diff --git a/crates/console/prometeu-hal/src/log/log_service.rs b/crates/console/prometeu-hal/src/log/log_service.rs index b5620f65..c796b63a 100644 --- a/crates/console/prometeu-hal/src/log/log_service.rs +++ b/crates/console/prometeu-hal/src/log/log_service.rs @@ -1,16 +1,23 @@ use crate::log::{LogEvent, LogLevel, LogSource}; use std::collections::VecDeque; +use std::sync::atomic::{AtomicU32, Ordering}; +use std::sync::Arc; pub struct LogService { events: VecDeque, capacity: usize, next_seq: u64, - pub logs_count: u32, + pub logs_count: Arc, } impl LogService { pub fn new(capacity: usize) -> Self { - Self { events: VecDeque::with_capacity(capacity), capacity, next_seq: 0, logs_count: 0 } + Self { + events: VecDeque::with_capacity(capacity), + capacity, + next_seq: 0, + logs_count: Arc::new(AtomicU32::new(0)), + } } pub fn log( @@ -35,11 +42,11 @@ impl LogService { msg, }); self.next_seq += 1; - self.logs_count += 1; + self.logs_count.fetch_add(1, Ordering::Relaxed); } pub fn reset_count(&mut self) { - self.logs_count = 0; + self.logs_count.store(0, Ordering::Relaxed); } pub fn get_recent(&self, n: usize) -> Vec { diff --git a/crates/console/prometeu-hal/src/telemetry.rs b/crates/console/prometeu-hal/src/telemetry.rs index c061e98d..08d025bb 100644 --- a/crates/console/prometeu-hal/src/telemetry.rs +++ b/crates/console/prometeu-hal/src/telemetry.rs @@ -1,4 +1,6 @@ use crate::log::{LogLevel, LogService, LogSource}; +use std::sync::atomic::{AtomicU32, AtomicU64, AtomicUsize, Ordering}; +use std::sync::Arc; #[derive(Debug, Clone, Copy, Default)] pub struct TelemetryFrame { @@ -29,6 +31,69 @@ pub struct TelemetryFrame { pub logs_count: u32, } +/// Thread-safe, atomic telemetry storage for real-time monitoring by the host. +/// This follows the push-based model from DEC-0005 to avoid expensive scans or locks. +#[derive(Debug, Default)] +pub struct AtomicTelemetry { + pub frame_index: AtomicU64, + pub cycles_used: AtomicU64, + pub cycles_budget: AtomicU64, + pub syscalls: AtomicU32, + pub host_cpu_time_us: AtomicU64, + pub vm_steps: AtomicU32, + pub completed_logical_frames: AtomicU32, + pub violations: AtomicU32, + + // GFX Banks + pub gfx_used_bytes: AtomicUsize, + pub gfx_inflight_bytes: AtomicUsize, + pub gfx_slots_occupied: AtomicU32, + + // Audio Banks + pub audio_used_bytes: AtomicUsize, + pub audio_inflight_bytes: AtomicUsize, + pub audio_slots_occupied: AtomicU32, + + // RAM (Heap) + pub heap_used_bytes: AtomicUsize, + pub heap_max_bytes: AtomicUsize, + + // Log Pressure + pub logs_count: Arc, +} + +impl AtomicTelemetry { + pub fn new(logs_count: Arc) -> Self { + Self { + logs_count, + ..Default::default() + } + } + + /// Snapshots the current atomic state into a TelemetryFrame. + pub fn snapshot(&self) -> TelemetryFrame { + TelemetryFrame { + frame_index: self.frame_index.load(Ordering::Relaxed), + cycles_used: self.cycles_used.load(Ordering::Relaxed), + cycles_budget: self.cycles_budget.load(Ordering::Relaxed), + syscalls: self.syscalls.load(Ordering::Relaxed), + host_cpu_time_us: self.host_cpu_time_us.load(Ordering::Relaxed), + completed_logical_frames: self.completed_logical_frames.load(Ordering::Relaxed), + violations: self.violations.load(Ordering::Relaxed), + gfx_used_bytes: self.gfx_used_bytes.load(Ordering::Relaxed), + gfx_inflight_bytes: self.gfx_inflight_bytes.load(Ordering::Relaxed), + gfx_slots_occupied: self.gfx_slots_occupied.load(Ordering::Relaxed), + audio_used_bytes: self.audio_used_bytes.load(Ordering::Relaxed), + audio_inflight_bytes: self.audio_inflight_bytes.load(Ordering::Relaxed), + audio_slots_occupied: self.audio_slots_occupied.load(Ordering::Relaxed), + heap_used_bytes: self.heap_used_bytes.load(Ordering::Relaxed), + heap_max_bytes: self.heap_max_bytes.load(Ordering::Relaxed), + logs_count: self.logs_count.load(Ordering::Relaxed), + vm_steps: self.vm_steps.load(Ordering::Relaxed), + } + } +} + #[derive(Debug, Clone, Copy, Default)] pub struct CertificationConfig { pub enabled: bool, @@ -196,6 +261,7 @@ mod tests { #[test] fn test_certifier_violations() { + let mut ls = LogService::new(10); let config = CertificationConfig { enabled: true, cycles_budget_per_frame: Some(100), @@ -205,7 +271,6 @@ mod tests { ..Default::default() }; let cert = Certifier::new(config); - let mut ls = LogService::new(10); let mut tel = TelemetryFrame::default(); tel.cycles_used = 150; diff --git a/crates/console/prometeu-system/src/virtual_machine_runtime.rs b/crates/console/prometeu-system/src/virtual_machine_runtime.rs index a4bfa41e..1e5edac4 100644 --- a/crates/console/prometeu-system/src/virtual_machine_runtime.rs +++ b/crates/console/prometeu-system/src/virtual_machine_runtime.rs @@ -9,9 +9,10 @@ use crate::fs::{FsState, VirtualFS}; use crate::services::memcard::MemcardService; use prometeu_hal::cartridge::AppMode; use prometeu_hal::log::LogService; -use prometeu_hal::telemetry::{CertificationConfig, Certifier, TelemetryFrame}; +use prometeu_hal::telemetry::{AtomicTelemetry, CertificationConfig, Certifier}; use prometeu_vm::VirtualMachine; use std::collections::HashMap; +use std::sync::Arc; use std::time::Instant; pub struct VirtualMachineRuntime { @@ -31,8 +32,7 @@ pub struct VirtualMachineRuntime { pub current_cartridge_app_version: String, pub current_cartridge_app_mode: AppMode, pub logs_written_this_frame: HashMap, - pub telemetry_current: TelemetryFrame, - pub telemetry_last: TelemetryFrame, + pub atomic_telemetry: Arc, pub last_crash_report: Option, pub certifier: Certifier, pub paused: bool, diff --git a/crates/console/prometeu-system/src/virtual_machine_runtime/dispatch.rs b/crates/console/prometeu-system/src/virtual_machine_runtime/dispatch.rs index 2504f0b1..bc32d456 100644 --- a/crates/console/prometeu-system/src/virtual_machine_runtime/dispatch.rs +++ b/crates/console/prometeu-system/src/virtual_machine_runtime/dispatch.rs @@ -9,6 +9,7 @@ use prometeu_hal::log::{LogLevel, LogSource}; use prometeu_hal::sprite::Sprite; use prometeu_hal::syscalls::Syscall; use prometeu_hal::vm_fault::VmFault; +use std::sync::atomic::Ordering; use prometeu_hal::{ AudioOpStatus, GfxOpStatus, HostContext, HostReturn, NativeInterface, SyscallId, expect_bool, expect_int, @@ -65,7 +66,7 @@ impl NativeInterface for VirtualMachineRuntime { ret: &mut HostReturn, ctx: &mut HostContext, ) -> Result<(), VmFault> { - self.telemetry_current.syscalls += 1; + self.atomic_telemetry.syscalls.fetch_add(1, Ordering::Relaxed); let syscall = Syscall::from_u32(id).ok_or_else(|| { VmFault::Trap(TRAP_INVALID_SYSCALL, format!("Unknown syscall: 0x{:08X}", id)) })?; diff --git a/crates/console/prometeu-system/src/virtual_machine_runtime/lifecycle.rs b/crates/console/prometeu-system/src/virtual_machine_runtime/lifecycle.rs index 69184441..b8bef76b 100644 --- a/crates/console/prometeu-system/src/virtual_machine_runtime/lifecycle.rs +++ b/crates/console/prometeu-system/src/virtual_machine_runtime/lifecycle.rs @@ -7,6 +7,8 @@ use prometeu_hal::log::{LogLevel, LogSource}; impl VirtualMachineRuntime { pub fn new(cap_config: Option) -> Self { let boot_time = Instant::now(); + let log_service = LogService::new(4096); + let atomic_telemetry = Arc::new(AtomicTelemetry::new(Arc::clone(&log_service.logs_count))); let mut os = Self { tick_index: 0, logical_frame_index: 0, @@ -18,14 +20,13 @@ impl VirtualMachineRuntime { memcard: MemcardService::new(), open_files: HashMap::new(), next_handle: 1, - log_service: LogService::new(4096), + log_service, current_app_id: 0, current_cartridge_title: String::new(), current_cartridge_app_version: String::new(), current_cartridge_app_mode: AppMode::Game, logs_written_this_frame: HashMap::new(), - telemetry_current: TelemetryFrame::default(), - telemetry_last: TelemetryFrame::default(), + atomic_telemetry, last_crash_report: None, certifier: Certifier::new(cap_config.unwrap_or_default()), paused: false, @@ -99,8 +100,6 @@ impl VirtualMachineRuntime { self.current_cartridge_app_mode = AppMode::Game; self.logs_written_this_frame.clear(); - self.telemetry_current = TelemetryFrame::default(); - self.telemetry_last = TelemetryFrame::default(); self.last_crash_report = None; self.paused = false; diff --git a/crates/console/prometeu-system/src/virtual_machine_runtime/tests.rs b/crates/console/prometeu-system/src/virtual_machine_runtime/tests.rs index 293a35a9..59e9d0c4 100644 --- a/crates/console/prometeu-system/src/virtual_machine_runtime/tests.rs +++ b/crates/console/prometeu-system/src/virtual_machine_runtime/tests.rs @@ -273,10 +273,9 @@ fn reset_clears_cartridge_scoped_runtime_state() { runtime.current_cartridge_app_version = "1.2.3".into(); runtime.current_cartridge_app_mode = AppMode::System; runtime.logs_written_this_frame.insert(42, 3); - runtime.telemetry_current.frame_index = 8; - runtime.telemetry_current.cycles_used = 99; - runtime.telemetry_last.frame_index = 7; - runtime.telemetry_last.completed_logical_frames = 2; + runtime.atomic_telemetry.frame_index.store(8, Ordering::Relaxed); + runtime.atomic_telemetry.cycles_used.store(99, Ordering::Relaxed); + runtime.atomic_telemetry.completed_logical_frames.store(2, Ordering::Relaxed); runtime.last_crash_report = Some(CrashReport::VmPanic { message: "stale".into(), pc: Some(55) }); runtime.paused = true; @@ -298,10 +297,9 @@ fn reset_clears_cartridge_scoped_runtime_state() { assert!(runtime.current_cartridge_app_version.is_empty()); assert_eq!(runtime.current_cartridge_app_mode, AppMode::Game); assert!(runtime.logs_written_this_frame.is_empty()); - assert_eq!(runtime.telemetry_current.frame_index, 0); - assert_eq!(runtime.telemetry_current.cycles_used, 0); - assert_eq!(runtime.telemetry_last.frame_index, 0); - assert_eq!(runtime.telemetry_last.completed_logical_frames, 0); + assert_eq!(runtime.atomic_telemetry.frame_index.load(Ordering::Relaxed), 0); + assert_eq!(runtime.atomic_telemetry.cycles_used.load(Ordering::Relaxed), 0); + assert_eq!(runtime.atomic_telemetry.completed_logical_frames.load(Ordering::Relaxed), 0); assert!(runtime.last_crash_report.is_none()); assert!(!runtime.paused); assert!(!runtime.debug_step_request); @@ -331,7 +329,7 @@ fn initialize_vm_failure_clears_previous_identity_and_handles() { runtime.next_handle = 6; runtime.paused = true; runtime.debug_step_request = true; - runtime.telemetry_current.cycles_used = 123; + runtime.atomic_telemetry.cycles_used.store(123, Ordering::Relaxed); let bad_program = serialized_single_function_module( assemble("PUSH_I32 0\nHOSTCALL 0\nHALT").expect("assemble"), @@ -356,7 +354,7 @@ fn initialize_vm_failure_clears_previous_identity_and_handles() { assert_eq!(runtime.next_handle, 1); assert!(!runtime.paused); assert!(!runtime.debug_step_request); - assert_eq!(runtime.telemetry_current.cycles_used, 0); + assert_eq!(runtime.atomic_telemetry.cycles_used.load(Ordering::Relaxed), 0); assert!(matches!(runtime.last_crash_report, Some(CrashReport::VmInit { .. }))); } diff --git a/crates/console/prometeu-system/src/virtual_machine_runtime/tick.rs b/crates/console/prometeu-system/src/virtual_machine_runtime/tick.rs index a879f514..2ded2022 100644 --- a/crates/console/prometeu-system/src/virtual_machine_runtime/tick.rs +++ b/crates/console/prometeu-system/src/virtual_machine_runtime/tick.rs @@ -63,15 +63,17 @@ impl VirtualMachineRuntime { self.needs_prepare_entry_call = false; } - self.telemetry_current = TelemetryFrame { - frame_index: self.logical_frame_index, - cycles_budget: self - .certifier + self.atomic_telemetry.frame_index.store(self.logical_frame_index, Ordering::Relaxed); + self.atomic_telemetry.cycles_budget.store( + self.certifier .config .cycles_budget_per_frame .unwrap_or(Self::CYCLES_PER_LOGICAL_FRAME), - ..Default::default() - }; + Ordering::Relaxed, + ); + self.atomic_telemetry.cycles_used.store(0, Ordering::Relaxed); + self.atomic_telemetry.syscalls.store(0, Ordering::Relaxed); + self.atomic_telemetry.vm_steps.store(0, Ordering::Relaxed); } let budget = std::cmp::min(Self::SLICE_PER_TICK, self.logical_frame_remaining_cycles); @@ -86,8 +88,9 @@ impl VirtualMachineRuntime { Ok(run) => { self.logical_frame_remaining_cycles = self.logical_frame_remaining_cycles.saturating_sub(run.cycles_used); - self.telemetry_current.cycles_used += run.cycles_used; - self.telemetry_current.vm_steps += run.steps_executed; + + self.atomic_telemetry.cycles_used.fetch_add(run.cycles_used, Ordering::Relaxed); + self.atomic_telemetry.vm_steps.fetch_add(run.steps_executed, Ordering::Relaxed); if run.reason == LogicalFrameEndingReason::Breakpoint { self.paused = true; @@ -130,37 +133,33 @@ impl VirtualMachineRuntime { { hw.gfx_mut().render_all(); - // 1. Snapshot full telemetry at logical frame end (O(1) with atomic counters) + // 1. Snapshot full telemetry at logical frame end let gfx_stats = hw.assets().bank_info(BankType::GLYPH); - self.telemetry_current.gfx_used_bytes = gfx_stats.used_bytes; - self.telemetry_current.gfx_inflight_bytes = gfx_stats.inflight_bytes; - self.telemetry_current.gfx_slots_occupied = gfx_stats.slots_occupied as u32; + self.atomic_telemetry.gfx_used_bytes.store(gfx_stats.used_bytes, Ordering::Relaxed); + self.atomic_telemetry.gfx_inflight_bytes.store(gfx_stats.inflight_bytes, Ordering::Relaxed); + self.atomic_telemetry.gfx_slots_occupied.store(gfx_stats.slots_occupied as u32, Ordering::Relaxed); let audio_stats = hw.assets().bank_info(BankType::SOUNDS); - self.telemetry_current.audio_used_bytes = audio_stats.used_bytes; - self.telemetry_current.audio_inflight_bytes = audio_stats.inflight_bytes; - self.telemetry_current.audio_slots_occupied = - audio_stats.slots_occupied as u32; + self.atomic_telemetry.audio_used_bytes.store(audio_stats.used_bytes, Ordering::Relaxed); + self.atomic_telemetry.audio_inflight_bytes.store(audio_stats.inflight_bytes, Ordering::Relaxed); + self.atomic_telemetry.audio_slots_occupied.store(audio_stats.slots_occupied as u32, Ordering::Relaxed); - self.telemetry_current.heap_used_bytes = - vm.heap().used_bytes.load(Ordering::Relaxed); - self.telemetry_current.heap_max_bytes = 0; // Not yet capped - - self.telemetry_current.logs_count = self.log_service.logs_count; - self.log_service.reset_count(); - - self.telemetry_current.host_cpu_time_us = - start.elapsed().as_micros() as u64; + self.atomic_telemetry.heap_used_bytes.store(vm.heap().used_bytes.load(Ordering::Relaxed), Ordering::Relaxed); + self.atomic_telemetry.host_cpu_time_us.store(start.elapsed().as_micros() as u64, Ordering::Relaxed); let ts_ms = self.boot_time.elapsed().as_millis() as u64; - self.telemetry_current.violations = self.certifier.evaluate( - &self.telemetry_current, + let telemetry_snapshot = self.atomic_telemetry.snapshot(); + + let violations = self.certifier.evaluate( + &telemetry_snapshot, &mut self.log_service, ts_ms, ) as u32; - self.telemetry_current.completed_logical_frames += 1; - self.telemetry_last = self.telemetry_current; + self.atomic_telemetry.violations.store(violations, Ordering::Relaxed); + self.atomic_telemetry.completed_logical_frames.fetch_add(1, Ordering::Relaxed); + + self.log_service.reset_count(); self.logical_frame_index += 1; self.logical_frame_active = false; @@ -190,33 +189,19 @@ impl VirtualMachineRuntime { // 2. High-frequency telemetry update (only if inspection is active) if self.inspection_active { let gfx_stats = hw.assets().bank_info(BankType::GLYPH); - self.telemetry_current.gfx_used_bytes = gfx_stats.used_bytes; - self.telemetry_current.gfx_inflight_bytes = gfx_stats.inflight_bytes; - self.telemetry_current.gfx_slots_occupied = gfx_stats.slots_occupied as u32; + self.atomic_telemetry.gfx_used_bytes.store(gfx_stats.used_bytes, Ordering::Relaxed); + self.atomic_telemetry.gfx_inflight_bytes.store(gfx_stats.inflight_bytes, Ordering::Relaxed); + self.atomic_telemetry.gfx_slots_occupied.store(gfx_stats.slots_occupied as u32, Ordering::Relaxed); let audio_stats = hw.assets().bank_info(BankType::SOUNDS); - self.telemetry_current.audio_used_bytes = audio_stats.used_bytes; - self.telemetry_current.audio_inflight_bytes = audio_stats.inflight_bytes; - self.telemetry_current.audio_slots_occupied = audio_stats.slots_occupied as u32; + self.atomic_telemetry.audio_used_bytes.store(audio_stats.used_bytes, Ordering::Relaxed); + self.atomic_telemetry.audio_inflight_bytes.store(audio_stats.inflight_bytes, Ordering::Relaxed); + self.atomic_telemetry.audio_slots_occupied.store(audio_stats.slots_occupied as u32, Ordering::Relaxed); - self.telemetry_current.heap_used_bytes = vm.heap().used_bytes.load(Ordering::Relaxed); - self.telemetry_current.logs_count = self.log_service.logs_count; - } + self.atomic_telemetry.heap_used_bytes.store(vm.heap().used_bytes.load(Ordering::Relaxed), Ordering::Relaxed); - if !self.logical_frame_active - && self.telemetry_last.frame_index == self.logical_frame_index.wrapping_sub(1) - { - self.telemetry_last.host_cpu_time_us = self.last_frame_cpu_time_us; - self.telemetry_last.cycles_budget = self.telemetry_current.cycles_budget; - self.telemetry_last.gfx_used_bytes = self.telemetry_current.gfx_used_bytes; - self.telemetry_last.gfx_inflight_bytes = self.telemetry_current.gfx_inflight_bytes; - self.telemetry_last.gfx_slots_occupied = self.telemetry_current.gfx_slots_occupied; - self.telemetry_last.audio_used_bytes = self.telemetry_current.audio_used_bytes; - self.telemetry_last.audio_inflight_bytes = self.telemetry_current.audio_inflight_bytes; - self.telemetry_last.audio_slots_occupied = self.telemetry_current.audio_slots_occupied; - self.telemetry_last.heap_used_bytes = self.telemetry_current.heap_used_bytes; - self.telemetry_last.heap_max_bytes = self.telemetry_current.heap_max_bytes; - self.telemetry_last.logs_count = self.telemetry_current.logs_count; + self.atomic_telemetry.frame_index.store(self.logical_frame_index, Ordering::Relaxed); + self.atomic_telemetry.host_cpu_time_us.store(start.elapsed().as_micros() as u64, Ordering::Relaxed); } None diff --git a/crates/host/prometeu-host-desktop-winit/src/debugger.rs b/crates/host/prometeu-host-desktop-winit/src/debugger.rs index 0f3bb1c2..abf0cdaf 100644 --- a/crates/host/prometeu-host-desktop-winit/src/debugger.rs +++ b/crates/host/prometeu-host-desktop-winit/src/debugger.rs @@ -253,7 +253,7 @@ impl HostDebugger { // Map Certification tags (0xCA01-0xCA03) to 'Cert' protocol events. if event.tag >= 0xCA01 && event.tag <= 0xCA03 { - let tel = &firmware.os.telemetry_last; + let tel = firmware.os.atomic_telemetry.snapshot(); let cert_config = &firmware.os.certifier.config; let (rule, used, limit) = match event.tag { @@ -293,7 +293,7 @@ impl HostDebugger { // 2. Send telemetry snapshots at the completion of every frame. let current_frame = firmware.os.logical_frame_index; if current_frame > self.last_telemetry_frame { - let tel = &firmware.os.telemetry_last; + let tel = firmware.os.atomic_telemetry.snapshot(); self.send_event(DebugEvent::Telemetry { frame_index: tel.frame_index, vm_steps: tel.vm_steps, diff --git a/crates/host/prometeu-host-desktop-winit/src/runner.rs b/crates/host/prometeu-host-desktop-winit/src/runner.rs index 81f86527..b5bbdf6c 100644 --- a/crates/host/prometeu-host-desktop-winit/src/runner.rs +++ b/crates/host/prometeu-host-desktop-winit/src/runner.rs @@ -124,7 +124,7 @@ impl HostRunner { } fn display_dbg_overlay(&mut self) { - let tel = &self.firmware.os.telemetry_last; + let tel = self.firmware.os.atomic_telemetry.snapshot(); let color_text = Color::WHITE; let color_bg = Color::INDIGO; // Dark blue to stand out let color_warn = Color::RED; @@ -195,18 +195,18 @@ impl HostRunner { ); self.hardware.gfx.draw_text(10, 90, &format!("LOGS: {}", tel.logs_count), color_text); - let cert_color = if tel.violations > 0 { color_warn } else { color_text }; - self.hardware.gfx.draw_text(10, 98, &format!("CERT LAST: {}", tel.violations), cert_color); + // Snapshot does not include violations, as they are part of certification (logical end of frame) + // But for visual debug, we can check if there are recent CA tags in logs + let recent_logs = self.firmware.os.log_service.get_recent(10); + let violations_count = recent_logs.iter().filter(|e| e.tag >= 0xCA01 && e.tag <= 0xCA07).count(); + let cert_color = if violations_count > 0 { color_warn } else { color_text }; + self.hardware.gfx.draw_text(10, 98, &format!("CERT RECENT: {}", violations_count), cert_color); - if tel.violations > 0 - && let Some(event) = self - .firmware - .os - .log_service - .get_recent(10) + if violations_count > 0 + && let Some(event) = recent_logs .into_iter() .rev() - .find(|e| e.tag >= 0xCA01 && e.tag <= 0xCA03) + .find(|e| e.tag >= 0xCA01 && e.tag <= 0xCA07) { let mut msg = event.msg.clone(); if msg.len() > 30 { diff --git a/crates/host/prometeu-host-desktop-winit/src/stats.rs b/crates/host/prometeu-host-desktop-winit/src/stats.rs index 03685a10..3c723442 100644 --- a/crates/host/prometeu-host-desktop-winit/src/stats.rs +++ b/crates/host/prometeu-host-desktop-winit/src/stats.rs @@ -68,7 +68,7 @@ impl HostStats { cpu_load_audio, firmware.os.tick_index, firmware.os.logical_frame_index, - firmware.os.telemetry_last.completed_logical_frames, + firmware.os.atomic_telemetry.completed_logical_frames.load(std::sync::atomic::Ordering::Relaxed), ); window.set_title(&title); } diff --git a/discussion/index.ndjson b/discussion/index.ndjson index 36feb88d..d4c853ef 100644 --- a/discussion/index.ndjson +++ b/discussion/index.ndjson @@ -1,4 +1,5 @@ -{"type":"meta","next_id":{"DSC":23,"AGD":21,"DEC":7,"PLN":6,"LSN":27,"CLSN":1}} +{"type":"meta","next_id":{"DSC":24,"AGD":22,"DEC":9,"PLN":8,"LSN":29,"CLSN":1}} +{"type":"discussion","id":"DSC-0023","status":"done","ticket":"perf-full-migration-to-atomic-telemetry","title":"Agenda - [PERF] Full Migration to Atomic Telemetry","created_at":"2026-04-10","updated_at":"2026-04-10","tags":["perf","runtime","telemetry"],"agendas":[{"id":"AGD-0021","file":"workflow/agendas/AGD-0021-full-migration-to-atomic-telemetry.md","status":"done","created_at":"2026-04-10","updated_at":"2026-04-10"}],"decisions":[{"id":"DEC-0008","file":"workflow/decisions/DEC-0008-full-migration-to-atomic-telemetry.md","status":"accepted","created_at":"2026-04-10","updated_at":"2026-04-10"}],"plans":[{"id":"PLN-0007","file":"workflow/plans/PLN-0007-full-migration-to-atomic-telemetry.md","status":"done","created_at":"2026-04-10","updated_at":"2026-04-10"}],"lessons":[{"id":"LSN-0028","file":"lessons/DSC-0023-perf-full-migration-to-atomic-telemetry/LSN-0028-converging-to-single-atomic-telemetry-source.md","status":"done","created_at":"2026-04-10","updated_at":"2026-04-10"}]} {"type":"discussion","id":"DSC-0020","status":"done","ticket":"jenkins-gitea-integration","title":"Jenkins Gitea Integration and Relocation","created_at":"2026-04-07","updated_at":"2026-04-07","tags":["ci","jenkins","gitea"],"agendas":[{"id":"AGD-0018","file":"workflow/agendas/AGD-0018-jenkins-gitea-integration-and-relocation.md","status":"done","created_at":"2026-04-07","updated_at":"2026-04-07"}],"decisions":[{"id":"DEC-0003","file":"workflow/decisions/DEC-0003-jenkins-gitea-strategy.md","status":"accepted","created_at":"2026-04-07","updated_at":"2026-04-07"}],"plans":[{"id":"PLN-0003","file":"workflow/plans/PLN-0003-jenkins-gitea-execution.md","status":"done","created_at":"2026-04-07","updated_at":"2026-04-07"}],"lessons":[{"id":"LSN-0021","file":"lessons/DSC-0020-jenkins-gitea-integration/LSN-0021-jenkins-gitea-integration.md","status":"done","created_at":"2026-04-07","updated_at":"2026-04-07"}]} {"type":"discussion","id":"DSC-0021","status":"done","ticket":"asset-entry-codec-enum-with-metadata","title":"Asset Entry Codec Enum Contract","created_at":"2026-04-09","updated_at":"2026-04-09","tags":["asset","runtime","codec","metadata"],"agendas":[],"decisions":[],"plans":[],"lessons":[{"id":"LSN-0024","file":"lessons/DSC-0021-asset-entry-codec-enum-contract/LSN-0024-string-on-the-wire-enum-in-runtime.md","status":"done","created_at":"2026-04-09","updated_at":"2026-04-09"}]} {"type":"discussion","id":"DSC-0022","status":"done","ticket":"tile-bank-vs-glyph-bank-domain-naming","title":"Glyph Bank Domain Naming Contract","created_at":"2026-04-09","updated_at":"2026-04-10","tags":["gfx","runtime","naming","domain-model"],"agendas":[],"decisions":[],"plans":[],"lessons":[{"id":"LSN-0025","file":"lessons/DSC-0022-glyph-bank-domain-naming/LSN-0025-rename-artifact-by-meaning-not-by-token.md","status":"done","created_at":"2026-04-10","updated_at":"2026-04-10"}]} @@ -14,7 +15,7 @@ {"type":"discussion","id":"DSC-0010","status":"open","ticket":"perf-host-desktop-frame-pacing-and-presentation","title":"Agenda - [PERF] Host Desktop Frame Pacing and Presentation","created_at":"2026-03-27","updated_at":"2026-03-27","tags":[],"agendas":[{"id":"AGD-0009","file":"workflow/agendas/AGD-0009-perf-host-desktop-frame-pacing-and-presentation.md","status":"open","created_at":"2026-03-27","updated_at":"2026-03-27"}],"decisions":[],"plans":[],"lessons":[]} {"type":"discussion","id":"DSC-0011","status":"open","ticket":"perf-gfx-render-pipeline-and-dirty-regions","title":"Agenda - [PERF] GFX Render Pipeline and Dirty Regions","created_at":"2026-03-27","updated_at":"2026-03-27","tags":[],"agendas":[{"id":"AGD-0010","file":"workflow/agendas/AGD-0010-perf-gfx-render-pipeline-and-dirty-regions.md","status":"open","created_at":"2026-03-27","updated_at":"2026-03-27"}],"decisions":[],"plans":[],"lessons":[]} {"type":"discussion","id":"DSC-0012","status":"open","ticket":"perf-runtime-introspection-syscalls","title":"Agenda - [PERF] Runtime Introspection Syscalls","created_at":"2026-03-27","updated_at":"2026-03-27","tags":[],"agendas":[{"id":"AGD-0011","file":"workflow/agendas/AGD-0011-perf-runtime-introspection-syscalls.md","status":"open","created_at":"2026-03-27","updated_at":"2026-03-27"}],"decisions":[],"plans":[],"lessons":[]} -{"type":"discussion","id":"DSC-0013","status":"open","ticket":"perf-host-debug-overlay-isolation","title":"Agenda - [PERF] Host Debug Overlay Isolation","created_at":"2026-03-27","updated_at":"2026-03-27","tags":[],"agendas":[{"id":"AGD-0012","file":"workflow/agendas/AGD-0012-perf-host-debug-overlay-isolation.md","status":"open","created_at":"2026-03-27","updated_at":"2026-03-27"}],"decisions":[],"plans":[],"lessons":[]} +{"type":"discussion","id":"DSC-0013","status":"done","ticket":"perf-host-debug-overlay-isolation","title":"Agenda - [PERF] Host Debug Overlay Isolation","created_at":"2026-03-27","updated_at":"2026-04-10","tags":[],"agendas":[{"id":"AGD-0012","file":"workflow/agendas/AGD-0012-perf-host-debug-overlay-isolation.md","status":"done","created_at":"2026-03-27","updated_at":"2026-04-10"}],"decisions":[{"id":"DEC-0007","file":"workflow/decisions/DEC-0007-perf-host-debug-overlay-isolation.md","status":"accepted","created_at":"2026-04-10","updated_at":"2026-04-10"}],"plans":[{"id":"PLN-0006","file":"workflow/plans/PLN-0006-perf-host-debug-overlay-isolation.md","status":"done","created_at":"2026-04-10","updated_at":"2026-04-10"}],"lessons":[{"id":"LSN-0027","file":"lessons/DSC-0013-perf-host-debug-overlay-isolation/LSN-0027-host-debug-overlay-isolation.md","status":"done","created_at":"2026-04-10","updated_at":"2026-04-10"}]} {"type":"discussion","id":"DSC-0014","status":"open","ticket":"perf-vm-allocation-and-copy-pressure","title":"Agenda - [PERF] VM Allocation and Copy Pressure","created_at":"2026-03-27","updated_at":"2026-03-27","tags":[],"agendas":[{"id":"AGD-0013","file":"workflow/agendas/AGD-0013-perf-vm-allocation-and-copy-pressure.md","status":"open","created_at":"2026-03-27","updated_at":"2026-03-27"}],"decisions":[],"plans":[],"lessons":[]} {"type":"discussion","id":"DSC-0015","status":"open","ticket":"perf-cartridge-boot-and-program-ownership","title":"Agenda - [PERF] Cartridge Boot and Program Ownership","created_at":"2026-03-27","updated_at":"2026-03-27","tags":[],"agendas":[{"id":"AGD-0014","file":"workflow/agendas/AGD-0014-perf-cartridge-boot-and-program-ownership.md","status":"open","created_at":"2026-03-27","updated_at":"2026-03-27"}],"decisions":[],"plans":[],"lessons":[]} {"type":"discussion","id":"DSC-0016","status":"done","ticket":"tilemap-empty-cell-vs-tile-id-zero","title":"Tilemap Empty Cell vs Tile ID Zero","created_at":"2026-03-27","updated_at":"2026-04-09","tags":[],"agendas":[{"id":"AGD-0015","file":"workflow/agendas/AGD-0015-tilemap-empty-cell-vs-tile-id-zero.md","status":"done","created_at":"2026-03-27","updated_at":"2026-04-09"}],"decisions":[],"plans":[],"lessons":[{"id":"LSN-0022","file":"lessons/DSC-0016-tilemap-empty-cell-semantics/LSN-0022-tilemap-empty-cell-convergence.md","status":"done","created_at":"2026-04-09","updated_at":"2026-04-09"}]} diff --git a/discussion/lessons/DSC-0013-perf-host-debug-overlay-isolation/LSN-0027-host-debug-overlay-isolation.md b/discussion/lessons/DSC-0013-perf-host-debug-overlay-isolation/LSN-0027-host-debug-overlay-isolation.md new file mode 100644 index 00000000..7efa01fc --- /dev/null +++ b/discussion/lessons/DSC-0013-perf-host-debug-overlay-isolation/LSN-0027-host-debug-overlay-isolation.md @@ -0,0 +1,39 @@ +--- +id: LSN-0027 +ticket: perf-host-debug-overlay-isolation +title: Host Debug Overlay Isolation +created: 2026-04-10 +tags: [performance, host, gfx, telemetry] +--- + +# Host Debug Overlay Isolation + +The PROMETEU debug overlay (HUD) was decoupled from the emulated machine pipeline and moved to the Host layer to ensure measurement purity and architectural separation. + +## The Original Problem + +The debug overlay used to be rendered by injecting pixels directly into the emulated GFX pipeline during the logical frame execution. This caused several issues: +- **Performance Distortion:** Cycle measurements for certification included the overhead of formatting technical strings and performing extra draw calls. +- **Leaky Abstraction:** The emulated machine became aware of Host-only inspection needs. +- **GFX Coupling:** The HUD was "burned" into the emulated framebuffer, making it impossible to capture raw game frames without the overlay while technical debugging was active. + +## The Solution: Host-Side Rendering with Atomic Telemetry + +The implemented solution follows a strictly non-intrusive approach: + +1. **Atomic Telemetry (Push-based):** A new `AtomicTelemetry` structure was added to the HAL. It uses `AtomicU64`, `AtomicU32`, and `AtomicUsize` to track metrics (Cycles, Memory, Logs) in real-time. +2. **Runtime Decoupling:** The `VirtualMachineRuntime` updates these atomic counters during its `tick` loop only if `inspection_active` is enabled. It does not perform any rendering or string formatting. +3. **Host-Side HUD:** The `HostRunner` (in `prometeu-host-desktop-winit`) now takes a `snapshot()` of the atomic telemetry and renders the HUD as a native layer after the emulated machine has finished its work for the tick. + +## Impact and Benefits + +- **Zero Machine Overhead:** Rendering the HUD consumes Host CPU/GPU cycles but does not affect the emulated machine's cycle counter or logical behavior. +- **Fidelity:** The emulated framebuffer remains pure, containing only game pixels. +- **Responsive Telemetry:** By using atomics, the Host can read the most recent metrics at any time without waiting for frame boundaries or acquiring heavy read-locks on the runtime state. +- **Platform Agnosticism:** Non-desktop hosts (which do not need the overlay) do not pay any implementation cost or performance penalty for the HUD's existence. + +## Lessons Learned + +- **Decouple Data from View:** Even for internal debugging tools, keeping the data collection (Runtime) separate from the visualization (Host) is crucial for accurate profiling. +- **Atomic Snapshots are Sufficient:** For high-frequency HUD updates, eventual consistency via relaxed atomic loads is more than enough and significantly more performant than synchronizing via Mutexes or logical frame boundaries. +- **Late Composition:** Composition of technical layers should always happen at the latest possible stage of the display pipeline to avoid polluting the core simulation state. diff --git a/discussion/lessons/DSC-0023-perf-full-migration-to-atomic-telemetry/LSN-0028-converging-to-single-atomic-telemetry-source.md b/discussion/lessons/DSC-0023-perf-full-migration-to-atomic-telemetry/LSN-0028-converging-to-single-atomic-telemetry-source.md new file mode 100644 index 00000000..0f7f8438 --- /dev/null +++ b/discussion/lessons/DSC-0023-perf-full-migration-to-atomic-telemetry/LSN-0028-converging-to-single-atomic-telemetry-source.md @@ -0,0 +1,23 @@ +# LSN-0028: Converging to a Single Atomic Telemetry Source + +## Context +Initial implementation of the Host Debug Overlay (DEC-0007) maintained legacy fields (`telemetry_current`, `telemetry_last`) alongside the new `AtomicTelemetry` for safety and backward compatibility. This resulted in redundant code and synchronization complexity in the core VM loop. + +## Problem +Maintaining two sources of truth for telemetry (frame-based and atomic-based) is a form of technical debt. It requires updating both systems, increases memory footprint in the `VirtualMachineRuntime`, and creates ambiguity about which data is more "accurate" or "current." + +## Lesson +1. **Atomics are sufficient:** A well-designed atomic structure with a `snapshot()` method can fulfill all needs, from real-time high-frequency inspection to deterministic frame-end certification. +2. **Push-based over Pull-based:** By having the VM "push" updates to atomic counters, the Host can consume them at its own pace without ever locking the execution thread. +3. **Purity through snapshots:** For processes that require a stable view of a frame (like Certification), capturing an atomic snapshot at the exact logical end of the frame is as precise as maintaining a separate buffered structure. + +## Impact +- **Simpler Code:** Removal of legacy fields reduced the complexity of `tick.rs` and `lifecycle.rs`. +- **Better Performance:** Avoids redundant data copies and struct initializations per frame. +- **Architectural Clarity:** All diagnostic tools (HUD, Debugger, CLI, Certifier) now converge on the same data source. + +## References +- DSC-0023 ([PERF] Full Migration to Atomic Telemetry) +- DEC-0008 (Full Migration Decision) +- PLN-0007 (Migration Plan) +- DEC-0007 (Overlay Isolation) diff --git a/discussion/workflow/agendas/AGD-0012-perf-host-debug-overlay-isolation.md b/discussion/workflow/agendas/AGD-0012-perf-host-debug-overlay-isolation.md index 8bb4ab88..31d9739d 100644 --- a/discussion/workflow/agendas/AGD-0012-perf-host-debug-overlay-isolation.md +++ b/discussion/workflow/agendas/AGD-0012-perf-host-debug-overlay-isolation.md @@ -2,72 +2,48 @@ id: AGD-0012 ticket: perf-host-debug-overlay-isolation title: Agenda - [PERF] Host Debug Overlay Isolation -status: open +status: done created: 2026-03-27 -resolved: -decision: -tags: [] +resolved: 2026-04-10 +decision: DEC-0007 +tags: [performance, host, gfx] --- # Agenda - [PERF] Host Debug Overlay Isolation +## Contexto +O overlay de debug é uma ferramenta exclusiva para o ambiente **Desktop** (`prometeu-host-desktop-winit`). Ele visa fornecer telemetria em tempo real para desenvolvedores sem impactar a fidelidade da emulação ou o desempenho medido do hardware final (handhelds/consoles de baixo custo), onde este overlay não existirá. + ## Problema +Atualmente, o overlay de debug está indevidamente acoplado ao pipeline de `gfx` emulado e ao processamento do runtime. +- **Distorção de Performance:** O custo de renderizar o HUD técnico (formatação de strings e draw calls extras) é contabilizado como custo do jogo. +- **Acoplamento de Pipeline:** O pipeline de `gfx` precisa processar elementos que não pertencem à lógica da máquina virtual. +- **Hotspots:** `runner.rs` realiza `present()` extra e manipulação de texto no loop principal. -O overlay de debug ainda usa o pipeline emulado de `gfx` e injeta custo visual no caminho normal do host. +## Pontos Críticos +- **Fato:** O overlay é uma necessidade de desenvolvimento Desktop, não uma funcionalidade da máquina `prometeu`. +- **Risco:** Qualquer processamento de overlay dentro do runtime `prometeu` invalida a pureza dos ciclos medidos para certificação. +- **Tradeoff:** Mover o overlay para o Host exige acesso assíncrono ou passivo aos dados de telemetria. +- **Hipótese:** Um overlay 100% nativo no Host (Winit/Pixels) usando fontes TrueType terá custo desprezível e legibilidade superior. -Hoje o host formata strings por frame, desenha texto via `gfx` e faz `present()` extra para sobrepor telemetria. +## Opções +- **Opção A (Recomendada):** Camada Host Nativa. O `HostRunner` renderiza o HUD em uma surface separada ou faz um *compositing* nativo após o upscaling do framebuffer do console. +- **Opção B:** Overlay via IPC/Sidecar. Ferramenta externa de inspeção (descartada por complexidade visual). +- **Opção C:** Manter no `gfx` emulado com otimização (descartada por não resolver o acoplamento). -## Dor +## Sugestão / Recomendação +1. **Agnosticismo de GFX:** O overlay deve ser tratado como uma "película" transparente aplicada pelo Host Desktop sobre o resultado final da renderização. +2. **Isolamento de Processamento:** Nenhuma instrução de desenho ou formatação de strings do overlay deve ocorrer dentro do runtime. +3. **Acesso via API:** O Host acessará os dados de telemetria através de uma API dedicada (baseada no modelo push-based da `DEC-0005`). +4. **Interface de Controle:** O acionamento permanece via tecla **F1** como um *toggle*, gerenciado pela camada de Host. +5. **Composição via Host:** Utilizar bibliotecas nativas do Host para renderizar o HUD com fontes TrueType nítidas e Alpha Blending real. -- debug ligado altera custo do render path que deveria estar sendo medido. -- overlay de desenvolvimento distorce a leitura de performance do console. -- handheld barato nao deveria pagar composicao de HUD tecnico no mesmo pipeline do jogo. +## Perguntas em Aberto +- Nenhuma. As questões sobre acesso via API e acionamento via F1 foram resolvidas durante a discussão. -## Hotspots Atuais - -- [runner.rs](/Users/niltonconstantino/personal/workspace.personal/intrepid/prometeu/runtime/crates/host/prometeu-host-desktop-winit/src/runner.rs#L126) -- [runner.rs](/Users/niltonconstantino/personal/workspace.personal/intrepid/prometeu/runtime/crates/host/prometeu-host-desktop-winit/src/runner.rs#L381) - -## Alvo da Discussao - -Isolar o overlay de debug do custo medido do console sem perder utilidade para desenvolvimento. - -## O Que Precisa Ser Definido - -1. Lugar de composicao. - Decidir se o overlay: - - continua no `gfx` emulado; - - sobe para camada host nativa; - - vira surface separada de debug. - -2. Politica de strings/glyphs. - Definir se texto e reconstruido por frame ou cacheado. - -3. Custo em modo debug. - Delimitar qual overhead e aceitavel quando overlay estiver ativo. - -4. Efeito na telemetria. - Fechar se a telemetria deve incluir ou excluir explicitamente o custo do overlay. - -## Open Questions de Arquitetura - -1. O overlay precisa ser representativo do hardware final ou apenas ferramenta de desktop? - Não, como é HUD técnico, pode e deve ser renderizado pelo Host nativo para melhor legibilidade. -2. Vale um modo "perf puro" onde overlay nunca toca no framebuffer do console? - Sim. O isolamento garante que o `gfx` emulado esteja 100% livre para o jogo durante a medição. -3. O host deve oferecer toggles separados para stats, logs e overlay visual? - Sim. O `HostRunner` deve expor controles granulares via `inspection_active`. -4. Como melhorar a legibilidade e estética (Glyphs/Transparência)? - Migrar a renderização do HUD para o Host Nativo (Winit/Pixels), permitindo o uso de fontes TrueType (monospaced) nítidas e Alpha Blending real para transparência no fundo do painel. - -## Dependencias - -- `../specs/10-debug-inspection-and-profiling.md` -- `../specs/11-portability-and-cross-platform-execution.md` - -## Sugestao / Recomendacao - -1. **Migração para Camada Host Nativa:** Renderizar o HUD de debug em uma surface separada ou via pipeline nativo do Host (depois do upscaling do framebuffer do console). -2. **Fontes TrueType (Mono):** Substituir os glyphs bitmapped rudimentares por uma fonte nativa de alta qualidade e nítida. -3. **Composição Alpha:** Permitir fundo semi-transparente para o overlay para não bloquear a visão do jogo. -4. **Acionamento Explícito:** Host deve gerenciar `inspection_active: true` no runtime apenas quando o HUD ou Debugger estiverem ativos. +## Critério para Encerrar +A agenda é considerada encerrada quando: +- Houver consenso sobre o isolamento total do pipeline de `gfx`. +- O método de acesso aos dados (API) estiver definido. +- O controle de interface (F1) estiver estabelecido. +*(Critérios atingidos em 2026-04-10)* diff --git a/discussion/workflow/agendas/AGD-0021-full-migration-to-atomic-telemetry.md b/discussion/workflow/agendas/AGD-0021-full-migration-to-atomic-telemetry.md new file mode 100644 index 00000000..1f2e2c6a --- /dev/null +++ b/discussion/workflow/agendas/AGD-0021-full-migration-to-atomic-telemetry.md @@ -0,0 +1,31 @@ +# AGD-0021: Full Migration to Atomic Telemetry + +## Contexto +Durante a implementação do isolamento do overlay de debug no Host Desktop (DEC-0007), foi introduzido o `AtomicTelemetry` para permitir acesso assíncrono e sem locks aos dados de performance. Por motivos de cautela inicial, os campos legados `telemetry_current` e `telemetry_last` foram mantidos no `VirtualMachineRuntime` para compatibilidade com processos de certificação e logs internos. + +## Problema +A manutenção de dois sistemas paralelos de telemetria gera redundância de código, aumenta a superfície de erro e consome ciclos de CPU desnecessários para atualizar dados que já estão disponíveis de forma mais eficiente via atômicos. A "compatibilidade" pretendida não justifica o débito técnico de manter estruturas duplicadas. + +## Pontos Críticos +- **Fato:** `AtomicTelemetry` já provê todos os dados necessários (ciclos, memória, logs). +- **Risco:** Remoção de `telemetry_last` pode quebrar ferramentas que dependem de snapshots estáticos por frame se não houver um substituto claro de snapshot via atômicos. +- **Tradeoff:** A migração exige refatorar o `VirtualMachineRuntime` e possivelmente o `LogService` para convergirem em uma única fonte de verdade. +- **Hipótese:** Um snapshot derivado do `AtomicTelemetry` ao final de cada frame é suficiente para substituir o `telemetry_last` legado sem perda de precisão. + +## Opções +- **Opção A (Recomendada):** Migração total e remoção dos campos legados. O `AtomicTelemetry` torna-se a única fonte de verdade. Onde snapshots estáveis são necessários, eles são extraídos via `AtomicTelemetry::snapshot()`. +- **Opção B:** Manter redundância (Descartada pelo usuário). + +## Sugestão / Recomendação +1. Remover `telemetry_current` e `telemetry_last` do `VirtualMachineRuntime`. +2. Refatorar o loop de execução para atualizar exclusivamente o `AtomicTelemetry`. +3. Garantir que o `LogService` e outras auditorias consumam dados do novo modelo. +4. Atualizar as especificações para refletir o modelo único. + +## Perguntas em Aberto +1. Existe algum uso específico de `telemetry_last` em ferramentas externas (não mapeadas) que dependem do layout de memória antigo? (Assumimos que não para este escopo). + +## Criterio para Encerrar +- Remoção completa dos campos no código. +- Compilação e execução bem-sucedida do Host Desktop com o novo modelo único. +- Atualização da documentação normativa. diff --git a/discussion/workflow/decisions/DEC-0007-perf-host-debug-overlay-isolation.md b/discussion/workflow/decisions/DEC-0007-perf-host-debug-overlay-isolation.md new file mode 100644 index 00000000..9906c13b --- /dev/null +++ b/discussion/workflow/decisions/DEC-0007-perf-host-debug-overlay-isolation.md @@ -0,0 +1,53 @@ +--- +id: DEC-0007 +ticket: perf-host-debug-overlay-isolation +title: Decision - [PERF] Host Debug Overlay Isolation +status: accepted +created: 2026-04-10 +updated: 2026-04-10 +agenda: AGD-0012 +tags: [performance, host, gfx] +--- + +# Decision - [PERF] Host Debug Overlay Isolation + +## Status +**Accepted** + +## Contexto +O overlay de debug do PROMETEU, utilizado para exibir telemetria, logs e métricas em tempo real, está atualmente acoplado ao pipeline de renderização emulado (`gfx`) e ao processamento do runtime. Isso resulta em: +1. **Distorção de Performance:** O custo de formatar strings e realizar chamadas de desenho para o HUD técnico é contabilizado como ciclos do jogo, invalidando medições de certificação. +2. **Acoplamento Indevido:** O pipeline `gfx` emulado processa elementos visuais (HUD) que não existem no hardware original. +3. **Complexidade no Host:** O Host precisa lidar com a injeção dessas informações no buffer de emulação. + +## Decisao +Fica decidido que o overlay de debug será movido integralmente para a camada **Host Desktop** (`prometeu-host-desktop-winit`), operando de forma 100% isolada do runtime e do pipeline de `gfx` emulado. + +1. **Isolamento de Pipeline:** O overlay técnico deve ser tratado como uma "película" (layer) transparente aplicada pelo Host sobre o resultado final da renderização (pós-upscaling). +2. **Isolamento de Processamento:** Nenhuma instrução de desenho ou formatação de strings relacionada ao overlay deve ocorrer dentro do runtime ou durante os ciclos emulados. +3. **Acesso via API:** O Host acessará os dados necessários para o overlay através de uma API de telemetria passiva (baseada em atômicos, conforme definido na `DEC-0005`). +4. **Acionamento via F1:** O controle de visibilidade (toggle) do overlay é de responsabilidade exclusiva do Host, acionado pela tecla **F1**. + +## Rationale +- **Pureza de Medição:** Ao remover o processamento do overlay do runtime, garantimos que 100% dos ciclos medidos pertencem à lógica do cartucho e periféricos emulados. +- **Desempenho no Hardware Alvo:** Como o overlay é exclusivo de Desktop, dispositivos handheld/consoles de baixo custo não devem pagar o preço de implementação ou ramificações lógicas para lidar com HUD técnico. +- **Qualidade Visual:** Utilizar o Host para renderizar o HUD permite o uso de fontes TrueType nítidas e Alpha Blending real, melhorando drasticamente a legibilidade para o desenvolvedor sem afetar a fidelidade da emulação. + +## Invariantes / Contrato +- **Zero Overhead no Runtime:** A presença ou ausência do overlay não deve alterar o contador de ciclos consumidos por frame no runtime. +- **Agnosticismo de GFX:** O framebuffer emulado deve conter apenas os pixels gerados pelo cartucho, sem HUD técnico "queimado" na imagem. +- **Composição Assíncrona/Passiva:** O Host não deve bloquear o runtime para coletar dados para o overlay; a leitura deve ser feita a partir de buffers de telemetria já expostos. + +## Impactos +- **Host (Desktop Winit):** Exige a implementação de uma camada de renderização nativa (ex: `egui` ou composição direta via `pixels`/`winit`) que suporte transparência. +- **Runtime API:** Deve expor campos de telemetria (FPS, Memory Usage, Cycles) via API para consumo pelo Host. +- **Especificações:** Atualização dos capítulos de Portabilidade e Debug para refletir o isolamento. + +## Referencias +- `AGD-0012`: [PERF] Host Debug Overlay Isolation (Agenda de Origem). +- `DEC-0005`: [PERF] Push-based Telemetry Model (Modelo de acesso aos dados). + +## Propagacao Necessaria +1. Remover hotspots de desenho de texto no `runner.rs` do host. +2. Implementar o novo sistema de HUD no host usando bibliotecas nativas. +3. Atualizar a documentação técnica em `docs/specs/runtime/`. diff --git a/discussion/workflow/decisions/DEC-0008-full-migration-to-atomic-telemetry.md b/discussion/workflow/decisions/DEC-0008-full-migration-to-atomic-telemetry.md new file mode 100644 index 00000000..22bdabc1 --- /dev/null +++ b/discussion/workflow/decisions/DEC-0008-full-migration-to-atomic-telemetry.md @@ -0,0 +1,35 @@ +# DEC-0008: Full Migration to Atomic Telemetry + +## Status +Accepted + +## Contexto +Após o sucesso inicial do `AtomicTelemetry` (DEC-0007), identificou-se que a coexistência com os campos legados `telemetry_current` e `telemetry_last` no `VirtualMachineRuntime` é contraproducente. O usuário solicitou a remoção completa da camada de compatibilidade para simplificar o motor e garantir que o modelo atômico seja a única fonte de verdade para performance e inspeção. + +## Decisao +1. **Remoção Total:** Eliminar os campos `telemetry_current` e `telemetry_last` da struct `VirtualMachineRuntime`. +2. **Modelo Único:** O `AtomicTelemetry` passa a ser a única estrutura responsável por rastrear métricas de execução (ciclos, memória, logs). +3. **Snapshot On-Demand:** Qualquer necessidade de telemetria estática (ex: para logs de erro ou final de frame) deve ser atendida pelo método `AtomicTelemetry::snapshot()`, que gera um `TelemetryFrame` imutável a partir do estado atômico atual. +4. **Atualização Condicional:** A atualização dos campos atômicos dentro do loop de `tick` da VM permanece protegida por `inspection_active`, garantindo overhead zero em modo de produção. + +## Rationale +- **Simplicidade:** Reduz o número de campos no `VirtualMachineRuntime`. +- **Performance:** Evita a cópia de dados entre `telemetry_current` e `telemetry_last` ao final de cada frame. +- **Consistência:** Garante que o Host e o Runtime vejam os mesmos dados através da mesma API. + +## Invariantes / Contrato +- O `VirtualMachineRuntime` deve possuir uma instância de `Arc`. +- O `AtomicTelemetry` deve ser thread-safe (já garantido pelo uso de `std::sync::atomic`). + +## Impactos +- **Runtime:** Alteração na struct principal e no loop de tick. +- **HAL:** Possível ajuste no `LogService` se ele dependia diretamente dos campos legados. +- **Docs:** Necessidade de atualizar as especificações de Debug e Portabilidade. + +## Referencias +- AGD-0021 (Migration Agenda) +- DEC-0007 (Overlay Isolation Decision) + +## Propagacao Necessaria +- Refatoração de `VirtualMachineRuntime`. +- Atualização de `prometeu-host-desktop-winit` para garantir que continua funcionando (já migrado no PLN-0006, mas deve ser validado). diff --git a/discussion/workflow/plans/PLN-0006-perf-host-debug-overlay-isolation.md b/discussion/workflow/plans/PLN-0006-perf-host-debug-overlay-isolation.md new file mode 100644 index 00000000..771f7440 --- /dev/null +++ b/discussion/workflow/plans/PLN-0006-perf-host-debug-overlay-isolation.md @@ -0,0 +1,64 @@ +--- +id: PLN-0006 +ticket: perf-host-debug-overlay-isolation +title: PR/Plan - [PERF] Host Debug Overlay Isolation +status: open +created: 2026-04-10 +updated: 2026-04-10 +decisions: [DEC-0007] +tags: [performance, host, gfx] +--- + +# PR/Plan - [PERF] Host Debug Overlay Isolation + +## Briefing +Implementação do isolamento total do overlay de debug no Host Desktop (`prometeu-host-desktop-winit`), removendo o acoplamento com o runtime e o pipeline de `gfx` emulado. + +## Decisions de Origem +- `DEC-0007`: [PERF] Host Debug Overlay Isolation. +- `DEC-0005`: [PERF] Push-based Telemetry Model (base para extração de dados). + +## Alvo +- `crates/host/prometeu-host-desktop-winit`: Implementação da camada nativa de HUD. +- `crates/runtime`: Exposição de campos de telemetria via API. +- `docs/specs/runtime`: Atualização das especificações de debug e portabilidade. + +## Escopo +- **Spec Work:** + - Atualizar `docs/specs/runtime/10-debug-inspection-and-profiling.md` para remover menções ao HUD emulado. + - Atualizar `docs/specs/runtime/11-portability-and-cross-platform-execution.md` para reforçar a separação de responsabilidades (Host-side HUD). +- **Code Work:** + - Expansão da API de telemetria no runtime para incluir todos os dados necessários (Cycles, Memory, Logs). + - Remoção do código de desenho de texto legado no `runner.rs`. + - Integração de biblioteca nativa (ex: `egui` ou composição via `pixels`) para renderização do novo overlay no Host. + - Implementação do toggle via tecla **F1** no `HostRunner`. + +## Fora de Escopo +- Implementação de overlay visual em outros hosts (mobile, handheld). +- Alterações na lógica de emulação central (loop de execução). + +## Plano de Execucao +1. **Fase 1: Especificações (Spec)** + - Revisar e atualizar os arquivos de especificação (`10-debug` e `11-portability`). +2. **Fase 2: Runtime Telemetry API (Code)** + - Garantir que todos os campos de telemetria estejam expostos via atômicos/push-based conforme `DEC-0005`. +3. **Fase 3: Host HUD Implementation (Code)** + - Integrar o novo motor de HUD no `prometeu-host-desktop-winit`. + - Conectar os dados da API de telemetria à visualização do HUD. +4. **Fase 4: Cleanup (Code)** + - Remover hotspots de formatação de strings e draw calls do overlay antigo no Host. + +## Criterios de Aceite +- O overlay de debug é ativado/desativado via tecla **F1**. +- O overlay utiliza fontes TrueType (monospaced) nítidas e fundo semi-transparente. +- O framebuffer emulado não contém pixels do HUD (composição nativa pós-upscaling). +- O custo de ciclos do runtime é idêntico com o overlay ligado ou desligado. + +## Tests / Validacao +- **Verificação Visual:** Confirmar a qualidade das fontes e a transparência do novo HUD. +- **Benchmarking:** Comparar os ciclos consumidos por frame com e sem o HUD ativo para provar isolamento. +- **Teste de Regressão:** Garantir que o F1 toggle não afeta a estabilidade do loop de emulação. + +## Riscos +- **Overhead no Host:** A renderização nativa (ex: `egui`) pode introduzir overhead no Host Desktop em máquinas muito fracas (geralmente aceitável em Desktop). +- **Sincronização de Telemetria:** Pequeno atraso visual entre o frame renderizado e os dados exibidos se a coleta for puramente assíncrona (aceitável para telemetria de debug). diff --git a/discussion/workflow/plans/PLN-0007-full-migration-to-atomic-telemetry.md b/discussion/workflow/plans/PLN-0007-full-migration-to-atomic-telemetry.md new file mode 100644 index 00000000..f9e3228b --- /dev/null +++ b/discussion/workflow/plans/PLN-0007-full-migration-to-atomic-telemetry.md @@ -0,0 +1,52 @@ +# PLN-0007: Full Migration to Atomic Telemetry + +## Briefing +Este plano detalha a remoção técnica dos campos legados de telemetria no `VirtualMachineRuntime` e a migração de todos os consumidores para o modelo de `AtomicTelemetry` introduzido na DEC-0007. + +## Decisions de Origem +- DEC-0008 (Full Migration to Atomic Telemetry) +- DEC-0007 (Host Debug Overlay Isolation) + +## Alvo +- `crates/console/prometeu-system` +- `crates/console/prometeu-hal` +- `crates/host/prometeu-host-desktop-winit` + +## Escopo +- Remoção de `telemetry_current` e `telemetry_last` de `VirtualMachineRuntime`. +- Refatoração do `VirtualMachineRuntime::tick` para remover atualizações redundantes. +- Atualização do `VirtualMachineRuntime::lifecycle` para remover inicialização e reset dos campos legados. +- Refatoração do `LogService` para consumir logs via `AtomicTelemetry`. +- Atualização do `HostRunner` (Desktop) para remover qualquer referência residual aos campos legados. +- Atualização das especificações técnicas em `docs/specs/runtime/`. + +## Fora de Escopo +- Mudanças no formato do `TelemetryFrame` (a menos que estritamente necessário para compatibilidade). +- Otimizações de performance não relacionadas à telemetria. + +## Plano de Execucao +1. **Fase 1: HAL & Telemetry** + - Verificar se `AtomicTelemetry` possui todos os campos necessários. + - Garantir que `LogService` está alinhado com o novo modelo. +2. **Fase 2: Runtime Refactor** + - Remover campos de `VirtualMachineRuntime` em `mod.rs`. + - Limpar inicialização em `lifecycle.rs`. + - Limpar loop de atualização em `tick.rs`. +3. **Fase 3: Host & Integration** + - Corrigir chamadas no `HostRunner` que ainda usem os campos antigos. + - Validar que o snapshot atômico atende às necessidades de inspeção. +4. **Fase 4: Specs & Cleanup** + - Atualizar `10-debug` e `11-portability`. + - Emitir lição aprendida LSN-0028. + +## Criterios de Aceite +- O projeto compila sem warnings relacionados a campos não utilizados. +- O Host Desktop inicia e o overlay (F1) exibe telemetria correta via atômicos. +- Não existem mais os campos `telemetry_current` e `telemetry_last` no código fonte. + +## Tests / Validacao +- `cargo check` em todos os crates afetados. +- Execução manual do host desktop para validar overlay. + +## Riscos +- **Perda de Snapshot de Frame:** Se o `snapshot()` não for chamado no momento certo ao final do frame, o overlay pode mostrar valores parciais de ciclos (resolvido chamando `snapshot()` no Host no momento de renderização do overlay). diff --git a/docs/specs/runtime/10-debug-inspection-and-profiling.md b/docs/specs/runtime/10-debug-inspection-and-profiling.md index 45159504..9137364a 100644 --- a/docs/specs/runtime/10-debug-inspection-and-profiling.md +++ b/docs/specs/runtime/10-debug-inspection-and-profiling.md @@ -18,7 +18,8 @@ It covers: - profiling; - breakpoints and watchpoints; - event and fault visibility; -- certification-facing diagnostics. +- certification-facing diagnostics; +- Host-side debug overlay (HUD) isolation. ## 2 Execution Modes @@ -230,10 +231,34 @@ Each event has: - cost - consequence -## 10 Integration with CAP and Certification +## 10 Host-Side Debug Overlay (HUD) Isolation + +The visual Debug Overlay (HUD) for technical inspection is not part of the emulated machine pipeline. + +### 10.1 Responsibilities + +1. **Runtime:** Only exposes telemetry data via the machine diagnostics surface. It does not perform HUD rendering or string formatting. +2. **Host (Desktop):** Responsible for collecting telemetry from the runtime and rendering the HUD as a native, transparent layer. + +### 10.2 Principles + +- **Zero Pipeline Interference:** HUD rendering must not inject pixels into the emulated framebuffer. It is applied after upscaling or as a separate display surface. +- **Zero Cycle Impact:** HUD-related processing (like formatting technical text) must occur outside the emulated machine cycles. +- **Toggle Control:** The activation of the overlay (typically via **F1**) is managed by the Host layer. + +### 10.3 Atomic Telemetry Model + +To ensure zero-impact synchronization between the VM and the Host Debug Overlay, PROMETEU uses a **push-based atomic model**: + +1. **Atomic Storage:** Metrics such as cycles, syscalls, and memory usage are stored in a dedicated `AtomicTelemetry` structure using thread-safe atomic types (`AtomicU64`, `AtomicU32`, etc.). +2. **Lockless Access:** The Host (Desktop) reads these metrics asynchronously and without locks by taking a `snapshot()` of the atomic state. +3. **Single Source of Truth:** This model is the exclusive source of truth for both real-time inspection and frame-end certification, replacing legacy per-frame buffered fields. + +## 11 Integration with CAP and Certification All debug and profiling data: - feed the certification report - are collected deterministically - do not depend on external tools +- are consistent regardless of whether the Host HUD is active or not. diff --git a/docs/specs/runtime/11-portability-and-cross-platform-execution.md b/docs/specs/runtime/11-portability-and-cross-platform-execution.md index 58383bbc..ccbfae77 100644 --- a/docs/specs/runtime/11-portability-and-cross-platform-execution.md +++ b/docs/specs/runtime/11-portability-and-cross-platform-execution.md @@ -51,6 +51,7 @@ The contract is about logical behavior, not identical physical latency or throug - audio output - physical input collection - access to the sandbox file system +- **technical inspection surfaces (Debug Overlay/HUD)** The host provides realization of machine surfaces. It does not redefine cartridge semantics. @@ -123,8 +124,25 @@ The platform layer: - only displays the framebuffer - does not reinterpret graphics commands +- **may overlay technical HUDs without modifying the logical framebuffer** -## 9 File System and Persistence +## 9 Debug and Inspection Isolation + +To preserve portability and certification purity, technical inspection tools (like the Debug Overlay) are moved to the Host layer. + +- **Host-exclusive:** These tools are only implemented where they are relevant (e.g., Desktop) and do not exist in the logical machine. +- **Non-intrusive:** They must not consume machine cycles or alter memory state. +- **Consistent Results:** A cartridge will produce the same logical results and certification metrics regardless of the Host's inspection capabilities. + +### 9.1 Atomic Telemetry Interface + +Inspection is facilitated by a lockless, push-based atomic interface: + +1. **Host-Independent:** The VM updates atomic counters in every frame. +2. **Asynchronous Observation:** The Host layer reads snapshots of these counters at its own display frequency. +3. **Loop Purity:** This ensures that the VM execution loop remains deterministic and free from synchronization overhead (locks) that could vary across host architectures. + +## 10 File System and Persistence PROMETEU defines a **sandbox logical filesystem**: @@ -140,7 +158,7 @@ The platform maps this filesystem to: Without changing semantics. -## 10 Certification and Portability +## 11 Certification and Portability The **PROMETEU Certification** is valid for all platforms. @@ -154,7 +172,7 @@ It: - will pass on all - will produce the same reports -## 11 What PROMETEU Does Not Guarantee +## 12 What PROMETEU Does Not Guarantee PROMETEU **does not promise**: