diff --git a/crates/console/prometeu-drivers/src/asset.rs b/crates/console/prometeu-drivers/src/asset.rs index 468f6bd6..ac0f1169 100644 --- a/crates/console/prometeu-drivers/src/asset.rs +++ b/crates/console/prometeu-drivers/src/asset.rs @@ -12,6 +12,7 @@ use prometeu_hal::sample::Sample; use prometeu_hal::sound_bank::SoundBank; use std::collections::HashMap; use std::io::Read; +use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::{Arc, Mutex, RwLock}; use std::thread; use std::time::Instant; @@ -53,10 +54,26 @@ impl ResidentEntry { /// This is internal to the AssetManager and not visible to peripherals. pub struct BankPolicy { /// Dedup table: asset_id -> resident entry (value + telemetry). - resident: Arc>>>, + pub resident: Arc>>>, /// Staging area: handle -> value ready to commit. - staging: Arc>>>, + pub staging: Arc, usize)>>>, + + /// Total bytes currently in resident storage. + pub used_bytes: Arc, + /// Bytes in staging awaiting commit. + pub inflight_bytes: Arc, +} + +impl Clone for BankPolicy { + fn clone(&self) -> Self { + Self { + resident: Arc::clone(&self.resident), + staging: Arc::clone(&self.staging), + used_bytes: Arc::clone(&self.used_bytes), + inflight_bytes: Arc::clone(&self.inflight_bytes), + } + } } impl BankPolicy { @@ -64,6 +81,8 @@ impl BankPolicy { Self { resident: Arc::new(RwLock::new(HashMap::new())), staging: Arc::new(RwLock::new(HashMap::new())), + used_bytes: Arc::new(AtomicUsize::new(0)), + inflight_bytes: Arc::new(AtomicUsize::new(0)), } } @@ -87,24 +106,32 @@ impl BankPolicy { None => { let entry = ResidentEntry::new(Arc::clone(&value), bytes); map.insert(asset_id, entry); + self.used_bytes.fetch_add(bytes, Ordering::Relaxed); value } } } /// Place a value into staging for a given handle. - pub fn stage(&self, handle: HandleId, value: Arc) { - self.staging.write().unwrap().insert(handle, value); + pub fn stage(&self, handle: HandleId, value: Arc, bytes: usize) { + self.staging.write().unwrap().insert(handle, (value, bytes)); + self.inflight_bytes.fetch_add(bytes, Ordering::Relaxed); } /// Take staged value (used by commit path). - pub fn take_staging(&self, handle: HandleId) -> Option> { - self.staging.write().unwrap().remove(&handle) + pub fn take_staging(&self, handle: HandleId) -> Option<(Arc, usize)> { + let entry = self.staging.write().unwrap().remove(&handle); + if let Some((_, bytes)) = entry.as_ref() { + self.inflight_bytes.fetch_sub(*bytes, Ordering::Relaxed); + } + entry } pub fn clear(&self) { self.resident.write().unwrap().clear(); self.staging.write().unwrap().clear(); + self.used_bytes.store(0, Ordering::Relaxed); + self.inflight_bytes.store(0, Ordering::Relaxed); } } @@ -127,6 +154,11 @@ pub struct AssetManager { /// Residency policy for sound banks. sound_policy: BankPolicy, + /// Count of occupied slots for GFX. + gfx_slots_occupied: AtomicUsize, + /// Count of occupied slots for sounds. + sound_slots_occupied: AtomicUsize, + // Commits that are ready to be applied at the next frame boundary. pending_commits: Mutex>, } @@ -263,6 +295,8 @@ impl AssetManager { sound_slots: Arc::new(RwLock::new(std::array::from_fn(|_| None))), gfx_policy: BankPolicy::new(), sound_policy: BankPolicy::new(), + gfx_slots_occupied: AtomicUsize::new(0), + sound_slots_occupied: AtomicUsize::new(0), handles: Arc::new(RwLock::new(HashMap::new())), next_handle_id: Mutex::new(1), assets_data: Arc::new(RwLock::new(assets_data)), @@ -379,7 +413,7 @@ impl AssetManager { let already_resident = match entry.bank_type { BankType::GLYPH => { if let Some(bank) = self.gfx_policy.get_resident(asset_id) { - self.gfx_policy.stage(handle_id, bank); + self.gfx_policy.stage(handle_id, bank, entry.decoded_size as usize); true } else { false @@ -387,7 +421,7 @@ impl AssetManager { } BankType::SOUNDS => { if let Some(bank) = self.sound_policy.get_resident(asset_id) { - self.sound_policy.stage(handle_id, bank); + self.sound_policy.stage(handle_id, bank, entry.decoded_size as usize); true } else { false @@ -414,10 +448,8 @@ impl AssetManager { let entry_clone = entry.clone(); // Capture policies for the worker thread - let gfx_policy_resident = Arc::clone(&self.gfx_policy.resident); - let gfx_policy_staging = Arc::clone(&self.gfx_policy.staging); - let sound_policy_resident = Arc::clone(&self.sound_policy.resident); - let sound_policy_staging = Arc::clone(&self.sound_policy.staging); + let gfx_policy = self.gfx_policy.clone(); + let sound_policy = self.sound_policy.clone(); thread::spawn(move || { // Update status to LOADING @@ -439,22 +471,13 @@ impl AssetManager { let result = Self::perform_load_glyph_bank(&entry_clone, assets_data); if let Ok(tilebank) = result { let bank_arc = Arc::new(tilebank); - let resident_arc = { - let mut map = gfx_policy_resident.write().unwrap(); - if let Some(existing) = map.get_mut(&asset_id) { - existing.last_used = Instant::now(); - existing.loads += 1; - Arc::clone(&existing.value) - } else { - let entry = ResidentEntry::new( - Arc::clone(&bank_arc), - entry_clone.decoded_size as usize, - ); - map.insert(asset_id, entry); - bank_arc - } - }; - gfx_policy_staging.write().unwrap().insert(handle_id, resident_arc); + let resident_arc = gfx_policy.put_resident( + asset_id, + bank_arc, + entry_clone.decoded_size as usize, + ); + gfx_policy.stage(handle_id, resident_arc, entry_clone.decoded_size as usize); + let mut handles_map = handles.write().unwrap(); if let Some(h) = handles_map.get_mut(&handle_id) { if h.status == LoadStatus::LOADING { @@ -472,22 +495,14 @@ impl AssetManager { let result = Self::perform_load_sound_bank(&entry_clone, assets_data); if let Ok(soundbank) = result { let bank_arc = Arc::new(soundbank); - let resident_arc = { - let mut map = sound_policy_resident.write().unwrap(); - if let Some(existing) = map.get_mut(&asset_id) { - existing.last_used = Instant::now(); - existing.loads += 1; - Arc::clone(&existing.value) - } else { - let entry = ResidentEntry::new( - Arc::clone(&bank_arc), - entry_clone.decoded_size as usize, - ); - map.insert(asset_id, entry); - bank_arc - } - }; - sound_policy_staging.write().unwrap().insert(handle_id, resident_arc); + let resident_arc = sound_policy.put_resident( + asset_id, + bank_arc, + entry_clone.decoded_size as usize, + ); + sound_policy + .stage(handle_id, resident_arc, entry_clone.decoded_size as usize); + let mut handles_map = handles.write().unwrap(); if let Some(h) = handles_map.get_mut(&handle_id) { if h.status == LoadStatus::LOADING { @@ -699,20 +714,26 @@ impl AssetManager { if h.status == LoadStatus::READY { match h.slot.asset_type { BankType::GLYPH => { - if let Some(bank) = self.gfx_policy.take_staging(handle_id) { + if let Some((bank, _)) = self.gfx_policy.take_staging(handle_id) { self.gfx_installer.install_glyph_bank(h.slot.index, bank); let mut slots = self.gfx_slots.write().unwrap(); if h.slot.index < slots.len() { + if slots[h.slot.index].is_none() { + self.gfx_slots_occupied.fetch_add(1, Ordering::Relaxed); + } slots[h.slot.index] = Some(h._asset_id); } h.status = LoadStatus::COMMITTED; } } BankType::SOUNDS => { - if let Some(bank) = self.sound_policy.take_staging(handle_id) { + if let Some((bank, _)) = self.sound_policy.take_staging(handle_id) { self.sound_installer.install_sound_bank(h.slot.index, bank); let mut slots = self.sound_slots.write().unwrap(); if h.slot.index < slots.len() { + if slots[h.slot.index].is_none() { + self.sound_slots_occupied.fetch_add(1, Ordering::Relaxed); + } slots[h.slot.index] = Some(h._asset_id); } h.status = LoadStatus::COMMITTED; @@ -727,38 +748,9 @@ impl AssetManager { pub fn bank_info(&self, kind: BankType) -> BankStats { match kind { BankType::GLYPH => { - let mut used_bytes = 0; - { - let resident = self.gfx_policy.resident.read().unwrap(); - for entry in resident.values() { - used_bytes += entry.bytes; - } - } - - let mut inflight_bytes = 0; - { - let staging = self.gfx_policy.staging.read().unwrap(); - let assets = self.assets.read().unwrap(); - let handles = self.handles.read().unwrap(); - - for (handle_id, _) in staging.iter() { - if let Some(h) = handles.get(handle_id) { - if let Some(entry) = assets.get(&h._asset_id) { - inflight_bytes += entry.decoded_size as usize; - } - } - } - } - - let mut slots_occupied = 0; - { - let slots = self.gfx_slots.read().unwrap(); - for s in slots.iter() { - if s.is_some() { - slots_occupied += 1; - } - } - } + let used_bytes = self.gfx_policy.used_bytes.load(Ordering::Relaxed); + let inflight_bytes = self.gfx_policy.inflight_bytes.load(Ordering::Relaxed); + let slots_occupied = self.gfx_slots_occupied.load(Ordering::Relaxed); BankStats { total_bytes: 16 * 1024 * 1024, @@ -770,38 +762,9 @@ impl AssetManager { } } BankType::SOUNDS => { - let mut used_bytes = 0; - { - let resident = self.sound_policy.resident.read().unwrap(); - for entry in resident.values() { - used_bytes += entry.bytes; - } - } - - let mut inflight_bytes = 0; - { - let staging = self.sound_policy.staging.read().unwrap(); - let assets = self.assets.read().unwrap(); - let handles = self.handles.read().unwrap(); - - for (handle_id, _) in staging.iter() { - if let Some(h) = handles.get(handle_id) { - if let Some(entry) = assets.get(&h._asset_id) { - inflight_bytes += entry.decoded_size as usize; - } - } - } - } - - let mut slots_occupied = 0; - { - let slots = self.sound_slots.read().unwrap(); - for s in slots.iter() { - if s.is_some() { - slots_occupied += 1; - } - } - } + let used_bytes = self.sound_policy.used_bytes.load(Ordering::Relaxed); + let inflight_bytes = self.sound_policy.inflight_bytes.load(Ordering::Relaxed); + let slots_occupied = self.sound_slots_occupied.load(Ordering::Relaxed); BankStats { total_bytes: 32 * 1024 * 1024, @@ -865,6 +828,8 @@ impl AssetManager { pub fn shutdown(&self) { self.gfx_policy.clear(); self.sound_policy.clear(); + self.gfx_slots_occupied.store(0, Ordering::Relaxed); + self.sound_slots_occupied.store(0, Ordering::Relaxed); self.handles.write().unwrap().clear(); self.pending_commits.lock().unwrap().clear(); self.gfx_slots.write().unwrap().fill(None); @@ -1049,8 +1014,8 @@ mod tests { assert_eq!(am.status(handle2), LoadStatus::READY); let staging = am.gfx_policy.staging.read().unwrap(); - let bank1 = staging.get(&handle1).unwrap(); - let bank2 = staging.get(&handle2).unwrap(); + let bank1 = &staging.get(&handle1).unwrap().0; + let bank2 = &staging.get(&handle2).unwrap().0; assert!(Arc::ptr_eq(bank1, bank2)); } @@ -1208,4 +1173,60 @@ mod tests { assert_eq!(am.status(handle), LoadStatus::CANCELED); assert_eq!(am.commit(handle), AssetOpStatus::InvalidState); } + + #[test] + fn test_asset_telemetry_incremental() { + let banks = Arc::new(MemoryBanks::new()); + let gfx_installer = Arc::clone(&banks) as Arc; + let sound_installer = Arc::clone(&banks) as Arc; + + let width = 16; + let height = 16; + let decoded_bytes = expected_glyph_decoded_size(width, height); + let data = test_glyph_asset_data(); + + let am = AssetManager::new( + vec![test_glyph_asset_entry("test_glyphs", width, height)], + AssetsPayloadSource::from_bytes(data), + gfx_installer, + sound_installer, + ); + + // Initially zero + let info = am.bank_info(BankType::GLYPH); + assert_eq!(info.used_bytes, 0); + assert_eq!(info.inflight_bytes, 0); + assert_eq!(info.slots_occupied, 0); + + // Loading + let handle = am.load(0, 0).expect("load must allocate handle"); + + // While LOADING or READY, it should be in inflight_bytes + let start = Instant::now(); + while am.status(handle) != LoadStatus::READY && start.elapsed().as_secs() < 5 { + thread::sleep(std::time::Duration::from_millis(10)); + } + + let info = am.bank_info(BankType::GLYPH); + // Note: put_resident happens in worker thread, then stage happens. + assert_eq!(info.used_bytes, decoded_bytes); + assert_eq!(info.inflight_bytes, decoded_bytes); + assert_eq!(info.slots_occupied, 0); + + // Commit + am.commit(handle); + am.apply_commits(); + + let info = am.bank_info(BankType::GLYPH); + assert_eq!(info.used_bytes, decoded_bytes); + assert_eq!(info.inflight_bytes, 0); + assert_eq!(info.slots_occupied, 1); + + // Shutdown resets + am.shutdown(); + let info = am.bank_info(BankType::GLYPH); + assert_eq!(info.used_bytes, 0); + assert_eq!(info.inflight_bytes, 0); + assert_eq!(info.slots_occupied, 0); + } } diff --git a/crates/console/prometeu-hal/src/log/log_service.rs b/crates/console/prometeu-hal/src/log/log_service.rs index 0d8f015c..e6d991e6 100644 --- a/crates/console/prometeu-hal/src/log/log_service.rs +++ b/crates/console/prometeu-hal/src/log/log_service.rs @@ -5,11 +5,17 @@ pub struct LogService { events: VecDeque, capacity: usize, next_seq: u64, + pub logs_count: u32, } impl LogService { pub fn new(capacity: usize) -> Self { - Self { events: VecDeque::with_capacity(capacity), capacity, next_seq: 0 } + Self { + events: VecDeque::with_capacity(capacity), + capacity, + next_seq: 0, + logs_count: 0, + } } pub fn log( @@ -34,6 +40,11 @@ impl LogService { msg, }); self.next_seq += 1; + self.logs_count += 1; + } + + pub fn reset_count(&mut self) { + self.logs_count = 0; } pub fn get_recent(&self, n: usize) -> Vec { diff --git a/crates/console/prometeu-hal/src/telemetry.rs b/crates/console/prometeu-hal/src/telemetry.rs index 9540d0a3..361cf11c 100644 --- a/crates/console/prometeu-hal/src/telemetry.rs +++ b/crates/console/prometeu-hal/src/telemetry.rs @@ -20,6 +20,13 @@ pub struct TelemetryFrame { pub audio_used_bytes: usize, pub audio_inflight_bytes: usize, pub audio_slots_occupied: u32, + + // RAM (Heap) + pub heap_used_bytes: usize, + pub heap_max_bytes: usize, + + // Log Pressure + pub logs_count: u32, } #[derive(Debug, Clone, Copy, Default)] @@ -28,6 +35,10 @@ pub struct CertificationConfig { pub cycles_budget_per_frame: Option, pub max_syscalls_per_frame: Option, pub max_host_cpu_us_per_frame: Option, + pub max_gfx_bytes: Option, + pub max_audio_bytes: Option, + pub max_heap_bytes: Option, + pub max_logs_per_frame: Option, } pub struct Certifier { @@ -51,6 +62,7 @@ impl Certifier { let mut violations = 0; + // 1. Cycles if let Some(budget) = self.config.cycles_budget_per_frame && telemetry.cycles_used > budget { @@ -68,6 +80,7 @@ impl Certifier { violations += 1; } + // 2. Syscalls if let Some(limit) = self.config.max_syscalls_per_frame && telemetry.syscalls > limit { @@ -85,6 +98,7 @@ impl Certifier { violations += 1; } + // 3. CPU Time if let Some(limit) = self.config.max_host_cpu_us_per_frame && telemetry.host_cpu_time_us > limit { @@ -102,6 +116,78 @@ impl Certifier { violations += 1; } + // 4. GFX Memory + if let Some(limit) = self.config.max_gfx_bytes + && telemetry.gfx_used_bytes > limit + { + log_service.log( + ts_ms, + telemetry.frame_index, + LogLevel::Warn, + LogSource::Pos, + 0xCA04, + format!( + "Cert: GFX bank exceeded memory limit ({} > {})", + telemetry.gfx_used_bytes, limit + ), + ); + violations += 1; + } + + // 5. Audio Memory + if let Some(limit) = self.config.max_audio_bytes + && telemetry.audio_used_bytes > limit + { + log_service.log( + ts_ms, + telemetry.frame_index, + LogLevel::Warn, + LogSource::Pos, + 0xCA05, + format!( + "Cert: Audio bank exceeded memory limit ({} > {})", + telemetry.audio_used_bytes, limit + ), + ); + violations += 1; + } + + // 6. Heap Memory + if let Some(limit) = self.config.max_heap_bytes + && telemetry.heap_used_bytes > limit + { + log_service.log( + ts_ms, + telemetry.frame_index, + LogLevel::Warn, + LogSource::Pos, + 0xCA06, + format!( + "Cert: Heap memory exceeded limit ({} > {})", + telemetry.heap_used_bytes, limit + ), + ); + violations += 1; + } + + // 7. Log Pressure + if let Some(limit) = self.config.max_logs_per_frame + && telemetry.logs_count > limit + { + log_service.log( + ts_ms, + telemetry.frame_index, + LogLevel::Warn, + LogSource::Pos, + 0xCA07, + format!( + "Cert: Log pressure exceeded limit ({} > {})", + telemetry.logs_count, limit + ), + ); + violations += 1; + } + violations } } @@ -118,6 +204,8 @@ mod tests { cycles_budget_per_frame: Some(100), max_syscalls_per_frame: Some(5), max_host_cpu_us_per_frame: Some(1000), + max_gfx_bytes: Some(1024), + ..Default::default() }; let cert = Certifier::new(config); let mut ls = LogService::new(10); @@ -126,13 +214,15 @@ mod tests { tel.cycles_used = 150; tel.syscalls = 10; tel.host_cpu_time_us = 500; + tel.gfx_used_bytes = 2048; let violations = cert.evaluate(&tel, &mut ls, 1000); - assert_eq!(violations, 2); + assert_eq!(violations, 3); let logs = ls.get_recent(10); - assert_eq!(logs.len(), 2); + assert_eq!(logs.len(), 3); assert!(logs[0].msg.contains("cycles_used")); assert!(logs[1].msg.contains("syscalls")); + assert!(logs[2].msg.contains("GFX bank")); } } diff --git a/crates/console/prometeu-system/src/virtual_machine_runtime.rs b/crates/console/prometeu-system/src/virtual_machine_runtime.rs index ea1a0bc7..a4bfa41e 100644 --- a/crates/console/prometeu-system/src/virtual_machine_runtime.rs +++ b/crates/console/prometeu-system/src/virtual_machine_runtime.rs @@ -37,13 +37,14 @@ pub struct VirtualMachineRuntime { pub certifier: Certifier, pub paused: bool, pub debug_step_request: bool, + pub inspection_active: bool, pub(crate) needs_prepare_entry_call: bool, pub(crate) boot_time: Instant, } impl VirtualMachineRuntime { - pub const CYCLES_PER_LOGICAL_FRAME: u64 = 5_000_000; - pub const SLICE_PER_TICK: u64 = 5_000_000; + pub const CYCLES_PER_LOGICAL_FRAME: u64 = 1_500_000; + pub const SLICE_PER_TICK: u64 = 1_500_000; pub const MAX_LOG_LEN: usize = 256; pub const MAX_LOGS_PER_FRAME: u32 = 10; } diff --git a/crates/console/prometeu-system/src/virtual_machine_runtime/lifecycle.rs b/crates/console/prometeu-system/src/virtual_machine_runtime/lifecycle.rs index 84046860..69184441 100644 --- a/crates/console/prometeu-system/src/virtual_machine_runtime/lifecycle.rs +++ b/crates/console/prometeu-system/src/virtual_machine_runtime/lifecycle.rs @@ -30,6 +30,7 @@ impl VirtualMachineRuntime { certifier: Certifier::new(cap_config.unwrap_or_default()), paused: false, debug_step_request: false, + inspection_active: false, needs_prepare_entry_call: false, boot_time, }; @@ -104,6 +105,7 @@ impl VirtualMachineRuntime { self.paused = false; self.debug_step_request = false; + self.inspection_active = false; self.needs_prepare_entry_call = false; } diff --git a/crates/console/prometeu-system/src/virtual_machine_runtime/tick.rs b/crates/console/prometeu-system/src/virtual_machine_runtime/tick.rs index 40c4f951..9168fae8 100644 --- a/crates/console/prometeu-system/src/virtual_machine_runtime/tick.rs +++ b/crates/console/prometeu-system/src/virtual_machine_runtime/tick.rs @@ -4,6 +4,7 @@ use prometeu_hal::asset::BankType; use prometeu_hal::log::{LogLevel, LogSource}; use prometeu_hal::{HardwareBridge, HostContext, InputSignals}; use prometeu_vm::LogicalFrameEndingReason; +use std::sync::atomic::Ordering; impl VirtualMachineRuntime { pub fn debug_step_instruction( @@ -128,8 +129,27 @@ impl VirtualMachineRuntime { || run.reason == LogicalFrameEndingReason::EndOfRom { hw.gfx_mut().render_all(); - self.telemetry_current.host_cpu_time_us = - start.elapsed().as_micros() as u64; + + // 1. Snapshot full telemetry at logical frame end (O(1) with atomic counters) + let gfx_stats = hw.assets().bank_info(BankType::GLYPH); + self.telemetry_current.gfx_used_bytes = gfx_stats.used_bytes; + self.telemetry_current.gfx_inflight_bytes = gfx_stats.inflight_bytes; + self.telemetry_current.gfx_slots_occupied = gfx_stats.slots_occupied as u32; + + let audio_stats = hw.assets().bank_info(BankType::SOUNDS); + self.telemetry_current.audio_used_bytes = audio_stats.used_bytes; + self.telemetry_current.audio_inflight_bytes = audio_stats.inflight_bytes; + self.telemetry_current.audio_slots_occupied = + audio_stats.slots_occupied as u32; + + self.telemetry_current.heap_used_bytes = + vm.heap().used_bytes.load(Ordering::Relaxed); + self.telemetry_current.heap_max_bytes = 0; // Not yet capped + + self.telemetry_current.logs_count = self.log_service.logs_count; + self.log_service.reset_count(); + + self.telemetry_current.host_cpu_time_us = start.elapsed().as_micros() as u64; let ts_ms = self.boot_time.elapsed().as_millis() as u64; self.telemetry_current.violations = self.certifier.evaluate( @@ -166,15 +186,21 @@ impl VirtualMachineRuntime { self.last_frame_cpu_time_us = start.elapsed().as_micros() as u64; - let gfx_stats = hw.assets().bank_info(BankType::GLYPH); - self.telemetry_current.gfx_used_bytes = gfx_stats.used_bytes; - self.telemetry_current.gfx_inflight_bytes = gfx_stats.inflight_bytes; - self.telemetry_current.gfx_slots_occupied = gfx_stats.slots_occupied as u32; + // 2. High-frequency telemetry update (only if inspection is active) + if self.inspection_active { + let gfx_stats = hw.assets().bank_info(BankType::GLYPH); + self.telemetry_current.gfx_used_bytes = gfx_stats.used_bytes; + self.telemetry_current.gfx_inflight_bytes = gfx_stats.inflight_bytes; + self.telemetry_current.gfx_slots_occupied = gfx_stats.slots_occupied as u32; - let audio_stats = hw.assets().bank_info(BankType::SOUNDS); - self.telemetry_current.audio_used_bytes = audio_stats.used_bytes; - self.telemetry_current.audio_inflight_bytes = audio_stats.inflight_bytes; - self.telemetry_current.audio_slots_occupied = audio_stats.slots_occupied as u32; + let audio_stats = hw.assets().bank_info(BankType::SOUNDS); + self.telemetry_current.audio_used_bytes = audio_stats.used_bytes; + self.telemetry_current.audio_inflight_bytes = audio_stats.inflight_bytes; + self.telemetry_current.audio_slots_occupied = audio_stats.slots_occupied as u32; + + self.telemetry_current.heap_used_bytes = vm.heap().used_bytes.load(Ordering::Relaxed); + self.telemetry_current.logs_count = self.log_service.logs_count; + } if !self.logical_frame_active && self.telemetry_last.frame_index == self.logical_frame_index.wrapping_sub(1) @@ -187,6 +213,9 @@ impl VirtualMachineRuntime { self.telemetry_last.audio_used_bytes = self.telemetry_current.audio_used_bytes; self.telemetry_last.audio_inflight_bytes = self.telemetry_current.audio_inflight_bytes; self.telemetry_last.audio_slots_occupied = self.telemetry_current.audio_slots_occupied; + self.telemetry_last.heap_used_bytes = self.telemetry_current.heap_used_bytes; + self.telemetry_last.heap_max_bytes = self.telemetry_current.heap_max_bytes; + self.telemetry_last.logs_count = self.telemetry_current.logs_count; } None diff --git a/crates/console/prometeu-vm/src/heap.rs b/crates/console/prometeu-vm/src/heap.rs index 1476e0e3..bcd4f2dd 100644 --- a/crates/console/prometeu-vm/src/heap.rs +++ b/crates/console/prometeu-vm/src/heap.rs @@ -2,6 +2,9 @@ use crate::call_frame::CallFrame; use crate::object::{ObjectHeader, ObjectKind}; use prometeu_bytecode::{HeapRef, Value}; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Arc; + /// Internal stored object: header plus opaque payload bytes. #[derive(Debug, Clone)] pub struct StoredObject { @@ -22,6 +25,30 @@ pub struct StoredObject { pub coroutine: Option, } +impl StoredObject { + /// Returns the approximate memory footprint of this object in bytes. + pub fn bytes(&self) -> usize { + let mut total = std::mem::size_of::(); + total += self.payload.capacity(); + + if let Some(elems) = &self.array_elems { + total += std::mem::size_of::>(); + total += elems.capacity() * std::mem::size_of::(); + } + if let Some(env) = &self.closure_env { + total += std::mem::size_of::>(); + total += env.capacity() * std::mem::size_of::(); + } + if let Some(coro) = &self.coroutine { + total += std::mem::size_of::(); + total += coro.stack.capacity() * std::mem::size_of::(); + total += coro.frames.capacity() * std::mem::size_of::(); + } + + total + } +} + /// Execution state of a coroutine. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum CoroutineState { @@ -49,14 +76,22 @@ pub struct Heap { objects: Vec>, // Reclaimed slots available for deterministic reuse (LIFO). free_list: Vec, + + /// Total bytes currently used by all objects in the heap. + pub used_bytes: Arc, } impl Heap { pub fn new() -> Self { - Self { objects: Vec::new(), free_list: Vec::new() } + Self { + objects: Vec::new(), + free_list: Vec::new(), + used_bytes: Arc::new(AtomicUsize::new(0)), + } } fn insert_object(&mut self, obj: StoredObject) -> HeapRef { + self.used_bytes.fetch_add(obj.bytes(), Ordering::Relaxed); if let Some(idx) = self.free_list.pop() { debug_assert!(self.objects.get(idx).is_some_and(|slot| slot.is_none())); self.objects[idx] = Some(obj); @@ -363,6 +398,7 @@ impl Heap { obj.header.set_marked(false); } else { // Unreachable: reclaim by dropping and turning into tombstone. + self.used_bytes.fetch_sub(obj.bytes(), Ordering::Relaxed); *slot = None; self.free_list.push(idx); } diff --git a/crates/console/prometeu-vm/src/virtual_machine.rs b/crates/console/prometeu-vm/src/virtual_machine.rs index cc8538da..52bbb659 100644 --- a/crates/console/prometeu-vm/src/virtual_machine.rs +++ b/crates/console/prometeu-vm/src/virtual_machine.rs @@ -140,6 +140,11 @@ impl VirtualMachine { self.operand_stack[start..].iter().rev().cloned().collect() } + /// Returns a reference to the VM's heap. + pub fn heap(&self) -> &Heap { + &self.heap + } + /// Returns true if the VM has executed a HALT and is not currently running. pub fn is_halted(&self) -> bool { self.halted diff --git a/crates/host/prometeu-host-desktop-winit/src/runner.rs b/crates/host/prometeu-host-desktop-winit/src/runner.rs index 643976ee..81f86527 100644 --- a/crates/host/prometeu-host-desktop-winit/src/runner.rs +++ b/crates/host/prometeu-host-desktop-winit/src/runner.rs @@ -129,7 +129,7 @@ impl HostRunner { let color_bg = Color::INDIGO; // Dark blue to stand out let color_warn = Color::RED; - self.hardware.gfx.fill_rect(5, 5, 175, 100, color_bg); + self.hardware.gfx.fill_rect(5, 5, 175, 130, color_bg); self.hardware.gfx.draw_text( 10, 10, @@ -187,8 +187,16 @@ impl HostRunner { ); } + self.hardware.gfx.draw_text( + 10, + 82, + &format!("RAM: {}KB", tel.heap_used_bytes / 1024), + color_text, + ); + self.hardware.gfx.draw_text(10, 90, &format!("LOGS: {}", tel.logs_count), color_text); + let cert_color = if tel.violations > 0 { color_warn } else { color_text }; - self.hardware.gfx.draw_text(10, 82, &format!("CERT LAST: {}", tel.violations), cert_color); + self.hardware.gfx.draw_text(10, 98, &format!("CERT LAST: {}", tel.violations), cert_color); if tel.violations > 0 && let Some(event) = self @@ -204,7 +212,7 @@ impl HostRunner { if msg.len() > 30 { msg.truncate(30); } - self.hardware.gfx.draw_text(10, 90, &msg, color_warn); + self.hardware.gfx.draw_text(10, 106, &msg, color_warn); } if let Some(report) = self.firmware.os.last_crash_report.as_ref() { @@ -212,7 +220,7 @@ impl HostRunner { if msg.len() > 30 { msg.truncate(30); } - self.hardware.gfx.draw_text(10, 98, &msg, color_warn); + self.hardware.gfx.draw_text(10, 114, &msg, color_warn); } } } @@ -311,6 +319,9 @@ impl ApplicationHandler for HostRunner { // 1. Process pending debug commands from the network. self.debugger.check_commands(&mut self.firmware, &mut self.hardware); + // Sync inspection mode state. + self.firmware.os.inspection_active = self.overlay_enabled || self.debugger.stream.is_some(); + // 2. Maintain filesystem connection if it was lost (e.g., directory removed). if let Some(root) = &self.fs_root { use prometeu_system::fs::FsState; diff --git a/discussion/index.ndjson b/discussion/index.ndjson index cc0aa485..36feb88d 100644 --- a/discussion/index.ndjson +++ b/discussion/index.ndjson @@ -1,4 +1,4 @@ -{"type":"meta","next_id":{"DSC":23,"AGD":21,"DEC":7,"PLN":6,"LSN":26,"CLSN":1}} +{"type":"meta","next_id":{"DSC":23,"AGD":21,"DEC":7,"PLN":6,"LSN":27,"CLSN":1}} {"type":"discussion","id":"DSC-0020","status":"done","ticket":"jenkins-gitea-integration","title":"Jenkins Gitea Integration and Relocation","created_at":"2026-04-07","updated_at":"2026-04-07","tags":["ci","jenkins","gitea"],"agendas":[{"id":"AGD-0018","file":"workflow/agendas/AGD-0018-jenkins-gitea-integration-and-relocation.md","status":"done","created_at":"2026-04-07","updated_at":"2026-04-07"}],"decisions":[{"id":"DEC-0003","file":"workflow/decisions/DEC-0003-jenkins-gitea-strategy.md","status":"accepted","created_at":"2026-04-07","updated_at":"2026-04-07"}],"plans":[{"id":"PLN-0003","file":"workflow/plans/PLN-0003-jenkins-gitea-execution.md","status":"done","created_at":"2026-04-07","updated_at":"2026-04-07"}],"lessons":[{"id":"LSN-0021","file":"lessons/DSC-0020-jenkins-gitea-integration/LSN-0021-jenkins-gitea-integration.md","status":"done","created_at":"2026-04-07","updated_at":"2026-04-07"}]} {"type":"discussion","id":"DSC-0021","status":"done","ticket":"asset-entry-codec-enum-with-metadata","title":"Asset Entry Codec Enum Contract","created_at":"2026-04-09","updated_at":"2026-04-09","tags":["asset","runtime","codec","metadata"],"agendas":[],"decisions":[],"plans":[],"lessons":[{"id":"LSN-0024","file":"lessons/DSC-0021-asset-entry-codec-enum-contract/LSN-0024-string-on-the-wire-enum-in-runtime.md","status":"done","created_at":"2026-04-09","updated_at":"2026-04-09"}]} {"type":"discussion","id":"DSC-0022","status":"done","ticket":"tile-bank-vs-glyph-bank-domain-naming","title":"Glyph Bank Domain Naming Contract","created_at":"2026-04-09","updated_at":"2026-04-10","tags":["gfx","runtime","naming","domain-model"],"agendas":[],"decisions":[],"plans":[],"lessons":[{"id":"LSN-0025","file":"lessons/DSC-0022-glyph-bank-domain-naming/LSN-0025-rename-artifact-by-meaning-not-by-token.md","status":"done","created_at":"2026-04-10","updated_at":"2026-04-10"}]} @@ -9,7 +9,7 @@ {"type":"discussion","id":"DSC-0005","status":"open","ticket":"system-fault-semantics-and-control-surface","title":"Agenda - System Fault Semantics and Control Surface","created_at":"2026-03-27","updated_at":"2026-03-27","tags":[],"agendas":[{"id":"AGD-0004","file":"workflow/agendas/AGD-0004-system-fault-semantics-and-control-surface.md","status":"open","created_at":"2026-03-27","updated_at":"2026-03-27"}],"decisions":[],"plans":[],"lessons":[]} {"type":"discussion","id":"DSC-0006","status":"open","ticket":"vm-owned-random-service","title":"Agenda - VM-Owned Random Service","created_at":"2026-03-27","updated_at":"2026-03-27","tags":[],"agendas":[{"id":"AGD-0005","file":"workflow/agendas/AGD-0005-vm-owned-random-service.md","status":"open","created_at":"2026-03-27","updated_at":"2026-03-27"}],"decisions":[],"plans":[],"lessons":[]} {"type":"discussion","id":"DSC-0007","status":"open","ticket":"app-home-filesystem-surface-and-semantics","title":"Agenda - App Home Filesystem Surface and Semantics","created_at":"2026-03-27","updated_at":"2026-03-27","tags":[],"agendas":[{"id":"AGD-0006","file":"workflow/agendas/AGD-0006-app-home-filesystem-surface-and-semantics.md","status":"open","created_at":"2026-03-27","updated_at":"2026-03-27"}],"decisions":[],"plans":[],"lessons":[]} -{"type":"discussion","id":"DSC-0008","status":"open","ticket":"perf-runtime-telemetry-hot-path","title":"Agenda - [PERF] Runtime Telemetry Hot Path","created_at":"2026-03-27","updated_at":"2026-03-27","tags":[],"agendas":[{"id":"AGD-0007","file":"workflow/agendas/AGD-0007-perf-runtime-telemetry-hot-path.md","status":"open","created_at":"2026-03-27","updated_at":"2026-03-27"}],"decisions":[],"plans":[],"lessons":[]} +{"type":"discussion","id":"DSC-0008","status":"done","ticket":"perf-runtime-telemetry-hot-path","title":"Agenda - [PERF] Runtime Telemetry Hot Path","created_at":"2026-03-27","updated_at":"2026-04-10","tags":[],"agendas":[{"id":"AGD-0007","file":"workflow/agendas/AGD-0007-perf-runtime-telemetry-hot-path.md","status":"done","created_at":"2026-03-27","updated_at":"2026-04-10"}],"decisions":[{"id":"DEC-0005","file":"workflow/decisions/DEC-0005-perf-push-based-telemetry-model.md","status":"accepted","created_at":"2026-04-10","updated_at":"2026-04-10"}],"plans":[{"id":"PLN-0005","file":"workflow/plans/PLN-0005-perf-push-based-telemetry-implementation.md","status":"done","created_at":"2026-04-10","updated_at":"2026-04-10"}],"lessons":[{"id":"LSN-0026","file":"lessons/DSC-0008-perf-runtime-telemetry-hot-path/LSN-0026-push-based-telemetry-model.md","status":"done","created_at":"2026-04-10","updated_at":"2026-04-10"}]} {"type":"discussion","id":"DSC-0009","status":"open","ticket":"perf-async-background-work-lanes-for-assets-and-fs","title":"Agenda - [PERF] Async Background Work Lanes for Assets and FS","created_at":"2026-03-27","updated_at":"2026-03-27","tags":[],"agendas":[{"id":"AGD-0008","file":"workflow/agendas/AGD-0008-perf-async-background-work-lanes-for-assets-and-fs.md","status":"open","created_at":"2026-03-27","updated_at":"2026-03-27"}],"decisions":[],"plans":[],"lessons":[]} {"type":"discussion","id":"DSC-0010","status":"open","ticket":"perf-host-desktop-frame-pacing-and-presentation","title":"Agenda - [PERF] Host Desktop Frame Pacing and Presentation","created_at":"2026-03-27","updated_at":"2026-03-27","tags":[],"agendas":[{"id":"AGD-0009","file":"workflow/agendas/AGD-0009-perf-host-desktop-frame-pacing-and-presentation.md","status":"open","created_at":"2026-03-27","updated_at":"2026-03-27"}],"decisions":[],"plans":[],"lessons":[]} {"type":"discussion","id":"DSC-0011","status":"open","ticket":"perf-gfx-render-pipeline-and-dirty-regions","title":"Agenda - [PERF] GFX Render Pipeline and Dirty Regions","created_at":"2026-03-27","updated_at":"2026-03-27","tags":[],"agendas":[{"id":"AGD-0010","file":"workflow/agendas/AGD-0010-perf-gfx-render-pipeline-and-dirty-regions.md","status":"open","created_at":"2026-03-27","updated_at":"2026-03-27"}],"decisions":[],"plans":[],"lessons":[]} diff --git a/discussion/lessons/DSC-0008-perf-runtime-telemetry-hot-path/LSN-0026-push-based-telemetry-model.md b/discussion/lessons/DSC-0008-perf-runtime-telemetry-hot-path/LSN-0026-push-based-telemetry-model.md new file mode 100644 index 00000000..e7abfec2 --- /dev/null +++ b/discussion/lessons/DSC-0008-perf-runtime-telemetry-hot-path/LSN-0026-push-based-telemetry-model.md @@ -0,0 +1,27 @@ +--- +id: LSN-0026 +ticket: perf-runtime-telemetry-hot-path +title: Modelo de Telemetria Push-based +created: 2026-04-10 +tags: [performance, telemetry, atomics] +--- +# Modelo de Telemetria Push-based +O sistema de telemetria do PROMETEU evoluiu de um modelo de varredura sob demanda (pull) para um modelo de contadores incrementais (push), visando minimizar o impacto no *hot path* do runtime. +## O Problema Original +Anteriormente, a cada *host tick*, o runtime solicitava informações de uso de memória dos bancos de assets. Isso resultava em: +- Varreduras $O(n)$ sobre mapas de recursos. +- Múltiplas aquisições de *locks* de leitura em cada tick. +- Overhead desnecessário em hardwares handheld, onde cada microssegundo conta. +## A Solução: Modelo Push com Atômicos +A solução implementada utiliza `AtomicUsize` nos drivers e na VM para manter o estado do sistema em tempo real com custo $O(1)$ de leitura e escrita: +1. **Drivers (Assets):** Contadores atômicos em cada `BankPolicy` são atualizados durante `load`, `commit` e `cancel`. +2. **VM (Heap):** Um contador `used_bytes` na struct `Heap` rastreia alocações e liberações (sweep). +3. **System (Logs):** O `LogService` rastreia a pressão de logs emitida em cada frame. +## Dois Níveis de Observabilidade +Para equilibrar performance e depuração, a coleta foi dividida: +- **Snapshot de Frame (Sempre):** Captura automática no fim de cada frame lógico. Custo irrelevante ($O(1)$). Serve ao `Certifier` e ao log histórico. +- **Tick de Host (Sob Demanda):** A coleta detalhada em cada tick só ocorre se `inspection_active` estiver habilitado (ex: Overlay F1 ligado). +## Lições Aprendidas +- **Desacoplamento de Gatilhos:** Não devemos usar o estado do `Certifier` para habilitar funcionalidades de depuração visual (como o overlay), pois eles têm propósitos e custos diferentes. +- **Consistência Eventual é Suficiente:** Para métricas de telemetria, não é necessário travar o sistema para obter um valor exato a cada nanossegundo. A leitura relaxada de atômicos é suficiente e muito mais performática. +- **Isolamento de Custo:** Mover a lógica de agregação para o driver simplifica o runtime e garante que o custo de telemetria seja pago apenas durante mutações de estado, e não repetidamente durante a execução estável. diff --git a/discussion/workflow/agendas/AGD-0007-perf-runtime-telemetry-hot-path.md b/discussion/workflow/agendas/AGD-0007-perf-runtime-telemetry-hot-path.md index 98a9edaf..d620c2bb 100644 --- a/discussion/workflow/agendas/AGD-0007-perf-runtime-telemetry-hot-path.md +++ b/discussion/workflow/agendas/AGD-0007-perf-runtime-telemetry-hot-path.md @@ -59,8 +59,30 @@ Remover varredura e agregacao lock-heavy do hot path do tick sem perder observab ## Open Questions de Arquitetura 1. O certifier realmente precisa de snapshot de bank a cada tick? + Não. O certifier aceita dados do fim do frame anterior, pois violações de limites de bank costumam ser persistentes entre frames. 2. O overlay pode ler uma versao resumida da telemetria em vez de recalcular tudo? + Sim. O `AssetManager` passará a prover uma struct `BankStats` pré-calculada via contadores atômicos. 3. Vale manter caminho "preciso" so para testes/debug e caminho "barato" para runtime normal? + Sim, mas a "precisão" será definida como "atualizado no último evento de mutação", o que já é suficiente para ambos os casos. +4. Como detectar o modo de depuração/inspeção de forma correta e desacoplada? + Através de um novo campo `inspection_active: bool` no `VirtualMachineRuntime`, controlado explicitamente pelo Host (ex: quando o Overlay F1 ou o Debugger remoto estão ativos). O `certifier` não deve ser usado para este propósito. + +## Sugestao / Recomendacao + +1. **Modelo de Métrica (Push-based):** + - Migrar de snapshot total $O(n)$ para contadores incrementais $O(1)$ no `AssetManager`. + - Utilizar `AtomicUsize` ou campos protegidos por Mutex simples para `used_bytes`, `inflight_bytes` e `slots_occupied`. + - Atualizar esses contadores apenas em eventos de mutação (`load`, `commit`, `cancel`). + +2. **Frequência de Coleta (Dois Níveis):** + - **Básica (Sempre):** O Runtime deve atualizar `telemetry_current` no fechamento de cada logical frame (`FrameSync` ou `EndOfRom`). Isso garante dados para o `certifier` com custo $O(1)$. + - **Alta Frequência (Sob Demanda):** Manter atualização em todo host tick apenas se `inspection_active` for `true` (Overlay F1 visível ou Debugger conectado). + +3. **Responsabilidade da Agregação (Centralizada):** + - O `AssetManager` é o dono da "verdade incremental". O Runtime consome um snapshot barato (struct POD) sem varredura de locks. + +4. **Garantia de Consistência (Eventual):** + - Aceitar defasagem de até 1 frame lógico para métricas de asset bank. ## Dependencias diff --git a/discussion/workflow/agendas/AGD-0012-perf-host-debug-overlay-isolation.md b/discussion/workflow/agendas/AGD-0012-perf-host-debug-overlay-isolation.md index 747c5bec..8bb4ab88 100644 --- a/discussion/workflow/agendas/AGD-0012-perf-host-debug-overlay-isolation.md +++ b/discussion/workflow/agendas/AGD-0012-perf-host-debug-overlay-isolation.md @@ -52,19 +52,22 @@ Isolar o overlay de debug do custo medido do console sem perder utilidade para d ## Open Questions de Arquitetura 1. O overlay precisa ser representativo do hardware final ou apenas ferramenta de desktop? + Não, como é HUD técnico, pode e deve ser renderizado pelo Host nativo para melhor legibilidade. 2. Vale um modo "perf puro" onde overlay nunca toca no framebuffer do console? + Sim. O isolamento garante que o `gfx` emulado esteja 100% livre para o jogo durante a medição. 3. O host deve oferecer toggles separados para stats, logs e overlay visual? + Sim. O `HostRunner` deve expor controles granulares via `inspection_active`. +4. Como melhorar a legibilidade e estética (Glyphs/Transparência)? + Migrar a renderização do HUD para o Host Nativo (Winit/Pixels), permitindo o uso de fontes TrueType (monospaced) nítidas e Alpha Blending real para transparência no fundo do painel. ## Dependencias - `../specs/10-debug-inspection-and-profiling.md` - `../specs/11-portability-and-cross-platform-execution.md` -## Criterio de Saida Desta Agenda +## Sugestao / Recomendacao -Pode virar PR quando houver decisao escrita sobre: - -- onde o overlay e composto; -- politica de cache de texto/glyphs; -- como o custo do overlay aparece na telemetria; -- overhead maximo aceitavel em modo debug. +1. **Migração para Camada Host Nativa:** Renderizar o HUD de debug em uma surface separada ou via pipeline nativo do Host (depois do upscaling do framebuffer do console). +2. **Fontes TrueType (Mono):** Substituir os glyphs bitmapped rudimentares por uma fonte nativa de alta qualidade e nítida. +3. **Composição Alpha:** Permitir fundo semi-transparente para o overlay para não bloquear a visão do jogo. +4. **Acionamento Explícito:** Host deve gerenciar `inspection_active: true` no runtime apenas quando o HUD ou Debugger estiverem ativos. diff --git a/discussion/workflow/decisions/DEC-0005-perf-push-based-telemetry-model.md b/discussion/workflow/decisions/DEC-0005-perf-push-based-telemetry-model.md new file mode 100644 index 00000000..4399573e --- /dev/null +++ b/discussion/workflow/decisions/DEC-0005-perf-push-based-telemetry-model.md @@ -0,0 +1,57 @@ +--- +id: DEC-0005 +title: "Decisão - [PERF] Modelo de Telemetria Push-based" +status: closed +created: 2026-03-27 +resolved: 2026-03-27 +agenda: AGD-0007 +tags: [] +--- + +# Decisão - [PERF] Modelo de Telemetria Push-based + +## Status + +**Fechada (Closed)** - Consensus reached. Implementation approved. + +## Contexto + +O runtime atual (`VirtualMachineRuntime::tick()`) realiza a coleta de telemetria de asset banks (`gfx` e `audio`) em todos os *host ticks*. Essa coleta envolve a chamada de `bank_info()` no `AssetManager`, que executa uma varredura $O(n)$ sobre mapas de recursos e adquire múltiplos locks de leitura. Em hardwares limitados (handhelds), esse custo repetitivo no caminho quente degrada a performance desnecessariamente, mesmo quando o sistema está estável ou em pausa. + +## Decisão + +1. **Modelo de Contadores no Driver e Runtime:** O `AssetManager`, a `Heap` da VM e o `LogService` devem substituir a varredura/contagem total por **contadores atômicos** (`used_bytes`, `inflight_bytes`, `slots_occupied`, `logs_count`) para cada subsistema. Esses contadores serão atualizados incrementalmente ($O(1)$) em cada mutação (load, commit, alloc, free, log). +2. **Snapshot Obrigatório de Fim de Frame:** O runtime capturará o estado desses contadores (Banks, Heap e Logs) **apenas uma vez** por fechamento de frame lógico (`FrameSync` ou `EndOfRom`). Este snapshot será usado para alimentar a `telemetry_last` e o `certifier`. +3. **Coleta sob Demanda (Inspection Mode):** A coleta em cada *host tick* será reativada **somente** se o novo sinalizador `inspection_active: bool` do runtime for verdadeiro. +4. **Desacoplamento do Certifier:** A ativação do `certifier` não habilitará mais a telemetria detalhada em cada tick. O certifier será servido exclusivamente pelos snapshots de fim de frame lógico. + +## Rationale + +* **Performance:** Reduz o custo do tick do runtime de $O(n)$ com locks para $O(1)$ sem locks no modo normal. +* **Observabilidade:** Mantém dados precisos para o overlay (via modo inspeção) e dados válidos para o certifier (via snapshot de frame). +* **Modularidade:** Desacopla as necessidades de depuração (Overlay F1) das necessidades de validação normativa (Certifier). + +## Invariantes / Contrato + +* O `AssetManager` é a única fonte da verdade para o uso de memória de assets; o runtime não deve tentar calcular esses valores manualmente. +* Contadores atômicos garantem que o runtime possa ler estatísticas de bancos sem travar mutações em andamento (consistência eventual). +* A defasagem de até 1 frame lógico é aceitável para métricas de assets bank no modo de operação normal. + +## Impactos + +* **Drivers:** Necessidade de adicionar e gerenciar contadores no `AssetManager`. +* **Virtual Machine:** Adicionar contador atômico de `used_bytes` na `Heap`. +* **Log Service:** Adicionar contador incremental de logs emitidos no frame no `LogService`. +* **Runtime:** Modificação no `VirtualMachineRuntime` para incluir o campo `inspection_active` e lógica condicional no `tick()`. +* **Host:** O host (ex: desktop-winit) deve agora sinalizar quando o overlay de depuração está ativo via `inspection_active`. + +## Referências + +* Agenda [AGD-0007-perf-runtime-telemetry-hot-path.md](../agendas/AGD-0007-perf-runtime-telemetry-hot-path.md) +* Spec `10-debug-inspection-and-profiling.md` + +## Propagação Necessária + +* Atualizar o `VirtualMachineRuntime` para expor o campo `inspection_active`. +* Atualizar o `HostRunner` para sinalizar `inspection_active` quando o overlay F1 for alternado. +* Atualizar a struct `TelemetryFrame` para incluir campos de Heap Memory e Log count. diff --git a/discussion/workflow/plans/PLN-0005-perf-push-based-telemetry-implementation.md b/discussion/workflow/plans/PLN-0005-perf-push-based-telemetry-implementation.md new file mode 100644 index 00000000..8cf7b3e2 --- /dev/null +++ b/discussion/workflow/plans/PLN-0005-perf-push-based-telemetry-implementation.md @@ -0,0 +1,81 @@ +--- +id: PLN-0005 +title: "Plano - [PERF] Implementação de Telemetria Push-based" +status: open +created: 2026-03-27 +origin_decisions: + - DEC-0005 +tags: [] +--- + +# Plano - [PERF] Implementação de Telemetria Push-based + +## Briefing + +Este plano detalha as alterações técnicas para migrar o sistema de telemetria de um modelo de varredura $O(n)$ com locks para um modelo push-based com contadores atômicos $O(1)$. O objetivo é reduzir o overhead do hot path do runtime e adicionar visibilidade sobre a memória RAM (Heap) e volume de logs. + +## Decisions de Origem + +* [DEC-0005 - [PERF] Modelo de Telemetria Push-based](../decisions/DEC-0005-perf-push-based-telemetry-model.md) + +## Alvo + +* `prometeu-drivers` (AssetManager) +* `prometeu-vm` (Heap) +* `prometeu-hal` (TelemetryFrame) +* `prometeu-system` (LogService, VirtualMachineRuntime) +* `prometeu-host-desktop-winit` (HostRunner) + +## Escopo + +1. **Contadores Atômicos:** Implementação de `AtomicUsize` nos subsistemas de assets, heap e logs. +2. **Telemetry Frame:** Expansão da struct para incluir `heap_used`, `heap_max` e `logs_count`. +3. **Lógica de Tick:** Refatoração do `tick.rs` para usar `inspection_active` e snapshots de fim de frame. +4. **Sinalização do Host:** Integração do campo `inspection_active` com o acionamento do overlay F1. + +## Fora de Escopo + +* Migração da renderização do overlay para o host nativo (será tratado em plano derivado da AGD-0012). +* Telemetria de FileSystem IO ou Corrotinas. + +## Plano de Execucao + +### Fase 1: Drivers e VM (Modelo Push) +1. **`prometeu-drivers/src/asset.rs`:** + - Adicionar contadores em `BankPolicy`. + - Atualizar contadores em `load_internal`, `commit` e `cancel`. + - Refatorar `bank_info()` para retornar os valores atômicos sem varredura. +2. **`prometeu-vm/src/heap.rs`:** + - Adicionar contador `used_bytes` na struct `Heap`. + - Atualizar contador em `alloc()` e `free()`. +3. **`prometeu-system/src/log.rs`:** + - Adicionar contador `logs_count` (resetável por frame) no `LogService`. + +### Fase 2: HAL e Runtime (Contrato e Lógica) +1. **`prometeu-hal/src/telemetry.rs`:** + - Adicionar `heap_used_bytes`, `heap_max_bytes` e `logs_count` na `TelemetryFrame`. +2. **`prometeu-system/src/virtual_machine_runtime.rs`:** + - Adicionar campo `public inspection_active: bool`. +3. **`prometeu-system/src/virtual_machine_runtime/tick.rs`:** + - Modificar `tick()` para coletar `bank_info` detalhado apenas se `inspection_active == true`. + - Implementar a captura do snapshot consolidado no fechamento do logical frame. + +### Fase 3: Host e Integração +1. **`prometeu-host-desktop-winit/src/runner.rs`:** + - Sincronizar o estado do campo `overlay_enabled` com `firmware.os.inspection_active`. + +## Criterios de Aceite + +* O sistema compila sem avisos de tipos inexistentes. +* A telemetria de assets (`gfx`/`audio`) continua funcional no overlay F1. +* Novos campos de Heap e Logs aparecem no log de performance do console. +* O custo de telemetria no `tick` deve cair drasticamente quando o overlay estiver desligado (verificável via profiling). + +## Tests / Validacao + +* **Teste Unitário:** Criar teste em `asset.rs` para garantir que contadores batem com a realidade após sequências de carga e cancelamento. +* **Teste de Regressão:** Garantir que o `certifier` continua detectando violações de bank no fim do frame. + +## Riscos + +* **Consistência Eventual:** Como os contadores são atômicos e não travam o sistema, pode haver uma defasagem momentânea durante um `commit` pesado; isto é aceitável conforme DEC-0005. diff --git a/test-cartridges/stress-console/program.pbx b/test-cartridges/stress-console/program.pbx index f2013a83..83b01e19 100644 Binary files a/test-cartridges/stress-console/program.pbx and b/test-cartridges/stress-console/program.pbx differ