dev/perf-vm-allocation-and-copy-pressure #18

Merged
bquarkz merged 6 commits from dev/perf-vm-allocation-and-copy-pressure into master 2026-04-20 10:09:28 +00:00
19 changed files with 286 additions and 100 deletions

View File

@ -6,4 +6,4 @@ license.workspace = true
repository.workspace = true repository.workspace = true
[dependencies] [dependencies]
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive", "rc"] }

View File

@ -21,4 +21,4 @@ pub use disassembler::disassemble;
pub use layout::{compute_function_layouts, FunctionLayout}; pub use layout::{compute_function_layouts, FunctionLayout};
pub use model::{BytecodeLoader, FunctionMeta, LoadError, SyscallDecl}; pub use model::{BytecodeLoader, FunctionMeta, LoadError, SyscallDecl};
pub use program_image::ProgramImage; pub use program_image::ProgramImage;
pub use value::{HeapRef, Value}; pub use value::{string_materialization_count, HeapRef, Value};

View File

@ -75,7 +75,7 @@ impl From<BytecodeModule> for ProgramImage {
ConstantPoolEntry::Int64(v) => Value::Int64(*v), ConstantPoolEntry::Int64(v) => Value::Int64(*v),
ConstantPoolEntry::Float64(v) => Value::Float(*v), ConstantPoolEntry::Float64(v) => Value::Float(*v),
ConstantPoolEntry::Boolean(v) => Value::Boolean(*v), ConstantPoolEntry::Boolean(v) => Value::Boolean(*v),
ConstantPoolEntry::String(v) => Value::String(v.clone()), ConstantPoolEntry::String(v) => Value::string(v.clone()),
ConstantPoolEntry::Int32(v) => Value::Int32(*v), ConstantPoolEntry::Int32(v) => Value::Int32(*v),
}) })
.collect(); .collect();
@ -99,7 +99,7 @@ impl From<ProgramImage> for BytecodeModule {
Value::Int64(v) => ConstantPoolEntry::Int64(*v), Value::Int64(v) => ConstantPoolEntry::Int64(*v),
Value::Float(v) => ConstantPoolEntry::Float64(*v), Value::Float(v) => ConstantPoolEntry::Float64(*v),
Value::Boolean(v) => ConstantPoolEntry::Boolean(*v), Value::Boolean(v) => ConstantPoolEntry::Boolean(*v),
Value::String(v) => ConstantPoolEntry::String(v.clone()), Value::String(v) => ConstantPoolEntry::String(v.to_string()),
Value::Int32(v) => ConstantPoolEntry::Int32(*v), Value::Int32(v) => ConstantPoolEntry::Int32(*v),
Value::HeapRef(_) => ConstantPoolEntry::Null, Value::HeapRef(_) => ConstantPoolEntry::Null,
}) })

View File

@ -1,5 +1,12 @@
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::cell::Cell;
use std::cmp::Ordering; use std::cmp::Ordering;
use std::fmt::Write;
use std::sync::Arc;
thread_local! {
static STRING_MATERIALIZATION_COUNT: Cell<u64> = const { Cell::new(0) };
}
/// Opaque handle that references an object stored in the VM heap. /// Opaque handle that references an object stored in the VM heap.
/// ///
@ -26,7 +33,7 @@ pub enum Value {
/// Boolean value (true/false). /// Boolean value (true/false).
Boolean(bool), Boolean(bool),
/// UTF-8 string. Strings are immutable and usually come from the Constant Pool. /// UTF-8 string. Strings are immutable and usually come from the Constant Pool.
String(String), String(Arc<str>),
/// A handle to an object on the heap (opaque reference). /// A handle to an object on the heap (opaque reference).
HeapRef(HeapRef), HeapRef(HeapRef),
/// Represents the absence of a value (equivalent to `null` or `undefined`). /// Represents the absence of a value (equivalent to `null` or `undefined`).
@ -74,6 +81,14 @@ impl PartialOrd for Value {
} }
impl Value { impl Value {
pub fn string<S>(value: S) -> Self
where
S: Into<Arc<str>>,
{
STRING_MATERIALIZATION_COUNT.with(|count| count.set(count.get() + 1));
Value::String(value.into())
}
pub fn as_float(&self) -> Option<f64> { pub fn as_float(&self) -> Option<f64> {
match self { match self {
Value::Int32(i) => Some(*i as f64), Value::Int32(i) => Some(*i as f64),
@ -92,18 +107,51 @@ impl Value {
} }
} }
pub fn append_to_string(&self, out: &mut String) {
match self {
Value::Int32(i) => {
let _ = write!(out, "{}", i);
}
Value::Int64(i) => {
let _ = write!(out, "{}", i);
}
Value::Float(f) => {
let _ = write!(out, "{}", f);
}
Value::Boolean(b) => {
let _ = write!(out, "{}", b);
}
Value::String(s) => out.push_str(s),
Value::HeapRef(r) => {
let _ = write!(out, "[HeapRef {}]", r.0);
}
Value::Null => out.push_str("null"),
}
}
pub fn string_len_hint(&self) -> usize {
match self {
Value::String(s) => s.len(),
Value::Null => 4,
Value::Boolean(true) => 4,
Value::Boolean(false) => 5,
Value::Int32(i) => i.to_string().len(),
Value::Int64(i) => i.to_string().len(),
Value::Float(f) => f.to_string().len(),
Value::HeapRef(r) => 11 + r.0.to_string().len(),
}
}
#[allow(clippy::inherent_to_string)] #[allow(clippy::inherent_to_string)]
pub fn to_string(&self) -> String { pub fn to_string(&self) -> String {
match self { let mut out = String::with_capacity(self.string_len_hint());
Value::Int32(i) => i.to_string(), self.append_to_string(&mut out);
Value::Int64(i) => i.to_string(), out
Value::Float(f) => f.to_string(),
Value::Boolean(b) => b.to_string(),
Value::String(s) => s.clone(),
Value::HeapRef(r) => format!("[HeapRef {}]", r.0),
Value::Null => "null".to_string(),
} }
} }
pub fn string_materialization_count() -> u64 {
STRING_MATERIALIZATION_COUNT.with(Cell::get)
} }
#[cfg(test)] #[cfg(test)]

View File

@ -22,6 +22,6 @@ impl<'a> HostReturn<'a> {
self.stack.push(Value::HeapRef(HeapRef(g as u32))); self.stack.push(Value::HeapRef(HeapRef(g as u32)));
} }
pub fn push_string(&mut self, s: String) { pub fn push_string(&mut self, s: String) {
self.stack.push(Value::String(s)); self.stack.push(Value::string(s));
} }
} }

View File

@ -24,6 +24,8 @@ pub struct TelemetryFrame {
// RAM (Heap) // RAM (Heap)
pub heap_used_bytes: usize, pub heap_used_bytes: usize,
pub heap_max_bytes: usize, pub heap_max_bytes: usize,
pub vm_heap_allocations: u64,
pub vm_string_materializations: u64,
// Log Pressure from the last completed logical frame // Log Pressure from the last completed logical frame
pub logs_count: u32, pub logs_count: u32,
@ -53,6 +55,8 @@ pub struct AtomicTelemetry {
// RAM (Heap) // RAM (Heap)
pub heap_used_bytes: AtomicUsize, pub heap_used_bytes: AtomicUsize,
pub heap_max_bytes: AtomicUsize, pub heap_max_bytes: AtomicUsize,
pub vm_heap_allocations: AtomicU64,
pub vm_string_materializations: AtomicU64,
// Transient in-flight log counter for the current logical frame // Transient in-flight log counter for the current logical frame
pub current_logs_count: Arc<AtomicU32>, pub current_logs_count: Arc<AtomicU32>,
@ -83,6 +87,8 @@ impl AtomicTelemetry {
scene_slots_total: self.scene_slots_total.load(Ordering::Relaxed), scene_slots_total: self.scene_slots_total.load(Ordering::Relaxed),
heap_used_bytes: self.heap_used_bytes.load(Ordering::Relaxed), heap_used_bytes: self.heap_used_bytes.load(Ordering::Relaxed),
heap_max_bytes: self.heap_max_bytes.load(Ordering::Relaxed), heap_max_bytes: self.heap_max_bytes.load(Ordering::Relaxed),
vm_heap_allocations: self.vm_heap_allocations.load(Ordering::Relaxed),
vm_string_materializations: self.vm_string_materializations.load(Ordering::Relaxed),
logs_count: self.logs_count.load(Ordering::Relaxed), logs_count: self.logs_count.load(Ordering::Relaxed),
vm_steps: self.vm_steps.load(Ordering::Relaxed), vm_steps: self.vm_steps.load(Ordering::Relaxed),
} }
@ -102,6 +108,8 @@ impl AtomicTelemetry {
self.scene_slots_used.store(0, Ordering::Relaxed); self.scene_slots_used.store(0, Ordering::Relaxed);
self.scene_slots_total.store(0, Ordering::Relaxed); self.scene_slots_total.store(0, Ordering::Relaxed);
self.heap_used_bytes.store(0, Ordering::Relaxed); self.heap_used_bytes.store(0, Ordering::Relaxed);
self.vm_heap_allocations.store(0, Ordering::Relaxed);
self.vm_string_materializations.store(0, Ordering::Relaxed);
self.vm_steps.store(0, Ordering::Relaxed); self.vm_steps.store(0, Ordering::Relaxed);
self.logs_count.store(0, Ordering::Relaxed); self.logs_count.store(0, Ordering::Relaxed);
self.current_logs_count.store(0, Ordering::Relaxed); self.current_logs_count.store(0, Ordering::Relaxed);
@ -313,4 +321,17 @@ mod tests {
assert_eq!(snapshot.logs_count, 3); assert_eq!(snapshot.logs_count, 3);
assert_eq!(current.load(Ordering::Relaxed), 7); assert_eq!(current.load(Ordering::Relaxed), 7);
} }
#[test]
fn snapshot_includes_internal_allocation_evidence() {
let current = Arc::new(AtomicU32::new(0));
let tel = AtomicTelemetry::new(current);
tel.vm_heap_allocations.store(2, Ordering::Relaxed);
tel.vm_string_materializations.store(5, Ordering::Relaxed);
let snapshot = tel.snapshot();
assert_eq!(snapshot.vm_heap_allocations, 2);
assert_eq!(snapshot.vm_string_materializations, 5);
}
} }

View File

@ -7,6 +7,7 @@ mod tick;
use crate::CrashReport; use crate::CrashReport;
use crate::fs::{FsState, VirtualFS}; use crate::fs::{FsState, VirtualFS};
use crate::services::memcard::MemcardService; use crate::services::memcard::MemcardService;
use prometeu_bytecode::string_materialization_count;
use prometeu_hal::cartridge::AppMode; use prometeu_hal::cartridge::AppMode;
use prometeu_hal::log::LogService; use prometeu_hal::log::LogService;
use prometeu_hal::telemetry::{AtomicTelemetry, CertificationConfig, Certifier}; use prometeu_hal::telemetry::{AtomicTelemetry, CertificationConfig, Certifier};
@ -38,6 +39,8 @@ pub struct VirtualMachineRuntime {
pub paused: bool, pub paused: bool,
pub debug_step_request: bool, pub debug_step_request: bool,
pub inspection_active: bool, pub inspection_active: bool,
pub(crate) frame_start_heap_allocations: u64,
pub(crate) frame_start_string_materializations: u64,
pub(crate) needs_prepare_entry_call: bool, pub(crate) needs_prepare_entry_call: bool,
pub(crate) boot_time: Instant, pub(crate) boot_time: Instant,
} }

View File

@ -577,7 +577,7 @@ impl NativeInterface for VirtualMachineRuntime {
fn expect_string(args: &[Value], index: usize, field: &str) -> Result<String, VmFault> { fn expect_string(args: &[Value], index: usize, field: &str) -> Result<String, VmFault> {
match args.get(index).ok_or_else(|| VmFault::Trap(TRAP_TYPE, format!("Missing {}", field)))? { match args.get(index).ok_or_else(|| VmFault::Trap(TRAP_TYPE, format!("Missing {}", field)))? {
Value::String(value) => Ok(value.clone()), Value::String(value) => Ok(value.to_string()),
_ => Err(VmFault::Trap(TRAP_TYPE, format!("Expected string {}", field))), _ => Err(VmFault::Trap(TRAP_TYPE, format!("Expected string {}", field))),
} }
} }

View File

@ -32,6 +32,8 @@ impl VirtualMachineRuntime {
paused: false, paused: false,
debug_step_request: false, debug_step_request: false,
inspection_active: false, inspection_active: false,
frame_start_heap_allocations: 0,
frame_start_string_materializations: 0,
needs_prepare_entry_call: false, needs_prepare_entry_call: false,
boot_time, boot_time,
}; };
@ -106,6 +108,8 @@ impl VirtualMachineRuntime {
self.paused = false; self.paused = false;
self.debug_step_request = false; self.debug_step_request = false;
self.inspection_active = false; self.inspection_active = false;
self.frame_start_heap_allocations = 0;
self.frame_start_string_materializations = 0;
self.needs_prepare_entry_call = false; self.needs_prepare_entry_call = false;
} }

View File

@ -493,6 +493,8 @@ fn reset_clears_cartridge_scoped_runtime_state() {
Some(CrashReport::VmPanic { message: "stale".into(), pc: Some(55) }); Some(CrashReport::VmPanic { message: "stale".into(), pc: Some(55) });
runtime.paused = true; runtime.paused = true;
runtime.debug_step_request = true; runtime.debug_step_request = true;
runtime.frame_start_heap_allocations = 11;
runtime.frame_start_string_materializations = 22;
runtime.needs_prepare_entry_call = true; runtime.needs_prepare_entry_call = true;
runtime.reset(&mut vm); runtime.reset(&mut vm);
@ -518,10 +520,56 @@ fn reset_clears_cartridge_scoped_runtime_state() {
assert!(runtime.last_crash_report.is_none()); assert!(runtime.last_crash_report.is_none());
assert!(!runtime.paused); assert!(!runtime.paused);
assert!(!runtime.debug_step_request); assert!(!runtime.debug_step_request);
assert_eq!(runtime.frame_start_heap_allocations, 0);
assert_eq!(runtime.frame_start_string_materializations, 0);
assert!(!runtime.needs_prepare_entry_call); assert!(!runtime.needs_prepare_entry_call);
assert_eq!(vm.pc(), 0); assert_eq!(vm.pc(), 0);
} }
#[test]
fn tick_numeric_happy_path_records_zero_internal_allocations() {
let mut runtime = VirtualMachineRuntime::new(None);
let mut vm = VirtualMachine::default();
let mut hardware = Hardware::new();
let signals = InputSignals::default();
let code =
assemble("PUSH_I32 1\nPUSH_I32 2\nADD\nPOP_N 1\nFRAME_SYNC\nHALT").expect("assemble");
let program = serialized_single_function_module(code, vec![]);
let cartridge = cartridge_with_program(program, caps::NONE);
runtime.initialize_vm(&mut vm, &cartridge).expect("runtime must initialize");
let report = runtime.tick(&mut vm, &signals, &mut hardware);
assert!(report.is_none());
let snapshot = runtime.atomic_telemetry.snapshot();
assert_eq!(snapshot.vm_heap_allocations, 0);
assert_eq!(snapshot.vm_string_materializations, 0);
}
#[test]
fn tick_already_materialized_string_path_records_zero_internal_allocations() {
let mut runtime = VirtualMachineRuntime::new(None);
let mut vm = VirtualMachine::default();
let mut hardware = Hardware::new();
let signals = InputSignals::default();
let code = assemble("PUSH_CONST 0\nSET_GLOBAL 0\nGET_GLOBAL 0\nPOP_N 1\nFRAME_SYNC\nHALT")
.expect("assemble");
let program = serialized_single_function_module_with_consts(
code,
vec![ConstantPoolEntry::String("steady".into())],
vec![],
);
let cartridge = cartridge_with_program(program, caps::NONE);
runtime.initialize_vm(&mut vm, &cartridge).expect("runtime must initialize");
let report = runtime.tick(&mut vm, &signals, &mut hardware);
assert!(report.is_none());
let snapshot = runtime.atomic_telemetry.snapshot();
assert_eq!(snapshot.vm_heap_allocations, 0);
assert_eq!(snapshot.vm_string_materializations, 0);
}
#[test] #[test]
fn initialize_vm_failure_clears_previous_identity_and_handles() { fn initialize_vm_failure_clears_previous_identity_and_handles() {
let mut runtime = VirtualMachineRuntime::new(None); let mut runtime = VirtualMachineRuntime::new(None);

View File

@ -7,6 +7,18 @@ use prometeu_vm::LogicalFrameEndingReason;
use std::sync::atomic::Ordering; use std::sync::atomic::Ordering;
impl VirtualMachineRuntime { impl VirtualMachineRuntime {
fn refresh_internal_allocation_telemetry(&self, vm: &VirtualMachine) {
let heap_allocations =
vm.heap().allocation_count().saturating_sub(self.frame_start_heap_allocations);
let string_materializations =
string_materialization_count().saturating_sub(self.frame_start_string_materializations);
self.atomic_telemetry.vm_heap_allocations.store(heap_allocations, Ordering::Relaxed);
self.atomic_telemetry
.vm_string_materializations
.store(string_materializations, Ordering::Relaxed);
}
fn bank_telemetry_summary( fn bank_telemetry_summary(
hw: &dyn HardwareBridge, hw: &dyn HardwareBridge,
) -> (BankTelemetry, BankTelemetry, BankTelemetry) { ) -> (BankTelemetry, BankTelemetry, BankTelemetry) {
@ -94,6 +106,10 @@ impl VirtualMachineRuntime {
self.atomic_telemetry.cycles_used.store(0, Ordering::Relaxed); self.atomic_telemetry.cycles_used.store(0, Ordering::Relaxed);
self.atomic_telemetry.syscalls.store(0, Ordering::Relaxed); self.atomic_telemetry.syscalls.store(0, Ordering::Relaxed);
self.atomic_telemetry.vm_steps.store(0, Ordering::Relaxed); self.atomic_telemetry.vm_steps.store(0, Ordering::Relaxed);
self.atomic_telemetry.vm_heap_allocations.store(0, Ordering::Relaxed);
self.atomic_telemetry.vm_string_materializations.store(0, Ordering::Relaxed);
self.frame_start_heap_allocations = vm.heap().allocation_count();
self.frame_start_string_materializations = string_materialization_count();
} }
let budget = std::cmp::min(Self::SLICE_PER_TICK, self.logical_frame_remaining_cycles); let budget = std::cmp::min(Self::SLICE_PER_TICK, self.logical_frame_remaining_cycles);
@ -177,6 +193,7 @@ impl VirtualMachineRuntime {
self.atomic_telemetry self.atomic_telemetry
.heap_used_bytes .heap_used_bytes
.store(vm.heap().used_bytes.load(Ordering::Relaxed), Ordering::Relaxed); .store(vm.heap().used_bytes.load(Ordering::Relaxed), Ordering::Relaxed);
self.refresh_internal_allocation_telemetry(vm);
self.atomic_telemetry self.atomic_telemetry
.host_cpu_time_us .host_cpu_time_us
.store(start.elapsed().as_micros() as u64, Ordering::Relaxed); .store(start.elapsed().as_micros() as u64, Ordering::Relaxed);
@ -253,6 +270,7 @@ impl VirtualMachineRuntime {
self.atomic_telemetry self.atomic_telemetry
.heap_used_bytes .heap_used_bytes
.store(vm.heap().used_bytes.load(Ordering::Relaxed), Ordering::Relaxed); .store(vm.heap().used_bytes.load(Ordering::Relaxed), Ordering::Relaxed);
self.refresh_internal_allocation_telemetry(vm);
self.atomic_telemetry.frame_index.store(self.logical_frame_index, Ordering::Relaxed); self.atomic_telemetry.frame_index.store(self.logical_frame_index, Ordering::Relaxed);
self.atomic_telemetry self.atomic_telemetry

View File

@ -3,7 +3,7 @@ use crate::object::{ObjectHeader, ObjectKind};
use prometeu_bytecode::{HeapRef, Value}; use prometeu_bytecode::{HeapRef, Value};
use std::sync::Arc; use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
/// Internal stored object: header plus opaque payload bytes. /// Internal stored object: header plus opaque payload bytes.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
@ -79,6 +79,8 @@ pub struct Heap {
/// Total bytes currently used by all objects in the heap. /// Total bytes currently used by all objects in the heap.
pub used_bytes: Arc<AtomicUsize>, pub used_bytes: Arc<AtomicUsize>,
/// Monotonic count of heap allocation events for internal engineering telemetry.
pub allocation_count: Arc<AtomicU64>,
} }
impl Heap { impl Heap {
@ -87,11 +89,13 @@ impl Heap {
objects: Vec::new(), objects: Vec::new(),
free_list: Vec::new(), free_list: Vec::new(),
used_bytes: Arc::new(AtomicUsize::new(0)), used_bytes: Arc::new(AtomicUsize::new(0)),
allocation_count: Arc::new(AtomicU64::new(0)),
} }
} }
fn insert_object(&mut self, obj: StoredObject) -> HeapRef { fn insert_object(&mut self, obj: StoredObject) -> HeapRef {
self.used_bytes.fetch_add(obj.bytes(), Ordering::Relaxed); self.used_bytes.fetch_add(obj.bytes(), Ordering::Relaxed);
self.allocation_count.fetch_add(1, Ordering::Relaxed);
if let Some(idx) = self.free_list.pop() { if let Some(idx) = self.free_list.pop() {
debug_assert!(self.objects.get(idx).is_some_and(|slot| slot.is_none())); debug_assert!(self.objects.get(idx).is_some_and(|slot| slot.is_none()));
self.objects[idx] = Some(obj); self.objects[idx] = Some(obj);
@ -411,6 +415,10 @@ impl Heap {
self.objects.iter().filter(|s| s.is_some()).count() self.objects.iter().filter(|s| s.is_some()).count()
} }
pub fn allocation_count(&self) -> u64 {
self.allocation_count.load(Ordering::Relaxed)
}
/// Enumerate handles of coroutines that are currently suspended (i.e., not running): /// Enumerate handles of coroutines that are currently suspended (i.e., not running):
/// Ready or Sleeping. These must be treated as GC roots by the runtime so their /// Ready or Sleeping. These must be treated as GC roots by the runtime so their
/// stacks/frames are scanned during mark. /// stacks/frames are scanned during mark.

View File

@ -639,7 +639,10 @@ impl VirtualMachine {
} }
OpCode::Add => self.binary_op(opcode, start_pc as u32, |a, b| match (&a, &b) { OpCode::Add => self.binary_op(opcode, start_pc as u32, |a, b| match (&a, &b) {
(Value::String(_), _) | (_, Value::String(_)) => { (Value::String(_), _) | (_, Value::String(_)) => {
Ok(Value::String(format!("{}{}", a.to_string(), b.to_string()))) let mut out = String::with_capacity(a.string_len_hint() + b.string_len_hint());
a.append_to_string(&mut out);
b.append_to_string(&mut out);
Ok(Value::string(out))
} }
(Value::Int32(a), Value::Int32(b)) => Ok(Value::Int32(a.wrapping_add(*b))), (Value::Int32(a), Value::Int32(b)) => Ok(Value::Int32(a.wrapping_add(*b))),
(Value::Int64(a), Value::Int64(b)) => Ok(Value::Int64(a.wrapping_add(*b))), (Value::Int64(a), Value::Int64(b)) => Ok(Value::Int64(a.wrapping_add(*b))),
@ -1213,6 +1216,7 @@ impl VirtualMachine {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use std::sync::Arc;
fn new_test_vm(rom: Vec<u8>, constant_pool: Vec<Value>) -> VirtualMachine { fn new_test_vm(rom: Vec<u8>, constant_pool: Vec<Value>) -> VirtualMachine {
let rom_len = rom.len() as u32; let rom_len = rom.len() as u32;
@ -1642,6 +1646,19 @@ mod tests {
assert_eq!(vm.peek().unwrap(), &Value::String("hello".into())); assert_eq!(vm.peek().unwrap(), &Value::String("hello".into()));
} }
#[test]
fn test_shared_string_clone_reuses_materialized_payload() {
let mut vm = VirtualMachine::default();
let payload: Arc<str> = "shared-global".into();
vm.globals.push(Value::String(payload.clone()));
let cloned = vm.globals[0].clone();
match cloned {
Value::String(cloned_payload) => assert!(Arc::ptr_eq(&payload, &cloned_payload)),
other => panic!("expected string clone from globals, got {:?}", other),
}
}
#[test] #[test]
fn test_push_const_invalid_index_traps_oob() { fn test_push_const_invalid_index_traps_oob() {
let mut rom = Vec::new(); let mut rom = Vec::new();

View File

@ -1,4 +1,4 @@
{"type":"meta","next_id":{"DSC":29,"AGD":29,"DEC":18,"PLN":33,"LSN":35,"CLSN":1}} {"type":"meta","next_id":{"DSC":29,"AGD":29,"DEC":19,"PLN":36,"LSN":36,"CLSN":1}}
{"type":"discussion","id":"DSC-0023","status":"done","ticket":"perf-full-migration-to-atomic-telemetry","title":"Agenda - [PERF] Full Migration to Atomic Telemetry","created_at":"2026-04-10","updated_at":"2026-04-10","tags":["perf","runtime","telemetry"],"agendas":[{"id":"AGD-0021","file":"workflow/agendas/AGD-0021-full-migration-to-atomic-telemetry.md","status":"done","created_at":"2026-04-10","updated_at":"2026-04-10"}],"decisions":[{"id":"DEC-0008","file":"workflow/decisions/DEC-0008-full-migration-to-atomic-telemetry.md","status":"accepted","created_at":"2026-04-10","updated_at":"2026-04-10"}],"plans":[{"id":"PLN-0007","file":"workflow/plans/PLN-0007-full-migration-to-atomic-telemetry.md","status":"done","created_at":"2026-04-10","updated_at":"2026-04-10"}],"lessons":[{"id":"LSN-0028","file":"lessons/DSC-0023-perf-full-migration-to-atomic-telemetry/LSN-0028-converging-to-single-atomic-telemetry-source.md","status":"done","created_at":"2026-04-10","updated_at":"2026-04-10"}]} {"type":"discussion","id":"DSC-0023","status":"done","ticket":"perf-full-migration-to-atomic-telemetry","title":"Agenda - [PERF] Full Migration to Atomic Telemetry","created_at":"2026-04-10","updated_at":"2026-04-10","tags":["perf","runtime","telemetry"],"agendas":[{"id":"AGD-0021","file":"workflow/agendas/AGD-0021-full-migration-to-atomic-telemetry.md","status":"done","created_at":"2026-04-10","updated_at":"2026-04-10"}],"decisions":[{"id":"DEC-0008","file":"workflow/decisions/DEC-0008-full-migration-to-atomic-telemetry.md","status":"accepted","created_at":"2026-04-10","updated_at":"2026-04-10"}],"plans":[{"id":"PLN-0007","file":"workflow/plans/PLN-0007-full-migration-to-atomic-telemetry.md","status":"done","created_at":"2026-04-10","updated_at":"2026-04-10"}],"lessons":[{"id":"LSN-0028","file":"lessons/DSC-0023-perf-full-migration-to-atomic-telemetry/LSN-0028-converging-to-single-atomic-telemetry-source.md","status":"done","created_at":"2026-04-10","updated_at":"2026-04-10"}]}
{"type":"discussion","id":"DSC-0020","status":"done","ticket":"jenkins-gitea-integration","title":"Jenkins Gitea Integration and Relocation","created_at":"2026-04-07","updated_at":"2026-04-07","tags":["ci","jenkins","gitea"],"agendas":[{"id":"AGD-0018","file":"workflow/agendas/AGD-0018-jenkins-gitea-integration-and-relocation.md","status":"done","created_at":"2026-04-07","updated_at":"2026-04-07"}],"decisions":[{"id":"DEC-0003","file":"workflow/decisions/DEC-0003-jenkins-gitea-strategy.md","status":"accepted","created_at":"2026-04-07","updated_at":"2026-04-07"}],"plans":[{"id":"PLN-0003","file":"workflow/plans/PLN-0003-jenkins-gitea-execution.md","status":"done","created_at":"2026-04-07","updated_at":"2026-04-07"}],"lessons":[{"id":"LSN-0021","file":"lessons/DSC-0020-jenkins-gitea-integration/LSN-0021-jenkins-gitea-integration.md","status":"done","created_at":"2026-04-07","updated_at":"2026-04-07"}]} {"type":"discussion","id":"DSC-0020","status":"done","ticket":"jenkins-gitea-integration","title":"Jenkins Gitea Integration and Relocation","created_at":"2026-04-07","updated_at":"2026-04-07","tags":["ci","jenkins","gitea"],"agendas":[{"id":"AGD-0018","file":"workflow/agendas/AGD-0018-jenkins-gitea-integration-and-relocation.md","status":"done","created_at":"2026-04-07","updated_at":"2026-04-07"}],"decisions":[{"id":"DEC-0003","file":"workflow/decisions/DEC-0003-jenkins-gitea-strategy.md","status":"accepted","created_at":"2026-04-07","updated_at":"2026-04-07"}],"plans":[{"id":"PLN-0003","file":"workflow/plans/PLN-0003-jenkins-gitea-execution.md","status":"done","created_at":"2026-04-07","updated_at":"2026-04-07"}],"lessons":[{"id":"LSN-0021","file":"lessons/DSC-0020-jenkins-gitea-integration/LSN-0021-jenkins-gitea-integration.md","status":"done","created_at":"2026-04-07","updated_at":"2026-04-07"}]}
{"type":"discussion","id":"DSC-0021","status":"done","ticket":"asset-entry-codec-enum-with-metadata","title":"Asset Entry Codec Enum Contract","created_at":"2026-04-09","updated_at":"2026-04-09","tags":["asset","runtime","codec","metadata"],"agendas":[],"decisions":[],"plans":[],"lessons":[{"id":"LSN-0024","file":"lessons/DSC-0021-asset-entry-codec-enum-contract/LSN-0024-string-on-the-wire-enum-in-runtime.md","status":"done","created_at":"2026-04-09","updated_at":"2026-04-09"}]} {"type":"discussion","id":"DSC-0021","status":"done","ticket":"asset-entry-codec-enum-with-metadata","title":"Asset Entry Codec Enum Contract","created_at":"2026-04-09","updated_at":"2026-04-09","tags":["asset","runtime","codec","metadata"],"agendas":[],"decisions":[],"plans":[],"lessons":[{"id":"LSN-0024","file":"lessons/DSC-0021-asset-entry-codec-enum-contract/LSN-0024-string-on-the-wire-enum-in-runtime.md","status":"done","created_at":"2026-04-09","updated_at":"2026-04-09"}]}
@ -21,7 +21,7 @@
{"type":"discussion","id":"DSC-0026","status":"done","ticket":"render-all-scene-cache-and-camera-integration","title":"Integrate render_all with Scene Cache and Camera","created_at":"2026-04-14","updated_at":"2026-04-18","tags":["gfx","runtime","render","camera","scene"],"agendas":[],"decisions":[],"plans":[],"lessons":[{"id":"LSN-0031","file":"lessons/DSC-0026-render-all-scene-cache-and-camera-integration/LSN-0031-frame-composition-belongs-above-the-render-backend.md","status":"done","created_at":"2026-04-18","updated_at":"2026-04-18"}]} {"type":"discussion","id":"DSC-0026","status":"done","ticket":"render-all-scene-cache-and-camera-integration","title":"Integrate render_all with Scene Cache and Camera","created_at":"2026-04-14","updated_at":"2026-04-18","tags":["gfx","runtime","render","camera","scene"],"agendas":[],"decisions":[],"plans":[],"lessons":[{"id":"LSN-0031","file":"lessons/DSC-0026-render-all-scene-cache-and-camera-integration/LSN-0031-frame-composition-belongs-above-the-render-backend.md","status":"done","created_at":"2026-04-18","updated_at":"2026-04-18"}]}
{"type":"discussion","id":"DSC-0027","status":"done","ticket":"frame-composer-public-syscall-surface","title":"Agenda - FrameComposer Public Syscall Surface","created_at":"2026-04-17","updated_at":"2026-04-18","tags":["gfx","runtime","syscall","abi","frame-composer","scene","camera","sprites"],"agendas":[],"decisions":[],"plans":[],"lessons":[{"id":"LSN-0032","file":"lessons/DSC-0027-frame-composer-public-syscall-surface/LSN-0032-public-abi-must-follow-the-canonical-service-boundary.md","status":"done","created_at":"2026-04-18","updated_at":"2026-04-18"}]} {"type":"discussion","id":"DSC-0027","status":"done","ticket":"frame-composer-public-syscall-surface","title":"Agenda - FrameComposer Public Syscall Surface","created_at":"2026-04-17","updated_at":"2026-04-18","tags":["gfx","runtime","syscall","abi","frame-composer","scene","camera","sprites"],"agendas":[],"decisions":[],"plans":[],"lessons":[{"id":"LSN-0032","file":"lessons/DSC-0027-frame-composer-public-syscall-surface/LSN-0032-public-abi-must-follow-the-canonical-service-boundary.md","status":"done","created_at":"2026-04-18","updated_at":"2026-04-18"}]}
{"type":"discussion","id":"DSC-0028","status":"done","ticket":"deferred-overlay-and-primitive-composition","title":"Deferred Overlay and Primitive Composition over FrameComposer","created_at":"2026-04-18","updated_at":"2026-04-18","tags":["gfx","runtime","render","frame-composer","overlay","primitives","hud"],"agendas":[],"decisions":[],"plans":[],"lessons":[{"id":"LSN-0033","file":"lessons/DSC-0028-deferred-overlay-and-primitive-composition/LSN-0033-debug-primitives-should-be-a-final-overlay-not-part-of-game-composition.md","status":"done","created_at":"2026-04-18","updated_at":"2026-04-18"}]} {"type":"discussion","id":"DSC-0028","status":"done","ticket":"deferred-overlay-and-primitive-composition","title":"Deferred Overlay and Primitive Composition over FrameComposer","created_at":"2026-04-18","updated_at":"2026-04-18","tags":["gfx","runtime","render","frame-composer","overlay","primitives","hud"],"agendas":[],"decisions":[],"plans":[],"lessons":[{"id":"LSN-0033","file":"lessons/DSC-0028-deferred-overlay-and-primitive-composition/LSN-0033-debug-primitives-should-be-a-final-overlay-not-part-of-game-composition.md","status":"done","created_at":"2026-04-18","updated_at":"2026-04-18"}]}
{"type":"discussion","id":"DSC-0014","status":"open","ticket":"perf-vm-allocation-and-copy-pressure","title":"Agenda - [PERF] VM Allocation and Copy Pressure","created_at":"2026-03-27","updated_at":"2026-03-27","tags":[],"agendas":[{"id":"AGD-0013","file":"workflow/agendas/AGD-0013-perf-vm-allocation-and-copy-pressure.md","status":"open","created_at":"2026-03-27","updated_at":"2026-03-27"}],"decisions":[],"plans":[],"lessons":[]} {"type":"discussion","id":"DSC-0014","status":"done","ticket":"perf-vm-allocation-and-copy-pressure","title":"Agenda - [PERF] VM Allocation and Copy Pressure","created_at":"2026-03-27","updated_at":"2026-04-20","tags":[],"agendas":[],"decisions":[],"plans":[],"lessons":[{"id":"LSN-0035","file":"lessons/DSC-0014-perf-vm-allocation-and-copy-pressure/LSN-0035-first-materialization-is-not-the-same-as-hot-path-copy-pressure.md","status":"done","created_at":"2026-04-20","updated_at":"2026-04-20"}]}
{"type":"discussion","id":"DSC-0015","status":"open","ticket":"perf-cartridge-boot-and-program-ownership","title":"Agenda - [PERF] Cartridge Boot and Program Ownership","created_at":"2026-03-27","updated_at":"2026-03-27","tags":[],"agendas":[{"id":"AGD-0014","file":"workflow/agendas/AGD-0014-perf-cartridge-boot-and-program-ownership.md","status":"open","created_at":"2026-03-27","updated_at":"2026-03-27"}],"decisions":[],"plans":[],"lessons":[]} {"type":"discussion","id":"DSC-0015","status":"open","ticket":"perf-cartridge-boot-and-program-ownership","title":"Agenda - [PERF] Cartridge Boot and Program Ownership","created_at":"2026-03-27","updated_at":"2026-03-27","tags":[],"agendas":[{"id":"AGD-0014","file":"workflow/agendas/AGD-0014-perf-cartridge-boot-and-program-ownership.md","status":"open","created_at":"2026-03-27","updated_at":"2026-03-27"}],"decisions":[],"plans":[],"lessons":[]}
{"type":"discussion","id":"DSC-0016","status":"done","ticket":"tilemap-empty-cell-vs-tile-id-zero","title":"Tilemap Empty Cell vs Tile ID Zero","created_at":"2026-03-27","updated_at":"2026-04-09","tags":[],"agendas":[{"id":"AGD-0015","file":"workflow/agendas/AGD-0015-tilemap-empty-cell-vs-tile-id-zero.md","status":"done","created_at":"2026-03-27","updated_at":"2026-04-09"}],"decisions":[],"plans":[],"lessons":[{"id":"LSN-0022","file":"lessons/DSC-0016-tilemap-empty-cell-semantics/LSN-0022-tilemap-empty-cell-convergence.md","status":"done","created_at":"2026-04-09","updated_at":"2026-04-09"}]} {"type":"discussion","id":"DSC-0016","status":"done","ticket":"tilemap-empty-cell-vs-tile-id-zero","title":"Tilemap Empty Cell vs Tile ID Zero","created_at":"2026-03-27","updated_at":"2026-04-09","tags":[],"agendas":[{"id":"AGD-0015","file":"workflow/agendas/AGD-0015-tilemap-empty-cell-vs-tile-id-zero.md","status":"done","created_at":"2026-03-27","updated_at":"2026-04-09"}],"decisions":[],"plans":[],"lessons":[{"id":"LSN-0022","file":"lessons/DSC-0016-tilemap-empty-cell-semantics/LSN-0022-tilemap-empty-cell-convergence.md","status":"done","created_at":"2026-04-09","updated_at":"2026-04-09"}]}
{"type":"discussion","id":"DSC-0017","status":"done","ticket":"asset-entry-metadata-normalization-contract","title":"Asset Entry Metadata Normalization Contract","created_at":"2026-03-27","updated_at":"2026-04-09","tags":[],"agendas":[{"id":"AGD-0016","file":"workflow/agendas/AGD-0016-asset-entry-metadata-normalization-contract.md","status":"done","created_at":"2026-03-27","updated_at":"2026-04-09"}],"decisions":[{"id":"DEC-0004","file":"workflow/decisions/DEC-0004-asset-entry-metadata-normalization-contract.md","status":"accepted","created_at":"2026-04-09","updated_at":"2026-04-09"}],"plans":[],"lessons":[{"id":"LSN-0023","file":"lessons/DSC-0017-asset-metadata-normalization/LSN-0023-typed-asset-metadata-helpers.md","status":"done","created_at":"2026-04-09","updated_at":"2026-04-09"}]} {"type":"discussion","id":"DSC-0017","status":"done","ticket":"asset-entry-metadata-normalization-contract","title":"Asset Entry Metadata Normalization Contract","created_at":"2026-03-27","updated_at":"2026-04-09","tags":[],"agendas":[{"id":"AGD-0016","file":"workflow/agendas/AGD-0016-asset-entry-metadata-normalization-contract.md","status":"done","created_at":"2026-03-27","updated_at":"2026-04-09"}],"decisions":[{"id":"DEC-0004","file":"workflow/decisions/DEC-0004-asset-entry-metadata-normalization-contract.md","status":"accepted","created_at":"2026-04-09","updated_at":"2026-04-09"}],"plans":[],"lessons":[{"id":"LSN-0023","file":"lessons/DSC-0017-asset-metadata-normalization/LSN-0023-typed-asset-metadata-helpers.md","status":"done","created_at":"2026-04-09","updated_at":"2026-04-09"}]}

View File

@ -0,0 +1,69 @@
---
id: LSN-0035
ticket: perf-vm-allocation-and-copy-pressure
title: First Materialization Is Not the Same as Hot-Path Copy Pressure
created: 2026-04-20
tags: [runtime, vm, performance, strings, allocation, telemetry]
---
## Context
`DSC-0014` started from a broad performance complaint around VM allocation and string-heavy execution. The tempting response would have been to chase "zero alloc everywhere" or to reopen public VM semantics around globals and strings.
The completed work converged on a narrower and more durable rule: the real optimization boundary is not "all allocation is bad", but "first materialization cost must be separated from repeated hot-path copy pressure".
That distinction shaped three concrete outcomes:
- immutable string payloads are now shared instead of recopied in common paths such as constant-pool use and `GET_GLOBAL`;
- internal telemetry records heap-allocation and string-materialization evidence without turning those counters into certification policy;
- the published specs now explain that zero-allocation happy paths are engineering targets, not guest-visible compatibility promises.
## Key Decisions
### Allocation Baseline for Strings and Globals (`DEC-0018`)
**What:**
PROMETEU keeps strings in the public VM language surface, but the runtime treats string payload as potentially expensive. Hardcoded strings may materialize once during build/load, runtime-created strings may materialize when a new value is semantically created, and `GET_GLOBAL` keeps its public behavior while avoiding unnecessary repeated payload copying internally.
**Why:**
The original hotspot was not the existence of strings. It was the fact that already-materialized payloads were being cloned repeatedly on hot paths. Fixing that internally is cheaper and safer than redefining the guest ABI.
**Trade-offs:**
String concatenation still creates a new string when the language semantics require it. The runtime gives up the illusion of "free strings", but preserves the correct public model while removing redundant copies that were never semantically required.
## Patterns and Algorithms
- Separate semantic allocation from accidental copying:
A new value may require materialization. Re-reading or forwarding an existing value usually should not.
- Share immutable payloads aggressively:
If a payload is immutable and already materialized, shared ownership is often the cheapest way to preserve public semantics while removing internal copy pressure.
- Keep public semantics stable, move optimization inward:
`GET_GLOBAL` did not need new meaning. The fix belonged in representation and ownership, not in the opcode contract.
- Keep engineering evidence internal unless explicitly promoted:
The runtime now records internal allocation evidence, but that evidence is not automatically a certification rule or ABI promise.
- Make internal counters robust under parallel tests:
Process-global counters can produce false failures in concurrent test runs. Internal evidence for runtime behavior should be scoped so unrelated work cannot contaminate the result.
## Pitfalls
- Do not treat every allocation as equivalent.
A first materialization that creates a semantically new value is not the same problem as cloning an old value on every hot-path access.
- Do not promote engineering goals to public contracts by accident.
"Zero alloc on the happy path" is useful for implementation discipline, but dangerous if readers start assuming it is a certification guarantee.
- Do not optimize strings by changing surface semantics prematurely.
The pressure here was internal. Reopening the guest ABI first would have increased scope and risk without solving the actual hotspot cleanly.
- Do not use shared global counters for per-test behavioral evidence.
The failed coverage run showed that instrumentation design matters as much as the metric itself.
## Takeaways
- The right performance question is often "where is the repeated cost?" rather than "where is any cost at all?"
- Immutable payload sharing is a strong default when public semantics must remain stable and hot-path copies are the real issue.
- Internal telemetry should help engineering decisions without silently becoming a product contract.

View File

@ -1,80 +0,0 @@
---
id: AGD-0013
ticket: perf-vm-allocation-and-copy-pressure
title: Agenda - [PERF] VM Allocation and Copy Pressure
status: open
created: 2026-03-27
resolved:
decision:
tags: []
---
# Agenda - [PERF] VM Allocation and Copy Pressure
## Problema
O core da VM ainda aloca e clona demais em alguns caminhos relevantes, especialmente quando strings entram no fluxo.
Hoje `ADD` com string usa `format!`/`to_string()`, `GET_GLOBAL` clona valores e varios caminhos de erro montam strings dinamicas.
## Dor
- churn de heap reduz o teto de throughput da VM.
- carts que abusam de string e estado global pagam custo cedo demais.
- hardware barato sente alocacao repetitiva de forma desproporcional.
## Hotspots Atuais
- [virtual_machine.rs](/Users/niltonconstantino/personal/workspace.personal/intrepid/prometeu/runtime/crates/console/prometeu-vm/src/virtual_machine.rs#L635)
- [virtual_machine.rs](/Users/niltonconstantino/personal/workspace.personal/intrepid/prometeu/runtime/crates/console/prometeu-vm/src/virtual_machine.rs#L870)
- [tick.rs](/Users/niltonconstantino/personal/workspace.personal/intrepid/prometeu/runtime/crates/console/prometeu-system/src/virtual_machine_runtime/tick.rs#L111)
## Alvo da Discussao
Definir o nivel de disciplina de alocacao/copia exigido do core da VM no baseline do console.
## O Que Precisa Ser Definido
1. Prioridade dos casos.
Fechar quais caminhos sao realmente hot:
- opcodes de string;
- acesso a globals;
- faults;
- logs.
2. Estrategia de ownership.
Decidir onde vale introduzir:
- borrow temporario;
- small-string strategy;
- copy-on-write;
- intern/cache de strings.
3. Meta de alocacao.
Definir se o projeto quer:
- zero alloc no frame loop feliz;
- alloc rara e explicita;
- apenas reducao oportunista.
4. Instrumentacao.
Decidir como medir alocacao sem transformar a VM em microbenchmark artificial.
## Open Questions de Arquitetura
1. Strings sao citizen de primeira classe no fantasy console ou recurso conveniente mas caro?
2. Vale endurecer a linguagem/ABI para reduzir alocacao implicitamente?
3. Caminhos de fault precisam ser maximizados para desempenho ou apenas os caminhos felizes?
## Dependencias
- `../specs/02a-vm-values-and-calling-convention.md`
- `../specs/03-memory-stack-heap-and-allocation.md`
- `../specs/10-debug-inspection-and-profiling.md`
## Criterio de Saida Desta Agenda
Pode virar PR quando houver decisao escrita sobre:
- caminho quente prioritario para desengordurar;
- meta minima de alocacao/copia da VM;
- estrategia de ownership para strings/values;
- instrumentacao canonica para medir regressao.

View File

@ -17,6 +17,17 @@ All runtime values are stored in VM slots as `Value`.
| `bool` | Boolean value | | `bool` | Boolean value |
| `float` | 64-bit floating point | | `float` | 64-bit floating point |
### String values
PROMETEU supports string values in the VM language surface.
String rules:
- hardcoded strings may be materialized once during build/load through constant-pool-backed storage;
- runtime-created strings may materialize at the point where a new string value is semantically created;
- the public value model does not require repeated payload copies after first materialization;
- reducing repeated string copy pressure is an internal runtime concern and MUST NOT be interpreted as a guest-visible ABI distinction.
### Built-in vector and graphics types ### Built-in vector and graphics types
These are treated as VM values with stable layout semantics. These are treated as VM values with stable layout semantics.

View File

@ -27,6 +27,7 @@ The stack stores transient execution values and frame-scoped data.
The stack may contain: The stack may contain:
- primitive values; - primitive values;
- string values;
- built-in fixed-layout values; - built-in fixed-layout values;
- heap handles; - heap handles;
- tuple-shaped multi-return values. - tuple-shaped multi-return values.
@ -155,6 +156,15 @@ Heap allocation conceptually performs:
2. create or reuse gate entry; 2. create or reuse gate entry;
3. return validated handle. 3. return validated handle.
First materialization cost is distinct from repeated hot-path copy pressure.
Implications:
- constant-pool-backed values may pay materialization cost before execution begins;
- runtime-created values may pay materialization cost when a new value is semantically produced;
- the runtime SHOULD minimize repeated implicit copying on hot paths after that first materialization;
- zero-allocation happy-path work is an engineering optimization target, not a published compatibility guarantee by itself.
If allocation fails: If allocation fails:
- the VM may trigger GC; - the VM may trigger GC;

View File

@ -188,6 +188,15 @@ Limit:32KB
These data feed host-owned certification. These data feed host-owned certification.
The profiling boundary MUST distinguish public certification input from internal engineering evidence.
In particular:
- host-owned certification may consume stable runtime counters such as heap usage and GC-related pressure;
- internal counters for allocation events or string materialization may exist to guide implementation work;
- such internal counters MUST NOT be treated as a guest-visible certification promise unless a separate decision explicitly promotes them;
- zero-allocation happy-path goals remain internal engineering targets unless promoted to a public certification contract.
### 7.1 Bank Occupancy Profiling ### 7.1 Bank Occupancy Profiling
Bank occupancy diagnostics are slot-first. Bank occupancy diagnostics are slot-first.