From 8c5957a0a9ebc9766f9b48960b28fe3f1818b7a2 Mon Sep 17 00:00:00 2001 From: bQUARKz Date: Mon, 20 Apr 2026 08:41:08 +0100 Subject: [PATCH] [PERF] VM Allocation and Copy Pressure --- discussion/index.ndjson | 4 +- ...13-perf-vm-allocation-and-copy-pressure.md | 55 ++++++++- ...m-allocation-and-copy-pressure-baseline.md | 112 ++++++++++++++++++ 3 files changed, 163 insertions(+), 8 deletions(-) create mode 100644 discussion/workflow/decisions/DEC-0018-vm-allocation-and-copy-pressure-baseline.md diff --git a/discussion/index.ndjson b/discussion/index.ndjson index d86c5efc..731a7de7 100644 --- a/discussion/index.ndjson +++ b/discussion/index.ndjson @@ -1,4 +1,4 @@ -{"type":"meta","next_id":{"DSC":29,"AGD":29,"DEC":18,"PLN":33,"LSN":35,"CLSN":1}} +{"type":"meta","next_id":{"DSC":29,"AGD":29,"DEC":19,"PLN":36,"LSN":35,"CLSN":1}} {"type":"discussion","id":"DSC-0023","status":"done","ticket":"perf-full-migration-to-atomic-telemetry","title":"Agenda - [PERF] Full Migration to Atomic Telemetry","created_at":"2026-04-10","updated_at":"2026-04-10","tags":["perf","runtime","telemetry"],"agendas":[{"id":"AGD-0021","file":"workflow/agendas/AGD-0021-full-migration-to-atomic-telemetry.md","status":"done","created_at":"2026-04-10","updated_at":"2026-04-10"}],"decisions":[{"id":"DEC-0008","file":"workflow/decisions/DEC-0008-full-migration-to-atomic-telemetry.md","status":"accepted","created_at":"2026-04-10","updated_at":"2026-04-10"}],"plans":[{"id":"PLN-0007","file":"workflow/plans/PLN-0007-full-migration-to-atomic-telemetry.md","status":"done","created_at":"2026-04-10","updated_at":"2026-04-10"}],"lessons":[{"id":"LSN-0028","file":"lessons/DSC-0023-perf-full-migration-to-atomic-telemetry/LSN-0028-converging-to-single-atomic-telemetry-source.md","status":"done","created_at":"2026-04-10","updated_at":"2026-04-10"}]} {"type":"discussion","id":"DSC-0020","status":"done","ticket":"jenkins-gitea-integration","title":"Jenkins Gitea Integration and Relocation","created_at":"2026-04-07","updated_at":"2026-04-07","tags":["ci","jenkins","gitea"],"agendas":[{"id":"AGD-0018","file":"workflow/agendas/AGD-0018-jenkins-gitea-integration-and-relocation.md","status":"done","created_at":"2026-04-07","updated_at":"2026-04-07"}],"decisions":[{"id":"DEC-0003","file":"workflow/decisions/DEC-0003-jenkins-gitea-strategy.md","status":"accepted","created_at":"2026-04-07","updated_at":"2026-04-07"}],"plans":[{"id":"PLN-0003","file":"workflow/plans/PLN-0003-jenkins-gitea-execution.md","status":"done","created_at":"2026-04-07","updated_at":"2026-04-07"}],"lessons":[{"id":"LSN-0021","file":"lessons/DSC-0020-jenkins-gitea-integration/LSN-0021-jenkins-gitea-integration.md","status":"done","created_at":"2026-04-07","updated_at":"2026-04-07"}]} {"type":"discussion","id":"DSC-0021","status":"done","ticket":"asset-entry-codec-enum-with-metadata","title":"Asset Entry Codec Enum Contract","created_at":"2026-04-09","updated_at":"2026-04-09","tags":["asset","runtime","codec","metadata"],"agendas":[],"decisions":[],"plans":[],"lessons":[{"id":"LSN-0024","file":"lessons/DSC-0021-asset-entry-codec-enum-contract/LSN-0024-string-on-the-wire-enum-in-runtime.md","status":"done","created_at":"2026-04-09","updated_at":"2026-04-09"}]} @@ -21,7 +21,7 @@ {"type":"discussion","id":"DSC-0026","status":"done","ticket":"render-all-scene-cache-and-camera-integration","title":"Integrate render_all with Scene Cache and Camera","created_at":"2026-04-14","updated_at":"2026-04-18","tags":["gfx","runtime","render","camera","scene"],"agendas":[],"decisions":[],"plans":[],"lessons":[{"id":"LSN-0031","file":"lessons/DSC-0026-render-all-scene-cache-and-camera-integration/LSN-0031-frame-composition-belongs-above-the-render-backend.md","status":"done","created_at":"2026-04-18","updated_at":"2026-04-18"}]} {"type":"discussion","id":"DSC-0027","status":"done","ticket":"frame-composer-public-syscall-surface","title":"Agenda - FrameComposer Public Syscall Surface","created_at":"2026-04-17","updated_at":"2026-04-18","tags":["gfx","runtime","syscall","abi","frame-composer","scene","camera","sprites"],"agendas":[],"decisions":[],"plans":[],"lessons":[{"id":"LSN-0032","file":"lessons/DSC-0027-frame-composer-public-syscall-surface/LSN-0032-public-abi-must-follow-the-canonical-service-boundary.md","status":"done","created_at":"2026-04-18","updated_at":"2026-04-18"}]} {"type":"discussion","id":"DSC-0028","status":"done","ticket":"deferred-overlay-and-primitive-composition","title":"Deferred Overlay and Primitive Composition over FrameComposer","created_at":"2026-04-18","updated_at":"2026-04-18","tags":["gfx","runtime","render","frame-composer","overlay","primitives","hud"],"agendas":[],"decisions":[],"plans":[],"lessons":[{"id":"LSN-0033","file":"lessons/DSC-0028-deferred-overlay-and-primitive-composition/LSN-0033-debug-primitives-should-be-a-final-overlay-not-part-of-game-composition.md","status":"done","created_at":"2026-04-18","updated_at":"2026-04-18"}]} -{"type":"discussion","id":"DSC-0014","status":"open","ticket":"perf-vm-allocation-and-copy-pressure","title":"Agenda - [PERF] VM Allocation and Copy Pressure","created_at":"2026-03-27","updated_at":"2026-03-27","tags":[],"agendas":[{"id":"AGD-0013","file":"workflow/agendas/AGD-0013-perf-vm-allocation-and-copy-pressure.md","status":"open","created_at":"2026-03-27","updated_at":"2026-03-27"}],"decisions":[],"plans":[],"lessons":[]} +{"type":"discussion","id":"DSC-0014","status":"review","ticket":"perf-vm-allocation-and-copy-pressure","title":"Agenda - [PERF] VM Allocation and Copy Pressure","created_at":"2026-03-27","updated_at":"2026-04-20","tags":[],"agendas":[{"id":"AGD-0013","file":"workflow/agendas/AGD-0013-perf-vm-allocation-and-copy-pressure.md","status":"accepted","created_at":"2026-03-27","updated_at":"2026-04-20","_override_reason":"User explicitly requested emitting a decision from the resolved agenda in this turn."}],"decisions":[{"id":"DEC-0018","file":"workflow/decisions/DEC-0018-vm-allocation-and-copy-pressure-baseline.md","status":"in_progress","created_at":"2026-04-20","updated_at":"2026-04-20","ref_agenda":"AGD-0013","_override_reason":"User explicitly requested emitting and then accepting the decision, followed by plan generation."}],"plans":[{"id":"PLN-0033","file":"PLN-0033-vm-hot-path-ownership-and-string-copy-pressure.md","status":"review","created_at":"2026-04-20","updated_at":"2026-04-20","ref_decisions":["DEC-0018"]},{"id":"PLN-0034","file":"PLN-0034-internal-allocation-evidence-and-hot-path-measurement.md","status":"review","created_at":"2026-04-20","updated_at":"2026-04-20","ref_decisions":["DEC-0018"]},{"id":"PLN-0035","file":"PLN-0035-runtime-spec-wording-for-materialization-vs-copy-pressure.md","status":"review","created_at":"2026-04-20","updated_at":"2026-04-20","ref_decisions":["DEC-0018"]}],"lessons":[]} {"type":"discussion","id":"DSC-0015","status":"open","ticket":"perf-cartridge-boot-and-program-ownership","title":"Agenda - [PERF] Cartridge Boot and Program Ownership","created_at":"2026-03-27","updated_at":"2026-03-27","tags":[],"agendas":[{"id":"AGD-0014","file":"workflow/agendas/AGD-0014-perf-cartridge-boot-and-program-ownership.md","status":"open","created_at":"2026-03-27","updated_at":"2026-03-27"}],"decisions":[],"plans":[],"lessons":[]} {"type":"discussion","id":"DSC-0016","status":"done","ticket":"tilemap-empty-cell-vs-tile-id-zero","title":"Tilemap Empty Cell vs Tile ID Zero","created_at":"2026-03-27","updated_at":"2026-04-09","tags":[],"agendas":[{"id":"AGD-0015","file":"workflow/agendas/AGD-0015-tilemap-empty-cell-vs-tile-id-zero.md","status":"done","created_at":"2026-03-27","updated_at":"2026-04-09"}],"decisions":[],"plans":[],"lessons":[{"id":"LSN-0022","file":"lessons/DSC-0016-tilemap-empty-cell-semantics/LSN-0022-tilemap-empty-cell-convergence.md","status":"done","created_at":"2026-04-09","updated_at":"2026-04-09"}]} {"type":"discussion","id":"DSC-0017","status":"done","ticket":"asset-entry-metadata-normalization-contract","title":"Asset Entry Metadata Normalization Contract","created_at":"2026-03-27","updated_at":"2026-04-09","tags":[],"agendas":[{"id":"AGD-0016","file":"workflow/agendas/AGD-0016-asset-entry-metadata-normalization-contract.md","status":"done","created_at":"2026-03-27","updated_at":"2026-04-09"}],"decisions":[{"id":"DEC-0004","file":"workflow/decisions/DEC-0004-asset-entry-metadata-normalization-contract.md","status":"accepted","created_at":"2026-04-09","updated_at":"2026-04-09"}],"plans":[],"lessons":[{"id":"LSN-0023","file":"lessons/DSC-0017-asset-metadata-normalization/LSN-0023-typed-asset-metadata-helpers.md","status":"done","created_at":"2026-04-09","updated_at":"2026-04-09"}]} diff --git a/discussion/workflow/agendas/AGD-0013-perf-vm-allocation-and-copy-pressure.md b/discussion/workflow/agendas/AGD-0013-perf-vm-allocation-and-copy-pressure.md index fb21d410..2ab1b23b 100644 --- a/discussion/workflow/agendas/AGD-0013-perf-vm-allocation-and-copy-pressure.md +++ b/discussion/workflow/agendas/AGD-0013-perf-vm-allocation-and-copy-pressure.md @@ -2,10 +2,10 @@ id: AGD-0013 ticket: perf-vm-allocation-and-copy-pressure title: Agenda - [PERF] VM Allocation and Copy Pressure -status: open +status: accepted created: 2026-03-27 -resolved: -decision: +resolved: 2026-04-20 +decision: DEC-0018 tags: [] --- @@ -64,11 +64,44 @@ Definir o nivel de disciplina de alocacao/copia exigido do core da VM no baselin 2. Vale endurecer a linguagem/ABI para reduzir alocacao implicitamente? 3. Caminhos de fault precisam ser maximizados para desempenho ou apenas os caminhos felizes? +## Sugestoes para Fechar as Open Questions + +1. Strings devem ser tratadas como recurso suportado, mas caro por definicao no baseline. + A VM hoje modela `Value::String(String)` como valor inline clonavel e o opcode `ADD` concatena via `format!`, entao o custo de copia ja faz parte do comportamento real do runtime. A sugestao nao e prometer "string cheap" na semantica base, e sim preservar string como capacidade legitima da linguagem enquanto o baseline otimiza os caminhos quentes que nao dependem de formatacao dinamica. Strings hardcoded podem ser materializadas uma vez na constant pool durante build/load; strings dinamicas podem ser materializadas em storage dinamico durante runtime. Em ambos os casos, a primeira materializacao e aceitavel. O que esta fora da meta e recopiar payload de string repetidamente no caminho quente depois dessa primeira materializacao. + +2. Vale endurecer o contrato operacional, mas nao a expressividade publica da linguagem neste primeiro passo. + A recomendacao e evitar uma mudanca ampla de ABI agora. Em vez disso: + - o caminho feliz do frame loop deve evitar alocacao implicita quando opera sobre numeros, handles e valores ja materializados; + - strings dinamicas devem continuar permitidas, mas tratadas como custo explicito de runtime; + - globals devem privilegiar handles e valores baratos no caminho quente, sem introduzir nova semantica publica para `GET_GLOBAL`; + - qualquer estrategia mais invasiva, como intern global, copy-on-write ou heap-string canonica, deve nascer como decisao posterior se a telemetria provar necessidade. + +3. Fault paths nao precisam ser maximizados como se fossem caminho quente. + A recomendacao e exigir que traps e faults permanencam corretos, deterministas e legiveis para tooling host-owned, mas sem contaminar o caminho feliz com formatacao defensiva ou montagem rica de strings em toda instrucao. O investimento principal deve ir para opcode dispatch, acesso a globals e operacoes repetidas por frame; faults podem aceitar custo maior desde que esse custo seja pago so na excecao. + +## Sugestao / Recomendacao + +Fechar esta agenda com a seguinte linha: + +- baseline alvo: "happy path com alloc rara e explicita", nao "zero alloc absoluto"; +- prioridade imediata: reduzir copia/alocacao em `GET_GLOBAL` e nas operacoes de string mais frequentes do loop; +- strings constantes: custo de materializacao pago uma vez no carregamento/constant pool e depois preferencialmente referenciadas; +- strings dinamicas: custo inicial de criacao aceito em runtime, mas sem clones implicitos repetidos apos a primeira materializacao; +- `GET_GLOBAL`: nenhuma nova semantica publica; a reducao de copia deve vir de representacao interna e ownership dos tipos caros; +- ownership baseline: manter valores triviais por copia e mover payloads caros para representacoes por handle quando houver prova de pressao suficiente para justificar a complexidade; +- faults/logs: preservar clareza e determinismo, aceitando custo fora do caminho feliz; +- meta interna de engenharia: perseguir `zero alloc` no caminho feliz numerico e nos acessos quentes ja materializados, sem publicar isso como metrica de certificacao; +- instrumentacao canonica: medir `heap_used_bytes`, frequencia de GC e contagem de logs/faults no fim do frame, mantendo contadores de alocacao como metrica interna de engenharia, sem transformar o dispatch em microbenchmark intrusivo. + +## Perguntas em Aberto + +- Nenhuma no nivel arquitetural atual. A agenda pode ser encerrada quando esta direcao for promovida para decisao normativa. + ## Dependencias -- `../specs/02a-vm-values-and-calling-convention.md` -- `../specs/03-memory-stack-heap-and-allocation.md` -- `../specs/10-debug-inspection-and-profiling.md` +- `docs/specs/runtime/02a-vm-values-and-calling-convention.md` +- `docs/specs/runtime/03-memory-stack-heap-and-allocation.md` +- `docs/specs/runtime/10-debug-inspection-and-profiling.md` ## Criterio de Saida Desta Agenda @@ -78,3 +111,13 @@ Pode virar PR quando houver decisao escrita sobre: - meta minima de alocacao/copia da VM; - estrategia de ownership para strings/values; - instrumentacao canonica para medir regressao. + +## Resolucao Proposta + +- Strings continuam parte legitima da linguagem, mas sao tratadas como recurso potencialmente caro no baseline. +- Strings hardcoded podem pagar custo de materializacao uma vez no carregamento/constant pool. +- Strings dinamicas podem pagar custo inicial de criacao em runtime. +- A arquitetura deve evitar recopia de payload apos a primeira materializacao sempre que o caminho quente puder operar por referencia, handle ou ownership interno mais barato. +- `GET_GLOBAL` nao ganha nova semantica publica; qualquer reducao de copia deve vir de mudancas internas de representacao e processo. +- `Zero alloc no caminho feliz` e meta interna de engenharia do runtime, nao criterio publicado de certificacao. +- Faults e logs permanecem corretos e legiveis, mas nao dirigem a otimizacao principal. diff --git a/discussion/workflow/decisions/DEC-0018-vm-allocation-and-copy-pressure-baseline.md b/discussion/workflow/decisions/DEC-0018-vm-allocation-and-copy-pressure-baseline.md new file mode 100644 index 00000000..144edd1b --- /dev/null +++ b/discussion/workflow/decisions/DEC-0018-vm-allocation-and-copy-pressure-baseline.md @@ -0,0 +1,112 @@ +--- +id: DEC-0018 +ticket: perf-vm-allocation-and-copy-pressure +title: Decision - [PERF] VM Allocation and Copy Pressure Baseline +status: in_progress +created: 2026-04-20 +accepted: 2026-04-20 +agenda: AGD-0013 +plans: [PLN-0033, PLN-0034, PLN-0035] +tags: [perf, runtime, vm, memory, strings, allocation] +--- + +# Status + +Accepted on 2026-04-20 and now in progress through linked implementation plans. + +# Contexto + +O core da VM ainda apresenta churn de alocacao e copia em caminhos relevantes, em especial: + +- concatenacao de string em `ADD`; +- clone de valores em `GET_GLOBAL`; +- montagem de strings dinamicas em faults e logs. + +O estado atual do runtime permite strings como parte legitima da linguagem, mas a representacao pratica ainda empurra payload caro para copias repetidas em alguns caminhos. A discussao da agenda fechou que o problema arquitetural nao e eliminar toda alocacao, e sim impedir recopia desnecessaria depois da primeira materializacao dos dados. + +# Decisao + +1. Strings SHALL remain part of the supported VM language surface. +2. The runtime MUST treat string payload as potentially expensive in the baseline performance model. +3. Hardcoded strings MAY pay one materialization cost during build/load into the constant pool, and runtime hot paths SHOULD preferentially reference that already-materialized representation. +4. Dynamically produced strings MAY pay an initial runtime materialization cost in dynamic storage, but the runtime MUST NOT introduce repeated implicit payload copies on hot paths unless the operation semantically requires a new string value. +5. `GET_GLOBAL` SHALL NOT gain new public semantics. Any reduction in copy pressure MUST come from internal representation, ownership strategy, or storage process, not from a guest-visible ABI change. +6. The baseline engineering target for the VM happy path SHALL be zero allocation on numeric and already-materialized hot paths. +7. That zero-allocation target SHALL remain an internal engineering objective and MUST NOT be published as a certification contract or guest-facing compatibility guarantee. +8. Fault and log paths MUST remain correct, deterministic, and readable, but they SHALL NOT drive the primary optimization strategy for the VM hot path. + +# Rationale + +Esta decisao fecha tres tensoes da agenda: + +- preserva string como capacidade real da linguagem, sem prometer que string e "cheap" por definicao; +- evita reabrir a ABI publica apenas para aliviar pressao de copia em um ponto interno da VM; +- separa meta de engenharia de contrato normativo externo. + +O custo inevitavel e a primeira materializacao: + +- string hardcoded entra na constant pool em build/load; +- string dinamica nasce em runtime. + +O custo evitavel e a recopia posterior do payload em caminhos quentes que ja deveriam operar sobre valor materializado, handle ou ownership mais barato. + +Essa linha tambem mantem a arquitetura coerente com as specs atuais: + +- valores triviais continuam baratos por copia; +- objetos com identidade e lifetime relevante continuam candidatos naturais a representacao por handle; +- tooling host-owned continua podendo observar heap e pressao de memoria sem que a VM precise transformar cada opcode em um ponto de instrumentacao intrusivo. + +# Invariantes / Contrato + +- The VM MUST preserve the public meaning of string values as guest-visible values. +- The VM MUST NOT change the public behavior of `GET_GLOBAL` as part of this decision. +- The runtime MUST distinguish first materialization from repeated implicit copy when evaluating optimization work. +- Constant-pool-backed strings SHOULD be treated as already materialized data for runtime access purposes. +- Runtime-created strings MAY allocate once at creation time. +- Hot-path operations over numeric values, handles, and already-materialized values SHOULD converge toward zero allocation. +- Any future move to heap-backed canonical strings, interning, copy-on-write, or similar global representation changes MUST be proposed in a separate decision if it changes operational constraints or implementation complexity materially. +- Fault generation and logs MAY allocate outside the happy path, provided they remain deterministic and do not leak into hot-loop design as a hidden steady-state cost. + +# Impactos + +- Spec + - No immediate guest-visible ABI change is required. + - Existing memory/value/debug specs remain directionally compatible. + - A follow-up plan may update wording in runtime specs to distinguish first materialization from repeated copy pressure more explicitly. + +- Runtime + - Optimization work MUST target internal ownership and representation first. + - `GET_GLOBAL` copy pressure MUST be addressed without semantic drift. + - String-heavy `ADD` and similar paths become explicit hot-path optimization candidates. + +- Host + - Certification and tooling remain host-owned. + - Allocation counters may exist as engineering telemetry, but they are not promoted to public certification criteria by this decision. + +- Firmware / Tooling + - No cartridge author contract changes are introduced. + - Compiler/loader/tooling MAY continue to materialize hardcoded strings into constant-pool storage. + +# Referencias + +- `AGD-0013` +- `docs/specs/runtime/02a-vm-values-and-calling-convention.md` +- `docs/specs/runtime/03-memory-stack-heap-and-allocation.md` +- `docs/specs/runtime/10-debug-inspection-and-profiling.md` +- `crates/console/prometeu-vm/src/virtual_machine.rs` +- `crates/console/prometeu-system/src/virtual_machine_runtime/tick.rs` + +# Propagacao Necessaria + +- Create a plan that separates: + - hot-path ownership work in the VM core; + - telemetry/internal measurement work; + - any optional spec wording cleanup. +- Audit `GET_GLOBAL` and string-producing opcode paths under this contract. +- Define implementation evidence for "zero alloc on happy path" as internal profiling evidence rather than certification output. +- Do not reopen public ABI or language semantics during plan execution unless new evidence shows the current contract is insufficient. + +# Revision Log + +- 2026-04-20: Initial draft from AGD-0013. +- 2026-04-20: Accepted and linked to PLN-0033, PLN-0034, and PLN-0035.