From b33dc7c18d1bfdc4cf7ebeb880156f497ca4302d Mon Sep 17 00:00:00 2001 From: bQUARKz Date: Wed, 11 Mar 2026 05:14:17 +0000 Subject: [PATCH] canonical path for intrinsics --- ...kend VM Pipeline - Orchestration Agenda.md | 121 ----------- ...d Workshop 1 - IRBackend Input Contract.md | 157 -------------- ...orkshop 2 - LowerToVM and IRVM Contract.md | 165 --------------- ...code Marshaling and Runtime Conformance.md | 197 ------------------ docs/vm-arch/ARCHITECTURE.md | 184 ++++++++++++++++ docs/vm-arch/INTRINSICS.csv | 24 +++ docs/vm-arch/ISA_CORE.md | 134 ++++++++++++ .../backend/irvm/IRVMIntrinsicDefinition.java | 142 +++++++++++++ .../backend/irvm/IRVMIntrinsicRegistry.java | 84 ++------ .../main/resources/intrinsics/registry-v1.csv | 24 --- .../irvm/IRVMIntrinsicRegistryParityTest.java | 183 +++++++++++++++- 11 files changed, 677 insertions(+), 738 deletions(-) delete mode 100644 docs/pbs/agendas/18.0. Backend VM Pipeline - Orchestration Agenda.md delete mode 100644 docs/pbs/agendas/18.1. Backend Workshop 1 - IRBackend Input Contract.md delete mode 100644 docs/pbs/agendas/18.2. Backend Workshop 2 - LowerToVM and IRVM Contract.md delete mode 100644 docs/pbs/agendas/18.3. Backend Workshop 3 - Bytecode Marshaling and Runtime Conformance.md create mode 100644 docs/vm-arch/ARCHITECTURE.md create mode 100644 docs/vm-arch/INTRINSICS.csv create mode 100644 docs/vm-arch/ISA_CORE.md create mode 100644 prometeu-compiler/prometeu-build-pipeline/src/main/java/p/studio/compiler/backend/irvm/IRVMIntrinsicDefinition.java delete mode 100644 prometeu-compiler/prometeu-build-pipeline/src/main/resources/intrinsics/registry-v1.csv diff --git a/docs/pbs/agendas/18.0. Backend VM Pipeline - Orchestration Agenda.md b/docs/pbs/agendas/18.0. Backend VM Pipeline - Orchestration Agenda.md deleted file mode 100644 index 297f9ba2..00000000 --- a/docs/pbs/agendas/18.0. Backend VM Pipeline - Orchestration Agenda.md +++ /dev/null @@ -1,121 +0,0 @@ -# 18.0. Backend VM Pipeline - Orchestration Agenda - -Status: Ready for Closure (v1 Draft Text) - -## Purpose - -Definir a ordem de fechamento arquitetural para o backend executável: - -1. contrato de entrada `IRBackend` vindo do frontend, -2. lowering `IRBackend -> IRVM`, -3. marshaling `IRVM -> bytecode/PBX` executável no runtime. - -## Context - -- O pipeline já prevê `LowerToIRVMPipelineStage` e `EmitBytecodePipelineStage`, mas ambos estão vazios. -- O runtime já impõe contratos concretos de loader/verifier/VM que precisam ser respeitados pelo backend. -- Nesta rodada, `packer` está explicitamente fora de escopo. - -## Decisions to Produce - -1. Ordem oficial de workshops e critérios de saída por etapa. -2. Fronteiras normativas entre: - - frontend (`IRBackend`), - - backend (`IRVM` + emissão), - - runtime (loader/verifier/execução). -3. Critérios de “backend integration-ready” para um artefato bytecode mínimo rodar no runtime. -4. Estratégia de fechamento em docs (`agendas -> decisions -> specs -> PR`). - -## Core Questions - -1. Quais decisões precisam ser fechadas antes de qualquer implementação de `LowerToIRVM`? -2. Quais invariantes são obrigatórias na fronteira `IRBackend` para evitar ambiguidade no backend? -3. Quais invariantes são obrigatórias na fronteira `IRVM` para emissão determinística? -4. Quais verificações devem ocorrer no compilador antes de delegar ao loader/verifier do runtime? -5. Como organizar testes de integração sem introduzir dependência de packer? - -## Resolution (v1) - -Nao ha open questions bloqueantes remanescentes para fechamento da Agenda 18. - -As agendas `18.1`, `18.2` e `18.3` definem os contratos de entrada, lowering e marshaling. - -Esta agenda fecha a orquestracao end-to-end com pipeline canônico incluindo etapa de otimizacao de `IRVM`. - -## Normative Text (Draft for Closure) - -### 1) Pipeline canônico do backend executável - -O pipeline normativo v1 do backend executavel MUST seguir esta ordem: - -1. `FrontendPhase` produz `IRBackend`; -2. `LowerToIRVM` produz `IRVM`; -3. `OptimizeIRVM` transforma `IRVM -> IRVM` otimizado; -4. `EmitBytecode` transforma `IRVM` otimizado em `BytecodeModule` pre-load; -5. runtime loader aplica patching/resolucao (`HOSTCALL -> SYSCALL`) e verifier valida imagem final. - -### 2) Posicionamento de otimização - -1. A etapa `OptimizeIRVM` MUST ocorrer entre `LowerToIRVM` e `EmitBytecode`. -2. Otimizacao semantica/estrutural MUST ocorrer no backend (compile-time), nao no runtime. -3. Runtime permanece focado em load, validacao e execucao deterministica. - -### 3) Contratos de entrada e saida por etapa - -1. `LowerToIRVM` recebe `IRBackend` conforme Agenda `18.1` e MUST produzir `IRVM` conforme Agenda `18.2`. -2. `OptimizeIRVM` recebe `IRVM` valido para o `vm_profile` alvo e MUST produzir `IRVM` equivalente semanticamente. -3. `EmitBytecode` recebe `IRVM` (pos-otimizacao) sem opcodes internos nao-eliminados e MUST emitir artefato conforme Agenda `18.3`. - -### 4) Invariantes de seguranca da etapa OptimizeIRVM - -`OptimizeIRVM` MUST: - -1. preservar semantica observavel do programa; -2. preservar contratos de aridade/slots/retorno; -3. preservar determinismo para mesmo grafo de entrada; -4. nao introduzir opcodes fora do `vm_profile` executavel; -5. nao mover chamadas host/intrinsic para fronteira semantica incorreta. - -### 5) Criterio de aprovacao de pipeline - -Um backend so pode declarar fluxo "integration-ready" quando: - -1. `IRBackend -> IRVM -> IRVM otimizado -> BytecodeModule` estiver implementado; -2. fixtures Gate I cobrirem cenarios positivos e negativos definidos em `18.3`; -3. bytecode emitido inicializar e executar no runtime alvo sem packer. - -### 6) Deferrals explicitos - -Sem deferrals de perguntas abertas desta agenda. - -Permanecem fora de escopo: - -1. formato final de packer/cartucho; -2. politicas avancadas de otimizacao nao necessarias para conformidade v1; -3. redesign de loader/verifier/runtime. - -## Expected Spec Material - -1. Nova spec de contrato `IRBackend -> IRVM` (backend-facing). -2. Nova spec de contrato de `IRVM` (shape, invariantes, diagnóstico). -3. Atualização de `docs/general/specs/15. Bytecode and PBX Mapping Specification.md`. -4. Atualização de `docs/general/specs/19. Verification and Safety Checks Specification.md`. - -## Non-Goals - -- Definir formato final de packer/cartucho. -- Redesenhar ISA do runtime. -- Redesenhar loader/verifier do runtime. -- Fechar otimizações de backend (peephole, SSA, etc.). - -## Inputs - -- `prometeu-compiler/prometeu-build-pipeline/src/main/java/p/studio/compiler/workspaces/stages/LowerToIRVMPipelineStage.java` -- `prometeu-compiler/prometeu-build-pipeline/src/main/java/p/studio/compiler/workspaces/stages/OptimizeIRVMPipelineStage.java` -- `prometeu-compiler/prometeu-build-pipeline/src/main/java/p/studio/compiler/workspaces/stages/EmitBytecodePipelineStage.java` -- `docs/pbs/specs/13. Lowering IRBackend Specification.md` -- `docs/general/specs/15. Bytecode and PBX Mapping Specification.md` -- `docs/pbs/specs/6.2. Host ABI Binding and Loader Resolution Specification.md` -- `../runtime/docs/runtime/virtual-machine/ISA_CORE.md` -- `../runtime/crates/console/prometeu-vm/src/virtual_machine/loader.rs` -- `../runtime/crates/console/prometeu-vm/src/verifier.rs` diff --git a/docs/pbs/agendas/18.1. Backend Workshop 1 - IRBackend Input Contract.md b/docs/pbs/agendas/18.1. Backend Workshop 1 - IRBackend Input Contract.md deleted file mode 100644 index 80682fe1..00000000 --- a/docs/pbs/agendas/18.1. Backend Workshop 1 - IRBackend Input Contract.md +++ /dev/null @@ -1,157 +0,0 @@ -# 18.1. Backend Workshop 1 - IRBackend Input Contract - -Status: Ready for Closure (v1 Draft Text) - -## Purpose - -Fechar o `IRBackend` como contrato de entrada do backend executável, de forma suficientemente explícita para permitir lowering determinístico para `IRVM`. - -## Context - -- A spec atual de `IRBackend` cobre sobretudo a fronteira frontend e não fecha `IRBackend -> IRVM`. -- O runtime exige contratos rígidos para chamadas host-backed (`SYSC`/`HOSTCALL`) e VM-owned (`INTRINSIC`). -- O backend precisa receber um IR que preserve semântica e também fatos de artefato necessários para bytecode. - -## Decisions to Produce - -1. Shape mínima obrigatória de `IRBackend` para backend executável: - - identidade de módulo/arquivo/callable, - - assinatura (parâmetros/retorno), - - corpo executável em forma backend-lowerable, - - âncoras de source attribution. -2. Modelo de representação para operações VM-owned vs host-backed no `IRBackend`. -3. Contrato de metadados reservados obrigatório: - - host bindings canônicos `(module, name, version)`, - - metadados de intrinsics/builtin VM-owned, - - `requiredCapabilities` determinístico. -4. Regras de ordenação/deduplicação obrigatórias ainda no `IRBackend`. -5. Política de rejeição determinística para formas que não entram no backend executável. - -## Core Questions - -1. O `IRBackend` deve carregar código em forma estruturada (blocos/expressões) ou linearizada? -2. Quais informações de tipos precisam sobreviver para o backend sem “re-tipagem” implícita? -3. Como representar chamadas host-backed no `IRBackend`: - - por identidade canônica, - - por índice lógico, - - ou por ambos? -4. Como representar intrinsics VM-owned: - - identidade canônica apenas, - - id final já conhecido, - - ou modelo híbrido? -5. Quais invariantes de estabilidade de ordenação são obrigatórias para conformance? -6. Quais campos mínimos de span/source mapping são mandatórios para erros backend-atribuíveis? - -## Resolution (v1) - -Nao ha open questions bloqueantes para fechar o contrato de entrada `IRBackend` v1. - -As perguntas desta agenda ficam consideradas resolvidas para v1 pelos termos normativos abaixo. - -## Normative Text (Draft for Closure) - -### 1) Escopo e fronteira - -1. `IRBackend` e o contrato de entrada normativo do backend executavel. -2. O frontend MUST emitir `IRBackend` completo para toda unidade admitida no Gate U. -3. O backend MUST tratar `IRBackend` como fonte de verdade para `IRBackend -> IRVM`. -4. Esta agenda NAO define o formato de `IRVM` nem o encoding final de bytecode. - -### 2) Obrigacoes minimas por callable - -Para cada callable executavel emitido no `IRBackend`, o frontend MUST preservar: - -1. identidade estavel do callable no build atual; -2. assinatura observavel: - - aridade de entrada; - - shape de retorno observavel; -3. categoria de callable (ex.: funcao comum vs superficie reservada nao executavel); -4. ancora de atribuicao de fonte (`fileId`, `start`, `end`); -5. corpo backend-lowerable com semantica suficiente para emissao de chamadas e fluxo. - -`IRFunction` apenas com nome/contagem de parametros/span NAO satisfaz este contrato executavel. - -### 3) Classificacao obrigatoria de callsites - -Todo callsite executavel no `IRBackend` MUST estar classificado em exatamente uma categoria: - -1. `CALL_FUNC` (chamada de callable de programa); -2. `CALL_HOST` (fronteira host-backed); -3. `CALL_INTRINSIC` (operacao VM-owned). - -Implementacoes MUST NOT inferir a categoria por heuristica textual na fase `LowerToIRVM`. - -### 4) Contrato para host-backed - -Todo `CALL_HOST` MUST carregar identidade canonica: - -1. `abiModule`, -2. `abiMethod`, -3. `abiVersion`. - -Todo `CALL_HOST` MUST carregar declaracao de ABI: - -1. `arg_slots`, -2. `ret_slots`. - -O conjunto de bindings host-backed no `IRBackend` MUST ser deduplicado por `(module, name, version)` e ordenado por primeira ocorrencia deterministica. - -### 5) Contrato para VM-owned intrinsics e builtins - -Todo `CALL_INTRINSIC` MUST carregar identidade canonica VM-owned: - -1. `canonicalName`, -2. `canonicalVersion`. - -Builtin projections, builtin constants e intrinsic callsites VM-owned MUST NOT ser modelados como host binding. - -### 6) Metadados reservados obrigatorios - -`IRReservedMetadata` MUST preservar, no minimo: - -1. host bindings canonicos admitidos; -2. builtin type surfaces relevantes para lowering; -3. builtin const surfaces relevantes para lowering; -4. `requiredCapabilities` deterministico para assistencia de pipeline/packer. - -`requiredCapabilities` MUST ser deterministico para o mesmo grafo de entrada admitido. - -### 7) Diagnosticos e rejeicao deterministica - -Quando um modulo/callable nao satisfaz o contrato de entrada executavel: - -1. a rejeicao MUST ser deterministica; -2. o diagnostico MUST manter atribuicao de fonte acionavel; -3. o frontend/backend MUST NOT degradar silenciosamente para comportamento executavel diferente. - -### 8) Deferrals explicitos - -Ficam deferidos para agendas seguintes: - -1. shape estrutural completo de `IRVM` e algoritmo de lowering (Agenda 18.2); -2. layout/encoding final de `BytecodeModule` e marshaling PBX (Agenda 18.3); -3. estrategia de otimizacao de backend. - -## Expected Spec Material - -1. Atualização de `docs/pbs/specs/13. Lowering IRBackend Specification.md` com adendo backend-facing. -2. Decision record específico para “IRBackend executável v1”. -3. Fixture set de conformance Gate U focado no contrato de entrada do backend. - -## Non-Goals - -- Definir ainda o bytecode final. -- Definir políticas de otimização. -- Definir o formato final de debug/source map rico. -- Introduzir dependência de packer. - -## Inputs - -- `prometeu-compiler/prometeu-frontend-api/src/main/java/p/studio/compiler/models/IRBackend.java` -- `prometeu-compiler/prometeu-frontend-api/src/main/java/p/studio/compiler/models/IRBackendFile.java` -- `prometeu-compiler/prometeu-frontend-api/src/main/java/p/studio/compiler/models/IRFunction.java` -- `prometeu-compiler/prometeu-frontend-api/src/main/java/p/studio/compiler/models/IRReservedMetadata.java` -- `docs/pbs/specs/13. Lowering IRBackend Specification.md` -- `docs/pbs/specs/6.1. Intrinsics and Builtin Types Specification.md` -- `docs/pbs/specs/6.2. Host ABI Binding and Loader Resolution Specification.md` -- `../runtime/docs/runtime/decisions/005-v1-vm-owned-input-intrinsics-and-language-agnostic-surface.md` diff --git a/docs/pbs/agendas/18.2. Backend Workshop 2 - LowerToVM and IRVM Contract.md b/docs/pbs/agendas/18.2. Backend Workshop 2 - LowerToVM and IRVM Contract.md deleted file mode 100644 index d762f852..00000000 --- a/docs/pbs/agendas/18.2. Backend Workshop 2 - LowerToVM and IRVM Contract.md +++ /dev/null @@ -1,165 +0,0 @@ -# 18.2. Backend Workshop 2 - LowerToIRVM and IRVM Contract - -Status: Ready for Closure (v1 Draft Text) - -## Purpose - -Definir o contrato intermediário `IRVM` e as regras normativas de lowering `IRBackend -> IRVM`, garantindo que a emissão de bytecode seja mecânica e determinística. - -## Context - -- O runtime já define ISA e calling rules concretas. -- No runtime atual, jumps usam imediato `u32` absoluto relativo ao início da função. -- O verificador e o loader já têm validações rígidas que devem ser antecipadas pelo backend sempre que possível. - -## Decisions to Produce - -1. Modelo oficial de `IRVM`: - - unidades (módulo/função/bloco), - - instruções VM-like, - - referência a constantes/globais/locals/callables, - - metadados mínimos por função. -2. Regras de lowering para controle de fluxo: - - labels simbólicos, - - normalização de blocos, - - política de branch/jump target. -3. Regras de lowering para chamadas: - - `CALL`, - - `HOSTCALL` pré-load, - - `INTRINSIC` VM-owned. -4. Regras de layout de função e slots: - - `param_slots`, - - `local_slots`, - - `return_slots`. -5. Política de diagnósticos determinísticos no lowering para rejeições backend-originadas. - -## Core Questions - -1. `IRVM` permite pseudo-opcodes que não existem na ISA final? -2. Onde ocorre a resolução final de labels para offsets: no próprio lowering ou apenas no emitter? -3. Como garantir que cada função tenha finalização válida (`RET`/terminador) sem depender só do verificador? -4. Qual o limite de checagens estáticas no compiler vs checagens delegadas ao verifier do runtime? -5. Como representar efeitos de stack no `IRVM` para detectar underflow/mismatch antes de emitir bytes? -6. Como versionar `IRVM` em relação à linha de ISA do runtime selecionada? - -## Resolution (v1) - -Nao ha open questions bloqueantes remanescentes nesta agenda. - -As perguntas desta agenda ficam consideradas resolvidas para v1 pelos termos normativos abaixo. - -## Normative Text (Draft for Closure) - -### 1) Escopo e fronteira - -1. Esta agenda define o contrato intermediario `IRVM` e as regras obrigatorias de lowering `IRBackend -> IRVM`. -2. O objetivo do `IRVM` v1 e permitir emissao mecanica e deterministica de bytecode para runtime `core-v1`. -3. Esta agenda NAO define o marshaling binario final `IRVM -> BytecodeModule` (Agenda 18.3). - -### 2) Modelo de IRVM (quase-ISA estendido) - -1. `IRVM` v1 adota modelo quase-ISA: - - opcodes da Core ISA alvo; - - e opcodes internos estendidos (`IRVM_EXT`) quando necessarios para normalizacao/otimizacao. -2. `IRVM_EXT` MUST ser interno ao compilador e MUST NOT aparecer no artefato executavel final. -3. Todo opcode `IRVM_EXT` MUST declarar metadados estruturais equivalentes a spec de opcode: - - `pops`, - - `pushes`, - - `is_branch`, - - `is_terminator`. -4. O backend MUST expandir/reduzir todo `IRVM_EXT` para Core ISA antes da fase de emissao de bytecode. - -### 3) Controle de fluxo e labels - -1. O `IRVM` pode usar labels simbolicos internamente. -2. A resolucao final de labels para immediates `u32` de salto MUST ocorrer no `LowerToIRVM`. -3. O target final de salto MUST seguir contrato da VM: - - offset relativo ao inicio da funcao (`func_start + target_rel` em runtime). -4. Todo salto MUST apontar para fronteira valida de instrucao da funcao alvo. -5. Caminho alcancavel que cai no fim da funcao sem terminador MUST ser erro de compilacao. - -### 4) IDs deterministas de funcao - -1. O backend MUST produzir `func_id` deterministico para `CALL`, `SPAWN` e `MAKE_CLOSURE`. -2. Regra v1: - - entrypoint executavel no indice `0`; - - demais callables por ordenacao deterministica `(module_key, callable_name, source_start)`. -3. O mesmo grafo de entrada admitido MUST produzir os mesmos `func_id`. - -### 5) Contrato de chamadas - -1. `CALL` MUST referenciar `func_id` valido. -2. `SPAWN` MUST respeitar aridade exata da funcao de destino. -3. `CALL_HOST` no `IRBackend` MUST lower para `HOSTCALL ` em `IRVM` pre-load. -4. `CALL_INTRINSIC` no `IRBackend` MUST lower para `INTRINSIC ` em `IRVM`. -5. `CALL_CLOSURE` com callee nao determinavel estaticamente e proibido no v1. - -### 6) Contrato de slots por funcao - -Para cada funcao em `IRVM`, o backend MUST fixar: - -1. `param_slots`, -2. `local_slots`, -3. `return_slots`, -4. `max_stack_slots`. - -`RET` MUST ser consistente com `return_slots` em todos os caminhos alcancaveis. - -### 7) Pre-verificacao obrigatoria no compilador - -Antes de emitir bytecode, o compilador MUST executar pre-verificacao estrutural em `IRVM` (ja em Core ISA, sem `IRVM_EXT`), cobrindo no minimo: - -1. validade de target de saltos; -2. coerencia de altura de stack em joins; -3. ausencia de underflow e overflow contra `max_stack_slots`; -4. consistencia de `RET` com `return_slots`; -5. validade de `func_id` referenciado; -6. validade de aridade em `CALL`/`SPAWN`/`CALL_CLOSURE`; -7. regra de `YIELD` com stack vazia; -8. validade estrutural de `HOSTCALL`/`INTRINSIC` no shape de IR. - -O verificador do runtime permanece autoritativo e continua obrigatorio. - -### 8) Regra de YIELD - -`YIELD` com stack nao vazia MUST ser rejeitado no compilador como erro deterministico de lowering/verificacao. - -### 9) Versionamento do IRVM - -1. `IRVM` MUST carregar `vm_profile` explicito. -2. Para v1, o perfil normativo e `core-v1`. -3. Mismatch entre `vm_profile` e alvo de emissao/verificacao MUST falhar de forma deterministica. - -### 10) Deferrals explicitos - -Sem deferrals de perguntas abertas desta agenda. - -Permanecem fora de escopo por fronteira de agenda: - -1. formato binario final de `BytecodeModule` e seccionamento PBX (Agenda 18.3); -2. politicas avancadas de otimizacao nao necessarias para conformidade v1. - -## Expected Spec Material - -1. Nova spec: `IRBackend -> IRVM` lowering contract. -2. Nova spec: `IRVM` model e invariantes. -3. Tabela de mapeamento `IRVM op -> ISA op` por linha de runtime suportada. -4. Critérios de testes unitários do `LowerToIRVMPipelineStage`. - -## Non-Goals - -- Implementar ainda otimizações avançadas. -- Expandir ISA com novos opcodes. -- Resolver políticas de linking multi-artifact além do necessário para execução v1. -- Definir packer/cart assembly. - -## Inputs - -- `prometeu-compiler/prometeu-build-pipeline/src/main/java/p/studio/compiler/workspaces/stages/LowerToIRVMPipelineStage.java` -- `../runtime/docs/runtime/virtual-machine/ISA_CORE.md` -- `../runtime/docs/runtime/specs/02-vm-instruction-set.md` -- `../runtime/docs/runtime/specs/02a-vm-values-and-calling-convention.md` -- `../runtime/crates/console/prometeu-bytecode/src/opcode.rs` -- `../runtime/crates/console/prometeu-bytecode/src/opcode_spec.rs` -- `../runtime/crates/console/prometeu-bytecode/src/decoder.rs` -- `../runtime/crates/console/prometeu-vm/src/verifier.rs` diff --git a/docs/pbs/agendas/18.3. Backend Workshop 3 - Bytecode Marshaling and Runtime Conformance.md b/docs/pbs/agendas/18.3. Backend Workshop 3 - Bytecode Marshaling and Runtime Conformance.md deleted file mode 100644 index 90a90d2f..00000000 --- a/docs/pbs/agendas/18.3. Backend Workshop 3 - Bytecode Marshaling and Runtime Conformance.md +++ /dev/null @@ -1,197 +0,0 @@ -# 18.3. Backend Workshop 3 - Bytecode Marshaling and Runtime Conformance - -Status: Ready for Closure (v1 Draft Text) - -## Purpose - -Fechar o contrato de emissão de bytecode/PBX a partir de `IRVM`, com conformidade explícita ao loader/verifier/runtime atual. - -## Context - -- O runtime já define layout de módulo, parser de seções e invariantes de load-time. -- `SYSC` é seção obrigatória (mesmo vazia), `HOSTCALL` é forma pré-load, `SYSCALL` final só após patching do loader. -- O verifier rejeita `HOSTCALL` não patchado e valida estrutura de controle/stack sobre imagem final. - -## Decisions to Produce - -1. Contrato de marshaling de `IRVM -> BytecodeModule`: - - `const_pool`, - - `functions`, - - `code`, - - `debug_info` (mínimo obrigatório), - - `exports`, - - `syscalls`. -2. Política de encoding de instruções: - - little-endian, - - `[opcode u16][immediate]`, - - encoding de saltos/calls conforme ISA. -3. Política de emissão de `SYSC`: - - deduplicação por identidade canônica, - - ordenação por primeira ocorrência, - - `arg_slots`/`ret_slots` coerentes com metadado host. -4. Contrato explícito “pre-load artifact vs executable image”: - - backend emite `HOSTCALL` quando host-backed, - - backend não emite `SYSCALL` bruto no artefato pré-load, - - intrinsics VM-owned seguem caminho distinto. -5. Estratégia de conformidade e regressão contra runtime real. - -## Core Questions - -1. Qual política oficial de ordenação de funções no bytecode (ordem de source, topo sort, entry-first)? -2. Como calcular `code_offset`/`code_len` por função em flatten final de `code`? -3. Quais hooks mínimos de source attribution serão emitidos no `debug_info` v1? -4. Quais validações o emitter faz antes de serializar vs o que é delegado ao loader/verifier? -5. Como padronizar mensagens/erros de marshaling para manter diagnóstico determinístico? -6. Qual suíte mínima de integração prova “bytecode emitido roda no runtime” sem packer? - -## Resolution (v1) - -Nao ha open questions bloqueantes remanescentes nesta agenda. - -As perguntas desta agenda ficam consideradas resolvidas para v1 pelos termos normativos abaixo. - -## Normative Text (Draft for Closure) - -### 1) Escopo e fronteira - -1. Esta agenda define o contrato normativo de marshaling `IRVM -> BytecodeModule` para runtime `core-v1`. -2. O backend executavel MUST produzir artefato pre-load compativel com loader e verifier do runtime. -3. Esta agenda nao define packer/cart assembly. - -### 2) Direcao JVM-inspired (adaptada) - -1. O backend adota modelo de referencia simbolica interna antes de indice numerico final: - - referencias simbolicas existem em `IRVM` e no emitter interno; - - indices finais (`func_id`, `sysc_index`, `intrinsic_id`) sao materializados de forma deterministica no marshaling. -2. O runtime continua numeric-only na execucao final. - -### 3) Contrato do BytecodeModule emitido - -O emitter MUST preencher: - -1. `const_pool`, -2. `functions`, -3. `code`, -4. `exports`, -5. `syscalls`, -6. `debug_info` conforme regra minima desta agenda. - -O emitter MUST gerar secao `SYSC` sempre presente (inclusive vazia), conforme contrato do loader. - -### 4) Politica de ordenacao e IDs - -1. A ordem de funcoes no artefato MUST seguir regra deterministica v1: - - entrypoint no indice `0`; - - demais funcoes por `(module_key, callable_name, source_start)`. -2. O mesmo grafo de entrada admitido MUST produzir os mesmos indices e offsets. -3. `SYSC` MUST ser deduplicado por `(module, name, version)` e ordenado por primeira ocorrencia. - -### 5) Layout de codigo por funcao - -1. O emitter SHOULD construir codigo por funcao (modelo method-local) e flatten apenas no passo final. -2. No artefato final: - - `code_offset` MUST ser crescente e unico por funcao; - - `code_len` MUST refletir exatamente o trecho da funcao; - - `code_offset + code_len` de cada funcao MUST estar dentro de `code.len`. -3. A concatenacao final MUST ser deterministica. - -### 6) Encoding de instrucao - -1. Encoding MUST ser little-endian. -2. Layout de instrucao MUST ser `[opcode: u16][immediate]`. -3. Saltos MUST usar imediato `u32` relativo ao inicio da funcao. -4. O emitter MUST respeitar tamanhos de imediato definidos pela Core ISA alvo. - -### 7) Pre-load vs executable image - -1. Para chamadas host-backed, o backend MUST emitir `HOSTCALL ` no artefato pre-load. -2. O backend MUST NOT emitir `SYSCALL ` bruto no artefato pre-load. -3. Chamadas VM-owned MUST seguir caminho de `INTRINSIC ` e permanecer distintas de `SYSC`/`HOSTCALL`/`SYSCALL`. -4. O backend MUST assumir que patch `HOSTCALL -> SYSCALL` e responsabilidade do loader. - -### 8) Coerencia ABI de host bindings - -1. Cada entrada `SYSC` MUST carregar `module`, `name`, `version`, `arg_slots`, `ret_slots`. -2. O emitter MUST validar coerencia de ABI declarada com metadado host conhecido no alvo de compilacao. -3. Mismatch detectavel no compile target MUST falhar no compilador antes da serializacao final. - -### 9) Debug minimo v1 (inspirado em line tables) - -1. `debug_info` v1 MUST incluir: - - `function_names` para todos os `func_idx` emitidos; - - `pc_to_span` para todo inicio de instrucao emitida. -2. O objetivo minimo e preservar atribuicao de fonte acionavel para falhas backend/runtime que permaneçam source-atribuiveis. - -### 10) Fronteira de validacao por camada - -Modelo inspirado em separacao JVM (format/link/verify), adaptado: - -1. emitter valida formato e invariantes de marshaling; -2. loader valida resolucao canonica, capability e ABI host; -3. verifier valida estrutura de controle e pilha na imagem patchada. - -O backend MUST executar prechecks de marshaling sem substituir a autoridade do loader/verifier. - -### 11) Taxonomia de erros de marshaling - -O backend MUST expor codigos estaveis para erros de marshaling em familias: - -1. `MARSHAL_FORMAT_*`, -2. `MARSHAL_LINKAGE_*`, -3. `MARSHAL_VERIFY_PRECHECK_*`. - -Quando atribuivel, diagnostico MUST incluir ancora de fonte primaria. - -### 12) Otimizacao: criterio de responsabilidade - -1. Otimizacao semantica/estrutural de programa MUST ocorrer no backend (compile-time). -2. Runtime MUST permanecer focado em carregamento, validacao e execucao deterministica. -3. Ajustes internos de runtime sao permitidos apenas se nao alterarem semantica do programa nem contrato observavel de bytecode. - -### 13) Suite minima Gate I (sem packer) - -A conformidade minima de integracao MUST incluir fixtures: - -1. `SYSC` presente e vazio (caso sem hostcall); -2. `HOSTCALL` valido com patch para `SYSCALL`; -3. `HOSTCALL` com indice fora de faixa; -4. `SYSC` declarado e nao usado; -5. `SYSCALL` bruto em artefato pre-load; -6. mismatch de ABI (`arg_slots`/`ret_slots`); -7. capability insuficiente em load-time; -8. caminho com `INTRINSIC` valido. - -### 14) Deferrals explicitos - -Sem deferrals de perguntas abertas desta agenda. - -Permanecem fora de escopo por fronteira: - -1. formato de packer/cartucho final; -2. superfices avancadas de simbolizacao alem do minimo v1. - -## Expected Spec Material - -1. Atualização de `docs/general/specs/15. Bytecode and PBX Mapping Specification.md`. -2. Atualização de `docs/general/specs/19. Verification and Safety Checks Specification.md`. -3. Decision record de “bytecode backend v1 marshaling contract”. -4. Plano de testes Gate I para backend executável. - -## Non-Goals - -- Definir fluxo de empacotamento final de cartucho. -- Alterar o contrato do loader/runtime. -- Definir formato final de symbolication completo. -- Cobrir perf tuning de emissor. - -## Inputs - -- `prometeu-compiler/prometeu-build-pipeline/src/main/java/p/studio/compiler/workspaces/stages/EmitBytecodePipelineStage.java` -- `../runtime/crates/console/prometeu-bytecode/src/model.rs` -- `../runtime/crates/console/prometeu-bytecode/src/assembler.rs` -- `../runtime/crates/console/prometeu-bytecode/src/program_image.rs` -- `../runtime/crates/console/prometeu-vm/src/virtual_machine/loader.rs` -- `../runtime/crates/console/prometeu-vm/src/vm_init_error.rs` -- `../runtime/crates/console/prometeu-vm/src/verifier.rs` -- `docs/pbs/specs/6.2. Host ABI Binding and Loader Resolution Specification.md` -- `docs/general/specs/15. Bytecode and PBX Mapping Specification.md` diff --git a/docs/vm-arch/ARCHITECTURE.md b/docs/vm-arch/ARCHITECTURE.md new file mode 100644 index 00000000..4f03840b --- /dev/null +++ b/docs/vm-arch/ARCHITECTURE.md @@ -0,0 +1,184 @@ +Prometeu VM Runtime — Canonical Architecture + +Status: canonical + +This document is the authoritative architectural reference for the Prometeu VM runtime. It reflects the implementation as it exists today and defines the invariants that govern architectural changes in the VM layer. + +Scope boundary: + +- PROMETEU itself is a fantasy handheld / fantasy console with a broader machine model, firmware model, cartridge model, and virtual hardware surface. +- This document does not define the whole PROMETEU machine. +- This document defines the VM/runtime subsystem that executes bytecode inside that machine. +- For broader machine-level framing, see [`../specs/README.md`](../specs/README.md). + +Document roles: + +- This file is normative for VM/runtime architecture. +- Detailed domain specifications may live under `docs/runtime/specs/`, but they must not contradict this document where VM/runtime invariants are concerned. +- Roadmaps, agendas, and PR proposals may discuss future changes, but they are not authoritative until this document is updated. +- The machine-wide fantasy console framing lives in the runtime specs manual and related domain specs; those documents are complementary, not competing VM architecture sources. + +Maintenance rule: + +- Any PR that changes VM/runtime architectural invariants must update this document in the same change. + + +1. Overview +----------- + +- Stack‑based virtual machine + - Operand stack + call frames; bytecode is fetched from a ROM/program image with a separate constant pool. +- GC‑managed heap + - Non‑compacting mark–sweep collector; stable object handles (`HeapRef`) while live. Sweep invalidates unreachable handles; objects are never moved. +- Closures (Model B) + - First‑class closures with a heap‑allocated environment. The closure object is passed to the callee as a hidden `arg0` when invoking a closure. +- Cooperative coroutines + - Deterministic, cooperative scheduling. Switching and GC occur only at explicit safepoints (`FRAME_SYNC`). +- Unified syscall ABI + - PBX pre-load artifacts declare canonical host bindings in `SYSC` and encode call sites as `HOSTCALL `. The loader resolves and patches them to numeric `SYSCALL ` before verification/execution. Capability gating is enforced at load and checked again defensively at runtime. Syscalls are not first‑class values. + + +2. Memory Model +---------------- + +2.1 Stack vs Heap + +- Stack + - Each running context has an operand stack plus call frames (locals, return bookkeeping). Primitive values (integers, floats, booleans) reside on the stack. Heap objects are referenced by opaque `HeapRef` values on the stack. + - The VM’s current operand stack and frames are GC roots. + +- Heap + - The heap stores runtime objects that require identity and reachability tracking. Handles are `HeapRef` indices into an internal object store. + - The collector is mark–sweep, non‑moving: it marks from roots, then reclaims unreachable objects without relocating survivors. Indices for live objects remain stable across collections. + +2.2 Heap Object Kinds (as used today) + +- Arrays of `Value` + - Variable‑length arrays whose elements may contain further `HeapRef`s. +- Closures + - Carry a function identifier and a captured environment (a slice/vector of `Value`s stored with the closure). Captured `HeapRef`s are traversed by the GC. +- Coroutines + - Heap‑resident coroutine records (state + wake time + suspended operand stack and call frames). These act as GC roots when suspended. + +Notes: +- Literals like strings and numbers are sourced from the constant pool in the program image; heap allocation is only used for runtime objects (closures, arrays, coroutine records, and any future heap kinds). The constant pool never embeds raw `HeapRef`s. + +2.3 GC Roots + +- VM roots + - Current operand stack and call frames of the running coroutine (or main context). +- Suspended coroutines + - All heap‑resident, suspended coroutine objects are treated as roots. Their saved stacks/frames are scanned during marking. +- Root traversal + - The VM exposes a root‑visitor that walks the operand stack, frames, and coroutine records to feed the collector. The collector then follows children from each object kind (e.g., array elements, closure environments, coroutine stacks). + + +3. Execution Model +------------------- + +3.1 Interpreter Loop + +- The VM runs a classic fetch–decode–execute loop over the ROM’s bytecode. The current program counter (PC), operand stack, and call frames define execution state. +- Function calls establish new frames; returns restore the caller’s frame and adjust the operand stack to the callee’s declared return slot count (the verifier enforces this shape statically). +- Errors + - Traps (well‑defined fault conditions) surface as trap reasons; panics indicate internal consistency failures. The VM can report logical frame endings such as `FrameSync`, `BudgetExhausted`, `Halted`, end‑of‑ROM, `Breakpoint`, `Trap(code, …)`, and `Panic(msg)`. + +3.2 Safepoints + +- `FRAME_SYNC` is the only safepoint. + - At `FRAME_SYNC`, the VM performs two actions in a well‑defined order: + 1) Garbage‑collection opportunity: root enumeration + mark–sweep. + 2) Scheduler handoff: the currently running coroutine may yield/sleep, and a next ready coroutine is selected deterministically. +- No other opcode constitutes a GC or scheduling safepoint. Syscalls do not implicitly trigger GC or rescheduling. + +3.3 Scheduler Behavior (Cooperative Coroutines) + +- Coroutines are cooperative and scheduled deterministically (FIFO among ready coroutines). +- `YIELD` and `SLEEP` take effect at `FRAME_SYNC`: + - `YIELD` places the current coroutine at the end of the ready queue. + - `SLEEP` parks the current coroutine until its exact `wake_tick`, after which it re‑enters the ready queue at the correct point. +- `SPAWN` creates a new coroutine with its own stack/frames recorded in the heap and enqueues it deterministically. +- No preemption: the VM never interrupts a coroutine between safepoints. + + +4. Verification Model +---------------------- + +4.1 Verifier Responsibilities + +The verifier statically checks bytecode for structural safety and stack‑shape correctness. Representative checks include: + +- Instruction well‑formedness + - Unknown opcode, truncated immediates/opcodes, malformed function boundaries, trailing bytes. +- Control‑flow integrity + - Jump targets within bounds and to instruction boundaries; functions must have proper terminators; path coverage ensures a valid exit. +- Stack discipline + - No underflow/overflow relative to declared max stack; consistent stack height at control‑flow joins; `RET` occurs at the expected height. +- Call/return shape + - Direct calls and returns must match the declared argument counts and return slot counts. Mismatches are rejected. +- Syscalls + - The verifier runs only on the patched executable image. `HOSTCALL` is invalid at verification time. Final `SYSCALL` IDs must exist per `SyscallMeta`, and arity/declared return slot counts must match metadata. +- Closures + - `CALL_CLOSURE` is only allowed on closure values; the callee function must be known; argument counts for closure calls must match. +- Coroutines + - `YIELD` context must be valid; `SPAWN` argument counts are validated. + +4.2 Runtime vs Verifier Guarantees + +- The verifier guarantees structural correctness and stack‑shape invariants. It does not perform full type checking of value contents; dynamic checks (e.g., numeric domain checks, polymorphic comparisons, concrete syscall argument validation) occur at runtime and may trap. +- Capability gating for syscalls is enforced at load from cartridge capability flags and checked again at runtime by the VM/native interface. + + +5. Closures (Model B) — Calling Convention +------------------------------------------- + +- Creation + - `MAKE_CLOSURE` captures N values from the operand stack into a heap‑allocated environment alongside a function identifier. The opcode _pushes a `HeapRef` to the new closure. +- Call + - `CALL_CLOSURE` invokes a closure. The closure object itself is supplied to the callee as a hidden `arg0`. User‑visible arguments follow the function’s declared arity. +- Access to captures + - The callee can access captured values via the closure’s environment. Captured `HeapRef`s are traced by the GC. + + +6. Unified Syscall ABI +----------------------- + +- Identification + - Host bindings are declared canonically as `(module, name, version)` in PBX `SYSC`, then executed as numeric IDs after loader patching. Syscalls are not first‑class values. +- Metadata‑driven + - `SyscallMeta` defines expected arity and return slot counts. The loader resolves `HOSTCALL` against this metadata and rejects raw `SYSCALL` in PBX pre-load artifacts; the verifier checks final IDs/arity/return‑slot counts against the same metadata. +- Arguments and returns + - Arguments are taken from the operand stack in the order defined by the ABI. Returns use multi‑slot results via a host‑side return buffer (`HostReturn`) which the VM copies back onto the stack, or zero slots for “void”. A mismatch in result counts is a fault/panic per current hardening logic. +- Capabilities + - Cartridge capability flags are applied before load-time host resolution. Missing required capability aborts load; invoking a syscall without the required capability also traps defensively at runtime. + + +7. Garbage Collection +---------------------- + +- Collector + - Non‑moving mark–sweep. +- Triggers + - GC runs only at `FRAME_SYNC` safepoints. +- Liveness + - Roots comprise: the live VM stack/frames and all suspended coroutines. The collector traverses object‑specific children (array elements, closure environments, coroutine stacks). +- Determinism + - GC opportunities and scheduling order are tied to `FRAME_SYNC`, ensuring repeatable execution traces across runs with the same inputs. + + +8. Non‑Goals +------------- + +- No RC +- No HIP +- No preemption +- No mailbox + + +9. Notes for Contributors +-------------------------- + +- Keep the public surface minimal and metadata‑driven (e.g., syscalls via `SyscallMeta`). +- Do not assume implicit safepoints; schedule and GC only at `FRAME_SYNC`. +- When adding new opcodes or object kinds, extend the verifier and GC traversal accordingly (children enumeration, environment scanning, root sets). +- Update this document alongside any architectural change that affects runtime invariants. diff --git a/docs/vm-arch/INTRINSICS.csv b/docs/vm-arch/INTRINSICS.csv new file mode 100644 index 00000000..74b3d3ef --- /dev/null +++ b/docs/vm-arch/INTRINSICS.csv @@ -0,0 +1,24 @@ +final_id_hex,final_id_dec,canonical_name,canonical_version,owner,name,arg_slots,ret_slots,arg_layout,ret_layout,deterministic,may_allocate +0x1000,4096,vec2.dot,1,vec2,dot,4,1,float|float|float|float,float,true,false +0x1001,4097,vec2.length,1,vec2,length,2,1,float|float,float,true,false +0x2000,8192,input.pad,1,input,pad,0,1,,builtin:input.pad,true,false +0x2001,8193,input.touch,1,input,touch,0,1,,builtin:input.touch,true,false +0x2010,8208,input.pad.up,1,input.pad,up,1,1,builtin:input.pad,builtin:input.button,true,false +0x2011,8209,input.pad.down,1,input.pad,down,1,1,builtin:input.pad,builtin:input.button,true,false +0x2012,8210,input.pad.left,1,input.pad,left,1,1,builtin:input.pad,builtin:input.button,true,false +0x2013,8211,input.pad.right,1,input.pad,right,1,1,builtin:input.pad,builtin:input.button,true,false +0x2014,8212,input.pad.a,1,input.pad,a,1,1,builtin:input.pad,builtin:input.button,true,false +0x2015,8213,input.pad.b,1,input.pad,b,1,1,builtin:input.pad,builtin:input.button,true,false +0x2016,8214,input.pad.x,1,input.pad,x,1,1,builtin:input.pad,builtin:input.button,true,false +0x2017,8215,input.pad.y,1,input.pad,y,1,1,builtin:input.pad,builtin:input.button,true,false +0x2018,8216,input.pad.l,1,input.pad,l,1,1,builtin:input.pad,builtin:input.button,true,false +0x2019,8217,input.pad.r,1,input.pad,r,1,1,builtin:input.pad,builtin:input.button,true,false +0x201A,8218,input.pad.start,1,input.pad,start,1,1,builtin:input.pad,builtin:input.button,true,false +0x201B,8219,input.pad.select,1,input.pad,select,1,1,builtin:input.pad,builtin:input.button,true,false +0x2020,8224,input.touch.button,1,input.touch,button,1,1,builtin:input.touch,builtin:input.button,true,false +0x2021,8225,input.touch.x,1,input.touch,x,1,1,builtin:input.touch,int,true,false +0x2022,8226,input.touch.y,1,input.touch,y,1,1,builtin:input.touch,int,true,false +0x2030,8240,input.button.pressed,1,input.button,pressed,1,1,builtin:input.button,bool,true,false +0x2031,8241,input.button.released,1,input.button,released,1,1,builtin:input.button,bool,true,false +0x2032,8242,input.button.down,1,input.button,down,1,1,builtin:input.button,bool,true,false +0x2033,8243,input.button.hold,1,input.button,hold,1,1,builtin:input.button,int,true,false diff --git a/docs/vm-arch/ISA_CORE.md b/docs/vm-arch/ISA_CORE.md new file mode 100644 index 00000000..b254cbd6 --- /dev/null +++ b/docs/vm-arch/ISA_CORE.md @@ -0,0 +1,134 @@ +### Prometeu Bytecode — Core ISA + +Status: bytecode-level normative + +This document defines the stable Core ISA surface for the Prometeu Virtual Machine at the bytecode level. It specifies instruction encoding, the stack evaluation model, and the instruction set currently available in the canonical opcode surface used by encoder, decoder, disassembler, assembler, verifier, and VM execution. + +Machine boundary: + +- PROMETEU is not "just the VM". It is the broader fantasy console/handheld machine. +- This document covers only the bytecode ISA of the VM subsystem embedded in that machine. + +Authority rule: + +- This document is normative for bytecode-level encoding and opcode surface. +- Runtime-wide invariants still live in [`../../ARCHITECTURE.md`](../../ARCHITECTURE.md). +- If a bytecode-level rule here conflicts with runtime architecture, the conflict must be resolved explicitly in both documents; neither should drift silently. + +#### Encoding Rules + +- Endianness: Little‑endian. +- Instruction layout: `[opcode: u16][immediate: spec.imm_bytes]`. +- Opcodes are defined in `prometeu_bytecode::isa::core::CoreOpCode`. +- Immediate sizes and stack effects are defined by `CoreOpCode::spec()` returning `CoreOpcodeSpec`. +- All jump immediates are absolute u32 byte offsets from the start of the current function. + +#### Stack Machine Model + +- The VM is stack‑based. Unless noted, operands are taken from the top of the operand stack and results are pushed back. +- Types at the bytecode level are represented by the `Value` enum; the VM may perform numeric promotion where appropriate (e.g., `Int32 + Float -> Float`). +- Stack underflow is a trap (TRAP_STACK_UNDERFLOW). +- Some operations may trap for other reasons (e.g., division by zero, invalid indices, type mismatches). + +#### Instruction Set (Core) + +- Execution control: + - `NOP` — no effect. + - `HALT` — terminates execution (block terminator). + - `JMP u32` — unconditional absolute jump (block terminator). + - `JMP_IF_FALSE u32` — pops `[bool]`, jumps if false. + - `JMP_IF_TRUE u32` — pops `[bool]`, jumps if true. + - `TRAP` — software trap/breakpoint (block terminator). + +- Stack manipulation: + - `PUSH_CONST u32` — load constant by index → _pushes `[value]`. + - `PUSH_I64 i64`, `PUSH_F64 f64`, `PUSH_BOOL u8`, `PUSH_I32 i32` — push literals. + - `POP` — pops 1. + - `POP_N u32` — pops N. + - `DUP` — `[x] -> [x, x]`. + - `SWAP` — `[a, b] -> [b, a]`. + +- Arithmetic: + - `ADD`, `SUB`, `MUL`, `DIV`, `MOD` — binary numeric ops. + - `NEG` — unary numeric negation. + +- Comparison and logic: + - `EQ`, `NEQ`, `LT`, `LTE`, `GT`, `GTE` — comparisons → `[bool]`. + - `AND`, `OR`, `NOT` — boolean logic. + - `BIT_AND`, `BIT_OR`, `BIT_XOR`, `SHL`, `SHR` — integer bit operations. + +- Variables: + - `GET_GLOBAL u32`, `SET_GLOBAL u32` — access global slots. + - `GET_LOCAL u32`, `SET_LOCAL u32` — access local slots (current frame). + +- Functions and scopes: + - `CALL u32` — call by function index; argument/result arity per function metadata. + - `RET` — return from current function (block terminator). + - `MAKE_CLOSURE u32,u32` — create closure from `(fn_id, capture_count)`. + - `CALL_CLOSURE u32` — invoke closure with `arg_count` user arguments. + +- Concurrency: + - `SPAWN u32,u32` — create coroutine for `(fn_id, arg_count)`. + - `YIELD` — request cooperative yield at the next safepoint. + - `SLEEP u32` — request suspension for a logical tick duration. + +- System/Timing: + - `HOSTCALL u32` — PBX pre-load host binding call by `SYSC` table index; the loader must resolve and rewrite it before verification or execution. + - `SYSCALL u32` — final numeric platform call in the executable image; raw `SYSCALL` in PBX pre-load artifacts is rejected by the loader. + - `INTRINSIC u32` — final numeric VM-owned intrinsic call. + - `FRAME_SYNC` — yield until the next frame boundary (e.g., vblank); explicit safepoint. + +#### Canonical Intrinsic Registry Artifact + +- Final intrinsic IDs and intrinsic stack metadata are published in [`INTRINSICS.csv`](INTRINSICS.csv). +- This CSV is the ISA-scoped artifact intended to be consumed by compiler/tooling consumers such as `../studio`. +- Each row defines one canonical intrinsic identity and its final numeric ID. +- `canonical_name` is the fully qualified intrinsic identity seen by compiler-side intrinsic pools. +- `arg_slots` and `ret_slots` are the real stack effect contract for verifier/lowering consumers. +- `arg_layout` and `ret_layout` use `|`-separated ABI atoms: + - `int` + - `float` + - `bool` + - `builtin:` +- Rows must remain unique by both `(canonical_name, canonical_version)` and `final_id_hex` / `final_id_dec`. +- Rows must remain deterministically ordered by final ID. + +For exact immediates and stack effects, see `CoreOpCode::spec()` which is the single source of truth used by the decoder, disassembler, and verifier. + +#### Canonical Decoder Contract + +- The canonical decoder is `prometeu_bytecode::decode_next(pc, bytes)`. +- It uses the Core ISA spec to determine immediate size and the canonical `next_pc`. +- Unknown or legacy opcodes must produce a deterministic `UnknownOpcode` error. + +#### Module Boundary + +- Core ISA lives under `prometeu_bytecode::isa::core` and re‑exports: + - `CoreOpCode` — the opcode enum of the core profile. + - `CoreOpcodeSpec` and `CoreOpCodeSpecExt` — spec with `imm_bytes`, stack effects, and flags. +- Consumers (encoder/decoder/disasm/verifier) should import from this module to avoid depending on internal layout. + +#### Scope Notes + +- "Core ISA" in the current repository means the canonical opcode surface implemented by the runtime today. +- It includes closures, coroutines, `HOSTCALL` patching semantics, `INTRINSIC`, and `FRAME_SYNC`. +- It does not, by itself, define higher-level runtime policy such as crash taxonomy, firmware behavior, cartridge lifecycle, or host service organization. Those belong to the canonical runtime architecture and related specs. + +#### FRAME_SYNC — Semantics and Placement (Bytecode Level) + +- Semantics: + - `FRAME_SYNC` is a zero-operand instruction and does not modify the operand stack. + - It marks a VM safepoint for GC and the cooperative scheduler. In `CoreOpcodeSpec` this is exposed as `spec.is_safepoint == true`. + - On execution, the VM may suspend the current coroutine until the next frame boundary and/or perform GC. After resuming, execution continues at the next instruction. + +- Placement rules (representable and checkable): + - `FRAME_SYNC` may appear anywhere inside a function body where normal instructions can appear. It is NOT a block terminator (`spec.is_terminator == false`). + - Instruction boundaries are canonical: encoders/emitters must only place `FRAME_SYNC` at valid instruction PCs. The verifier already enforces “jump-to-boundary” and end-exclusive `[start, end)` function ranges using the canonical layout routine. + - Entrypoints that represent a render/update loop SHOULD ensure at least one reachable `FRAME_SYNC` along every long-running path to provide deterministic safepoints for GC/scheduling. This policy is semantic and may be enforced by higher-level tooling; at the bytecode level it is representable via `spec.is_safepoint` and can be counted by static analyzers. + +- Disassembly: + - Disassemblers must print the mnemonic `FRAME_SYNC` verbatim for this opcode. + - Tools MAY optionally annotate it as a safepoint in comments, e.g., `FRAME_SYNC ; safepoint`. + +- Verification notes: + - The bytecode verifier treats `FRAME_SYNC` as a normal instruction with no stack effect and no control-flow targets. It is permitted before `RET`, between basic blocks, and as the last instruction of a function. Jumps targeting the function end (`pc == end`) remain valid under the end-exclusive rule. diff --git a/prometeu-compiler/prometeu-build-pipeline/src/main/java/p/studio/compiler/backend/irvm/IRVMIntrinsicDefinition.java b/prometeu-compiler/prometeu-build-pipeline/src/main/java/p/studio/compiler/backend/irvm/IRVMIntrinsicDefinition.java new file mode 100644 index 00000000..7bb916fa --- /dev/null +++ b/prometeu-compiler/prometeu-build-pipeline/src/main/java/p/studio/compiler/backend/irvm/IRVMIntrinsicDefinition.java @@ -0,0 +1,142 @@ +package p.studio.compiler.backend.irvm; + +import java.util.Objects; + +enum IRVMIntrinsicDefinition { + VEC2_DOT("vec2.dot", 1, 0x1000, "vec2", "dot", 4, 1, "float|float|float|float", "float", true, false), + VEC2_LENGTH("vec2.length", 1, 0x1001, "vec2", "length", 2, 1, "float|float", "float", true, false), + INPUT_PAD("input.pad", 1, 0x2000, "input", "pad", 0, 1, "", "builtin:input.pad", true, false), + INPUT_TOUCH("input.touch", 1, 0x2001, "input", "touch", 0, 1, "", "builtin:input.touch", true, false), + INPUT_PAD_UP("input.pad.up", 1, 0x2010, "input.pad", "up", 1, 1, "builtin:input.pad", "builtin:input.button", true, false), + INPUT_PAD_DOWN("input.pad.down", 1, 0x2011, "input.pad", "down", 1, 1, "builtin:input.pad", "builtin:input.button", true, false), + INPUT_PAD_LEFT("input.pad.left", 1, 0x2012, "input.pad", "left", 1, 1, "builtin:input.pad", "builtin:input.button", true, false), + INPUT_PAD_RIGHT("input.pad.right", 1, 0x2013, "input.pad", "right", 1, 1, "builtin:input.pad", "builtin:input.button", true, false), + INPUT_PAD_A("input.pad.a", 1, 0x2014, "input.pad", "a", 1, 1, "builtin:input.pad", "builtin:input.button", true, false), + INPUT_PAD_B("input.pad.b", 1, 0x2015, "input.pad", "b", 1, 1, "builtin:input.pad", "builtin:input.button", true, false), + INPUT_PAD_X("input.pad.x", 1, 0x2016, "input.pad", "x", 1, 1, "builtin:input.pad", "builtin:input.button", true, false), + INPUT_PAD_Y("input.pad.y", 1, 0x2017, "input.pad", "y", 1, 1, "builtin:input.pad", "builtin:input.button", true, false), + INPUT_PAD_L("input.pad.l", 1, 0x2018, "input.pad", "l", 1, 1, "builtin:input.pad", "builtin:input.button", true, false), + INPUT_PAD_R("input.pad.r", 1, 0x2019, "input.pad", "r", 1, 1, "builtin:input.pad", "builtin:input.button", true, false), + INPUT_PAD_START("input.pad.start", 1, 0x201A, "input.pad", "start", 1, 1, "builtin:input.pad", "builtin:input.button", true, false), + INPUT_PAD_SELECT("input.pad.select", 1, 0x201B, "input.pad", "select", 1, 1, "builtin:input.pad", "builtin:input.button", true, false), + INPUT_TOUCH_BUTTON("input.touch.button", 1, 0x2020, "input.touch", "button", 1, 1, "builtin:input.touch", "builtin:input.button", true, false), + INPUT_TOUCH_X("input.touch.x", 1, 0x2021, "input.touch", "x", 1, 1, "builtin:input.touch", "int", true, false), + INPUT_TOUCH_Y("input.touch.y", 1, 0x2022, "input.touch", "y", 1, 1, "builtin:input.touch", "int", true, false), + INPUT_BUTTON_PRESSED("input.button.pressed", 1, 0x2030, "input.button", "pressed", 1, 1, "builtin:input.button", "bool", true, false), + INPUT_BUTTON_RELEASED("input.button.released", 1, 0x2031, "input.button", "released", 1, 1, "builtin:input.button", "bool", true, false), + INPUT_BUTTON_DOWN("input.button.down", 1, 0x2032, "input.button", "down", 1, 1, "builtin:input.button", "bool", true, false), + INPUT_BUTTON_HOLD("input.button.hold", 1, 0x2033, "input.button", "hold", 1, 1, "builtin:input.button", "int", true, false); + + private final String canonicalName; + private final long canonicalVersion; + private final int finalId; + private final String owner; + private final String name; + private final int argSlots; + private final int retSlots; + private final String argLayout; + private final String retLayout; + private final boolean deterministic; + private final boolean mayAllocate; + + IRVMIntrinsicDefinition( + final String canonicalName, + final long canonicalVersion, + final int finalId, + final String owner, + final String name, + final int argSlots, + final int retSlots, + final String argLayout, + final String retLayout, + final boolean deterministic, + final boolean mayAllocate) { + this.canonicalName = requireNonBlank(canonicalName, "canonicalName"); + this.canonicalVersion = requireNonNegative(canonicalVersion, "canonicalVersion"); + this.finalId = finalId; + this.owner = requireNonBlank(owner, "owner"); + this.name = requireNonBlank(name, "name"); + this.argSlots = requireNonNegative(argSlots, "argSlots"); + this.retSlots = requireNonNegative(retSlots, "retSlots"); + this.argLayout = Objects.requireNonNull(argLayout, "argLayout"); + this.retLayout = Objects.requireNonNull(retLayout, "retLayout"); + this.deterministic = deterministic; + this.mayAllocate = mayAllocate; + } + + String canonicalName() { + return canonicalName; + } + + long canonicalVersion() { + return canonicalVersion; + } + + int finalId() { + return finalId; + } + + String owner() { + return owner; + } + + String intrinsicName() { + return name; + } + + int argSlots() { + return argSlots; + } + + int retSlots() { + return retSlots; + } + + String argLayout() { + return argLayout; + } + + String retLayout() { + return retLayout; + } + + boolean deterministic() { + return deterministic; + } + + boolean mayAllocate() { + return mayAllocate; + } + + String canonicalIdentity() { + return canonicalName + "@" + canonicalVersion; + } + + private static String requireNonBlank( + final String value, + final String fieldName) { + final var normalized = Objects.requireNonNull(value, fieldName); + if (normalized.isBlank()) { + throw new IllegalArgumentException(fieldName + " must not be blank"); + } + return normalized; + } + + private static int requireNonNegative( + final int value, + final String fieldName) { + if (value < 0) { + throw new IllegalArgumentException(fieldName + " must be non-negative"); + } + return value; + } + + private static long requireNonNegative( + final long value, + final String fieldName) { + if (value < 0) { + throw new IllegalArgumentException(fieldName + " must be non-negative"); + } + return value; + } +} diff --git a/prometeu-compiler/prometeu-build-pipeline/src/main/java/p/studio/compiler/backend/irvm/IRVMIntrinsicRegistry.java b/prometeu-compiler/prometeu-build-pipeline/src/main/java/p/studio/compiler/backend/irvm/IRVMIntrinsicRegistry.java index 226d5cd7..151b70cf 100644 --- a/prometeu-compiler/prometeu-build-pipeline/src/main/java/p/studio/compiler/backend/irvm/IRVMIntrinsicRegistry.java +++ b/prometeu-compiler/prometeu-build-pipeline/src/main/java/p/studio/compiler/backend/irvm/IRVMIntrinsicRegistry.java @@ -1,15 +1,10 @@ package p.studio.compiler.backend.irvm; -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; -import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.Map; import java.util.OptionalInt; final class IRVMIntrinsicRegistry { - private static final String REGISTRY_RESOURCE = "/intrinsics/registry-v1.csv"; private static final Map FINAL_ID_BY_INTRINSIC = loadRegistry(); private IRVMIntrinsicRegistry() { @@ -37,77 +32,28 @@ final class IRVMIntrinsicRegistry { } private static Map loadRegistry() { - final var stream = IRVMIntrinsicRegistry.class.getResourceAsStream(REGISTRY_RESOURCE); - if (stream == null) { - throw new IllegalStateException("missing intrinsic registry resource: " + REGISTRY_RESOURCE); - } - final var registry = new HashMap(); final var identityByFinalId = new HashMap(); - try (final var reader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))) { - String rawLine; - int lineNumber = 0; - Integer previousFinalId = null; - while ((rawLine = reader.readLine()) != null) { - lineNumber++; - final var line = rawLine.trim(); - if (line.isBlank() || line.startsWith("#")) { - continue; - } - final var columns = line.split(",", -1); - if (columns.length != 3) { - throw new IllegalStateException("invalid intrinsic registry row at line " + lineNumber + ": " + line); - } - final var canonicalName = columns[0].trim(); - final long canonicalVersion; - try { - canonicalVersion = Long.parseLong(columns[1].trim()); - } catch (NumberFormatException ex) { - throw new IllegalStateException("invalid intrinsic version at line " + lineNumber + ": " + line, ex); - } - if (canonicalName.isBlank()) { - throw new IllegalStateException("blank intrinsic name at line " + lineNumber + ": " + line); - } - if (canonicalVersion < 0) { - throw new IllegalStateException("negative intrinsic version at line " + lineNumber + ": " + line); - } - final int finalId = parseFinalId(columns[2].trim(), lineNumber, line); - if (previousFinalId != null && Integer.compareUnsigned(finalId, previousFinalId) <= 0) { - throw new IllegalStateException( - "intrinsic final ids must be strictly increasing for deterministic evolution at line " - + lineNumber + ": " + line); - } - register(registry, canonicalName, canonicalVersion, finalId); - final var identity = canonicalIdentity(canonicalName, canonicalVersion); - final var previous = identityByFinalId.putIfAbsent(finalId, identity); - if (previous != null && !previous.equals(identity)) { - throw new IllegalStateException( - "duplicate intrinsic final id 0x%08X for %s and %s".formatted(finalId, previous, identity)); - } - previousFinalId = finalId; + Integer previousFinalId = null; + for (final var definition : IRVMIntrinsicDefinition.values()) { + final var finalId = definition.finalId(); + if (previousFinalId != null && Integer.compareUnsigned(finalId, previousFinalId) <= 0) { + throw new IllegalStateException( + "intrinsic final ids must be strictly increasing for deterministic evolution: " + + definition.canonicalIdentity()); } - } catch (IOException ex) { - throw new IllegalStateException("failed to read intrinsic registry resource: " + REGISTRY_RESOURCE, ex); + register(registry, definition.canonicalName(), definition.canonicalVersion(), finalId); + final var identity = definition.canonicalIdentity(); + final var previous = identityByFinalId.putIfAbsent(finalId, identity); + if (previous != null && !previous.equals(identity)) { + throw new IllegalStateException( + "duplicate intrinsic final id 0x%08X for %s and %s".formatted(finalId, previous, identity)); + } + previousFinalId = finalId; } return Map.copyOf(registry); } - private static int parseFinalId( - final String rawId, - final int lineNumber, - final String line) { - try { - if (rawId.startsWith("0x") || rawId.startsWith("0X")) { - return Integer.parseUnsignedInt(rawId.substring(2), 16); - } - return Integer.parseUnsignedInt(rawId, 10); - } catch (NumberFormatException ex) { - throw new IllegalStateException( - "invalid intrinsic final id at line %d: %s".formatted(lineNumber, line), - ex); - } - } - private static String canonicalIdentity( final String canonicalName, final long canonicalVersion) { diff --git a/prometeu-compiler/prometeu-build-pipeline/src/main/resources/intrinsics/registry-v1.csv b/prometeu-compiler/prometeu-build-pipeline/src/main/resources/intrinsics/registry-v1.csv deleted file mode 100644 index 1f1da908..00000000 --- a/prometeu-compiler/prometeu-build-pipeline/src/main/resources/intrinsics/registry-v1.csv +++ /dev/null @@ -1,24 +0,0 @@ -# canonicalName,canonicalVersion,finalId -vec2.dot,1,0x1000 -vec2.length,1,0x1001 -input.pad,1,0x2000 -input.touch,1,0x2001 -input.pad.up,1,0x2010 -input.pad.down,1,0x2011 -input.pad.left,1,0x2012 -input.pad.right,1,0x2013 -input.pad.a,1,0x2014 -input.pad.b,1,0x2015 -input.pad.x,1,0x2016 -input.pad.y,1,0x2017 -input.pad.l,1,0x2018 -input.pad.r,1,0x2019 -input.pad.start,1,0x201A -input.pad.select,1,0x201B -input.touch.button,1,0x2020 -input.touch.x,1,0x2021 -input.touch.y,1,0x2022 -input.button.pressed,1,0x2030 -input.button.released,1,0x2031 -input.button.down,1,0x2032 -input.button.hold,1,0x2033 diff --git a/prometeu-compiler/prometeu-build-pipeline/src/test/java/p/studio/compiler/backend/irvm/IRVMIntrinsicRegistryParityTest.java b/prometeu-compiler/prometeu-build-pipeline/src/test/java/p/studio/compiler/backend/irvm/IRVMIntrinsicRegistryParityTest.java index 021f9f56..d24aef8e 100644 --- a/prometeu-compiler/prometeu-build-pipeline/src/test/java/p/studio/compiler/backend/irvm/IRVMIntrinsicRegistryParityTest.java +++ b/prometeu-compiler/prometeu-build-pipeline/src/test/java/p/studio/compiler/backend/irvm/IRVMIntrinsicRegistryParityTest.java @@ -7,17 +7,21 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; +import java.util.Arrays; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Locale; import java.util.regex.Pattern; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; class IRVMIntrinsicRegistryParityTest { - private static final Path RUNTIME_INTRINSICS_FILE = Path.of("../runtime/crates/console/prometeu-vm/src/builtins.rs"); + private static final String VM_ARCH_INTRINSICS_PATH = "docs/vm-arch/INTRINSICS.csv"; + private static final String RUNTIME_INTRINSICS_PATH = "crates/console/prometeu-vm/src/builtins.rs"; private static final Pattern OWNER_PATTERN = Pattern.compile("owner:\\s*\"([^\"]+)\","); private static final Pattern NAME_PATTERN = Pattern.compile("name:\\s*\"([^\"]+)\","); private static final Pattern VERSION_PATTERN = Pattern.compile("version:\\s*(\\d+),"); @@ -25,7 +29,7 @@ class IRVMIntrinsicRegistryParityTest { private static final String STRICT_PARITY_ENV = "PROMETEU_INTRINSIC_PARITY_STRICT"; @Test - void registryResourceMustExposeExpectedCanonicalEntries() { + void registryDefinitionsMustExposeExpectedCanonicalEntries() { final var snapshot = IRVMIntrinsicRegistry.snapshotByCanonicalIdentity(); assertFalse(snapshot.isEmpty()); assertEquals(23, snapshot.size()); @@ -33,19 +37,69 @@ class IRVMIntrinsicRegistryParityTest { assertEquals(0x2033, snapshot.get("input.button.hold@1")); } + @Test + void registryDefinitionsMustStayInSyncWithVmArchCsv() throws IOException { + final var specPath = locateRepoRoot().resolve(VM_ARCH_INTRINSICS_PATH); + assertTrue(Files.isRegularFile(specPath), "intrinsics spec is missing: " + specPath); + + final var expected = parseVmArchIntrinsicRows(specPath); + final var actual = Arrays.stream(IRVMIntrinsicDefinition.values()) + .map(IntrinsicCsvRow::fromDefinition) + .toList(); + assertEquals(expected, actual); + } + @Test void registryMustStayInSyncWithRuntimeBuiltinsTable() throws IOException { final var compilerSnapshot = IRVMIntrinsicRegistry.snapshotByCanonicalIdentity(); - if (!Files.exists(RUNTIME_INTRINSICS_FILE)) { + final var runtimeIntrinsicsFile = locateRuntimeIntrinsicsFile(); + if (!Files.exists(runtimeIntrinsicsFile)) { if (strictParityEnabled()) { - fail("runtime intrinsic table not found: " + RUNTIME_INTRINSICS_FILE.toAbsolutePath()); + fail("runtime intrinsic table not found: " + runtimeIntrinsicsFile.toAbsolutePath()); } return; } - final var runtimeSnapshot = parseRuntimeIntrinsicSnapshot(RUNTIME_INTRINSICS_FILE); + final var runtimeSnapshot = parseRuntimeIntrinsicSnapshot(runtimeIntrinsicsFile); assertEquals(runtimeSnapshot, compilerSnapshot, parityDiff(runtimeSnapshot, compilerSnapshot)); } + private List parseVmArchIntrinsicRows(final Path specPath) throws IOException { + final var lines = Files.readAllLines(specPath, StandardCharsets.UTF_8); + final var rows = new ArrayList(); + for (int index = 0; index < lines.size(); index++) { + final var rawLine = lines.get(index); + final var line = rawLine.trim(); + if (line.isBlank()) { + continue; + } + if (index == 0) { + assertEquals( + "final_id_hex,final_id_dec,canonical_name,canonical_version,owner,name,arg_slots,ret_slots,arg_layout,ret_layout,deterministic,may_allocate", + line, + "unexpected intrinsic csv header"); + continue; + } + final var columns = line.split(",", -1); + if (columns.length != 12) { + fail("invalid intrinsics csv row at line " + (index + 1) + ": " + rawLine); + } + rows.add(new IntrinsicCsvRow( + normalizeHexId(columns[0]), + parseUnsignedInt(columns[1], "final_id_dec", index + 1), + requireNonBlank(columns[2], "canonical_name", index + 1), + parseLong(columns[3], "canonical_version", index + 1), + requireNonBlank(columns[4], "owner", index + 1), + requireNonBlank(columns[5], "name", index + 1), + parseInt(columns[6], "arg_slots", index + 1), + parseInt(columns[7], "ret_slots", index + 1), + columns[8].trim(), + columns[9].trim(), + parseBoolean(columns[10], "deterministic", index + 1), + parseBoolean(columns[11], "may_allocate", index + 1))); + } + return List.copyOf(rows); + } + private Map parseRuntimeIntrinsicSnapshot(final Path runtimeFile) throws IOException { final var runtimeSource = Files.readAllLines(runtimeFile, StandardCharsets.UTF_8); final var mapped = new LinkedHashMap(); @@ -109,6 +163,27 @@ class IRVMIntrinsicRegistryParityTest { return Map.copyOf(mapped); } + private Path locateRepoRoot() { + var cursor = Path.of(System.getProperty("user.dir")).toAbsolutePath().normalize(); + while (cursor != null) { + if (Files.isDirectory(cursor.resolve("docs")) + && Files.isDirectory(cursor.resolve("prometeu-compiler")) + && Files.exists(cursor.resolve("settings.gradle.kts"))) { + return cursor; + } + cursor = cursor.getParent(); + } + fail("could not locate repository root from working directory"); + throw new IllegalStateException("unreachable"); + } + + private Path locateRuntimeIntrinsicsFile() { + return locateRepoRoot() + .getParent() + .resolve("runtime") + .resolve(RUNTIME_INTRINSICS_PATH); + } + private int parseRuntimeId(final String rawId) { if (rawId.startsWith("0x") || rawId.startsWith("0X")) { return Integer.parseUnsignedInt(rawId.substring(2), 16); @@ -116,6 +191,73 @@ class IRVMIntrinsicRegistryParityTest { return Integer.parseUnsignedInt(rawId, 10); } + private String requireNonBlank( + final String value, + final String fieldName, + final int lineNumber) { + final var normalized = value.trim(); + if (normalized.isBlank()) { + fail("blank " + fieldName + " at line " + lineNumber); + } + return normalized; + } + + private int parseInt( + final String rawValue, + final String fieldName, + final int lineNumber) { + try { + return Integer.parseInt(rawValue.trim()); + } catch (NumberFormatException ex) { + fail("invalid " + fieldName + " at line " + lineNumber + ": " + rawValue); + throw new IllegalStateException("unreachable", ex); + } + } + + private long parseLong( + final String rawValue, + final String fieldName, + final int lineNumber) { + try { + return Long.parseLong(rawValue.trim()); + } catch (NumberFormatException ex) { + fail("invalid " + fieldName + " at line " + lineNumber + ": " + rawValue); + throw new IllegalStateException("unreachable", ex); + } + } + + private int parseUnsignedInt( + final String rawValue, + final String fieldName, + final int lineNumber) { + try { + return Integer.parseUnsignedInt(rawValue.trim(), 10); + } catch (NumberFormatException ex) { + fail("invalid " + fieldName + " at line " + lineNumber + ": " + rawValue); + throw new IllegalStateException("unreachable", ex); + } + } + + private boolean parseBoolean( + final String rawValue, + final String fieldName, + final int lineNumber) { + return switch (rawValue.trim().toLowerCase(Locale.ROOT)) { + case "true" -> true; + case "false" -> false; + default -> { + fail("invalid " + fieldName + " at line " + lineNumber + ": " + rawValue); + yield false; + } + }; + } + + private String normalizeHexId(final String rawValue) { + final var trimmed = rawValue.trim(); + final var parsed = parseRuntimeId(trimmed); + return "0x" + Integer.toUnsignedString(parsed, 16).toUpperCase(Locale.ROOT); + } + private boolean strictParityEnabled() { final var strict = System.getenv(STRICT_PARITY_ENV); if (isTruthy(strict)) { @@ -160,4 +302,35 @@ class IRVMIntrinsicRegistryParityTest { + "\nmissingInRuntime=" + List.copyOf(missingInRuntime) + "\nidMismatches=" + List.copyOf(mismatchedIds); } + + private record IntrinsicCsvRow( + String finalIdHex, + int finalIdDec, + String canonicalName, + long canonicalVersion, + String owner, + String name, + int argSlots, + int retSlots, + String argLayout, + String retLayout, + boolean deterministic, + boolean mayAllocate) { + + private static IntrinsicCsvRow fromDefinition(final IRVMIntrinsicDefinition definition) { + return new IntrinsicCsvRow( + "0x" + Integer.toUnsignedString(definition.finalId(), 16).toUpperCase(Locale.ROOT), + definition.finalId(), + definition.canonicalName(), + definition.canonicalVersion(), + definition.owner(), + definition.intrinsicName(), + definition.argSlots(), + definition.retSlots(), + definition.argLayout(), + definition.retLayout(), + definition.deterministic(), + definition.mayAllocate()); + } + } }