This commit is contained in:
bQUARKz 2026-02-10 00:56:15 +00:00
parent 6eda2d21ac
commit c9b9f18b32
Signed by: bquarkz
SSH Key Fingerprint: SHA256:Z7dgqoglWwoK6j6u4QC87OveEq74WOhFN+gitsxtkf8
5 changed files with 29 additions and 109 deletions

View File

@ -255,14 +255,14 @@ impl Linker {
patch_u32_at(&mut combined_code, imm_start, &|_| global_func_idx); patch_u32_at(&mut combined_code, imm_start, &|_| global_func_idx);
} }
} }
// Relocate intra-function control-flow immediates by module code offset to preserve absolute PCs // Branches are strictly function-relative. Do NOT relocate.
// The emitter encodes `target_rel = label - func_start` and the verifier enforces it.
OpCode::Jmp | OpCode::JmpIfFalse | OpCode::JmpIfTrue => { OpCode::Jmp | OpCode::JmpIfFalse | OpCode::JmpIfTrue => {
// For branches, immediate must be present and represents rel PC from function start
let _ = imm_u32_opt.ok_or_else(|| LinkError::IncompatibleSymbolSignature(format!( let _ = imm_u32_opt.ok_or_else(|| LinkError::IncompatibleSymbolSignature(format!(
"Invalid branch immediate at pc {}", "Invalid branch immediate at pc {}",
pc - code_offset pc - code_offset
)))?; )))?;
patch_u32_at(&mut combined_code, imm_start, &|cur| cur + (code_offset as u32)); // Intentionally no patch here.
} }
_ => {} _ => {}
} }
@ -628,16 +628,16 @@ mod tests {
// Module 2's code starts after module 1's code // Module 2's code starts after module 1's code
let module2_offset = code1.len() as u32; let module2_offset = code1.len() as u32;
// Verify that the JMP immediate equals original_target (0) + module2_offset // Verify that the JMP immediate remains function-relative (0), no relocation applied
let jmp_abs_pc = module2_offset as usize + jmp_pc as usize; let jmp_abs_pc = module2_offset as usize + jmp_pc as usize;
let jmp_imm_off = jmp_abs_pc + 2; // skip opcode let jmp_imm_off = jmp_abs_pc + 2; // skip opcode
let jmp_patched = u32::from_le_bytes(result.rom[jmp_imm_off..jmp_imm_off+4].try_into().unwrap()); let jmp_patched = u32::from_le_bytes(result.rom[jmp_imm_off..jmp_imm_off+4].try_into().unwrap());
assert_eq!(jmp_patched, module2_offset); assert_eq!(jmp_patched, 0);
// Verify that the conditional JMP immediate was relocated similarly // Verify that the conditional JMP immediate also remains function-relative (0)
let cjmp_abs_pc = module2_offset as usize + cjmp_pc as usize; let cjmp_abs_pc = module2_offset as usize + cjmp_pc as usize;
let cjmp_imm_off = cjmp_abs_pc + 2; let cjmp_imm_off = cjmp_abs_pc + 2;
let cjmp_patched = u32::from_le_bytes(result.rom[cjmp_imm_off..cjmp_imm_off+4].try_into().unwrap()); let cjmp_patched = u32::from_le_bytes(result.rom[cjmp_imm_off..cjmp_imm_off+4].try_into().unwrap());
assert_eq!(cjmp_patched, module2_offset); assert_eq!(cjmp_patched, 0);
} }
} }

View File

@ -139,7 +139,28 @@ impl Verifier {
let func_len = func_end_abs - func_start; let func_len = func_end_abs - func_start;
if target_rel > func_len { if target_rel > func_len {
return Err(VerifierError::InvalidJumpTarget { pc: func_start + pc, target: func_start + target_rel }); // Mandatory structured diagnostic for InvalidJumpTarget
let pc_abs = func_start + pc;
let opcode = instr.opcode;
let imm_raw = instr.imm_u32().unwrap();
let func_start_abs = func_start;
let func_end_abs_log = func_end_abs;
let target_abs_expected = func_start_abs + target_rel;
let is_boundary_target_rel = valid_pc.contains(&target_rel);
eprintln!(
"[VERIFIER] invalid jump: pc_abs={} opcode={:?} imm_raw={} func=F{} start={} end={} len={} target_rel={} target_abs_expected={} boundary_rel={}",
pc_abs,
opcode,
imm_raw,
func_idx,
func_start_abs,
func_end_abs_log,
func_len,
target_rel,
target_abs_expected,
is_boundary_target_rel
);
return Err(VerifierError::InvalidJumpTarget { pc: pc_abs, target: target_abs_expected });
} }
if target_rel == func_len { if target_rel == func_len {

View File

@ -11,107 +11,6 @@
* A **single canonical layout/decoder/spec** is used across compiler/linker/verifier/VM. * A **single canonical layout/decoder/spec** is used across compiler/linker/verifier/VM.
* Any invalid program fails with clear diagnostics, not panics. * Any invalid program fails with clear diagnostics, not panics.
**Estimation scale:**
* **1 point** = small / straightforward
* **3 points** = medium
* **5 points** = large
* Any work that feels bigger must be broken down into multiple PRs (≤ 5 points each).
---
## Phase 1 — Single Source of Truth: Bytecode Spec + Decoder (Highest ROI)
### PR-01 (3 pts) — Move OpcodeSpec to `prometeu-bytecode` and make it authoritative
**Briefing**
Today opcode metadata (imm sizes, stack effects, branch-ness, terminators) is duplicated and/or inconsistent across crates. This creates a perpetual maintenance nightmare.
**Target**
Create one authoritative opcode spec in **`prometeu-bytecode`** and delete/replace all “local” opcode knowledge.
**Scope**
* Create `prometeu-bytecode::opcode_spec` containing:
* `imm_bytes`
* `pops`, `pushes` (stack effect)
* `is_branch`, `is_terminator`
* optional: `name`, `category`
* Update callers to import from `prometeu-bytecode`.
**Requirements Checklist**
* [ ] There is exactly one canonical `OpcodeSpec` source.
* [ ] All crates compile against that source.
* [ ] No hardcoded operand sizes remain outside the spec.
**Completion Tests**
* [ ] Unit test enumerating all opcodes validates:
* every opcode has a spec
* `imm_bytes` is defined
---
### PR-02 (5 pts) — Introduce canonical decoder in `prometeu-bytecode` and migrate VM to it
**Briefing**
The VM currently has its own decoder. The linker and other tools decode manually. This must be centralized.
**Target**
Add a single canonical decoder in `prometeu-bytecode` that produces typed decoded instructions.
**Scope**
* Add `prometeu-bytecode::decoder`:
* `decode_next(pc, bytes) -> DecodedInstr`
* includes: opcode, pc, next_pc, raw immediate bytes slice
* helpers: `imm_u8/u16/u32/i32/i64/f64` with size validation
* Migrate VM to use `prometeu-bytecode::decoder`.
**Requirements Checklist**
* [ ] VM no longer has a bespoke decoder.
* [ ] No slicing-based immediate parsing in VM core paths.
* [ ] Decoder validates immediate sizes and fails deterministically.
**Completion Tests**
* [ ] Decoder unit tests for representative opcodes with each immediate size.
* [ ] Roundtrip test: encode→decode (table-driven; property test optional).
---
### PR-03 (3 pts) — Delete/neutralize `abi::operand_size` duplication
**Briefing**
`prometeu-bytecode/src/abi.rs` provides partial operand sizing that can drift from the canonical spec.
**Target**
Make all operand sizing derived from the opcode spec.
**Scope**
* Replace `operand_size()` with `OpcodeSpec::imm_bytes`.
* Remove or restrict legacy APIs that leak duplication.
**Requirements Checklist**
* [ ] There is no second operand-size table.
**Completion Tests**
* [ ] Test ensuring `operand_size()` (if retained) matches spec for all opcodes.
--- ---
## Phase 2 — Canonical Layout + Verifier Contract (JVM-like Control Flow) ## Phase 2 — Canonical Layout + Verifier Contract (JVM-like Control Flow)