This commit is contained in:
bQUARKz 2026-03-02 14:25:12 +00:00
parent d5ef8a2003
commit 2a79f641dd
Signed by: bquarkz
SSH Key Fingerprint: SHA256:Z7dgqoglWwoK6j6u4QC87OveEq74WOhFN+gitsxtkf8
6 changed files with 376 additions and 20 deletions

View File

@ -18,6 +18,6 @@ pub use assembler::{assemble, AsmError};
pub use decoder::{decode_next, DecodeError};
pub use disassembler::disassemble;
pub use layout::{compute_function_layouts, FunctionLayout};
pub use model::{BytecodeLoader, FunctionMeta, LoadError};
pub use model::{BytecodeLoader, FunctionMeta, LoadError, SyscallDecl};
pub use program_image::ProgramImage;
pub use value::{HeapRef, Value};

View File

@ -1,6 +1,7 @@
use crate::abi::SourceSpan;
use crate::opcode::OpCode;
use serde::{Deserialize, Serialize};
use std::collections::HashSet;
/// An entry in the Constant Pool.
///
@ -35,6 +36,9 @@ pub enum LoadError {
InvalidFunctionIndex,
MalformedHeader,
MalformedSection,
MissingSyscallSection,
DuplicateSyscallIdentity,
InvalidUtf8,
UnexpectedEof,
}
@ -60,6 +64,22 @@ pub struct Export {
pub func_idx: u32,
}
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct SyscallDecl {
pub module: String,
pub name: String,
pub version: u16,
pub arg_slots: u16,
pub ret_slots: u16,
}
const SECTION_KIND_CONST_POOL: u32 = 0;
const SECTION_KIND_FUNCTIONS: u32 = 1;
const SECTION_KIND_CODE: u32 = 2;
const SECTION_KIND_DEBUG: u32 = 3;
const SECTION_KIND_EXPORTS: u32 = 4;
const SECTION_KIND_SYSCALLS: u32 = 5;
/// Represents the final serialized format of a PBS v0 module.
///
/// This structure is a pure data container for the PBS format. It does NOT
@ -74,6 +94,7 @@ pub struct BytecodeModule {
pub code: Vec<u8>,
pub debug_info: Option<DebugInfo>,
pub exports: Vec<Export>,
pub syscalls: Vec<SyscallDecl>,
}
impl BytecodeModule {
@ -84,23 +105,25 @@ impl BytecodeModule {
let debug_data =
self.debug_info.as_ref().map(|di| self.serialize_debug(di)).unwrap_or_default();
let export_data = self.serialize_exports();
let syscall_data = self.serialize_syscalls();
let mut final_sections = Vec::new();
if !cp_data.is_empty() {
final_sections.push((0, cp_data));
final_sections.push((SECTION_KIND_CONST_POOL, cp_data));
}
if !func_data.is_empty() {
final_sections.push((1, func_data));
final_sections.push((SECTION_KIND_FUNCTIONS, func_data));
}
if !code_data.is_empty() {
final_sections.push((2, code_data));
final_sections.push((SECTION_KIND_CODE, code_data));
}
if !debug_data.is_empty() {
final_sections.push((3, debug_data));
final_sections.push((SECTION_KIND_DEBUG, debug_data));
}
if !export_data.is_empty() {
final_sections.push((4, export_data));
final_sections.push((SECTION_KIND_EXPORTS, export_data));
}
final_sections.push((SECTION_KIND_SYSCALLS, syscall_data));
let mut out = Vec::new();
// Magic "PBS\0"
@ -219,6 +242,26 @@ impl BytecodeModule {
}
data
}
fn serialize_syscalls(&self) -> Vec<u8> {
let mut data = Vec::new();
data.extend_from_slice(&(self.syscalls.len() as u32).to_le_bytes());
for syscall in &self.syscalls {
let module = syscall.module.as_bytes();
let name = syscall.name.as_bytes();
assert!(u16::try_from(module.len()).is_ok(), "SYSC module name exceeds u16 length");
assert!(u16::try_from(name.len()).is_ok(), "SYSC syscall name exceeds u16 length");
data.extend_from_slice(&(module.len() as u16).to_le_bytes());
data.extend_from_slice(module);
data.extend_from_slice(&(name.len() as u16).to_le_bytes());
data.extend_from_slice(name);
data.extend_from_slice(&syscall.version.to_le_bytes());
data.extend_from_slice(&syscall.arg_slots.to_le_bytes());
data.extend_from_slice(&syscall.ret_slots.to_le_bytes());
}
data
}
}
pub struct BytecodeLoader;
@ -296,35 +339,45 @@ impl BytecodeLoader {
code: Vec::new(),
debug_info: None,
exports: Vec::new(),
syscalls: Vec::new(),
};
let mut has_syscalls = false;
for (kind, offset, length) in sections {
let section_data = &bytes[offset as usize..(offset + length) as usize];
match kind {
0 => {
SECTION_KIND_CONST_POOL => {
// Const Pool
module.const_pool = parse_const_pool(section_data)?;
}
1 => {
SECTION_KIND_FUNCTIONS => {
// Functions
module.functions = parse_functions(section_data)?;
}
2 => {
SECTION_KIND_CODE => {
// Code
module.code = section_data.to_vec();
}
3 => {
SECTION_KIND_DEBUG => {
// Debug Info
module.debug_info = Some(parse_debug_section(section_data)?);
}
4 => {
SECTION_KIND_EXPORTS => {
// Exports
module.exports = parse_exports(section_data)?;
}
SECTION_KIND_SYSCALLS => {
module.syscalls = parse_syscalls(section_data)?;
has_syscalls = true;
}
_ => {} // Skip unknown or optional sections
}
}
if !has_syscalls {
return Err(LoadError::MissingSyscallSection);
}
// Additional validations
validate_module(&module)?;
@ -520,7 +573,74 @@ fn parse_exports(data: &[u8]) -> Result<Vec<Export>, LoadError> {
Ok(exports)
}
fn parse_syscalls(data: &[u8]) -> Result<Vec<SyscallDecl>, LoadError> {
if data.len() < 4 {
return Err(LoadError::MalformedSection);
}
let count = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
let mut syscalls = Vec::with_capacity(count);
let mut pos = 4;
for _ in 0..count {
if pos + 2 > data.len() {
return Err(LoadError::UnexpectedEof);
}
let module_len = u16::from_le_bytes(data[pos..pos + 2].try_into().unwrap()) as usize;
pos += 2;
if pos + module_len > data.len() {
return Err(LoadError::UnexpectedEof);
}
let module =
std::str::from_utf8(&data[pos..pos + module_len]).map_err(|_| LoadError::InvalidUtf8)?;
pos += module_len;
if pos + 2 > data.len() {
return Err(LoadError::UnexpectedEof);
}
let name_len = u16::from_le_bytes(data[pos..pos + 2].try_into().unwrap()) as usize;
pos += 2;
if pos + name_len > data.len() {
return Err(LoadError::UnexpectedEof);
}
let name =
std::str::from_utf8(&data[pos..pos + name_len]).map_err(|_| LoadError::InvalidUtf8)?;
pos += name_len;
if pos + 6 > data.len() {
return Err(LoadError::UnexpectedEof);
}
let version = u16::from_le_bytes(data[pos..pos + 2].try_into().unwrap());
let arg_slots = u16::from_le_bytes(data[pos + 2..pos + 4].try_into().unwrap());
let ret_slots = u16::from_le_bytes(data[pos + 4..pos + 6].try_into().unwrap());
pos += 6;
syscalls.push(SyscallDecl {
module: module.to_owned(),
name: name.to_owned(),
version,
arg_slots,
ret_slots,
});
}
if pos != data.len() {
return Err(LoadError::MalformedSection);
}
Ok(syscalls)
}
fn validate_module(module: &BytecodeModule) -> Result<(), LoadError> {
let mut syscall_identities = HashSet::with_capacity(module.syscalls.len());
for syscall in &module.syscalls {
if !syscall_identities
.insert((syscall.module.clone(), syscall.name.clone(), syscall.version))
{
return Err(LoadError::DuplicateSyscallIdentity);
}
}
for func in &module.functions {
// Opcode stream bounds
if (func.code_offset as usize) + (func.code_len as usize) > module.code.len() {
@ -591,6 +711,36 @@ mod tests {
h
}
fn minimal_module() -> BytecodeModule {
BytecodeModule {
version: 0,
const_pool: vec![],
functions: vec![],
code: vec![],
debug_info: None,
exports: vec![],
syscalls: vec![],
}
}
fn build_pbs_with_sections(sections: Vec<(u32, Vec<u8>)>) -> Vec<u8> {
let mut data = create_header(sections.len() as u32);
let mut offset = 32 + (sections.len() as u32 * 12);
for (kind, section_data) in &sections {
data.extend_from_slice(&kind.to_le_bytes());
data.extend_from_slice(&offset.to_le_bytes());
data.extend_from_slice(&(section_data.len() as u32).to_le_bytes());
offset += section_data.len() as u32;
}
for (_, section_data) in sections {
data.extend_from_slice(&section_data);
}
data
}
#[test]
fn test_invalid_magic() {
let mut data = create_header(0);
@ -645,10 +795,10 @@ mod tests {
#[test]
fn test_invalid_function_code_offset() {
let mut data = create_header(2);
// Section 1: Functions, Kind 1, Offset 64, Length 20 (Header 4 + 1 entry 16)
let mut data = create_header(3);
// Section 1: Functions, Kind 1, Offset 80, Length 20 (Header 4 + 1 entry 16)
data.extend_from_slice(&1u32.to_le_bytes());
data.extend_from_slice(&64u32.to_le_bytes());
data.extend_from_slice(&80u32.to_le_bytes());
data.extend_from_slice(&20u32.to_le_bytes());
// Section 2: Code, Kind 2, Offset 128, Length 10
@ -656,25 +806,31 @@ mod tests {
data.extend_from_slice(&128u32.to_le_bytes());
data.extend_from_slice(&10u32.to_le_bytes());
// Section 3: SYSC, Kind 5, Offset 160, Length 4 (empty)
data.extend_from_slice(&5u32.to_le_bytes());
data.extend_from_slice(&160u32.to_le_bytes());
data.extend_from_slice(&4u32.to_le_bytes());
data.resize(256, 0);
// Setup functions section
let func_data_start = 64;
let func_data_start = 80;
data[func_data_start..func_data_start + 4].copy_from_slice(&1u32.to_le_bytes()); // 1 function
let entry_start = func_data_start + 4;
data[entry_start..entry_start + 4].copy_from_slice(&5u32.to_le_bytes()); // code_offset = 5
data[entry_start + 4..entry_start + 8].copy_from_slice(&10u32.to_le_bytes()); // code_len = 10
// 5 + 10 = 15 > 10 (code section length)
data[160..164].copy_from_slice(&0u32.to_le_bytes());
assert_eq!(BytecodeLoader::load(&data), Err(LoadError::InvalidFunctionIndex));
}
#[test]
fn test_invalid_const_index() {
let mut data = create_header(2);
// Section 1: Const Pool, Kind 0, Offset 64, Length 4 (Empty CP)
let mut data = create_header(3);
// Section 1: Const Pool, Kind 0, Offset 80, Length 4 (Empty CP)
data.extend_from_slice(&0u32.to_le_bytes());
data.extend_from_slice(&64u32.to_le_bytes());
data.extend_from_slice(&80u32.to_le_bytes());
data.extend_from_slice(&4u32.to_le_bytes());
// Section 2: Code, Kind 2, Offset 128, Length 6 (PushConst 0)
@ -682,25 +838,105 @@ mod tests {
data.extend_from_slice(&128u32.to_le_bytes());
data.extend_from_slice(&6u32.to_le_bytes());
// Section 3: SYSC, Kind 5, Offset 160, Length 4 (empty)
data.extend_from_slice(&5u32.to_le_bytes());
data.extend_from_slice(&160u32.to_le_bytes());
data.extend_from_slice(&4u32.to_le_bytes());
data.resize(256, 0);
// Setup empty CP
data[64..68].copy_from_slice(&0u32.to_le_bytes());
data[80..84].copy_from_slice(&0u32.to_le_bytes());
// Setup code with PushConst 0
data[128..130].copy_from_slice(&(OpCode::PushConst as u16).to_le_bytes());
data[130..134].copy_from_slice(&0u32.to_le_bytes());
data[160..164].copy_from_slice(&0u32.to_le_bytes());
assert_eq!(BytecodeLoader::load(&data), Err(LoadError::InvalidConstIndex));
}
#[test]
fn test_valid_minimal_load() {
fn test_missing_sysc_section_is_rejected() {
let data = create_header(0);
assert_eq!(BytecodeLoader::load(&data), Err(LoadError::MissingSyscallSection));
}
#[test]
fn test_valid_minimal_load_with_empty_sysc() {
let data = minimal_module().serialize();
let module = BytecodeLoader::load(&data).unwrap();
assert_eq!(module.version, 0);
assert!(module.const_pool.is_empty());
assert!(module.functions.is_empty());
assert!(module.code.is_empty());
assert!(module.syscalls.is_empty());
}
#[test]
fn test_valid_sysc_roundtrip() {
let mut module = minimal_module();
module.syscalls = vec![SyscallDecl {
module: "gfx".into(),
name: "draw_line".into(),
version: 1,
arg_slots: 4,
ret_slots: 0,
}];
let data = module.serialize();
let loaded = BytecodeLoader::load(&data).unwrap();
assert_eq!(loaded.syscalls, module.syscalls);
}
#[test]
fn test_malformed_sysc_section_is_rejected() {
let data = build_pbs_with_sections(vec![(SECTION_KIND_SYSCALLS, vec![1, 0, 0])]);
assert_eq!(BytecodeLoader::load(&data), Err(LoadError::MalformedSection));
}
#[test]
fn test_invalid_utf8_in_sysc_section_is_rejected() {
let mut sysc = Vec::new();
sysc.extend_from_slice(&1u32.to_le_bytes());
sysc.extend_from_slice(&1u16.to_le_bytes());
sysc.push(0xFF);
sysc.extend_from_slice(&1u16.to_le_bytes());
sysc.push(b'x');
sysc.extend_from_slice(&1u16.to_le_bytes());
sysc.extend_from_slice(&0u16.to_le_bytes());
sysc.extend_from_slice(&0u16.to_le_bytes());
let data = build_pbs_with_sections(vec![(SECTION_KIND_SYSCALLS, sysc)]);
assert_eq!(BytecodeLoader::load(&data), Err(LoadError::InvalidUtf8));
}
#[test]
fn test_duplicate_sysc_identity_is_rejected() {
let mut module = minimal_module();
module.syscalls = vec![
SyscallDecl {
module: "system".into(),
name: "has_cart".into(),
version: 1,
arg_slots: 0,
ret_slots: 1,
},
SyscallDecl {
module: "system".into(),
name: "has_cart".into(),
version: 1,
arg_slots: 0,
ret_slots: 1,
},
];
let data = module.serialize();
assert_eq!(BytecodeLoader::load(&data), Err(LoadError::DuplicateSyscallIdentity));
}
}

View File

@ -118,6 +118,7 @@ impl From<ProgramImage> for BytecodeModule {
code: program.rom.as_ref().to_vec(),
debug_info: program.debug_info.clone(),
exports,
syscalls: Vec::new(),
}
}
}

View File

@ -38,6 +38,7 @@ pub fn generate() -> Result<()> {
function_names: vec![(0, "main".into())],
}),
exports: vec![Export { symbol: "main".into(), func_idx: 0 }],
syscalls: vec![],
};
let bytes = module.serialize();

View File

@ -268,6 +268,124 @@ Required behavior:
- patch before `Verifier::verify(...)`
## Implementation Plan
Implementation should be staged so each phase leaves the workspace in a coherent state and keeps the verifier/VM contract intact.
### Commit Checklist
1. `prometeu-bytecode: add SyscallDecl model and mandatory SYSC section`
2. `prometeu-bytecode: add SYSC parser validation and load errors`
3. `prometeu-bytecode: add phase-1 coverage for empty/valid/invalid SYSC`
4. `prometeu-bytecode: wire downstream constructors to new BytecodeModule.syscalls field`
5. `prometeu-bytecode: clean up naming/docs after phase-1 passes`
### Phase 1 - Extend PBX module format
Target crates:
- `crates/console/prometeu-bytecode`
Steps:
1. add `SyscallDecl` to `src/model.rs` and extend `BytecodeModule` with `syscalls: Vec<SyscallDecl>`
2. reserve a new section kind for `SYSC` in module serialization/deserialization
3. enforce the mandatory-section rule: valid PBS images always carry `SYSC`, including `count = 0`
4. reject malformed payloads, invalid UTF-8, and duplicate canonical identities during load
5. update `src/lib.rs` exports if needed so the VM and HAL can consume the new declarations
Checkpoint:
- `BytecodeLoader::load(...)` returns `BytecodeModule` with canonical syscall declarations preserved from PBX
### Phase 2 - Add pre-load opcode support
Target crates:
- `crates/console/prometeu-bytecode`
Steps:
1. add `HOSTCALL` to `src/opcode.rs` with a `u32` immediate
2. extend `src/opcode_spec.rs` and `src/decoder.rs` so the loader can scan and decode `HOSTCALL`
3. update `src/assembler.rs` and `src/disassembler.rs` so tests and fixtures can produce/read pre-load artifacts
4. keep the runtime contract explicit: `HOSTCALL` is representable in bytecode artifacts but must not survive loader patching
Checkpoint:
- bytecode tooling round-trips `HOSTCALL <index>` correctly, while runtime execution still depends on patched `SYSCALL <id>`
### Phase 3 - Bridge PBX declarations to host metadata
Target crates:
- `crates/console/prometeu-hal`
Steps:
1. add a resolver path that accepts program-owned syscall declarations instead of only `&'static str` identities
2. resolve each `(module, name, version)` against `src/syscalls.rs`
3. validate `arg_slots` and `ret_slots` against authoritative `SyscallMeta`
4. validate required capabilities against cartridge-derived `CapFlags`
5. return deterministic, load-facing errors for unknown syscalls, ABI mismatches, and missing capabilities
Design note:
- this phase likely needs a small owned-string adapter or a new helper alongside `resolve_program_syscalls(...)`, because PBX strings are runtime data, not `&'static str`
Checkpoint:
- given only `BytecodeModule.syscalls` plus granted `CapFlags`, the loader can produce a resolved table `sysc_index -> syscall_id`
### Phase 4 - Patch before verification
Target crates:
- `crates/console/prometeu-vm`
Steps:
1. add a load-time patching helper near `src/virtual_machine.rs` or a small dedicated module
2. run that helper immediately after `BytecodeLoader::load(...)` and before `Verifier::verify(...)`
3. scan `module.code`, decode every instruction, and rewrite `HOSTCALL <index>` into `SYSCALL <resolved_id>`
4. reject out-of-bounds `HOSTCALL` indices during the scan
5. track `SYSC` usage and reject declarations that are never referenced
6. assert that no `HOSTCALL` remains before handing code to the verifier
7. only then call `Verifier::verify(...)`, compute `max_stack_slots`, and build `ProgramImage::from(module)`
Why the sequencing matters:
- the current load path in `crates/console/prometeu-vm/src/virtual_machine.rs` verifies the raw module immediately after `BytecodeLoader::load(...)`
- `ProgramImage` currently stores only the final ROM/functions/constants and does not preserve a syscall declaration table, so patching must happen while the code is still a `BytecodeModule`
Checkpoint:
- the verifier sees only numeric `SYSCALL <id>` instructions, preserving the existing verifier and VM execution model
### Phase 5 - Tests and failure matrix
Target crates:
- `crates/console/prometeu-bytecode`
- `crates/console/prometeu-hal`
- `crates/console/prometeu-vm`
Steps:
1. add serialization/deserialization tests for missing, empty, valid, malformed, and duplicate `SYSC`
2. add opcode tests for `HOSTCALL` decoding and assembler/disassembler coverage
3. add resolver tests for unknown identity, ABI mismatch, and capability mismatch
4. add VM load-path tests proving patch-before-verify behavior
5. add an assertion that final executable images contain only numeric `SYSCALL`
Suggested PR slicing:
1. bytecode format + tests
2. `HOSTCALL` opcode plumbing
3. HAL resolution/ABI validation
4. VM loader patching
5. integration and regression tests
## Deterministic Load Errors
Load must fail for at least: