pr 2.1
This commit is contained in:
parent
d5ef8a2003
commit
2a79f641dd
@ -18,6 +18,6 @@ pub use assembler::{assemble, AsmError};
|
|||||||
pub use decoder::{decode_next, DecodeError};
|
pub use decoder::{decode_next, DecodeError};
|
||||||
pub use disassembler::disassemble;
|
pub use disassembler::disassemble;
|
||||||
pub use layout::{compute_function_layouts, FunctionLayout};
|
pub use layout::{compute_function_layouts, FunctionLayout};
|
||||||
pub use model::{BytecodeLoader, FunctionMeta, LoadError};
|
pub use model::{BytecodeLoader, FunctionMeta, LoadError, SyscallDecl};
|
||||||
pub use program_image::ProgramImage;
|
pub use program_image::ProgramImage;
|
||||||
pub use value::{HeapRef, Value};
|
pub use value::{HeapRef, Value};
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
use crate::abi::SourceSpan;
|
use crate::abi::SourceSpan;
|
||||||
use crate::opcode::OpCode;
|
use crate::opcode::OpCode;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::collections::HashSet;
|
||||||
|
|
||||||
/// An entry in the Constant Pool.
|
/// An entry in the Constant Pool.
|
||||||
///
|
///
|
||||||
@ -35,6 +36,9 @@ pub enum LoadError {
|
|||||||
InvalidFunctionIndex,
|
InvalidFunctionIndex,
|
||||||
MalformedHeader,
|
MalformedHeader,
|
||||||
MalformedSection,
|
MalformedSection,
|
||||||
|
MissingSyscallSection,
|
||||||
|
DuplicateSyscallIdentity,
|
||||||
|
InvalidUtf8,
|
||||||
UnexpectedEof,
|
UnexpectedEof,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -60,6 +64,22 @@ pub struct Export {
|
|||||||
pub func_idx: u32,
|
pub func_idx: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
|
||||||
|
pub struct SyscallDecl {
|
||||||
|
pub module: String,
|
||||||
|
pub name: String,
|
||||||
|
pub version: u16,
|
||||||
|
pub arg_slots: u16,
|
||||||
|
pub ret_slots: u16,
|
||||||
|
}
|
||||||
|
|
||||||
|
const SECTION_KIND_CONST_POOL: u32 = 0;
|
||||||
|
const SECTION_KIND_FUNCTIONS: u32 = 1;
|
||||||
|
const SECTION_KIND_CODE: u32 = 2;
|
||||||
|
const SECTION_KIND_DEBUG: u32 = 3;
|
||||||
|
const SECTION_KIND_EXPORTS: u32 = 4;
|
||||||
|
const SECTION_KIND_SYSCALLS: u32 = 5;
|
||||||
|
|
||||||
/// Represents the final serialized format of a PBS v0 module.
|
/// Represents the final serialized format of a PBS v0 module.
|
||||||
///
|
///
|
||||||
/// This structure is a pure data container for the PBS format. It does NOT
|
/// This structure is a pure data container for the PBS format. It does NOT
|
||||||
@ -74,6 +94,7 @@ pub struct BytecodeModule {
|
|||||||
pub code: Vec<u8>,
|
pub code: Vec<u8>,
|
||||||
pub debug_info: Option<DebugInfo>,
|
pub debug_info: Option<DebugInfo>,
|
||||||
pub exports: Vec<Export>,
|
pub exports: Vec<Export>,
|
||||||
|
pub syscalls: Vec<SyscallDecl>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl BytecodeModule {
|
impl BytecodeModule {
|
||||||
@ -84,23 +105,25 @@ impl BytecodeModule {
|
|||||||
let debug_data =
|
let debug_data =
|
||||||
self.debug_info.as_ref().map(|di| self.serialize_debug(di)).unwrap_or_default();
|
self.debug_info.as_ref().map(|di| self.serialize_debug(di)).unwrap_or_default();
|
||||||
let export_data = self.serialize_exports();
|
let export_data = self.serialize_exports();
|
||||||
|
let syscall_data = self.serialize_syscalls();
|
||||||
|
|
||||||
let mut final_sections = Vec::new();
|
let mut final_sections = Vec::new();
|
||||||
if !cp_data.is_empty() {
|
if !cp_data.is_empty() {
|
||||||
final_sections.push((0, cp_data));
|
final_sections.push((SECTION_KIND_CONST_POOL, cp_data));
|
||||||
}
|
}
|
||||||
if !func_data.is_empty() {
|
if !func_data.is_empty() {
|
||||||
final_sections.push((1, func_data));
|
final_sections.push((SECTION_KIND_FUNCTIONS, func_data));
|
||||||
}
|
}
|
||||||
if !code_data.is_empty() {
|
if !code_data.is_empty() {
|
||||||
final_sections.push((2, code_data));
|
final_sections.push((SECTION_KIND_CODE, code_data));
|
||||||
}
|
}
|
||||||
if !debug_data.is_empty() {
|
if !debug_data.is_empty() {
|
||||||
final_sections.push((3, debug_data));
|
final_sections.push((SECTION_KIND_DEBUG, debug_data));
|
||||||
}
|
}
|
||||||
if !export_data.is_empty() {
|
if !export_data.is_empty() {
|
||||||
final_sections.push((4, export_data));
|
final_sections.push((SECTION_KIND_EXPORTS, export_data));
|
||||||
}
|
}
|
||||||
|
final_sections.push((SECTION_KIND_SYSCALLS, syscall_data));
|
||||||
|
|
||||||
let mut out = Vec::new();
|
let mut out = Vec::new();
|
||||||
// Magic "PBS\0"
|
// Magic "PBS\0"
|
||||||
@ -219,6 +242,26 @@ impl BytecodeModule {
|
|||||||
}
|
}
|
||||||
data
|
data
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn serialize_syscalls(&self) -> Vec<u8> {
|
||||||
|
let mut data = Vec::new();
|
||||||
|
data.extend_from_slice(&(self.syscalls.len() as u32).to_le_bytes());
|
||||||
|
for syscall in &self.syscalls {
|
||||||
|
let module = syscall.module.as_bytes();
|
||||||
|
let name = syscall.name.as_bytes();
|
||||||
|
assert!(u16::try_from(module.len()).is_ok(), "SYSC module name exceeds u16 length");
|
||||||
|
assert!(u16::try_from(name.len()).is_ok(), "SYSC syscall name exceeds u16 length");
|
||||||
|
|
||||||
|
data.extend_from_slice(&(module.len() as u16).to_le_bytes());
|
||||||
|
data.extend_from_slice(module);
|
||||||
|
data.extend_from_slice(&(name.len() as u16).to_le_bytes());
|
||||||
|
data.extend_from_slice(name);
|
||||||
|
data.extend_from_slice(&syscall.version.to_le_bytes());
|
||||||
|
data.extend_from_slice(&syscall.arg_slots.to_le_bytes());
|
||||||
|
data.extend_from_slice(&syscall.ret_slots.to_le_bytes());
|
||||||
|
}
|
||||||
|
data
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct BytecodeLoader;
|
pub struct BytecodeLoader;
|
||||||
@ -296,35 +339,45 @@ impl BytecodeLoader {
|
|||||||
code: Vec::new(),
|
code: Vec::new(),
|
||||||
debug_info: None,
|
debug_info: None,
|
||||||
exports: Vec::new(),
|
exports: Vec::new(),
|
||||||
|
syscalls: Vec::new(),
|
||||||
};
|
};
|
||||||
|
let mut has_syscalls = false;
|
||||||
|
|
||||||
for (kind, offset, length) in sections {
|
for (kind, offset, length) in sections {
|
||||||
let section_data = &bytes[offset as usize..(offset + length) as usize];
|
let section_data = &bytes[offset as usize..(offset + length) as usize];
|
||||||
match kind {
|
match kind {
|
||||||
0 => {
|
SECTION_KIND_CONST_POOL => {
|
||||||
// Const Pool
|
// Const Pool
|
||||||
module.const_pool = parse_const_pool(section_data)?;
|
module.const_pool = parse_const_pool(section_data)?;
|
||||||
}
|
}
|
||||||
1 => {
|
SECTION_KIND_FUNCTIONS => {
|
||||||
// Functions
|
// Functions
|
||||||
module.functions = parse_functions(section_data)?;
|
module.functions = parse_functions(section_data)?;
|
||||||
}
|
}
|
||||||
2 => {
|
SECTION_KIND_CODE => {
|
||||||
// Code
|
// Code
|
||||||
module.code = section_data.to_vec();
|
module.code = section_data.to_vec();
|
||||||
}
|
}
|
||||||
3 => {
|
SECTION_KIND_DEBUG => {
|
||||||
// Debug Info
|
// Debug Info
|
||||||
module.debug_info = Some(parse_debug_section(section_data)?);
|
module.debug_info = Some(parse_debug_section(section_data)?);
|
||||||
}
|
}
|
||||||
4 => {
|
SECTION_KIND_EXPORTS => {
|
||||||
// Exports
|
// Exports
|
||||||
module.exports = parse_exports(section_data)?;
|
module.exports = parse_exports(section_data)?;
|
||||||
}
|
}
|
||||||
|
SECTION_KIND_SYSCALLS => {
|
||||||
|
module.syscalls = parse_syscalls(section_data)?;
|
||||||
|
has_syscalls = true;
|
||||||
|
}
|
||||||
_ => {} // Skip unknown or optional sections
|
_ => {} // Skip unknown or optional sections
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !has_syscalls {
|
||||||
|
return Err(LoadError::MissingSyscallSection);
|
||||||
|
}
|
||||||
|
|
||||||
// Additional validations
|
// Additional validations
|
||||||
validate_module(&module)?;
|
validate_module(&module)?;
|
||||||
|
|
||||||
@ -520,7 +573,74 @@ fn parse_exports(data: &[u8]) -> Result<Vec<Export>, LoadError> {
|
|||||||
Ok(exports)
|
Ok(exports)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn parse_syscalls(data: &[u8]) -> Result<Vec<SyscallDecl>, LoadError> {
|
||||||
|
if data.len() < 4 {
|
||||||
|
return Err(LoadError::MalformedSection);
|
||||||
|
}
|
||||||
|
|
||||||
|
let count = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
|
||||||
|
let mut syscalls = Vec::with_capacity(count);
|
||||||
|
let mut pos = 4;
|
||||||
|
|
||||||
|
for _ in 0..count {
|
||||||
|
if pos + 2 > data.len() {
|
||||||
|
return Err(LoadError::UnexpectedEof);
|
||||||
|
}
|
||||||
|
let module_len = u16::from_le_bytes(data[pos..pos + 2].try_into().unwrap()) as usize;
|
||||||
|
pos += 2;
|
||||||
|
if pos + module_len > data.len() {
|
||||||
|
return Err(LoadError::UnexpectedEof);
|
||||||
|
}
|
||||||
|
let module =
|
||||||
|
std::str::from_utf8(&data[pos..pos + module_len]).map_err(|_| LoadError::InvalidUtf8)?;
|
||||||
|
pos += module_len;
|
||||||
|
|
||||||
|
if pos + 2 > data.len() {
|
||||||
|
return Err(LoadError::UnexpectedEof);
|
||||||
|
}
|
||||||
|
let name_len = u16::from_le_bytes(data[pos..pos + 2].try_into().unwrap()) as usize;
|
||||||
|
pos += 2;
|
||||||
|
if pos + name_len > data.len() {
|
||||||
|
return Err(LoadError::UnexpectedEof);
|
||||||
|
}
|
||||||
|
let name =
|
||||||
|
std::str::from_utf8(&data[pos..pos + name_len]).map_err(|_| LoadError::InvalidUtf8)?;
|
||||||
|
pos += name_len;
|
||||||
|
|
||||||
|
if pos + 6 > data.len() {
|
||||||
|
return Err(LoadError::UnexpectedEof);
|
||||||
|
}
|
||||||
|
let version = u16::from_le_bytes(data[pos..pos + 2].try_into().unwrap());
|
||||||
|
let arg_slots = u16::from_le_bytes(data[pos + 2..pos + 4].try_into().unwrap());
|
||||||
|
let ret_slots = u16::from_le_bytes(data[pos + 4..pos + 6].try_into().unwrap());
|
||||||
|
pos += 6;
|
||||||
|
|
||||||
|
syscalls.push(SyscallDecl {
|
||||||
|
module: module.to_owned(),
|
||||||
|
name: name.to_owned(),
|
||||||
|
version,
|
||||||
|
arg_slots,
|
||||||
|
ret_slots,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if pos != data.len() {
|
||||||
|
return Err(LoadError::MalformedSection);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(syscalls)
|
||||||
|
}
|
||||||
|
|
||||||
fn validate_module(module: &BytecodeModule) -> Result<(), LoadError> {
|
fn validate_module(module: &BytecodeModule) -> Result<(), LoadError> {
|
||||||
|
let mut syscall_identities = HashSet::with_capacity(module.syscalls.len());
|
||||||
|
for syscall in &module.syscalls {
|
||||||
|
if !syscall_identities
|
||||||
|
.insert((syscall.module.clone(), syscall.name.clone(), syscall.version))
|
||||||
|
{
|
||||||
|
return Err(LoadError::DuplicateSyscallIdentity);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for func in &module.functions {
|
for func in &module.functions {
|
||||||
// Opcode stream bounds
|
// Opcode stream bounds
|
||||||
if (func.code_offset as usize) + (func.code_len as usize) > module.code.len() {
|
if (func.code_offset as usize) + (func.code_len as usize) > module.code.len() {
|
||||||
@ -591,6 +711,36 @@ mod tests {
|
|||||||
h
|
h
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn minimal_module() -> BytecodeModule {
|
||||||
|
BytecodeModule {
|
||||||
|
version: 0,
|
||||||
|
const_pool: vec![],
|
||||||
|
functions: vec![],
|
||||||
|
code: vec![],
|
||||||
|
debug_info: None,
|
||||||
|
exports: vec![],
|
||||||
|
syscalls: vec![],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_pbs_with_sections(sections: Vec<(u32, Vec<u8>)>) -> Vec<u8> {
|
||||||
|
let mut data = create_header(sections.len() as u32);
|
||||||
|
let mut offset = 32 + (sections.len() as u32 * 12);
|
||||||
|
|
||||||
|
for (kind, section_data) in §ions {
|
||||||
|
data.extend_from_slice(&kind.to_le_bytes());
|
||||||
|
data.extend_from_slice(&offset.to_le_bytes());
|
||||||
|
data.extend_from_slice(&(section_data.len() as u32).to_le_bytes());
|
||||||
|
offset += section_data.len() as u32;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (_, section_data) in sections {
|
||||||
|
data.extend_from_slice(§ion_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
data
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_invalid_magic() {
|
fn test_invalid_magic() {
|
||||||
let mut data = create_header(0);
|
let mut data = create_header(0);
|
||||||
@ -645,10 +795,10 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_invalid_function_code_offset() {
|
fn test_invalid_function_code_offset() {
|
||||||
let mut data = create_header(2);
|
let mut data = create_header(3);
|
||||||
// Section 1: Functions, Kind 1, Offset 64, Length 20 (Header 4 + 1 entry 16)
|
// Section 1: Functions, Kind 1, Offset 80, Length 20 (Header 4 + 1 entry 16)
|
||||||
data.extend_from_slice(&1u32.to_le_bytes());
|
data.extend_from_slice(&1u32.to_le_bytes());
|
||||||
data.extend_from_slice(&64u32.to_le_bytes());
|
data.extend_from_slice(&80u32.to_le_bytes());
|
||||||
data.extend_from_slice(&20u32.to_le_bytes());
|
data.extend_from_slice(&20u32.to_le_bytes());
|
||||||
|
|
||||||
// Section 2: Code, Kind 2, Offset 128, Length 10
|
// Section 2: Code, Kind 2, Offset 128, Length 10
|
||||||
@ -656,25 +806,31 @@ mod tests {
|
|||||||
data.extend_from_slice(&128u32.to_le_bytes());
|
data.extend_from_slice(&128u32.to_le_bytes());
|
||||||
data.extend_from_slice(&10u32.to_le_bytes());
|
data.extend_from_slice(&10u32.to_le_bytes());
|
||||||
|
|
||||||
|
// Section 3: SYSC, Kind 5, Offset 160, Length 4 (empty)
|
||||||
|
data.extend_from_slice(&5u32.to_le_bytes());
|
||||||
|
data.extend_from_slice(&160u32.to_le_bytes());
|
||||||
|
data.extend_from_slice(&4u32.to_le_bytes());
|
||||||
|
|
||||||
data.resize(256, 0);
|
data.resize(256, 0);
|
||||||
|
|
||||||
// Setup functions section
|
// Setup functions section
|
||||||
let func_data_start = 64;
|
let func_data_start = 80;
|
||||||
data[func_data_start..func_data_start + 4].copy_from_slice(&1u32.to_le_bytes()); // 1 function
|
data[func_data_start..func_data_start + 4].copy_from_slice(&1u32.to_le_bytes()); // 1 function
|
||||||
let entry_start = func_data_start + 4;
|
let entry_start = func_data_start + 4;
|
||||||
data[entry_start..entry_start + 4].copy_from_slice(&5u32.to_le_bytes()); // code_offset = 5
|
data[entry_start..entry_start + 4].copy_from_slice(&5u32.to_le_bytes()); // code_offset = 5
|
||||||
data[entry_start + 4..entry_start + 8].copy_from_slice(&10u32.to_le_bytes()); // code_len = 10
|
data[entry_start + 4..entry_start + 8].copy_from_slice(&10u32.to_le_bytes()); // code_len = 10
|
||||||
// 5 + 10 = 15 > 10 (code section length)
|
// 5 + 10 = 15 > 10 (code section length)
|
||||||
|
data[160..164].copy_from_slice(&0u32.to_le_bytes());
|
||||||
|
|
||||||
assert_eq!(BytecodeLoader::load(&data), Err(LoadError::InvalidFunctionIndex));
|
assert_eq!(BytecodeLoader::load(&data), Err(LoadError::InvalidFunctionIndex));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_invalid_const_index() {
|
fn test_invalid_const_index() {
|
||||||
let mut data = create_header(2);
|
let mut data = create_header(3);
|
||||||
// Section 1: Const Pool, Kind 0, Offset 64, Length 4 (Empty CP)
|
// Section 1: Const Pool, Kind 0, Offset 80, Length 4 (Empty CP)
|
||||||
data.extend_from_slice(&0u32.to_le_bytes());
|
data.extend_from_slice(&0u32.to_le_bytes());
|
||||||
data.extend_from_slice(&64u32.to_le_bytes());
|
data.extend_from_slice(&80u32.to_le_bytes());
|
||||||
data.extend_from_slice(&4u32.to_le_bytes());
|
data.extend_from_slice(&4u32.to_le_bytes());
|
||||||
|
|
||||||
// Section 2: Code, Kind 2, Offset 128, Length 6 (PushConst 0)
|
// Section 2: Code, Kind 2, Offset 128, Length 6 (PushConst 0)
|
||||||
@ -682,25 +838,105 @@ mod tests {
|
|||||||
data.extend_from_slice(&128u32.to_le_bytes());
|
data.extend_from_slice(&128u32.to_le_bytes());
|
||||||
data.extend_from_slice(&6u32.to_le_bytes());
|
data.extend_from_slice(&6u32.to_le_bytes());
|
||||||
|
|
||||||
|
// Section 3: SYSC, Kind 5, Offset 160, Length 4 (empty)
|
||||||
|
data.extend_from_slice(&5u32.to_le_bytes());
|
||||||
|
data.extend_from_slice(&160u32.to_le_bytes());
|
||||||
|
data.extend_from_slice(&4u32.to_le_bytes());
|
||||||
|
|
||||||
data.resize(256, 0);
|
data.resize(256, 0);
|
||||||
|
|
||||||
// Setup empty CP
|
// Setup empty CP
|
||||||
data[64..68].copy_from_slice(&0u32.to_le_bytes());
|
data[80..84].copy_from_slice(&0u32.to_le_bytes());
|
||||||
|
|
||||||
// Setup code with PushConst 0
|
// Setup code with PushConst 0
|
||||||
data[128..130].copy_from_slice(&(OpCode::PushConst as u16).to_le_bytes());
|
data[128..130].copy_from_slice(&(OpCode::PushConst as u16).to_le_bytes());
|
||||||
data[130..134].copy_from_slice(&0u32.to_le_bytes());
|
data[130..134].copy_from_slice(&0u32.to_le_bytes());
|
||||||
|
data[160..164].copy_from_slice(&0u32.to_le_bytes());
|
||||||
|
|
||||||
assert_eq!(BytecodeLoader::load(&data), Err(LoadError::InvalidConstIndex));
|
assert_eq!(BytecodeLoader::load(&data), Err(LoadError::InvalidConstIndex));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_valid_minimal_load() {
|
fn test_missing_sysc_section_is_rejected() {
|
||||||
let data = create_header(0);
|
let data = create_header(0);
|
||||||
|
|
||||||
|
assert_eq!(BytecodeLoader::load(&data), Err(LoadError::MissingSyscallSection));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_valid_minimal_load_with_empty_sysc() {
|
||||||
|
let data = minimal_module().serialize();
|
||||||
let module = BytecodeLoader::load(&data).unwrap();
|
let module = BytecodeLoader::load(&data).unwrap();
|
||||||
assert_eq!(module.version, 0);
|
assert_eq!(module.version, 0);
|
||||||
assert!(module.const_pool.is_empty());
|
assert!(module.const_pool.is_empty());
|
||||||
assert!(module.functions.is_empty());
|
assert!(module.functions.is_empty());
|
||||||
assert!(module.code.is_empty());
|
assert!(module.code.is_empty());
|
||||||
|
assert!(module.syscalls.is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_valid_sysc_roundtrip() {
|
||||||
|
let mut module = minimal_module();
|
||||||
|
module.syscalls = vec![SyscallDecl {
|
||||||
|
module: "gfx".into(),
|
||||||
|
name: "draw_line".into(),
|
||||||
|
version: 1,
|
||||||
|
arg_slots: 4,
|
||||||
|
ret_slots: 0,
|
||||||
|
}];
|
||||||
|
|
||||||
|
let data = module.serialize();
|
||||||
|
let loaded = BytecodeLoader::load(&data).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(loaded.syscalls, module.syscalls);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_malformed_sysc_section_is_rejected() {
|
||||||
|
let data = build_pbs_with_sections(vec![(SECTION_KIND_SYSCALLS, vec![1, 0, 0])]);
|
||||||
|
|
||||||
|
assert_eq!(BytecodeLoader::load(&data), Err(LoadError::MalformedSection));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_invalid_utf8_in_sysc_section_is_rejected() {
|
||||||
|
let mut sysc = Vec::new();
|
||||||
|
sysc.extend_from_slice(&1u32.to_le_bytes());
|
||||||
|
sysc.extend_from_slice(&1u16.to_le_bytes());
|
||||||
|
sysc.push(0xFF);
|
||||||
|
sysc.extend_from_slice(&1u16.to_le_bytes());
|
||||||
|
sysc.push(b'x');
|
||||||
|
sysc.extend_from_slice(&1u16.to_le_bytes());
|
||||||
|
sysc.extend_from_slice(&0u16.to_le_bytes());
|
||||||
|
sysc.extend_from_slice(&0u16.to_le_bytes());
|
||||||
|
|
||||||
|
let data = build_pbs_with_sections(vec![(SECTION_KIND_SYSCALLS, sysc)]);
|
||||||
|
|
||||||
|
assert_eq!(BytecodeLoader::load(&data), Err(LoadError::InvalidUtf8));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_duplicate_sysc_identity_is_rejected() {
|
||||||
|
let mut module = minimal_module();
|
||||||
|
module.syscalls = vec![
|
||||||
|
SyscallDecl {
|
||||||
|
module: "system".into(),
|
||||||
|
name: "has_cart".into(),
|
||||||
|
version: 1,
|
||||||
|
arg_slots: 0,
|
||||||
|
ret_slots: 1,
|
||||||
|
},
|
||||||
|
SyscallDecl {
|
||||||
|
module: "system".into(),
|
||||||
|
name: "has_cart".into(),
|
||||||
|
version: 1,
|
||||||
|
arg_slots: 0,
|
||||||
|
ret_slots: 1,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
let data = module.serialize();
|
||||||
|
|
||||||
|
assert_eq!(BytecodeLoader::load(&data), Err(LoadError::DuplicateSyscallIdentity));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -118,6 +118,7 @@ impl From<ProgramImage> for BytecodeModule {
|
|||||||
code: program.rom.as_ref().to_vec(),
|
code: program.rom.as_ref().to_vec(),
|
||||||
debug_info: program.debug_info.clone(),
|
debug_info: program.debug_info.clone(),
|
||||||
exports,
|
exports,
|
||||||
|
syscalls: Vec::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -38,6 +38,7 @@ pub fn generate() -> Result<()> {
|
|||||||
function_names: vec![(0, "main".into())],
|
function_names: vec![(0, "main".into())],
|
||||||
}),
|
}),
|
||||||
exports: vec![Export { symbol: "main".into(), func_idx: 0 }],
|
exports: vec![Export { symbol: "main".into(), func_idx: 0 }],
|
||||||
|
syscalls: vec![],
|
||||||
};
|
};
|
||||||
|
|
||||||
let bytes = module.serialize();
|
let bytes = module.serialize();
|
||||||
|
|||||||
@ -268,6 +268,124 @@ Required behavior:
|
|||||||
|
|
||||||
- patch before `Verifier::verify(...)`
|
- patch before `Verifier::verify(...)`
|
||||||
|
|
||||||
|
## Implementation Plan
|
||||||
|
|
||||||
|
Implementation should be staged so each phase leaves the workspace in a coherent state and keeps the verifier/VM contract intact.
|
||||||
|
|
||||||
|
### Commit Checklist
|
||||||
|
|
||||||
|
1. `prometeu-bytecode: add SyscallDecl model and mandatory SYSC section`
|
||||||
|
2. `prometeu-bytecode: add SYSC parser validation and load errors`
|
||||||
|
3. `prometeu-bytecode: add phase-1 coverage for empty/valid/invalid SYSC`
|
||||||
|
4. `prometeu-bytecode: wire downstream constructors to new BytecodeModule.syscalls field`
|
||||||
|
5. `prometeu-bytecode: clean up naming/docs after phase-1 passes`
|
||||||
|
|
||||||
|
### Phase 1 - Extend PBX module format
|
||||||
|
|
||||||
|
Target crates:
|
||||||
|
|
||||||
|
- `crates/console/prometeu-bytecode`
|
||||||
|
|
||||||
|
Steps:
|
||||||
|
|
||||||
|
1. add `SyscallDecl` to `src/model.rs` and extend `BytecodeModule` with `syscalls: Vec<SyscallDecl>`
|
||||||
|
2. reserve a new section kind for `SYSC` in module serialization/deserialization
|
||||||
|
3. enforce the mandatory-section rule: valid PBS images always carry `SYSC`, including `count = 0`
|
||||||
|
4. reject malformed payloads, invalid UTF-8, and duplicate canonical identities during load
|
||||||
|
5. update `src/lib.rs` exports if needed so the VM and HAL can consume the new declarations
|
||||||
|
|
||||||
|
Checkpoint:
|
||||||
|
|
||||||
|
- `BytecodeLoader::load(...)` returns `BytecodeModule` with canonical syscall declarations preserved from PBX
|
||||||
|
|
||||||
|
### Phase 2 - Add pre-load opcode support
|
||||||
|
|
||||||
|
Target crates:
|
||||||
|
|
||||||
|
- `crates/console/prometeu-bytecode`
|
||||||
|
|
||||||
|
Steps:
|
||||||
|
|
||||||
|
1. add `HOSTCALL` to `src/opcode.rs` with a `u32` immediate
|
||||||
|
2. extend `src/opcode_spec.rs` and `src/decoder.rs` so the loader can scan and decode `HOSTCALL`
|
||||||
|
3. update `src/assembler.rs` and `src/disassembler.rs` so tests and fixtures can produce/read pre-load artifacts
|
||||||
|
4. keep the runtime contract explicit: `HOSTCALL` is representable in bytecode artifacts but must not survive loader patching
|
||||||
|
|
||||||
|
Checkpoint:
|
||||||
|
|
||||||
|
- bytecode tooling round-trips `HOSTCALL <index>` correctly, while runtime execution still depends on patched `SYSCALL <id>`
|
||||||
|
|
||||||
|
### Phase 3 - Bridge PBX declarations to host metadata
|
||||||
|
|
||||||
|
Target crates:
|
||||||
|
|
||||||
|
- `crates/console/prometeu-hal`
|
||||||
|
|
||||||
|
Steps:
|
||||||
|
|
||||||
|
1. add a resolver path that accepts program-owned syscall declarations instead of only `&'static str` identities
|
||||||
|
2. resolve each `(module, name, version)` against `src/syscalls.rs`
|
||||||
|
3. validate `arg_slots` and `ret_slots` against authoritative `SyscallMeta`
|
||||||
|
4. validate required capabilities against cartridge-derived `CapFlags`
|
||||||
|
5. return deterministic, load-facing errors for unknown syscalls, ABI mismatches, and missing capabilities
|
||||||
|
|
||||||
|
Design note:
|
||||||
|
|
||||||
|
- this phase likely needs a small owned-string adapter or a new helper alongside `resolve_program_syscalls(...)`, because PBX strings are runtime data, not `&'static str`
|
||||||
|
|
||||||
|
Checkpoint:
|
||||||
|
|
||||||
|
- given only `BytecodeModule.syscalls` plus granted `CapFlags`, the loader can produce a resolved table `sysc_index -> syscall_id`
|
||||||
|
|
||||||
|
### Phase 4 - Patch before verification
|
||||||
|
|
||||||
|
Target crates:
|
||||||
|
|
||||||
|
- `crates/console/prometeu-vm`
|
||||||
|
|
||||||
|
Steps:
|
||||||
|
|
||||||
|
1. add a load-time patching helper near `src/virtual_machine.rs` or a small dedicated module
|
||||||
|
2. run that helper immediately after `BytecodeLoader::load(...)` and before `Verifier::verify(...)`
|
||||||
|
3. scan `module.code`, decode every instruction, and rewrite `HOSTCALL <index>` into `SYSCALL <resolved_id>`
|
||||||
|
4. reject out-of-bounds `HOSTCALL` indices during the scan
|
||||||
|
5. track `SYSC` usage and reject declarations that are never referenced
|
||||||
|
6. assert that no `HOSTCALL` remains before handing code to the verifier
|
||||||
|
7. only then call `Verifier::verify(...)`, compute `max_stack_slots`, and build `ProgramImage::from(module)`
|
||||||
|
|
||||||
|
Why the sequencing matters:
|
||||||
|
|
||||||
|
- the current load path in `crates/console/prometeu-vm/src/virtual_machine.rs` verifies the raw module immediately after `BytecodeLoader::load(...)`
|
||||||
|
- `ProgramImage` currently stores only the final ROM/functions/constants and does not preserve a syscall declaration table, so patching must happen while the code is still a `BytecodeModule`
|
||||||
|
|
||||||
|
Checkpoint:
|
||||||
|
|
||||||
|
- the verifier sees only numeric `SYSCALL <id>` instructions, preserving the existing verifier and VM execution model
|
||||||
|
|
||||||
|
### Phase 5 - Tests and failure matrix
|
||||||
|
|
||||||
|
Target crates:
|
||||||
|
|
||||||
|
- `crates/console/prometeu-bytecode`
|
||||||
|
- `crates/console/prometeu-hal`
|
||||||
|
- `crates/console/prometeu-vm`
|
||||||
|
|
||||||
|
Steps:
|
||||||
|
|
||||||
|
1. add serialization/deserialization tests for missing, empty, valid, malformed, and duplicate `SYSC`
|
||||||
|
2. add opcode tests for `HOSTCALL` decoding and assembler/disassembler coverage
|
||||||
|
3. add resolver tests for unknown identity, ABI mismatch, and capability mismatch
|
||||||
|
4. add VM load-path tests proving patch-before-verify behavior
|
||||||
|
5. add an assertion that final executable images contain only numeric `SYSCALL`
|
||||||
|
|
||||||
|
Suggested PR slicing:
|
||||||
|
|
||||||
|
1. bytecode format + tests
|
||||||
|
2. `HOSTCALL` opcode plumbing
|
||||||
|
3. HAL resolution/ABI validation
|
||||||
|
4. VM loader patching
|
||||||
|
5. integration and regression tests
|
||||||
|
|
||||||
## Deterministic Load Errors
|
## Deterministic Load Errors
|
||||||
|
|
||||||
Load must fail for at least:
|
Load must fail for at least:
|
||||||
|
|||||||
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user