2026-03-24 13:40:43 +00:00

130 lines
5.5 KiB
Rust

//! Canonical heap object header and kind tags.
//!
//! This module defines the minimal common header that prefixes every
//! heap-allocated object managed by the VM. The purpose of the header is to:
//! - allow the garbage collector (GC) to identify and classify objects,
//! - carry the object "kind" (type tag),
//! - optionally carry size/length metadata for variable-sized payloads.
//!
//! Scope of this file:
//! - No GC/traversal logic is implemented here.
//! - No allocation strategies are defined here.
//! - Only the data layout and documentation are provided.
//!
//! Layout and semantics
//! --------------------
//! The header has a fixed layout and uses `repr(C)` to keep a stable field order.
//!
//! Fields:
//! - `flags` (u8): bit flags used by the runtime/GC. Bit 0 is the GC "mark" bit.
//! Remaining bits are reserved for future use (e.g., color, pinning, etc.).
//! - `kind` (ObjectKind): object kind tag (stored as `u8`). It describes how the
//! object should be interpreted by higher layers (array, string, closure, ...).
//! - `payload_len` (u32): optional, object-specific length field. For fixed-size
//! objects this MAY be zero. For variable-size objects it typically stores the
//! element count (arrays) or byte length (strings). Exact interpretation is
//! defined by each object kind; the GC treats it as an opaque metadata field.
//!
//! Closure-specific note: for `ObjectKind::Closure`, `payload_len` is the
//! fixed size `8` and the payload layout is exactly two little-endian `u32`s:
//! `[fn_id][env_len]`. The captured environment values themselves are NOT in
//! the raw payload; they live in a separate GC-visible area managed by the
//! heap (see `Heap`), and the GC must traverse exactly `env_len` values from
//! that environment slice.
//!
//! Notes:
//! - The GC only relies on `flags` (mark bit) and `kind` to traverse/trace.
//! Actual traversal logic will be implemented elsewhere in future PRs.
//! - The header is intentionally compact (8 bytes on most targets) to minimize
//! per-object overhead.
//!
//! Safety & invariants:
//! - Every heap object MUST begin with an `ObjectHeader`.
//! - `kind` must contain a valid `ObjectKind` tag for the object's payload.
//! - `payload_len` must be consistent with the chosen `kind` (if applicable).
/// Object kind tags for heap objects.
///
/// This `repr(u8)` enum is stable across FFI boundaries and persisted images.
/// Do not reorder variants; append new ones at the end.
#[repr(u8)]
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub enum ObjectKind {
/// Reserved/unknown kind. Should not appear in valid allocations.
#[allow(dead_code)] // Kept for stable tag layout and persisted images, even if not constructed in this crate yet
Unknown = 0,
/// UTF-8 string. `payload_len` is the number of bytes.
#[allow(dead_code)] // Public/stable tag retained; construction may live in higher layers
String = 1,
/// Homogeneous array of VM values/handles. `payload_len` is element count.
#[allow(dead_code)] // Public/stable tag retained; constructed via Heap helpers in tests
Array = 2,
/// Compiled closure/function value.
///
/// Invariants for `payload_len` and payload layout:
/// - `payload_len == 8` (fixed).
/// - payload bytes are `[fn_id: u32][env_len: u32]` (little-endian).
/// - The `env_len` captured values are stored out-of-line in the heap so
/// they remain directly visible to the GC during traversal.
Closure = 3,
/// Byte buffer / blob. `payload_len` is the number of bytes.
#[allow(dead_code)] // Public/stable tag retained for future/host APIs
Bytes = 4,
/// User-defined/native host object. Payload shape is host-defined.
#[allow(dead_code)] // Reserved for host/native integrations
UserData = 5,
/// Coroutine object: suspended execution context with its own stack/frames.
///
/// Notes:
/// - Stack/frames are stored in typed fields inside the heap storage
/// (not inside raw `payload` bytes) so the GC can traverse their
/// contained `HeapRef`s directly.
/// - `payload_len` is 0 for this fixed-layout object.
Coroutine = 6,
// Future kinds must be appended here to keep tag numbers stable.
}
/// Bit flags stored in `ObjectHeader.flags`.
pub mod object_flags {
/// GC mark bit (used during tracing). 1 = marked, 0 = not marked.
pub const MARKED: u8 = 0b0000_0001;
// Reserved bits for future use:
// pub const PINNED: u8 = 0b0000_0010; // example: prevent movement/collection
}
/// Common header that prefixes every heap-allocated object.
#[repr(C)]
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub struct ObjectHeader {
/// Runtime/GC flags. See `object_flags` for meanings.
pub flags: u8,
/// Object kind tag (compact `u8`). See `ObjectKind`.
pub kind: ObjectKind,
/// Optional length metadata for variable-sized payloads.
/// For fixed-size objects this may be zero.
pub payload_len: u32,
}
impl ObjectHeader {
/// Create a new header with given `kind` and `payload_len`, flags cleared.
pub const fn new(kind: ObjectKind, payload_len: u32) -> Self {
Self { flags: 0, kind, payload_len }
}
/// Returns true if the GC mark bit is set.
pub fn is_marked(&self) -> bool { (self.flags & object_flags::MARKED) != 0 }
/// Sets or clears the GC mark bit. Note: actual GC logic lives elsewhere.
pub fn set_marked(&mut self, value: bool) {
if value { self.flags |= object_flags::MARKED; } else { self.flags &= !object_flags::MARKED; }
}
}