diff --git a/Cargo.lock b/Cargo.lock index 2900913f..87129c7d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1663,6 +1663,10 @@ dependencies = [ "prometeu-vm", ] +[[package]] +name = "prometeu-quality-checks" +version = "0.1.0" + [[package]] name = "prometeu-system" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index e3116a38..b3c850e5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ members = [ "crates/tools/prometeu-cli", "crates/dev/prometeu-test-support", "crates/dev/prometeu-layer-tests", + "crates/dev/prometeu-quality-checks", ] resolver = "2" diff --git a/crates/dev/prometeu-quality-checks/Cargo.toml b/crates/dev/prometeu-quality-checks/Cargo.toml new file mode 100644 index 00000000..bc0bf5b0 --- /dev/null +++ b/crates/dev/prometeu-quality-checks/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "prometeu-quality-checks" +version = "0.1.0" +edition = "2021" +publish = false + +[dev-dependencies] diff --git a/crates/dev/prometeu-quality-checks/src/lib.rs b/crates/dev/prometeu-quality-checks/src/lib.rs new file mode 100644 index 00000000..680a3dad --- /dev/null +++ b/crates/dev/prometeu-quality-checks/src/lib.rs @@ -0,0 +1 @@ +// Intentionally empty: this crate only hosts integration tests under `tests/`. diff --git a/crates/dev/prometeu-quality-checks/tests/no_legacy.rs b/crates/dev/prometeu-quality-checks/tests/no_legacy.rs new file mode 100644 index 00000000..1f8f0d8b --- /dev/null +++ b/crates/dev/prometeu-quality-checks/tests/no_legacy.rs @@ -0,0 +1,341 @@ +use std::collections::BTreeSet; +use std::fs; +use std::path::{Path, PathBuf}; + +// Denylist per User's canonical rules +const FORBIDDEN_IDENT_TOKENS: &[&str] = &[ + // HIP must be fully removed from code + "hip", + "HIP", + + // Legacy RC API surface (exact token only) + "release", + "Release", + + // Legacy scope helpers (exact tokens only) + "enter_scope", + "exit_scope", + "scope_frame", + "scope_stack", + "rc_scope", + "hip_scope", + "borrow_scope", + "mutate_scope", + "peek_scope", + + // Legacy handle/gate/retain-release naming that is almost certainly RC/HIP-related + "GateHandle", + "gate_handle", + "HandlePool", + "handle_pool", + "retain_release", + "retainHandle", + "releaseHandle", +]; + +const FORBIDDEN_PATH_SEGMENTS: &[&str] = &[ + "hip", + "refcount", + "ref_count", + "retain_release", + "gate_pool", + "handle_pool", + "scope_stack", + "borrow_scope", + "mutate_scope", + "peek_scope", +]; + +#[test] +fn test_no_legacy_artifacts() { + let workspace_root = find_workspace_root().expect("Failed to locate workspace root with [workspace] Cargo.toml"); + + // Collect Rust files under crates/**/src/**/*.rs and any build.rs under crates/** + let files = collect_rust_files(&workspace_root); + + let mut violations = BTreeSet::new(); + + // Path-segment checks (case-insensitive) on each file path + for path in &files { + if let Some(diag) = path_segment_violation(path) { + violations.insert(diag); + } + } + + // Content checks + for path in &files { + let Ok(content) = fs::read_to_string(path) else { continue }; + let stripped = strip_comments_and_strings(&content); + let toks = tokenize_identifiers(&stripped); + + for (tok, line, col) in toks { + if is_forbidden_ident(&tok) { + violations.insert(format!("{}:{}:{} identifier '{}': legacy token", + rel(&workspace_root, path), line, col, tok)); + } + } + } + + if !violations.is_empty() { + let mut msg = String::from("Legacy artifacts detected (RC/HIP/scope helpers). Please remove or rename.\n"); + for v in &violations { + msg.push_str(" - "); + msg.push_str(v); + msg.push('\n'); + } + panic!("{}", msg); + } +} + +fn rel(root: &Path, p: &Path) -> String { + pathdiff::diff_paths(p, root).unwrap_or_else(|| p.to_path_buf()).display().to_string() +} + +fn find_workspace_root() -> Option { + let mut dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + // Walk up until we find a Cargo.toml that contains "[workspace]" + while dir.pop() { + let candidate = dir.join("Cargo.toml"); + if candidate.exists() { + if let Ok(txt) = fs::read_to_string(&candidate) { + if txt.contains("[workspace]") { + return Some(dir.clone()); + } + } + } + } + None +} + +fn collect_rust_files(root: &Path) -> Vec { + let mut out = Vec::new(); + let crates_dir = root.join("crates"); + if !crates_dir.exists() { return out; } + + let mut stack = vec![crates_dir]; + while let Some(dir) = stack.pop() { + // Exclude noisy/non-code directories early + let name_lc = dir.file_name().and_then(|s| s.to_str()).unwrap_or("").to_ascii_lowercase(); + if matches!(name_lc.as_str(), + "target" | "docs" | "files" | "sdcard" | "test-cartridges" | "temp") + || name_lc.starts_with("dist") + { + continue; + } + + let Ok(read) = fs::read_dir(&dir) else { continue }; + for entry in read.flatten() { + let path = entry.path(); + if path.is_dir() { + stack.push(path); + } else if path.file_name().and_then(|s| s.to_str()) == Some("build.rs") { + out.push(path); + } else if path.extension().and_then(|s| s.to_str()) == Some("rs") { + // Only include files under a src/ directory + if path.components().any(|c| c.as_os_str() == "src") { + out.push(path); + } + } + } + } + out.sort(); + out +} + +fn path_segment_violation(path: &Path) -> Option { + let mut segs: Vec = path + .components() + .filter_map(|c| c.as_os_str().to_str().map(|s| s.to_string())) + .collect(); + if let Some(fname) = path.file_stem().and_then(|s| s.to_str()) { + segs.push(fname.to_string()); + } + for seg in segs { + let seg_lc = seg.to_ascii_lowercase(); + for &bad in FORBIDDEN_PATH_SEGMENTS { + if seg_lc == bad.to_ascii_lowercase() { + return Some(format!( + "{} path-segment '{}'", + path.display(), seg + )); + } + } + } + None +} + +fn is_forbidden_ident(tok: &str) -> bool { + FORBIDDEN_IDENT_TOKENS.iter().any(|&bad| bad == tok) +} + +fn tokenize_identifiers(text: &str) -> Vec<(String, usize, usize)> { + let mut out = Vec::new(); + let mut line: usize = 1; + let mut col: usize = 1; + let mut i = 0; + let bytes = text.as_bytes(); + while i < bytes.len() { + let b = bytes[i]; + let ch = b as char; + if ch == '\n' { + line += 1; + col = 1; + i += 1; + continue; + } + + if is_ident_start(ch) { + let start_line = line; + let start_col = col; + let start = i; + i += 1; col += 1; + while i < bytes.len() { + let c = bytes[i] as char; + if is_ident_part(c) { i += 1; col += 1; } else { break; } + } + let tok = &text[start..i]; + out.push((tok.to_string(), start_line, start_col)); + } else { + i += 1; col += 1; + } + } + out +} + +fn is_ident_start(c: char) -> bool { + c == '_' || c.is_ascii_alphabetic() +} + +fn is_ident_part(c: char) -> bool { + c == '_' || c.is_ascii_alphanumeric() +} + +fn strip_comments_and_strings(src: &str) -> String { + // State machine that removes line comments (//...), block comments (/*...*/), + // normal strings ("..." with escapes), and raw strings (r#"..."# with up to 10 hashes). + let mut out = String::with_capacity(src.len()); + let b = src.as_bytes(); + let mut i = 0; + let mut line = 1usize; // keep newlines for accurate positions + + while i < b.len() { + let c = b[i] as char; + // Preserve newlines to maintain line numbers + if c == '\n' { out.push('\n'); i += 1; line += 1; continue; } + + // Try to match line comment + if c == '/' && i + 1 < b.len() && b[i + 1] as char == '/' { + i += 2; + while i < b.len() { + let ch = b[i] as char; + if ch == '\n' { break; } + i += 1; + } + continue; // newline is handled at top on next loop + } + + // Try to match block comment (non-nested for simplicity) + if c == '/' && i + 1 < b.len() && b[i + 1] as char == '*' { + i += 2; + while i + 1 < b.len() { + let ch = b[i] as char; + if ch == '\n' { out.push('\n'); line += 1; } + if ch == '*' && b[i + 1] as char == '/' { i += 2; break; } + i += 1; + } + continue; + } + + // Try to match raw string like r"..." or r#"..."# up to 10 hashes + if c == 'r' { + let mut j = i + 1; + let mut hashes = 0usize; + if j < b.len() && b[j] as char == '#' { + while j < b.len() && b[j] as char == '#' && hashes < 10 { hashes += 1; j += 1; } + } + if j < b.len() && b[j] as char == '"' { + // Found start of raw string + j += 1; // skip opening quote + let mut end_found = false; + while j < b.len() { + let ch = b[j] as char; + if ch == '\n' { out.push('\n'); line += 1; j += 1; continue; } + if ch == '"' { + // check for closing hashes + let mut k = j + 1; + let mut matched = 0usize; + while matched < hashes && k < b.len() && b[k] as char == '#' { + matched += 1; k += 1; + } + if matched == hashes { + i = k; // consume entire raw string + end_found = true; + break; + } else { + j += 1; // this quote is part of content + continue; + } + } + j += 1; + } + if !end_found { i = j; } // EOF inside string + continue; + } + } + + // Try to match normal string "..." with escapes + if c == '"' { + i += 1; // skip starting quote + while i < b.len() { + let ch = b[i] as char; + if ch == '\n' { out.push('\n'); line += 1; } + if ch == '\\' { + i += 2; // skip escaped char + continue; + } + if ch == '"' { i += 1; break; } + i += 1; + } + continue; + } + + // Default: copy byte + out.push(c); + i += 1; + } + + out +} + +// Minimal pathdiff to avoid bringing an external dependency if not present +mod pathdiff { + use std::path::{Component, Path, PathBuf}; + + pub fn diff_paths(path: &Path, base: &Path) -> Option { + let path = path.absolutize(); + let base = base.absolutize(); + let mut ita = base.components(); + let mut itb = path.components(); + + // pop common prefix + let mut comps_a: Vec = Vec::new(); + let mut comps_b: Vec = Vec::new(); + for c in ita { comps_a.push(c); } + for c in itb { comps_b.push(c); } + + let mut i = 0usize; + while i < comps_a.len() && i < comps_b.len() && comps_a[i] == comps_b[i] { i += 1; } + + let mut result = PathBuf::new(); + for _ in i..comps_a.len() { result.push(".."); } + for c in &comps_b[i..] { result.push(c.as_os_str()); } + Some(result) + } + + trait Absolutize { fn absolutize(&self) -> PathBuf; } + impl Absolutize for Path { + fn absolutize(&self) -> PathBuf { + if self.is_absolute() { self.to_path_buf() } else { std::env::current_dir().unwrap().join(self) } + } + } +} diff --git a/files/TODOs.md b/files/TODOs.md index deb3df5f..97ef1d55 100644 --- a/files/TODOs.md +++ b/files/TODOs.md @@ -1,68 +1,3 @@ -# PR-8.4 — No Legacy Artifacts Enforcement - -## Briefing - -The codebase must not contain legacy RC/HIP artifacts. - -Forbidden patterns: - -* retain -* release -* hip -* gate -* borrow -* scope (legacy usage context) -* RC-related modules - -## Target - -Implement automated check: - -* Script or test that scans source tree for forbidden symbols. -* Fails CI if found. - -Also: - -* Remove dead modules/files. -* Remove unused features. - -## Acceptance Checklist - -* [ ] Automated legacy scan exists. -* [ ] No legacy symbols present. -* [ ] Dead modules removed. - -## Tests - -1. Intentionally insert forbidden symbol in test branch → test fails. -2. Scan passes on clean tree. - -## Junie Instructions - -You MAY: - -* Add static scan test. -* Remove dead code. - -You MUST NOT: - -* Suppress warnings instead of fixing code. -* Leave deprecated modules commented out. - -If unsure whether a symbol is legacy or valid, STOP and ask. - ---- - -## Final Definition of Done - -* Disassembler roundtrip-safe. -* Deterministic harness in place. -* Layered test suite established. -* No legacy artifacts remain. -* All tests reproducible and stable. - ---- - # PR-9 — Final Hardening & Baseline Documentation This phase finalizes the new Prometeu VM baseline after the architectural reset (GC, closures, coroutines, unified syscall ABI, deterministic scheduler).