2026-03-24 13:40:42 +00:00

342 lines
10 KiB
Rust

use std::collections::BTreeSet;
use std::fs;
use std::path::{Path, PathBuf};
// Denylist per User's canonical rules
const FORBIDDEN_IDENT_TOKENS: &[&str] = &[
// HIP must be fully removed from code
"hip",
"HIP",
// Legacy RC API surface (exact token only)
"release",
"Release",
// Legacy scope helpers (exact tokens only)
"enter_scope",
"exit_scope",
"scope_frame",
"scope_stack",
"rc_scope",
"hip_scope",
"borrow_scope",
"mutate_scope",
"peek_scope",
// Legacy handle/gate/retain-release naming that is almost certainly RC/HIP-related
"GateHandle",
"gate_handle",
"HandlePool",
"handle_pool",
"retain_release",
"retainHandle",
"releaseHandle",
];
const FORBIDDEN_PATH_SEGMENTS: &[&str] = &[
"hip",
"refcount",
"ref_count",
"retain_release",
"gate_pool",
"handle_pool",
"scope_stack",
"borrow_scope",
"mutate_scope",
"peek_scope",
];
#[test]
fn test_no_legacy_artifacts() {
let workspace_root = find_workspace_root().expect("Failed to locate workspace root with [workspace] Cargo.toml");
// Collect Rust files under crates/**/src/**/*.rs and any build.rs under crates/**
let files = collect_rust_files(&workspace_root);
let mut violations = BTreeSet::new();
// Path-segment checks (case-insensitive) on each file path
for path in &files {
if let Some(diag) = path_segment_violation(path) {
violations.insert(diag);
}
}
// Content checks
for path in &files {
let Ok(content) = fs::read_to_string(path) else { continue };
let stripped = strip_comments_and_strings(&content);
let toks = tokenize_identifiers(&stripped);
for (tok, line, col) in toks {
if is_forbidden_ident(&tok) {
violations.insert(format!("{}:{}:{} identifier '{}': legacy token",
rel(&workspace_root, path), line, col, tok));
}
}
}
if !violations.is_empty() {
let mut msg = String::from("Legacy artifacts detected (RC/HIP/scope helpers). Please remove or rename.\n");
for v in &violations {
msg.push_str(" - ");
msg.push_str(v);
msg.push('\n');
}
panic!("{}", msg);
}
}
fn rel(root: &Path, p: &Path) -> String {
pathdiff::diff_paths(p, root).unwrap_or_else(|| p.to_path_buf()).display().to_string()
}
fn find_workspace_root() -> Option<PathBuf> {
let mut dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
// Walk up until we find a Cargo.toml that contains "[workspace]"
while dir.pop() {
let candidate = dir.join("Cargo.toml");
if candidate.exists() {
if let Ok(txt) = fs::read_to_string(&candidate) {
if txt.contains("[workspace]") {
return Some(dir.clone());
}
}
}
}
None
}
fn collect_rust_files(root: &Path) -> Vec<PathBuf> {
let mut out = Vec::new();
let crates_dir = root.join("crates");
if !crates_dir.exists() { return out; }
let mut stack = vec![crates_dir];
while let Some(dir) = stack.pop() {
// Exclude noisy/non-code directories early
let name_lc = dir.file_name().and_then(|s| s.to_str()).unwrap_or("").to_ascii_lowercase();
if matches!(name_lc.as_str(),
"target" | "docs" | "files" | "sdcard" | "test-cartridges" | "temp")
|| name_lc.starts_with("dist")
{
continue;
}
let Ok(read) = fs::read_dir(&dir) else { continue };
for entry in read.flatten() {
let path = entry.path();
if path.is_dir() {
stack.push(path);
} else if path.file_name().and_then(|s| s.to_str()) == Some("build.rs") {
out.push(path);
} else if path.extension().and_then(|s| s.to_str()) == Some("rs") {
// Only include files under a src/ directory
if path.components().any(|c| c.as_os_str() == "src") {
out.push(path);
}
}
}
}
out.sort();
out
}
fn path_segment_violation(path: &Path) -> Option<String> {
let mut segs: Vec<String> = path
.components()
.filter_map(|c| c.as_os_str().to_str().map(|s| s.to_string()))
.collect();
if let Some(fname) = path.file_stem().and_then(|s| s.to_str()) {
segs.push(fname.to_string());
}
for seg in segs {
let seg_lc = seg.to_ascii_lowercase();
for &bad in FORBIDDEN_PATH_SEGMENTS {
if seg_lc == bad.to_ascii_lowercase() {
return Some(format!(
"{} path-segment '{}'",
path.display(), seg
));
}
}
}
None
}
fn is_forbidden_ident(tok: &str) -> bool {
FORBIDDEN_IDENT_TOKENS.iter().any(|&bad| bad == tok)
}
fn tokenize_identifiers(text: &str) -> Vec<(String, usize, usize)> {
let mut out = Vec::new();
let mut line: usize = 1;
let mut col: usize = 1;
let mut i = 0;
let bytes = text.as_bytes();
while i < bytes.len() {
let b = bytes[i];
let ch = b as char;
if ch == '\n' {
line += 1;
col = 1;
i += 1;
continue;
}
if is_ident_start(ch) {
let start_line = line;
let start_col = col;
let start = i;
i += 1; col += 1;
while i < bytes.len() {
let c = bytes[i] as char;
if is_ident_part(c) { i += 1; col += 1; } else { break; }
}
let tok = &text[start..i];
out.push((tok.to_string(), start_line, start_col));
} else {
i += 1; col += 1;
}
}
out
}
fn is_ident_start(c: char) -> bool {
c == '_' || c.is_ascii_alphabetic()
}
fn is_ident_part(c: char) -> bool {
c == '_' || c.is_ascii_alphanumeric()
}
fn strip_comments_and_strings(src: &str) -> String {
// State machine that removes line comments (//...), block comments (/*...*/),
// normal strings ("..." with escapes), and raw strings (r#"..."# with up to 10 hashes).
let mut out = String::with_capacity(src.len());
let b = src.as_bytes();
let mut i = 0;
let mut line = 1usize; // keep newlines for accurate positions
while i < b.len() {
let c = b[i] as char;
// Preserve newlines to maintain line numbers
if c == '\n' { out.push('\n'); i += 1; line += 1; continue; }
// Try to match line comment
if c == '/' && i + 1 < b.len() && b[i + 1] as char == '/' {
i += 2;
while i < b.len() {
let ch = b[i] as char;
if ch == '\n' { break; }
i += 1;
}
continue; // newline is handled at top on next loop
}
// Try to match block comment (non-nested for simplicity)
if c == '/' && i + 1 < b.len() && b[i + 1] as char == '*' {
i += 2;
while i + 1 < b.len() {
let ch = b[i] as char;
if ch == '\n' { out.push('\n'); line += 1; }
if ch == '*' && b[i + 1] as char == '/' { i += 2; break; }
i += 1;
}
continue;
}
// Try to match raw string like r"..." or r#"..."# up to 10 hashes
if c == 'r' {
let mut j = i + 1;
let mut hashes = 0usize;
if j < b.len() && b[j] as char == '#' {
while j < b.len() && b[j] as char == '#' && hashes < 10 { hashes += 1; j += 1; }
}
if j < b.len() && b[j] as char == '"' {
// Found start of raw string
j += 1; // skip opening quote
let mut end_found = false;
while j < b.len() {
let ch = b[j] as char;
if ch == '\n' { out.push('\n'); line += 1; j += 1; continue; }
if ch == '"' {
// check for closing hashes
let mut k = j + 1;
let mut matched = 0usize;
while matched < hashes && k < b.len() && b[k] as char == '#' {
matched += 1; k += 1;
}
if matched == hashes {
i = k; // consume entire raw string
end_found = true;
break;
} else {
j += 1; // this quote is part of content
continue;
}
}
j += 1;
}
if !end_found { i = j; } // EOF inside string
continue;
}
}
// Try to match normal string "..." with escapes
if c == '"' {
i += 1; // skip starting quote
while i < b.len() {
let ch = b[i] as char;
if ch == '\n' { out.push('\n'); line += 1; }
if ch == '\\' {
i += 2; // skip escaped char
continue;
}
if ch == '"' { i += 1; break; }
i += 1;
}
continue;
}
// Default: copy byte
out.push(c);
i += 1;
}
out
}
// Minimal pathdiff to avoid bringing an external dependency if not present
mod pathdiff {
use std::path::{Component, Path, PathBuf};
pub fn diff_paths(path: &Path, base: &Path) -> Option<PathBuf> {
let path = path.absolutize();
let base = base.absolutize();
let mut ita = base.components();
let mut itb = path.components();
// pop common prefix
let mut comps_a: Vec<Component> = Vec::new();
let mut comps_b: Vec<Component> = Vec::new();
for c in ita { comps_a.push(c); }
for c in itb { comps_b.push(c); }
let mut i = 0usize;
while i < comps_a.len() && i < comps_b.len() && comps_a[i] == comps_b[i] { i += 1; }
let mut result = PathBuf::new();
for _ in i..comps_a.len() { result.push(".."); }
for c in &comps_b[i..] { result.push(c.as_os_str()); }
Some(result)
}
trait Absolutize { fn absolutize(&self) -> PathBuf; }
impl Absolutize for Path {
fn absolutize(&self) -> PathBuf {
if self.is_absolute() { self.to_path_buf() } else { std::env::current_dir().unwrap().join(self) }
}
}
}