use std::collections::BTreeSet; use std::fs; use std::path::{Path, PathBuf}; // Denylist per User's canonical rules const FORBIDDEN_IDENT_TOKENS: &[&str] = &[ // HIP must be fully removed from code "hip", "HIP", // Legacy RC API surface (exact token only) "release", "Release", // Legacy scope helpers (exact tokens only) "enter_scope", "exit_scope", "scope_frame", "scope_stack", "rc_scope", "hip_scope", "borrow_scope", "mutate_scope", "peek_scope", // Legacy handle/gate/retain-release naming that is almost certainly RC/HIP-related "GateHandle", "gate_handle", "HandlePool", "handle_pool", "retain_release", "retainHandle", "releaseHandle", ]; const FORBIDDEN_PATH_SEGMENTS: &[&str] = &[ "hip", "refcount", "ref_count", "retain_release", "gate_pool", "handle_pool", "scope_stack", "borrow_scope", "mutate_scope", "peek_scope", ]; #[test] fn test_no_legacy_artifacts() { let workspace_root = find_workspace_root().expect("Failed to locate workspace root with [workspace] Cargo.toml"); // Collect Rust files under crates/**/src/**/*.rs and any build.rs under crates/** let files = collect_rust_files(&workspace_root); let mut violations = BTreeSet::new(); // Path-segment checks (case-insensitive) on each file path for path in &files { if let Some(diag) = path_segment_violation(path) { violations.insert(diag); } } // Content checks for path in &files { let Ok(content) = fs::read_to_string(path) else { continue }; let stripped = strip_comments_and_strings(&content); let toks = tokenize_identifiers(&stripped); for (tok, line, col) in toks { if is_forbidden_ident(&tok) { violations.insert(format!("{}:{}:{} identifier '{}': legacy token", rel(&workspace_root, path), line, col, tok)); } } } if !violations.is_empty() { let mut msg = String::from("Legacy artifacts detected (RC/HIP/scope helpers). Please remove or rename.\n"); for v in &violations { msg.push_str(" - "); msg.push_str(v); msg.push('\n'); } panic!("{}", msg); } } fn rel(root: &Path, p: &Path) -> String { pathdiff::diff_paths(p, root).unwrap_or_else(|| p.to_path_buf()).display().to_string() } fn find_workspace_root() -> Option { let mut dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); // Walk up until we find a Cargo.toml that contains "[workspace]" while dir.pop() { let candidate = dir.join("Cargo.toml"); if candidate.exists() { if let Ok(txt) = fs::read_to_string(&candidate) { if txt.contains("[workspace]") { return Some(dir.clone()); } } } } None } fn collect_rust_files(root: &Path) -> Vec { let mut out = Vec::new(); let crates_dir = root.join("crates"); if !crates_dir.exists() { return out; } let mut stack = vec![crates_dir]; while let Some(dir) = stack.pop() { // Exclude noisy/non-code directories early let name_lc = dir.file_name().and_then(|s| s.to_str()).unwrap_or("").to_ascii_lowercase(); if matches!(name_lc.as_str(), "target" | "docs" | "files" | "sdcard" | "test-cartridges" | "temp") || name_lc.starts_with("dist") { continue; } let Ok(read) = fs::read_dir(&dir) else { continue }; for entry in read.flatten() { let path = entry.path(); if path.is_dir() { stack.push(path); } else if path.file_name().and_then(|s| s.to_str()) == Some("build.rs") { out.push(path); } else if path.extension().and_then(|s| s.to_str()) == Some("rs") { // Only include files under a src/ directory if path.components().any(|c| c.as_os_str() == "src") { out.push(path); } } } } out.sort(); out } fn path_segment_violation(path: &Path) -> Option { let mut segs: Vec = path .components() .filter_map(|c| c.as_os_str().to_str().map(|s| s.to_string())) .collect(); if let Some(fname) = path.file_stem().and_then(|s| s.to_str()) { segs.push(fname.to_string()); } for seg in segs { let seg_lc = seg.to_ascii_lowercase(); for &bad in FORBIDDEN_PATH_SEGMENTS { if seg_lc == bad.to_ascii_lowercase() { return Some(format!( "{} path-segment '{}'", path.display(), seg )); } } } None } fn is_forbidden_ident(tok: &str) -> bool { FORBIDDEN_IDENT_TOKENS.iter().any(|&bad| bad == tok) } fn tokenize_identifiers(text: &str) -> Vec<(String, usize, usize)> { let mut out = Vec::new(); let mut line: usize = 1; let mut col: usize = 1; let mut i = 0; let bytes = text.as_bytes(); while i < bytes.len() { let b = bytes[i]; let ch = b as char; if ch == '\n' { line += 1; col = 1; i += 1; continue; } if is_ident_start(ch) { let start_line = line; let start_col = col; let start = i; i += 1; col += 1; while i < bytes.len() { let c = bytes[i] as char; if is_ident_part(c) { i += 1; col += 1; } else { break; } } let tok = &text[start..i]; out.push((tok.to_string(), start_line, start_col)); } else { i += 1; col += 1; } } out } fn is_ident_start(c: char) -> bool { c == '_' || c.is_ascii_alphabetic() } fn is_ident_part(c: char) -> bool { c == '_' || c.is_ascii_alphanumeric() } fn strip_comments_and_strings(src: &str) -> String { // State machine that removes line comments (//...), block comments (/*...*/), // normal strings ("..." with escapes), and raw strings (r#"..."# with up to 10 hashes). let mut out = String::with_capacity(src.len()); let b = src.as_bytes(); let mut i = 0; let mut line = 1usize; // keep newlines for accurate positions while i < b.len() { let c = b[i] as char; // Preserve newlines to maintain line numbers if c == '\n' { out.push('\n'); i += 1; line += 1; continue; } // Try to match line comment if c == '/' && i + 1 < b.len() && b[i + 1] as char == '/' { i += 2; while i < b.len() { let ch = b[i] as char; if ch == '\n' { break; } i += 1; } continue; // newline is handled at top on next loop } // Try to match block comment (non-nested for simplicity) if c == '/' && i + 1 < b.len() && b[i + 1] as char == '*' { i += 2; while i + 1 < b.len() { let ch = b[i] as char; if ch == '\n' { out.push('\n'); line += 1; } if ch == '*' && b[i + 1] as char == '/' { i += 2; break; } i += 1; } continue; } // Try to match raw string like r"..." or r#"..."# up to 10 hashes if c == 'r' { let mut j = i + 1; let mut hashes = 0usize; if j < b.len() && b[j] as char == '#' { while j < b.len() && b[j] as char == '#' && hashes < 10 { hashes += 1; j += 1; } } if j < b.len() && b[j] as char == '"' { // Found start of raw string j += 1; // skip opening quote let mut end_found = false; while j < b.len() { let ch = b[j] as char; if ch == '\n' { out.push('\n'); line += 1; j += 1; continue; } if ch == '"' { // check for closing hashes let mut k = j + 1; let mut matched = 0usize; while matched < hashes && k < b.len() && b[k] as char == '#' { matched += 1; k += 1; } if matched == hashes { i = k; // consume entire raw string end_found = true; break; } else { j += 1; // this quote is part of content continue; } } j += 1; } if !end_found { i = j; } // EOF inside string continue; } } // Try to match normal string "..." with escapes if c == '"' { i += 1; // skip starting quote while i < b.len() { let ch = b[i] as char; if ch == '\n' { out.push('\n'); line += 1; } if ch == '\\' { i += 2; // skip escaped char continue; } if ch == '"' { i += 1; break; } i += 1; } continue; } // Default: copy byte out.push(c); i += 1; } out } // Minimal pathdiff to avoid bringing an external dependency if not present mod pathdiff { use std::path::{Component, Path, PathBuf}; pub fn diff_paths(path: &Path, base: &Path) -> Option { let path = path.absolutize(); let base = base.absolutize(); let mut ita = base.components(); let mut itb = path.components(); // pop common prefix let mut comps_a: Vec = Vec::new(); let mut comps_b: Vec = Vec::new(); for c in ita { comps_a.push(c); } for c in itb { comps_b.push(c); } let mut i = 0usize; while i < comps_a.len() && i < comps_b.len() && comps_a[i] == comps_b[i] { i += 1; } let mut result = PathBuf::new(); for _ in i..comps_a.len() { result.push(".."); } for c in &comps_b[i..] { result.push(c.as_os_str()); } Some(result) } trait Absolutize { fn absolutize(&self) -> PathBuf; } impl Absolutize for Path { fn absolutize(&self) -> PathBuf { if self.is_absolute() { self.to_path_buf() } else { std::env::current_dir().unwrap().join(self) } } } }