prometeu-runtime/crates/dev/prometeu-quality-checks/tests/no_legacy.rs

use std::collections::BTreeSet;
use std::fs;
use std::path::{Path, PathBuf};

// Denylist per User's canonical rules
const FORBIDDEN_IDENT_TOKENS: &[&str] = &[
    // HIP must be fully removed from code
    "hip",
    "HIP",

    // Legacy RC API surface (exact token only)
    "release",
    "Release",

    // Legacy scope helpers (exact tokens only)
    "enter_scope",
    "exit_scope",
    "scope_frame",
    "scope_stack",
    "rc_scope",
    "hip_scope",
    "borrow_scope",
    "mutate_scope",
    "peek_scope",

    // Legacy handle/gate/retain-release naming that is almost certainly RC/HIP-related
    "GateHandle",
    "gate_handle",
    "HandlePool",
    "handle_pool",
    "retain_release",
    "retainHandle",
    "releaseHandle",
];

const FORBIDDEN_PATH_SEGMENTS: &[&str] = &[
    "hip",
    "refcount",
    "ref_count",
    "retain_release",
    "gate_pool",
    "handle_pool",
    "scope_stack",
    "borrow_scope",
    "mutate_scope",
    "peek_scope",
];

#[test]
fn test_no_legacy_artifacts() {
    let workspace_root = find_workspace_root().expect("Failed to locate workspace root with [workspace] Cargo.toml");

    // Collect Rust files under crates/**/src/**/*.rs and any build.rs under crates/**
    let files = collect_rust_files(&workspace_root);

    let mut violations = BTreeSet::new();

    // Path-segment checks (case-insensitive) on each file path
    for path in &files {
        if let Some(diag) = path_segment_violation(path) {
            violations.insert(diag);
        }
    }

    // Content checks
    for path in &files {
        let Ok(content) = fs::read_to_string(path) else { continue };
        let stripped = strip_comments_and_strings(&content);
        let toks = tokenize_identifiers(&stripped);

        for (tok, line, col) in toks {
            if is_forbidden_ident(&tok) {
                violations.insert(format!("{}:{}:{} identifier '{}': legacy token",
                    rel(&workspace_root, path), line, col, tok));
            }
        }
    }

    if !violations.is_empty() {
        let mut msg = String::from("Legacy artifacts detected (RC/HIP/scope helpers). Please remove or rename.\n");
        for v in &violations {
            msg.push_str(" - ");
            msg.push_str(v);
            msg.push('\n');
        }
        panic!("{}", msg);
    }
}

fn rel(root: &Path, p: &Path) -> String {
    pathdiff::diff_paths(p, root).unwrap_or_else(|| p.to_path_buf()).display().to_string()
}

fn find_workspace_root() -> Option<PathBuf> {
    let mut dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
    // Walk up until we find a Cargo.toml that contains "[workspace]"
    while dir.pop() {
        let candidate = dir.join("Cargo.toml");
        if candidate.exists() {
            if let Ok(txt) = fs::read_to_string(&candidate) {
                if txt.contains("[workspace]") {
                    return Some(dir.clone());
                }
            }
        }
    }
    None
}

fn collect_rust_files(root: &Path) -> Vec<PathBuf> {
    let mut out = Vec::new();
    let crates_dir = root.join("crates");
    if !crates_dir.exists() { return out; }

    let mut stack = vec![crates_dir];
    while let Some(dir) = stack.pop() {
        // Exclude noisy/non-code directories early
        let name_lc = dir.file_name().and_then(|s| s.to_str()).unwrap_or("").to_ascii_lowercase();
        if matches!(name_lc.as_str(),
            "target" | "docs" | "files" | "sdcard" | "test-cartridges" | "temp")
            || name_lc.starts_with("dist")
        {
            continue;
        }

        let Ok(read) = fs::read_dir(&dir) else { continue };
        for entry in read.flatten() {
            let path = entry.path();
            if path.is_dir() {
                stack.push(path);
            } else if path.file_name().and_then(|s| s.to_str()) == Some("build.rs") {
                out.push(path);
            } else if path.extension().and_then(|s| s.to_str()) == Some("rs") {
                // Only include files under a src/ directory
                if path.components().any(|c| c.as_os_str() == "src") {
                    out.push(path);
                }
            }
        }
    }
    out.sort();
    out
}

fn path_segment_violation(path: &Path) -> Option<String> {
    let mut segs: Vec<String> = path
        .components()
        .filter_map(|c| c.as_os_str().to_str().map(|s| s.to_string()))
        .collect();
    if let Some(fname) = path.file_stem().and_then(|s| s.to_str()) {
        segs.push(fname.to_string());
    }
    for seg in segs {
        let seg_lc = seg.to_ascii_lowercase();
        for &bad in FORBIDDEN_PATH_SEGMENTS {
            if seg_lc == bad.to_ascii_lowercase() {
                return Some(format!(
                    "{} path-segment '{}'",
                    path.display(), seg
                ));
            }
        }
    }
    None
}

fn is_forbidden_ident(tok: &str) -> bool {
    FORBIDDEN_IDENT_TOKENS.iter().any(|&bad| bad == tok)
}

fn tokenize_identifiers(text: &str) -> Vec<(String, usize, usize)> {
    let mut out = Vec::new();
    let mut line: usize = 1;
    let mut col: usize = 1;
    let mut i = 0;
    let bytes = text.as_bytes();
    while i < bytes.len() {
        let b = bytes[i];
        let ch = b as char;
        if ch == '\n' {
            line += 1;
            col = 1;
            i += 1;
            continue;
        }

        if is_ident_start(ch) {
            let start_line = line;
            let start_col = col;
            let start = i;
            i += 1; col += 1;
            while i < bytes.len() {
                let c = bytes[i] as char;
                if is_ident_part(c) { i += 1; col += 1; } else { break; }
            }
            let tok = &text[start..i];
            out.push((tok.to_string(), start_line, start_col));
        } else {
            i += 1; col += 1;
        }
    }
    out
}

fn is_ident_start(c: char) -> bool {
    c == '_' || c.is_ascii_alphabetic()
}

fn is_ident_part(c: char) -> bool {
    c == '_' || c.is_ascii_alphanumeric()
}

fn strip_comments_and_strings(src: &str) -> String {
    // State machine that removes line comments (//...), block comments (/*...*/),
    // normal strings ("..." with escapes), and raw strings (r#"..."# with up to 10 hashes).
    let mut out = String::with_capacity(src.len());
    let b = src.as_bytes();
    let mut i = 0;
    let mut line = 1usize; // keep newlines for accurate positions

    while i < b.len() {
        let c = b[i] as char;
        // Preserve newlines to maintain line numbers
        if c == '\n' { out.push('\n'); i += 1; line += 1; continue; }

        // Try to match line comment
        if c == '/' && i + 1 < b.len() && b[i + 1] as char == '/' {
            i += 2;
            while i < b.len() {
                let ch = b[i] as char;
                if ch == '\n' { break; }
                i += 1;
            }
            continue; // newline is handled at top on next loop
        }

        // Try to match block comment (non-nested for simplicity)
        if c == '/' && i + 1 < b.len() && b[i + 1] as char == '*' {
            i += 2;
            while i + 1 < b.len() {
                let ch = b[i] as char;
                if ch == '\n' { out.push('\n'); line += 1; }
                if ch == '*' && b[i + 1] as char == '/' { i += 2; break; }
                i += 1;
            }
            continue;
        }

        // Try to match raw string like r"..." or r#"..."# up to 10 hashes
        if c == 'r' {
            let mut j = i + 1;
            let mut hashes = 0usize;
            if j < b.len() && b[j] as char == '#' {
                while j < b.len() && b[j] as char == '#' && hashes < 10 { hashes += 1; j += 1; }
            }
            if j < b.len() && b[j] as char == '"' {
                // Found start of raw string
                j += 1; // skip opening quote
                let mut end_found = false;
                while j < b.len() {
                    let ch = b[j] as char;
                    if ch == '\n' { out.push('\n'); line += 1; j += 1; continue; }
                    if ch == '"' {
                        // check for closing hashes
                        let mut k = j + 1;
                        let mut matched = 0usize;
                        while matched < hashes && k < b.len() && b[k] as char == '#' {
                            matched += 1; k += 1;
                        }
                        if matched == hashes {
                            i = k; // consume entire raw string
                            end_found = true;
                            break;
                        } else {
                            j += 1; // this quote is part of content
                            continue;
                        }
                    }
                    j += 1;
                }
                if !end_found { i = j; } // EOF inside string
                continue;
            }
        }

        // Try to match normal string "..." with escapes
        if c == '"' {
            i += 1; // skip starting quote
            while i < b.len() {
                let ch = b[i] as char;
                if ch == '\n' { out.push('\n'); line += 1; }
                if ch == '\\' {
                    i += 2; // skip escaped char
                    continue;
                }
                if ch == '"' { i += 1; break; }
                i += 1;
            }
            continue;
        }

        // Default: copy byte
        out.push(c);
        i += 1;
    }

    out
}

// Minimal pathdiff to avoid bringing an external dependency if not present
mod pathdiff {
    use std::path::{Component, Path, PathBuf};

    pub fn diff_paths(path: &Path, base: &Path) -> Option<PathBuf> {
        let path = path.absolutize();
        let base = base.absolutize();
        let mut ita = base.components();
        let mut itb = path.components();

        // pop common prefix
        let mut comps_a: Vec<Component> = Vec::new();
        let mut comps_b: Vec<Component> = Vec::new();
        for c in ita { comps_a.push(c); }
        for c in itb { comps_b.push(c); }

        let mut i = 0usize;
        while i < comps_a.len() && i < comps_b.len() && comps_a[i] == comps_b[i] { i += 1; }

        let mut result = PathBuf::new();
        for _ in i..comps_a.len() { result.push(".."); }
        for c in &comps_b[i..] { result.push(c.as_os_str()); }
        Some(result)
    }

    trait Absolutize { fn absolutize(&self) -> PathBuf; }
    impl Absolutize for Path {
        fn absolutize(&self) -> PathBuf {
            if self.is_absolute() { self.to_path_buf() } else { std::env::current_dir().unwrap().join(self) }
        }
    }
}