use kazsearch_core::lexicon::Lexicon;
use kazsearch_core::{stem, StemConfig};
use std::fs;

fn load_test_config() -> StemConfig {
    let lexicon_path = concat!(
        env!("CARGO_MANIFEST_DIR"),
        "/../data/tsearch_data/kaz_stems.dict"
    );
    let lexicon = Lexicon::load(lexicon_path).expect("failed to load kaz_stems.dict");
    StemConfig {
        derivation: true,
        max_steps: 8,
        lexicon: Some(lexicon),
        ..Default::default()
    }
}

#[test]
fn test_parity_with_c_extension() {
    let cfg = load_test_config();
    let data = fs::read_to_string(concat!(
        env!("CARGO_MANIFEST_DIR"),
        "/tests/c_stem_output.txt"
    ))
    .expect("c_stem_output.txt not found");

    let mut total = 0;
    let mut matches = 0;
    let mut mismatches: Vec<(String, String, String)> = Vec::new();

    for line in data.lines() {
        let line = line.trim();
        if line.is_empty() {
            continue;
        }
        let parts: Vec<&str> = line.splitn(2, '|').collect();
        if parts.len() != 2 {
            continue;
        }
        let word = parts[0];
        let c_stem_raw = parts[1].trim_start_matches('{').trim_end_matches('}');

        let rust_stem = stem(word, &cfg);
        total += 1;

        if rust_stem == c_stem_raw {
            matches += 1;
        } else {
            mismatches.push((word.to_string(), c_stem_raw.to_string(), rust_stem));
        }
    }

    let parity_pct = if total > 0 {
        (matches as f64 / total as f64) * 100.0
    } else {
        0.0
    };

    eprintln!("\n=== Parity Report (with lexicon) ===");
    eprintln!("Lexicon entries: {}", cfg.lexicon.as_ref().unwrap().len());
    eprintln!("Total words:     {}", total);
    eprintln!("Matches:         {}", matches);
    eprintln!("Mismatches:      {}", mismatches.len());
    eprintln!("Parity:          {:.2}%", parity_pct);

    if !mismatches.is_empty() {
        eprintln!("\nFirst 40 mismatches:");
        eprintln!(
            "{:<30} {:<25} {:<25}",
            "WORD", "C_STEM", "RUST_STEM"
        );
        eprintln!("{}", "-".repeat(80));
        for (word, c_s, r_s) in mismatches.iter().take(40) {
            eprintln!("{:<30} {:<25} {:<25}", word, c_s, r_s);
        }
    }

    assert!(
        parity_pct >= 70.0,
        "Parity too low: {:.2}% ({}/{} words match)",
        parity_pct,
        matches,
        total
    );
}

#[test]
fn test_parity_5k_words_with_lexicon() {
    let cfg = load_test_config();
    let data = fs::read_to_string(concat!(
        env!("CARGO_MANIFEST_DIR"),
        "/tests/c_stem_output_5k.txt"
    ))
    .expect("c_stem_output_5k.txt not found");

    let mut total = 0;
    let mut matches = 0;
    let mut mismatches: Vec<(String, String, String)> = Vec::new();

    for line in data.lines() {
        let line = line.trim();
        if line.is_empty() {
            continue;
        }
        let parts: Vec<&str> = line.splitn(2, '|').collect();
        if parts.len() != 2 {
            continue;
        }
        let word = parts[0];
        let c_stem_raw = parts[1].trim_start_matches('{').trim_end_matches('}');

        let rust_stem = stem(word, &cfg);
        total += 1;
        if rust_stem == c_stem_raw {
            matches += 1;
        } else {
            mismatches.push((word.to_string(), c_stem_raw.to_string(), rust_stem));
        }
    }

    let parity_pct = (matches as f64 / total as f64) * 100.0;
    eprintln!("\n=== 5K Parity Report (with lexicon) ===");
    eprintln!("Total words:     {}", total);
    eprintln!("Matches:         {}", matches);
    eprintln!("Mismatches:      {}", mismatches.len());
    eprintln!("Parity:          {:.2}%", parity_pct);

    if !mismatches.is_empty() {
        eprintln!("\nFirst 30 mismatches:");
        eprintln!("{:<35} {:<25} {:<25}", "WORD", "C_STEM", "RUST_STEM");
        eprintln!("{}", "-".repeat(85));
        for (word, c_s, r_s) in mismatches.iter().take(30) {
            eprintln!("{:<35} {:<25} {:<25}", word, c_s, r_s);
        }
    }

    assert!(
        parity_pct >= 95.0,
        "5K parity too low: {:.2}% ({}/{} words match)",
        parity_pct,
        matches,
        total
    );
}

#[test]
fn test_parity_without_lexicon() {
    let cfg = StemConfig::default();
    let data = fs::read_to_string(concat!(
        env!("CARGO_MANIFEST_DIR"),
        "/tests/c_stem_output.txt"
    ))
    .expect("c_stem_output.txt not found");

    let mut total = 0;
    let mut matches = 0;

    for line in data.lines() {
        let line = line.trim();
        if line.is_empty() {
            continue;
        }
        let parts: Vec<&str> = line.splitn(2, '|').collect();
        if parts.len() != 2 {
            continue;
        }
        let word = parts[0];
        let c_stem_raw = parts[1].trim_start_matches('{').trim_end_matches('}');

        let rust_stem = stem(word, &cfg);
        total += 1;
        if rust_stem == c_stem_raw {
            matches += 1;
        }
    }

    let parity_pct = (matches as f64 / total as f64) * 100.0;
    eprintln!("\n=== No-lexicon baseline: {:.2}% ({}/{}) ===", parity_pct, matches, total);
}