test(p10-1d): integration smoke tests for C + C++
Verifies end-to-end ingest + search + Citation::Code shape: - tier1_c_ingest_searchable: .c file → --code-lang c search → symbol = function name (no nesting), lang = "c", chunker_version = "code-c-ast-v1". - tier1_cpp_ingest_searchable: .cpp file → --code-lang cpp search → symbol starts with namespace::Class prefix, lang = "cpp", chunker_version = "code-cpp-ast-v1". Brings code_ingest_smoke to 18 tests (Tier 1: 9 → 11, Tier 2: 3, Tier 3: 4). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1117,6 +1117,175 @@ fn tier3_yaml_fallback_reingest_is_unchanged() {
|
||||
);
|
||||
}
|
||||
|
||||
/// p10-1d Task G: a `.c` file with a single top-level function is ingested
|
||||
/// and the resulting `Citation::Code` hit must carry `lang="c"`,
|
||||
/// `symbol="parse_record"` (function name only — no nesting in C), and
|
||||
/// `chunker_version = "code-c-ast-v1"`.
|
||||
#[test]
|
||||
fn tier1_c_ingest_searchable() {
|
||||
let env = TestEnv::lexical_only();
|
||||
|
||||
std::fs::write(
|
||||
env.workspace_root.join("parser.c"),
|
||||
"#include <stdio.h>\n\nint parse_record(const char *line) {\n if (line == NULL) return -1;\n return 0;\n}\n",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
|
||||
.expect("ingest must succeed");
|
||||
assert_eq!(report.errors, 0, "no ingest errors: {report:?}");
|
||||
assert!(report.new >= 1, "c file ingested: {report:?}");
|
||||
|
||||
let c_item = report
|
||||
.items
|
||||
.as_ref()
|
||||
.expect("items present")
|
||||
.iter()
|
||||
.find(|i| i.doc_path.0.ends_with("parser.c"))
|
||||
.expect("parser.c item present");
|
||||
assert_eq!(
|
||||
c_item.parser_version.as_ref().map(|p| p.0.as_str()),
|
||||
Some("code-c-v1"),
|
||||
"parser_version must be code-c-v1"
|
||||
);
|
||||
assert_eq!(
|
||||
c_item.chunker_version.as_ref().map(|c| c.0.as_str()),
|
||||
Some("code-c-ast-v1"),
|
||||
"chunker_version must be code-c-ast-v1"
|
||||
);
|
||||
|
||||
let query = kebab_core::SearchQuery {
|
||||
text: "parse_record".to_string(),
|
||||
mode: kebab_core::SearchMode::Lexical,
|
||||
k: 10,
|
||||
filters: kebab_core::SearchFilters {
|
||||
code_lang: vec!["c".to_string()],
|
||||
..Default::default()
|
||||
},
|
||||
};
|
||||
let hits = kebab_app::search_with_config(env.config.clone(), query)
|
||||
.expect("search must succeed");
|
||||
|
||||
let h = hits
|
||||
.iter()
|
||||
.find(|h| matches!(&h.citation, Citation::Code { .. }))
|
||||
.expect("at least one Citation::Code hit for 'parse_record'");
|
||||
|
||||
match &h.citation {
|
||||
Citation::Code {
|
||||
lang,
|
||||
symbol,
|
||||
line_start,
|
||||
..
|
||||
} => {
|
||||
assert_eq!(lang.as_deref(), Some("c"), "citation.lang must be 'c'");
|
||||
assert_eq!(
|
||||
symbol.as_deref(),
|
||||
Some("parse_record"),
|
||||
"C symbol must be function name only (no nesting)"
|
||||
);
|
||||
assert!(*line_start >= 1, "line_start must be >=1");
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
h.code_lang.as_deref(),
|
||||
Some("c"),
|
||||
"SearchHit.code_lang must be 'c'"
|
||||
);
|
||||
assert_eq!(
|
||||
h.chunker_version.0.as_str(),
|
||||
"code-c-ast-v1",
|
||||
"C chunks must be stamped with code-c-ast-v1"
|
||||
);
|
||||
}
|
||||
|
||||
/// p10-1d Task G: a `.cpp` file with nested namespace + class is ingested
|
||||
/// and the resulting `Citation::Code` hit must carry `lang="cpp"`, a
|
||||
/// `symbol` that starts with `"kebab::chunk::Foo"` (namespace::Class or
|
||||
/// namespace::Class::method), and `chunker_version = "code-cpp-ast-v1"`.
|
||||
#[test]
|
||||
fn tier1_cpp_ingest_searchable() {
|
||||
let env = TestEnv::lexical_only();
|
||||
|
||||
std::fs::write(
|
||||
env.workspace_root.join("chunker.cpp"),
|
||||
"namespace kebab {\nnamespace chunk {\nclass Foo {\npublic:\n void bar() { /* impl */ }\n};\n}\n}\n",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
|
||||
.expect("ingest must succeed");
|
||||
assert_eq!(report.errors, 0, "no ingest errors: {report:?}");
|
||||
assert!(report.new >= 1, "cpp file ingested: {report:?}");
|
||||
|
||||
let cpp_item = report
|
||||
.items
|
||||
.as_ref()
|
||||
.expect("items present")
|
||||
.iter()
|
||||
.find(|i| i.doc_path.0.ends_with("chunker.cpp"))
|
||||
.expect("chunker.cpp item present");
|
||||
assert_eq!(
|
||||
cpp_item.parser_version.as_ref().map(|p| p.0.as_str()),
|
||||
Some("code-cpp-v1"),
|
||||
"parser_version must be code-cpp-v1"
|
||||
);
|
||||
assert_eq!(
|
||||
cpp_item.chunker_version.as_ref().map(|c| c.0.as_str()),
|
||||
Some("code-cpp-ast-v1"),
|
||||
"chunker_version must be code-cpp-ast-v1"
|
||||
);
|
||||
|
||||
let query = kebab_core::SearchQuery {
|
||||
text: "bar".to_string(),
|
||||
mode: kebab_core::SearchMode::Lexical,
|
||||
k: 10,
|
||||
filters: kebab_core::SearchFilters {
|
||||
code_lang: vec!["cpp".to_string()],
|
||||
..Default::default()
|
||||
},
|
||||
};
|
||||
let hits = kebab_app::search_with_config(env.config.clone(), query)
|
||||
.expect("search must succeed");
|
||||
|
||||
let h = hits
|
||||
.iter()
|
||||
.find(|h| matches!(&h.citation, Citation::Code { .. }))
|
||||
.expect("at least one Citation::Code hit for 'bar'");
|
||||
|
||||
match &h.citation {
|
||||
Citation::Code {
|
||||
lang,
|
||||
symbol,
|
||||
line_start,
|
||||
..
|
||||
} => {
|
||||
assert_eq!(lang.as_deref(), Some("cpp"), "citation.lang must be 'cpp'");
|
||||
// Symbol could be "kebab::chunk::Foo" (class) or "kebab::chunk::Foo::bar"
|
||||
// (method) depending on which chunk ranks first.
|
||||
assert!(
|
||||
symbol.as_deref().is_some_and(|s| s.starts_with("kebab::chunk::Foo")),
|
||||
"C++ symbol must start with namespace::Class prefix, got {:?}", symbol
|
||||
);
|
||||
assert!(*line_start >= 1, "line_start must be >=1");
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
h.code_lang.as_deref(),
|
||||
Some("cpp"),
|
||||
"SearchHit.code_lang must be 'cpp'"
|
||||
);
|
||||
assert_eq!(
|
||||
h.chunker_version.0.as_str(),
|
||||
"code-cpp-ast-v1",
|
||||
"C++ chunks must be stamped with code-cpp-ast-v1"
|
||||
);
|
||||
}
|
||||
|
||||
/// p10-3 fix regression: a shell file (direct Tier 3, not a fallback)
|
||||
/// must also report Unchanged on re-ingest. Shell goes straight to
|
||||
/// CodeTextParagraphV1Chunker so `stored_is_tier3_fallback` is false
|
||||
|
||||
Reference in New Issue
Block a user