test(p10-1d): integration smoke tests for C + C++

Verifies end-to-end ingest + search + Citation::Code shape:
- tier1_c_ingest_searchable: .c file → --code-lang c search → symbol
  = function name (no nesting), lang = "c", chunker_version = "code-c-ast-v1".
- tier1_cpp_ingest_searchable: .cpp file → --code-lang cpp search →
  symbol starts with namespace::Class prefix, lang = "cpp",
  chunker_version = "code-cpp-ast-v1".

Brings code_ingest_smoke to 18 tests (Tier 1: 9 → 11, Tier 2: 3,
Tier 3: 4).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-21 14:31:35 +00:00
parent 1034de25a2
commit 192835e5bf

View File

@@ -1117,6 +1117,175 @@ fn tier3_yaml_fallback_reingest_is_unchanged() {
);
}
/// p10-1d Task G: a `.c` file with a single top-level function is ingested
/// and the resulting `Citation::Code` hit must carry `lang="c"`,
/// `symbol="parse_record"` (function name only — no nesting in C), and
/// `chunker_version = "code-c-ast-v1"`.
#[test]
fn tier1_c_ingest_searchable() {
let env = TestEnv::lexical_only();
std::fs::write(
env.workspace_root.join("parser.c"),
"#include <stdio.h>\n\nint parse_record(const char *line) {\n if (line == NULL) return -1;\n return 0;\n}\n",
)
.unwrap();
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
.expect("ingest must succeed");
assert_eq!(report.errors, 0, "no ingest errors: {report:?}");
assert!(report.new >= 1, "c file ingested: {report:?}");
let c_item = report
.items
.as_ref()
.expect("items present")
.iter()
.find(|i| i.doc_path.0.ends_with("parser.c"))
.expect("parser.c item present");
assert_eq!(
c_item.parser_version.as_ref().map(|p| p.0.as_str()),
Some("code-c-v1"),
"parser_version must be code-c-v1"
);
assert_eq!(
c_item.chunker_version.as_ref().map(|c| c.0.as_str()),
Some("code-c-ast-v1"),
"chunker_version must be code-c-ast-v1"
);
let query = kebab_core::SearchQuery {
text: "parse_record".to_string(),
mode: kebab_core::SearchMode::Lexical,
k: 10,
filters: kebab_core::SearchFilters {
code_lang: vec!["c".to_string()],
..Default::default()
},
};
let hits = kebab_app::search_with_config(env.config.clone(), query)
.expect("search must succeed");
let h = hits
.iter()
.find(|h| matches!(&h.citation, Citation::Code { .. }))
.expect("at least one Citation::Code hit for 'parse_record'");
match &h.citation {
Citation::Code {
lang,
symbol,
line_start,
..
} => {
assert_eq!(lang.as_deref(), Some("c"), "citation.lang must be 'c'");
assert_eq!(
symbol.as_deref(),
Some("parse_record"),
"C symbol must be function name only (no nesting)"
);
assert!(*line_start >= 1, "line_start must be >=1");
}
_ => unreachable!(),
}
assert_eq!(
h.code_lang.as_deref(),
Some("c"),
"SearchHit.code_lang must be 'c'"
);
assert_eq!(
h.chunker_version.0.as_str(),
"code-c-ast-v1",
"C chunks must be stamped with code-c-ast-v1"
);
}
/// p10-1d Task G: a `.cpp` file with nested namespace + class is ingested
/// and the resulting `Citation::Code` hit must carry `lang="cpp"`, a
/// `symbol` that starts with `"kebab::chunk::Foo"` (namespace::Class or
/// namespace::Class::method), and `chunker_version = "code-cpp-ast-v1"`.
#[test]
fn tier1_cpp_ingest_searchable() {
let env = TestEnv::lexical_only();
std::fs::write(
env.workspace_root.join("chunker.cpp"),
"namespace kebab {\nnamespace chunk {\nclass Foo {\npublic:\n void bar() { /* impl */ }\n};\n}\n}\n",
)
.unwrap();
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
.expect("ingest must succeed");
assert_eq!(report.errors, 0, "no ingest errors: {report:?}");
assert!(report.new >= 1, "cpp file ingested: {report:?}");
let cpp_item = report
.items
.as_ref()
.expect("items present")
.iter()
.find(|i| i.doc_path.0.ends_with("chunker.cpp"))
.expect("chunker.cpp item present");
assert_eq!(
cpp_item.parser_version.as_ref().map(|p| p.0.as_str()),
Some("code-cpp-v1"),
"parser_version must be code-cpp-v1"
);
assert_eq!(
cpp_item.chunker_version.as_ref().map(|c| c.0.as_str()),
Some("code-cpp-ast-v1"),
"chunker_version must be code-cpp-ast-v1"
);
let query = kebab_core::SearchQuery {
text: "bar".to_string(),
mode: kebab_core::SearchMode::Lexical,
k: 10,
filters: kebab_core::SearchFilters {
code_lang: vec!["cpp".to_string()],
..Default::default()
},
};
let hits = kebab_app::search_with_config(env.config.clone(), query)
.expect("search must succeed");
let h = hits
.iter()
.find(|h| matches!(&h.citation, Citation::Code { .. }))
.expect("at least one Citation::Code hit for 'bar'");
match &h.citation {
Citation::Code {
lang,
symbol,
line_start,
..
} => {
assert_eq!(lang.as_deref(), Some("cpp"), "citation.lang must be 'cpp'");
// Symbol could be "kebab::chunk::Foo" (class) or "kebab::chunk::Foo::bar"
// (method) depending on which chunk ranks first.
assert!(
symbol.as_deref().is_some_and(|s| s.starts_with("kebab::chunk::Foo")),
"C++ symbol must start with namespace::Class prefix, got {:?}", symbol
);
assert!(*line_start >= 1, "line_start must be >=1");
}
_ => unreachable!(),
}
assert_eq!(
h.code_lang.as_deref(),
Some("cpp"),
"SearchHit.code_lang must be 'cpp'"
);
assert_eq!(
h.chunker_version.0.as_str(),
"code-cpp-ast-v1",
"C++ chunks must be stamped with code-cpp-ast-v1"
);
}
/// p10-3 fix regression: a shell file (direct Tier 3, not a fallback)
/// must also report Unchanged on re-ingest. Shell goes straight to
/// CodeTextParagraphV1Chunker so `stored_is_tier3_fallback` is false