fix(p10-1b): PR review round 1 — 5 actionable items
(1) tasks/HOTFIXES.md: add 2026-05-20 entry for path-sanitize gap in
module_path_for_python / _tsjs (promised in task spec line 55 but
not landed in round 0). Bidirectional cross-link added.
(2) crates/kebab-parse-code: dedup filename_from_workspace_path /
strip_extension / join_symbol via new pub(crate) module scaffold.rs.
Removed 9 byte-identical fn copies across rust/python/typescript/
javascript extractors. Pure refactor — no behavior change.
(3) crates/kebab-parse-code/tests/fixtures/sample.py: @staticmethod was
semantically inappropriate on a module-level fn (class-method
decorator). Changed to @no_type_check; test assertion updated.
(5)+(6) crates/kebab-parse-code/src/lang.rs: add tests/test_foo.py case
to module_path_for_python test + doc clarifying that tests/ /
examples/ / benches/ are intentionally not stripped.
(4) PUSH BACK — TS/JS class decorator handling is design intent of 1B
1차 (typescript.rs:242-244 + HOTFIXES entry 2 already in place).
No code change.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -41,6 +41,8 @@ use kebab_core::{
|
|||||||
use serde_json::Map;
|
use serde_json::Map;
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
|
|
||||||
|
use crate::scaffold::{filename_from_workspace_path, join_symbol, strip_extension};
|
||||||
|
|
||||||
pub const PARSER_VERSION: &str = "code-js-v1";
|
pub const PARSER_VERSION: &str = "code-js-v1";
|
||||||
|
|
||||||
/// JavaScript / JSX AST extractor. Per-unit blocks via
|
/// JavaScript / JSX AST extractor. Per-unit blocks via
|
||||||
@@ -177,36 +179,6 @@ impl Extractor for JavascriptAstExtractor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn filename_from_workspace_path(p: &str) -> String {
|
|
||||||
p.rsplit('/').next().unwrap_or(p).to_string()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn strip_extension(filename: &str) -> String {
|
|
||||||
match filename.rfind('.') {
|
|
||||||
Some(0) => filename.to_string(),
|
|
||||||
Some(idx) => filename[..idx].to_string(),
|
|
||||||
None => filename.to_string(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Join (mod_prefix, mod_path, name) into a dotted JS symbol.
|
|
||||||
///
|
|
||||||
/// Note: JS uses `.` as the join separator between mod_prefix /
|
|
||||||
/// class-nesting / leaf — even though `mod_prefix` itself may contain
|
|
||||||
/// `/` (e.g. `src/search/Retriever`), the JOIN between segments stays
|
|
||||||
/// `.`. So a class method symbol looks like `src/search/Foo.search`.
|
|
||||||
fn join_symbol(mod_prefix: &str, mod_path: &[String], name: &str) -> String {
|
|
||||||
let mut parts: Vec<&str> = Vec::with_capacity(mod_path.len() + 2);
|
|
||||||
if !mod_prefix.is_empty() {
|
|
||||||
parts.push(mod_prefix);
|
|
||||||
}
|
|
||||||
for p in mod_path {
|
|
||||||
parts.push(p.as_str());
|
|
||||||
}
|
|
||||||
parts.push(name);
|
|
||||||
parts.join(".")
|
|
||||||
}
|
|
||||||
|
|
||||||
fn build_blocks(
|
fn build_blocks(
|
||||||
source: &str,
|
source: &str,
|
||||||
doc_id: &kebab_core::DocumentId,
|
doc_id: &kebab_core::DocumentId,
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ pub mod lang;
|
|||||||
pub mod python;
|
pub mod python;
|
||||||
pub mod repo;
|
pub mod repo;
|
||||||
pub mod rust;
|
pub mod rust;
|
||||||
|
pub(crate) mod scaffold;
|
||||||
pub mod skip;
|
pub mod skip;
|
||||||
pub mod typescript;
|
pub mod typescript;
|
||||||
|
|
||||||
|
|||||||
@@ -26,6 +26,8 @@ use kebab_core::{
|
|||||||
use serde_json::Map;
|
use serde_json::Map;
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
|
|
||||||
|
use crate::scaffold::{filename_from_workspace_path, join_symbol, strip_extension};
|
||||||
|
|
||||||
pub const PARSER_VERSION: &str = "code-python-v1";
|
pub const PARSER_VERSION: &str = "code-python-v1";
|
||||||
|
|
||||||
/// Python AST extractor. Per-unit blocks via tree-sitter-python 0.25
|
/// Python AST extractor. Per-unit blocks via tree-sitter-python 0.25
|
||||||
@@ -159,35 +161,6 @@ impl Extractor for PythonAstExtractor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn filename_from_workspace_path(p: &str) -> String {
|
|
||||||
p.rsplit('/').next().unwrap_or(p).to_string()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn strip_extension(filename: &str) -> String {
|
|
||||||
match filename.rfind('.') {
|
|
||||||
Some(0) => filename.to_string(),
|
|
||||||
Some(idx) => filename[..idx].to_string(),
|
|
||||||
None => filename.to_string(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Join (mod_prefix, mod_path, name) into a dotted Python symbol.
|
|
||||||
///
|
|
||||||
/// Empty `mod_prefix` (e.g. file is `__init__.py` at workspace root)
|
|
||||||
/// drops the leading prefix segment; empty `mod_path` (file top-level)
|
|
||||||
/// drops the class-nesting middle.
|
|
||||||
fn join_symbol(mod_prefix: &str, mod_path: &[String], name: &str) -> String {
|
|
||||||
let mut parts: Vec<&str> = Vec::with_capacity(mod_path.len() + 2);
|
|
||||||
if !mod_prefix.is_empty() {
|
|
||||||
parts.push(mod_prefix);
|
|
||||||
}
|
|
||||||
for p in mod_path {
|
|
||||||
parts.push(p.as_str());
|
|
||||||
}
|
|
||||||
parts.push(name);
|
|
||||||
parts.join(".")
|
|
||||||
}
|
|
||||||
|
|
||||||
fn build_blocks(
|
fn build_blocks(
|
||||||
source: &str,
|
source: &str,
|
||||||
doc_id: &kebab_core::DocumentId,
|
doc_id: &kebab_core::DocumentId,
|
||||||
@@ -446,14 +419,14 @@ mod tests {
|
|||||||
assert!(syms.iter().any(|s| s == "kebab_eval.metrics.Outer.Inner.helper"));
|
assert!(syms.iter().any(|s| s == "kebab_eval.metrics.Outer.Inner.helper"));
|
||||||
assert!(syms.iter().any(|s| s == "kebab_eval.metrics.with_decorator"));
|
assert!(syms.iter().any(|s| s == "kebab_eval.metrics.with_decorator"));
|
||||||
assert!(syms.iter().any(|s| s == "kebab_eval.metrics.<top-level>"));
|
assert!(syms.iter().any(|s| s == "kebab_eval.metrics.<top-level>"));
|
||||||
// The `@staticmethod` decorator on `free` is folded into its
|
// The `@no_type_check` decorator on `free` is folded into its
|
||||||
// unit's line range (decorated_definition unwrap).
|
// unit's line range (decorated_definition unwrap).
|
||||||
let free_src = doc.blocks.iter().find_map(|b| match b {
|
let free_src = doc.blocks.iter().find_map(|b| match b {
|
||||||
Block::Code(c) if matches!(&c.common.source_span,
|
Block::Code(c) if matches!(&c.common.source_span,
|
||||||
SourceSpan::Code{symbol,..} if symbol.as_deref()==Some("kebab_eval.metrics.free")) => Some(c.code.clone()),
|
SourceSpan::Code{symbol,..} if symbol.as_deref()==Some("kebab_eval.metrics.free")) => Some(c.code.clone()),
|
||||||
_ => None,
|
_ => None,
|
||||||
}).unwrap();
|
}).unwrap();
|
||||||
assert!(free_src.contains("@staticmethod"), "decorator folded in: {free_src}");
|
assert!(free_src.contains("@no_type_check"), "decorator folded in: {free_src}");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|||||||
@@ -30,6 +30,8 @@ use kebab_core::{
|
|||||||
use serde_json::Map;
|
use serde_json::Map;
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
|
|
||||||
|
use crate::scaffold::{filename_from_workspace_path, strip_extension};
|
||||||
|
|
||||||
pub const PARSER_VERSION: &str = "code-rust-v1";
|
pub const PARSER_VERSION: &str = "code-rust-v1";
|
||||||
|
|
||||||
/// Rust AST extractor. Per-unit blocks via tree-sitter-rust 0.24
|
/// Rust AST extractor. Per-unit blocks via tree-sitter-rust 0.24
|
||||||
@@ -162,18 +164,6 @@ impl Extractor for RustAstExtractor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn filename_from_workspace_path(p: &str) -> String {
|
|
||||||
p.rsplit('/').next().unwrap_or(p).to_string()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn strip_extension(filename: &str) -> String {
|
|
||||||
match filename.rfind('.') {
|
|
||||||
Some(0) => filename.to_string(),
|
|
||||||
Some(idx) => filename[..idx].to_string(),
|
|
||||||
None => filename.to_string(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn build_blocks(
|
fn build_blocks(
|
||||||
source: &str,
|
source: &str,
|
||||||
doc_id: &kebab_core::DocumentId,
|
doc_id: &kebab_core::DocumentId,
|
||||||
|
|||||||
45
crates/kebab-parse-code/src/scaffold.rs
Normal file
45
crates/kebab-parse-code/src/scaffold.rs
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
//! `kebab-parse-code::scaffold` — shared pure helpers used by all
|
||||||
|
//! per-language extractor modules.
|
||||||
|
//!
|
||||||
|
//! These are `pub(crate)` utilities extracted from the four extractor
|
||||||
|
//! modules (rust / python / typescript / javascript) where identical
|
||||||
|
//! copies existed. Keeping them here is the single source of truth.
|
||||||
|
|
||||||
|
/// Extract the last path component (filename) from a `/`-separated
|
||||||
|
/// workspace path string.
|
||||||
|
/// For a path like `crates/x/src/foo.rs` this returns `foo.rs`.
|
||||||
|
pub(crate) fn filename_from_workspace_path(p: &str) -> String {
|
||||||
|
p.rsplit('/').next().unwrap_or(p).to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Strip the last dot-extension from a filename string.
|
||||||
|
/// A leading dot (hidden-file convention) is preserved as-is.
|
||||||
|
/// `foo.rs` → `foo`, `.hidden` → `.hidden`, `noext` → `noext`.
|
||||||
|
pub(crate) fn strip_extension(filename: &str) -> String {
|
||||||
|
match filename.rfind('.') {
|
||||||
|
Some(0) => filename.to_string(),
|
||||||
|
Some(idx) => filename[..idx].to_string(),
|
||||||
|
None => filename.to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Join `(mod_prefix, mod_path, name)` into a dotted symbol string.
|
||||||
|
///
|
||||||
|
/// Used by Python / TypeScript / JavaScript extractors. Rust uses
|
||||||
|
/// `::` separators instead and builds symbols inline; this helper
|
||||||
|
/// covers the `.`-joined languages.
|
||||||
|
///
|
||||||
|
/// Empty `mod_prefix` (e.g. file is `__init__.py` at workspace root)
|
||||||
|
/// drops the leading prefix segment; empty `mod_path` (file top-level)
|
||||||
|
/// drops the class-nesting middle segment.
|
||||||
|
pub(crate) fn join_symbol(mod_prefix: &str, mod_path: &[String], name: &str) -> String {
|
||||||
|
let mut parts: Vec<&str> = Vec::with_capacity(mod_path.len() + 2);
|
||||||
|
if !mod_prefix.is_empty() {
|
||||||
|
parts.push(mod_prefix);
|
||||||
|
}
|
||||||
|
for p in mod_path {
|
||||||
|
parts.push(p.as_str());
|
||||||
|
}
|
||||||
|
parts.push(name);
|
||||||
|
parts.join(".")
|
||||||
|
}
|
||||||
@@ -34,6 +34,8 @@ use kebab_core::{
|
|||||||
use serde_json::Map;
|
use serde_json::Map;
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
|
|
||||||
|
use crate::scaffold::{filename_from_workspace_path, join_symbol, strip_extension};
|
||||||
|
|
||||||
pub const PARSER_VERSION: &str = "code-ts-v1";
|
pub const PARSER_VERSION: &str = "code-ts-v1";
|
||||||
|
|
||||||
/// TypeScript / TSX AST extractor. Per-unit blocks via
|
/// TypeScript / TSX AST extractor. Per-unit blocks via
|
||||||
@@ -181,36 +183,6 @@ fn select_grammar(workspace_path: &str) -> tree_sitter::Language {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn filename_from_workspace_path(p: &str) -> String {
|
|
||||||
p.rsplit('/').next().unwrap_or(p).to_string()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn strip_extension(filename: &str) -> String {
|
|
||||||
match filename.rfind('.') {
|
|
||||||
Some(0) => filename.to_string(),
|
|
||||||
Some(idx) => filename[..idx].to_string(),
|
|
||||||
None => filename.to_string(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Join (mod_prefix, mod_path, name) into a dotted TS symbol.
|
|
||||||
///
|
|
||||||
/// Note: TS uses `.` as the join separator between mod_prefix /
|
|
||||||
/// class-nesting / leaf — even though `mod_prefix` itself may contain
|
|
||||||
/// `/` (e.g. `src/search/Retriever`), the JOIN between segments stays
|
|
||||||
/// `.`. So a class method symbol looks like `src/search/Foo.search`.
|
|
||||||
fn join_symbol(mod_prefix: &str, mod_path: &[String], name: &str) -> String {
|
|
||||||
let mut parts: Vec<&str> = Vec::with_capacity(mod_path.len() + 2);
|
|
||||||
if !mod_prefix.is_empty() {
|
|
||||||
parts.push(mod_prefix);
|
|
||||||
}
|
|
||||||
for p in mod_path {
|
|
||||||
parts.push(p.as_str());
|
|
||||||
}
|
|
||||||
parts.push(name);
|
|
||||||
parts.join(".")
|
|
||||||
}
|
|
||||||
|
|
||||||
fn build_blocks(
|
fn build_blocks(
|
||||||
source: &str,
|
source: &str,
|
||||||
doc_id: &kebab_core::DocumentId,
|
doc_id: &kebab_core::DocumentId,
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ import os
|
|||||||
|
|
||||||
ANSWER = 42
|
ANSWER = 42
|
||||||
|
|
||||||
@staticmethod
|
@no_type_check
|
||||||
def free(x):
|
def free(x):
|
||||||
"""free fn."""
|
"""free fn."""
|
||||||
return x + 1
|
return x + 1
|
||||||
|
|||||||
@@ -26,6 +26,16 @@ git history.
|
|||||||
|
|
||||||
**cross-link**: `tasks/p10/p10-1b-py-ts-js-ast-chunkers.md` Risks / notes 섹션, design §3.4.
|
**cross-link**: `tasks/p10/p10-1b-py-ts-js-ast-chunkers.md` Risks / notes 섹션, design §3.4.
|
||||||
|
|
||||||
|
## 2026-05-20 — p10-1B: module_path_for_python / _tsjs do not sanitize non-ASCII / 공백 / 특수문자 in workspace path
|
||||||
|
|
||||||
|
**동작**: `module_path_for_python` 와 `module_path_for_tsjs` 가 workspace path 의 비-ASCII / 공백 / 따옴표 / 백슬래시 같은 특수문자를 그대로 prefix 에 통과시킨다. 예: `kebab eval/metrics.py` (공백 포함) → module prefix `kebab eval.metrics` — 라이브러리 코드는 동작하지만 symbol 텍스트에 공백이 들어간다.
|
||||||
|
|
||||||
|
**이유**: 1B 1차 단순화. 대다수 코드 베이스가 ASCII identifier + `/` 구분자만 사용하므로 사용자 경험상 영향 미미.
|
||||||
|
|
||||||
|
**해결**: 후속 phase 에서 path-sanitize 추가 검토. NFKC normalize 후 `[^A-Za-z0-9_.\-/]` → `_` 변환 식. 적용 시 chunker_version bump 트리거 (re-ingest cascade 필요).
|
||||||
|
|
||||||
|
**cross-link**: `tasks/p10/p10-1b-py-ts-js-ast-chunkers.md` Risks / notes 섹션 line 55.
|
||||||
|
|
||||||
## 2026-05-20 — p10-1B: expression-level functions (arrow fn, function expression assigned to const) NOT emitted as units in 1B 1차
|
## 2026-05-20 — p10-1B: expression-level functions (arrow fn, function expression assigned to const) NOT emitted as units in 1B 1차
|
||||||
|
|
||||||
**무엇이 바뀌었나**: TypeScript / JavaScript 의 `const foo = () => {...}` 또는 `const bar = function() {...}` 같은 expression-level 함수 할당은 `code-ts-ast-v1` / `code-js-ast-v1` 에서 독립 unit 으로 방출되지 않는다. 해당 코드는 가장 가까운 surrounding declaration-level unit (또는 `<top-level>` glue) 에 흡수된다.
|
**무엇이 바뀌었나**: TypeScript / JavaScript 의 `const foo = () => {...}` 또는 `const bar = function() {...}` 같은 expression-level 함수 할당은 `code-ts-ast-v1` / `code-js-ast-v1` 에서 독립 unit 으로 방출되지 않는다. 해당 코드는 가장 가까운 surrounding declaration-level unit (또는 `<top-level>` glue) 에 흡수된다.
|
||||||
|
|||||||
@@ -57,3 +57,4 @@
|
|||||||
- 머지 후 deviation 은 `tasks/HOTFIXES.md` 에 dated 로그 + 본 spec `Risks / notes` 에 one-line cross-link.
|
- 머지 후 deviation 은 `tasks/HOTFIXES.md` 에 dated 로그 + 본 spec `Risks / notes` 에 one-line cross-link.
|
||||||
- **[HOTFIXES 2026-05-20]** Rust 1A-2 symbol 은 file-scope nesting 만 (workspace prefix 없음); 1B 의 Python/TypeScript/JavaScript 와 비일관 — retrofit 은 사용자 명시 요청 시. 자세한 내용: `tasks/HOTFIXES.md` (2026-05-20, "Rust 1A-2 symbol path").
|
- **[HOTFIXES 2026-05-20]** Rust 1A-2 symbol 은 file-scope nesting 만 (workspace prefix 없음); 1B 의 Python/TypeScript/JavaScript 와 비일관 — retrofit 은 사용자 명시 요청 시. 자세한 내용: `tasks/HOTFIXES.md` (2026-05-20, "Rust 1A-2 symbol path").
|
||||||
- **[HOTFIXES 2026-05-20]** TypeScript/JavaScript 의 expression-level 함수 (`const foo = () => {}` 등) 는 `<top-level>` glue 로 처리됨, 독립 unit 미방출 — 후속 phase 에서 `lexical_declaration` unwrap 검토. 자세한 내용: `tasks/HOTFIXES.md` (2026-05-20, "expression-level functions").
|
- **[HOTFIXES 2026-05-20]** TypeScript/JavaScript 의 expression-level 함수 (`const foo = () => {}` 등) 는 `<top-level>` glue 로 처리됨, 독립 unit 미방출 — 후속 phase 에서 `lexical_declaration` unwrap 검토. 자세한 내용: `tasks/HOTFIXES.md` (2026-05-20, "expression-level functions").
|
||||||
|
- **[HOTFIXES 2026-05-20]** `module_path_for_python` / `module_path_for_tsjs` 가 path-sanitize 안 함 (특수문자/공백 그대로 prefix 에 들어감) — 후속 phase 에서 NFKC + 사용금지 문자 변환 검토. 자세한 내용: `tasks/HOTFIXES.md` (2026-05-20, "module_path_for_python / _tsjs do not sanitize").
|
||||||
|
|||||||
Reference in New Issue
Block a user