From 6f92059a3d9d114af330c05b313f94797aa3fa37 Mon Sep 17 00:00:00 2001 From: Adrian Braemer Date: Wed, 6 May 2026 12:44:42 +0200 Subject: [PATCH] fix(scan-result-shaping): remove root directory entry when --strip-root is set When --strip-root is used, Provenant now excludes the root directory entry from the files array, matching ScanCode's skip_root=True behavior. Previously, the root entry was kept with its path shortened to just the basename, which caused duplicate path entries when a child directory shared the root's name (e.g. project/project/inner.txt). Single-resource scans (one file or one empty directory) preserve the root entry, consistent with ScanCode's has_single_resource exception. Signed-off-by: Adrian Braemer --- src/post_processing/test_utils.rs | 11 +--- src/scan_result_shaping/core_test.rs | 89 ++++++++++++++++++++++++++++ src/scan_result_shaping/mod.rs | 12 +++- 3 files changed, 101 insertions(+), 11 deletions(-) diff --git a/src/post_processing/test_utils.rs b/src/post_processing/test_utils.rs index 31f30a425..3d4f2c01a 100644 --- a/src/post_processing/test_utils.rs +++ b/src/post_processing/test_utils.rs @@ -424,7 +424,7 @@ pub(crate) fn strip_root_prefix_for_test(path: &Path, root: &Path) -> Option, scan_root: &str) { normalize_paths(files, scan_root, true, false); } @@ -488,15 +488,6 @@ pub(crate) fn compute_fixture_output( .to_str() .expect("fixture path should be UTF-8"), ); - if let Some(root_name) = resolved_scan_root - .scan_root - .file_name() - .and_then(|name| name.to_str()) - .filter(|name| !name.is_empty()) - && !files.iter().any(|file| file.path == root_name) - { - files.push(dir(root_name)); - } let mut assembly_result = assembly::assemble(&mut files); for package in &mut assembly_result.packages { package.backfill_license_provenance(); diff --git a/src/scan_result_shaping/core_test.rs b/src/scan_result_shaping/core_test.rs index 3bfc28199..31b6a219c 100644 --- a/src/scan_result_shaping/core_test.rs +++ b/src/scan_result_shaping/core_test.rs @@ -938,3 +938,92 @@ fn normalize_top_level_output_paths_only_applies_strip_root() { assert_eq!(packages[0].datafile_paths, vec!["package.json"]); assert_eq!(dependencies[0].datafile_path, "package.json"); } + +#[test] +fn normalize_paths_strip_root_removes_root_directory_entry() { + let mut files = vec![ + dir("project"), + dir("project/src"), + file("project/src/main.rs"), + file("project/README.md"), + ]; + normalize_paths(&mut files, "project", true, false); + + let paths: Vec<&str> = files.iter().map(|f| f.path.as_str()).collect(); + assert!( + !paths.contains(&"project"), + "root directory entry should be removed with --strip-root, got: {paths:?}" + ); + assert!(paths.contains(&"src"), "child directory should remain"); + assert!(paths.contains(&"src/main.rs"), "child file should remain"); + assert!( + paths.contains(&"README.md"), + "root-level file should remain" + ); +} + +#[test] +fn normalize_paths_strip_root_keeps_root_entry_for_single_file_scan() { + let mut files = vec![file("project/README.md")]; + normalize_paths(&mut files, "project/README.md", true, false); + + let paths: Vec<&str> = files.iter().map(|f| f.path.as_str()).collect(); + assert!( + paths.contains(&"README.md"), + "single file entry should remain with --strip-root, got: {paths:?}" + ); +} + +#[test] +fn normalize_paths_strip_root_keeps_root_entry_for_single_directory_scan() { + let mut files = vec![dir("project")]; + normalize_paths(&mut files, "project", true, false); + + let paths: Vec<&str> = files.iter().map(|f| f.path.as_str()).collect(); + assert_eq!( + paths, + vec!["project"], + "single directory root should remain with --strip-root, got: {paths:?}" + ); +} + +#[test] +fn normalize_paths_strip_root_removes_root_but_keeps_same_named_child() { + let mut files = vec![ + dir("project"), + dir("project/project"), + file("project/project/inner.txt"), + ]; + normalize_paths(&mut files, "project", true, false); + + let paths: Vec<&str> = files.iter().map(|f| f.path.as_str()).collect(); + assert_eq!( + paths.len(), + 2, + "should have exactly 2 entries (child dir + file), got: {paths:?}" + ); + assert!( + paths.contains(&"project"), + "child directory named 'project' should remain after root is removed" + ); + assert!( + paths.contains(&"project/inner.txt"), + "file under same-named child should remain" + ); +} + +#[test] +fn normalize_paths_without_strip_root_keeps_root_directory_entry() { + let mut files = vec![ + dir("project"), + dir("project/src"), + file("project/src/main.rs"), + ]; + normalize_paths(&mut files, "project", false, false); + + let paths: Vec<&str> = files.iter().map(|f| f.path.as_str()).collect(); + assert!( + paths.contains(&"project"), + "root directory entry should be kept without --strip-root" + ); +} diff --git a/src/scan_result_shaping/mod.rs b/src/scan_result_shaping/mod.rs index e0a9c0724..44c21a89b 100644 --- a/src/scan_result_shaping/mod.rs +++ b/src/scan_result_shaping/mod.rs @@ -540,11 +540,21 @@ fn normalize_ignorable_value(value: &str, trim_slashes: bool) -> String { } pub(crate) fn normalize_paths( - files: &mut [FileInfo], + files: &mut Vec, scan_root: &str, strip_root: bool, full_root: bool, ) { + if strip_root { + let root_is_directory = files.iter().any(|entry| { + entry.path == scan_root && entry.file_type == crate::models::FileType::Directory + }); + let has_single_resource = files.len() <= 1; + if root_is_directory && !has_single_resource { + files.retain(|entry| entry.path != scan_root); + } + } + for entry in files.iter_mut() { if let Some(normalized_path) = normalize_path_value(&entry.path, scan_root, strip_root, full_root)