Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 18 additions & 12 deletions src/license_detection/detection/analysis.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ use crate::license_detection::expression::{
};
use crate::license_detection::models::{LicenseMatch, MatcherKind};
use crate::utils::spdx::{
ExpressionRelation, combine_license_expressions_preserving_structure,
combine_license_expressions_with_relation_preserving_structure,
ExpressionRelation, combine_license_expressions_preserving_structure_strict,
combine_license_expressions_with_relation_preserving_structure_strict,
};

/// Coverage value below which detections are not perfect.
Expand Down Expand Up @@ -463,8 +463,10 @@ pub fn determine_spdx_expression(
let expressions = expressions
.ok_or_else(|| "Missing SPDX expressions for one or more matches".to_string())?;

combine_license_expressions_preserving_structure(expressions.into_iter().map(str::to_string))
.ok_or_else(|| "Failed to combine SPDX expressions".to_string())
combine_license_expressions_preserving_structure_strict(
expressions.into_iter().map(str::to_string),
)
.ok_or_else(|| "Failed to combine SPDX expressions".to_string())
}

fn determine_alternative_notice_expression(
Expand Down Expand Up @@ -528,11 +530,12 @@ fn determine_alternative_notice_spdx_expression(
let alternative_expressions = alternative_expressions.ok_or_else(|| {
"Missing SPDX expressions for one or more alternative-license matches".to_string()
})?;
let alternative_expression = combine_license_expressions_with_relation_preserving_structure(
alternative_expressions,
ExpressionRelation::Or,
)
.ok_or_else(|| "Failed to combine alternative SPDX expressions".to_string())?;
let alternative_expression =
combine_license_expressions_with_relation_preserving_structure_strict(
alternative_expressions,
ExpressionRelation::Or,
)
.ok_or_else(|| "Failed to combine alternative SPDX expressions".to_string())?;

let mut parts = vec![alternative_expression];
let supplemental_expressions: Option<Vec<String>> = supplemental
Expand All @@ -543,9 +546,12 @@ fn determine_alternative_notice_spdx_expression(
"Missing SPDX expressions for one or more supplemental matches".to_string()
})?);

combine_license_expressions_with_relation_preserving_structure(parts, ExpressionRelation::And)
.ok_or_else(|| "Failed to combine alternative SPDX expression parts".to_string())
.map(Some)
combine_license_expressions_with_relation_preserving_structure_strict(
parts,
ExpressionRelation::And,
)
.ok_or_else(|| "Failed to combine alternative SPDX expression parts".to_string())
.map(Some)
}

fn has_alternative_license_notice(matches: &[LicenseMatch], source_text: Option<&str>) -> bool {
Expand Down
120 changes: 119 additions & 1 deletion src/license_detection/detection/grouping.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,83 @@ pub(super) fn group_matches_by_region_with_threshold(
groups.push(DetectionGroup::new(current_group));
}

groups
merge_sandwiched_same_line_clue_groups(groups)
}

fn merge_sandwiched_same_line_clue_groups(groups: Vec<DetectionGroup>) -> Vec<DetectionGroup> {
let mut merged = Vec::new();
let mut index = 0;

while index < groups.len() {
if index + 2 < groups.len()
&& should_merge_sandwiched_same_line_clue(
&groups[index],
&groups[index + 1],
&groups[index + 2],
)
{
let mut combined_matches = groups[index].matches.clone();
combined_matches.extend(groups[index + 1].matches.clone());
combined_matches.extend(groups[index + 2].matches.clone());
merged.push(DetectionGroup::new(combined_matches));
index += 3;
continue;
}

merged.push(DetectionGroup::new(groups[index].matches.clone()));
index += 1;
}

merged
}

fn should_merge_sandwiched_same_line_clue(
left: &DetectionGroup,
middle: &DetectionGroup,
right: &DetectionGroup,
) -> bool {
let Some(clue_match) = middle.matches.first() else {
return false;
};

middle.matches.len() == 1
&& clue_match.is_license_clue()
&& left
.matches
.iter()
.all(|match_item| !match_item.is_license_clue())
&& right
.matches
.iter()
.all(|match_item| !match_item.is_license_clue())
&& all_matches_are_single_line_exact(&left.matches)
&& all_matches_are_single_line_exact(&middle.matches)
&& all_matches_are_single_line_exact(&right.matches)
&& group_line_range(left) == group_line_range(middle)
&& group_line_range(middle) == group_line_range(right)
}

fn all_matches_are_single_line_exact(matches: &[LicenseMatch]) -> bool {
!matches.is_empty()
&& matches.iter().all(|match_item| {
match_item.start_line == match_item.end_line
&& match_item.coverage() == 100.0
&& matches!(
match_item.matcher,
crate::license_detection::models::MatcherKind::Hash
| crate::license_detection::models::MatcherKind::SpdxId
| crate::license_detection::models::MatcherKind::Aho
)
})
}

fn group_line_range(
group: &DetectionGroup,
) -> Option<(crate::models::LineNumber, crate::models::LineNumber)> {
Some((
group.matches.first()?.start_line,
group.matches.last()?.end_line,
))
}

/// Check if two matches should be in the same group based on line proximity.
Expand Down Expand Up @@ -308,6 +384,48 @@ mod tests {
assert_eq!(groups[1].matches, vec![clue]);
}

#[test]
fn test_group_matches_merges_sandwiched_same_line_clue_between_exact_matches() {
let mut left = create_test_match(10, 10, "2-aho", "linux-note.RULE");
left.rule_kind = crate::license_detection::models::RuleKind::Reference;
left.license_expression = "linux-syscall-exception-gpl".to_string();
left.match_coverage = 100.0;

let mut clue = create_test_match(10, 10, "2-aho", "gpl_bare_word_only.RULE");
clue.rule_kind = crate::license_detection::models::RuleKind::Clue;
clue.license_expression = "gpl-1.0-plus".to_string();
clue.match_coverage = 100.0;

let mut right = create_test_match(10, 10, "2-aho", "llgpl_3.RULE");
right.rule_kind = crate::license_detection::models::RuleKind::Reference;
right.license_expression = "llgpl".to_string();
right.match_coverage = 100.0;

let groups = group_matches_by_region(&[left.clone(), clue.clone(), right.clone()]);

assert_eq!(groups.len(), 1);
assert_eq!(groups[0].matches, vec![left, clue, right]);
}

#[test]
fn test_group_matches_does_not_merge_sandwiched_clue_across_different_lines() {
let mut left = create_test_match(10, 10, "2-aho", "linux-note.RULE");
left.rule_kind = crate::license_detection::models::RuleKind::Reference;
left.match_coverage = 100.0;

let mut clue = create_test_match(11, 11, "2-aho", "gpl_bare_word_only.RULE");
clue.rule_kind = crate::license_detection::models::RuleKind::Clue;
clue.match_coverage = 100.0;

let mut right = create_test_match(11, 11, "2-aho", "llgpl_3.RULE");
right.rule_kind = crate::license_detection::models::RuleKind::Reference;
right.match_coverage = 100.0;

let groups = group_matches_by_region(&[left, clue, right]);

assert_eq!(groups.len(), 3);
}

#[test]
fn test_sort_matches_by_line() {
let mut match1 = create_test_match(10, 15, "1-hash", "mit.LICENSE");
Expand Down
Loading
Loading