Skip to content

Commit fd76ecf

Browse files
tstackttiimm
authored andcommitted
[perf] add a map of file name to source ID instead of scanning all files
1 parent 1e7a6cf commit fd76ecf

1 file changed

Lines changed: 58 additions & 13 deletions

File tree

src/lib.rs

Lines changed: 58 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -202,13 +202,26 @@ impl StatementsInFile {
202202
.collect::<Vec<&str>>();
203203
self.matcher = RegexSet::new(&patterns).ok();
204204
}
205+
206+
fn to_lookup_pair(&self) -> Option<(String, SourceFileID)> {
207+
PATH_TO_NAME_REGEX
208+
.captures(&self.path)
209+
.into_iter()
210+
.flat_map(|caps| caps.get(1))
211+
.map(|name_match| (name_match.as_str().to_owned(), self.id))
212+
.next()
213+
}
205214
}
206215

207216
/// Collection of individual source files under a root path
208217
#[derive(Serialize, Deserialize, Debug)]
209218
pub struct SourceTree {
210219
pub tree: SourceHierTree,
211220
pub files_with_statements: HashMap<SourceFileID, StatementsInFile>,
221+
/// Most log statements only have the file name, so we keep an extra map from the name
222+
/// to the source file IDs to speed up matches.
223+
#[serde(skip)]
224+
pub file_name_to_sources: HashMap<String, Vec<SourceFileID>>,
212225
}
213226

214227
/// Collection of root paths to their tree of source files
@@ -270,6 +283,13 @@ impl LogMatcher {
270283
})?;
271284
for sif in decoded_root.files_with_statements.values_mut() {
272285
sif.try_creating_matcher();
286+
sif.to_lookup_pair().into_iter().for_each(|(name, sid)| {
287+
decoded_root
288+
.file_name_to_sources
289+
.entry(name)
290+
.or_default()
291+
.push(sid);
292+
});
273293
}
274294
Ok(decoded_root)
275295
}
@@ -385,6 +405,7 @@ impl LogMatcher {
385405
.or_insert_with(|| SourceTree {
386406
tree: SourceHierTree::from(&path),
387407
files_with_statements: HashMap::new(),
408+
file_name_to_sources: HashMap::new(),
388409
});
389410
}
390411
Ok(())
@@ -459,13 +480,19 @@ impl LogMatcher {
459480
ScanEvent::DeletedFile(_path, id) => {
460481
retval.deleted += 1;
461482
coll.files_with_statements.remove(&id);
483+
coll.file_name_to_sources.values_mut().for_each(|ids| {
484+
ids.retain_mut(|elem| *elem != id);
485+
});
462486
None
463487
}
464488
})
465489
.collect::<Vec<CodeSource>>();
466490
extract_logging_guarded(&sources, &guard)
467491
.into_iter()
468492
.for_each(|sif| {
493+
sif.to_lookup_pair().into_iter().for_each(|(name, sid)| {
494+
coll.file_name_to_sources.entry(name).or_default().push(sid);
495+
});
469496
coll.files_with_statements.insert(sif.id, sif);
470497
});
471498
}
@@ -491,19 +518,34 @@ impl LogMatcher {
491518
..
492519
}) = log_ref.details
493520
{
494-
// XXX this block and the else are basically the same, try to refactor
495-
coll.files_with_statements
496-
.values()
497-
.filter(|stmts| stmts.path.contains(filename))
498-
.flat_map(|stmts| {
499-
let file_matches =
500-
stmts.matcher.as_ref().expect("have RegexSet").matches(body);
501-
match file_matches.iter().next() {
502-
None => None,
503-
Some(index) => stmts.log_statements.get(index),
504-
}
505-
})
506-
.collect::<Vec<&SourceRef>>()
521+
if let Some(sources) = coll.file_name_to_sources.get(filename) {
522+
sources
523+
.iter()
524+
.flat_map(|path| coll.files_with_statements.get(path))
525+
.flat_map(|stmts| {
526+
let file_matches =
527+
stmts.matcher.as_ref().expect("have RegexSet").matches(body);
528+
match file_matches.iter().next() {
529+
None => None,
530+
Some(index) => stmts.log_statements.get(index),
531+
}
532+
})
533+
.collect::<Vec<&SourceRef>>()
534+
} else {
535+
// XXX this block and the else are basically the same, try to refactor
536+
coll.files_with_statements
537+
.values()
538+
.filter(|stmts| stmts.path.contains(filename))
539+
.flat_map(|stmts| {
540+
let file_matches =
541+
stmts.matcher.as_ref().expect("have RegexSet").matches(body);
542+
match file_matches.iter().next() {
543+
None => None,
544+
Some(index) => stmts.log_statements.get(index),
545+
}
546+
})
547+
.collect::<Vec<&SourceRef>>()
548+
}
507549
} else {
508550
coll.files_with_statements
509551
.par_iter()
@@ -590,6 +632,9 @@ static PYTHON_PLACEHOLDER_REGEX: LazyLock<Regex> = LazyLock::new(|| {
590632
Regex::new(r#"%[-+ #0]*\d*(?:\.\d+)?[hlLzjt]*[diuoxXfFeEgGaAcspn%]"#).unwrap()
591633
});
592634

635+
static PATH_TO_NAME_REGEX: LazyLock<Regex> =
636+
LazyLock::new(|| Regex::new(r#"[/\\]([^/\\]+)$"#).unwrap());
637+
593638
static BACKTRACE_REGEX: LazyLock<Regex> = LazyLock::new(|| {
594639
Regex::new(
595640
r#"(?smx)

0 commit comments

Comments
 (0)