@@ -202,13 +202,26 @@ impl StatementsInFile {
202202 . collect :: < Vec < & str > > ( ) ;
203203 self . matcher = RegexSet :: new ( & patterns) . ok ( ) ;
204204 }
205+
206+ fn to_lookup_pair ( & self ) -> Option < ( String , SourceFileID ) > {
207+ PATH_TO_NAME_REGEX
208+ . captures ( & self . path )
209+ . into_iter ( )
210+ . flat_map ( |caps| caps. get ( 1 ) )
211+ . map ( |name_match| ( name_match. as_str ( ) . to_owned ( ) , self . id ) )
212+ . next ( )
213+ }
205214}
206215
207216/// Collection of individual source files under a root path
208217#[ derive( Serialize , Deserialize , Debug ) ]
209218pub struct SourceTree {
210219 pub tree : SourceHierTree ,
211220 pub files_with_statements : HashMap < SourceFileID , StatementsInFile > ,
221+ /// Most log statements only have the file name, so we keep an extra map from the name
222+ /// to the source file IDs to speed up matches.
223+ #[ serde( skip) ]
224+ pub file_name_to_sources : HashMap < String , Vec < SourceFileID > > ,
212225}
213226
214227/// Collection of root paths to their tree of source files
@@ -270,6 +283,13 @@ impl LogMatcher {
270283 } ) ?;
271284 for sif in decoded_root. files_with_statements . values_mut ( ) {
272285 sif. try_creating_matcher ( ) ;
286+ sif. to_lookup_pair ( ) . into_iter ( ) . for_each ( |( name, sid) | {
287+ decoded_root
288+ . file_name_to_sources
289+ . entry ( name)
290+ . or_default ( )
291+ . push ( sid) ;
292+ } ) ;
273293 }
274294 Ok ( decoded_root)
275295 }
@@ -385,6 +405,7 @@ impl LogMatcher {
385405 . or_insert_with ( || SourceTree {
386406 tree : SourceHierTree :: from ( & path) ,
387407 files_with_statements : HashMap :: new ( ) ,
408+ file_name_to_sources : HashMap :: new ( ) ,
388409 } ) ;
389410 }
390411 Ok ( ( ) )
@@ -459,13 +480,19 @@ impl LogMatcher {
459480 ScanEvent :: DeletedFile ( _path, id) => {
460481 retval. deleted += 1 ;
461482 coll. files_with_statements . remove ( & id) ;
483+ coll. file_name_to_sources . values_mut ( ) . for_each ( |ids| {
484+ ids. retain_mut ( |elem| * elem != id) ;
485+ } ) ;
462486 None
463487 }
464488 } )
465489 . collect :: < Vec < CodeSource > > ( ) ;
466490 extract_logging_guarded ( & sources, & guard)
467491 . into_iter ( )
468492 . for_each ( |sif| {
493+ sif. to_lookup_pair ( ) . into_iter ( ) . for_each ( |( name, sid) | {
494+ coll. file_name_to_sources . entry ( name) . or_default ( ) . push ( sid) ;
495+ } ) ;
469496 coll. files_with_statements . insert ( sif. id , sif) ;
470497 } ) ;
471498 }
@@ -491,19 +518,34 @@ impl LogMatcher {
491518 ..
492519 } ) = log_ref. details
493520 {
494- // XXX this block and the else are basically the same, try to refactor
495- coll. files_with_statements
496- . values ( )
497- . filter ( |stmts| stmts. path . contains ( filename) )
498- . flat_map ( |stmts| {
499- let file_matches =
500- stmts. matcher . as_ref ( ) . expect ( "have RegexSet" ) . matches ( body) ;
501- match file_matches. iter ( ) . next ( ) {
502- None => None ,
503- Some ( index) => stmts. log_statements . get ( index) ,
504- }
505- } )
506- . collect :: < Vec < & SourceRef > > ( )
521+ if let Some ( sources) = coll. file_name_to_sources . get ( filename) {
522+ sources
523+ . iter ( )
524+ . flat_map ( |path| coll. files_with_statements . get ( path) )
525+ . flat_map ( |stmts| {
526+ let file_matches =
527+ stmts. matcher . as_ref ( ) . expect ( "have RegexSet" ) . matches ( body) ;
528+ match file_matches. iter ( ) . next ( ) {
529+ None => None ,
530+ Some ( index) => stmts. log_statements . get ( index) ,
531+ }
532+ } )
533+ . collect :: < Vec < & SourceRef > > ( )
534+ } else {
535+ // XXX this block and the else are basically the same, try to refactor
536+ coll. files_with_statements
537+ . values ( )
538+ . filter ( |stmts| stmts. path . contains ( filename) )
539+ . flat_map ( |stmts| {
540+ let file_matches =
541+ stmts. matcher . as_ref ( ) . expect ( "have RegexSet" ) . matches ( body) ;
542+ match file_matches. iter ( ) . next ( ) {
543+ None => None ,
544+ Some ( index) => stmts. log_statements . get ( index) ,
545+ }
546+ } )
547+ . collect :: < Vec < & SourceRef > > ( )
548+ }
507549 } else {
508550 coll. files_with_statements
509551 . par_iter ( )
@@ -590,6 +632,9 @@ static PYTHON_PLACEHOLDER_REGEX: LazyLock<Regex> = LazyLock::new(|| {
590632 Regex :: new ( r#"%[-+ #0]*\d*(?:\.\d+)?[hlLzjt]*[diuoxXfFeEgGaAcspn%]"# ) . unwrap ( )
591633} ) ;
592634
635+ static PATH_TO_NAME_REGEX : LazyLock < Regex > =
636+ LazyLock :: new ( || Regex :: new ( r#"[/\\]([^/\\]+)$"# ) . unwrap ( ) ) ;
637+
593638static BACKTRACE_REGEX : LazyLock < Regex > = LazyLock :: new ( || {
594639 Regex :: new (
595640 r#"(?smx)
0 commit comments