22import logging
33from multiprocessing import cpu_count
44from multiprocessing .pool import Pool
5- from typing import Dict , Iterable , List , Optional , Tuple
5+ from typing import Dict , Iterable , List , Optional , Tuple , Set
66
77from find_compatible_dts import FindCompatibleDTS
88
2929# References parsing output, ident -> (file_idx, family) -> list of lines
3030RefsDict = Dict [bytes , Dict [Tuple [int , str ], List [int ]]]
3131
32- # Cache of definitions found in current tag, ident -> list of (file_idx, line)
33- DefCache = Dict [bytes , List [Tuple [int , int ]]]
34-
3532# Generic dictionary of ident -> list of lines
3633LinesListDict = Dict [str , List [int ]]
3734
35+ # File idx -> (hash, filename, is a new file?)
36+ IdxCache = Dict [int , Tuple [bytes , str , bool ]]
37+
38+ # Check if definition for ident is visible in current version
39+ def def_in_version (db : DB , def_cache : Set [bytes ], idx_to_hash_and_filename : IdxCache , ident : bytes ) -> bool :
40+ if ident in def_cache :
41+ return True
42+
43+ defs_this_ident = db .defs .get (ident )
44+ if not defs_this_ident :
45+ return False
46+
47+ for def_idx , _ , _ , _ in defs_this_ident .iter ():
48+ if def_idx in idx_to_hash_and_filename :
49+ def_cache .add (ident )
50+ return True
51+
52+ return False
53+
3854# Add definitions to database
39- def add_defs (db : DB , def_cache : DefCache , defs : DefsDict ):
55+ def add_defs (db : DB , defs : DefsDict ):
4056 for ident , occ_list in defs .items ():
4157 obj = db .defs .get (ident )
4258 if obj is None :
4359 obj = DefList ()
4460
45- if ident in def_cache :
46- lines_list = def_cache [ident ]
47- else :
48- lines_list = []
49- def_cache [ident ] = lines_list
50-
5161 for (idx , type , line , family ) in occ_list :
5262 obj .append (idx , type , line , family )
53- lines_list .append ((idx , line ))
5463
5564 db .defs .put (ident , obj )
5665
5766# Add references to database
58- def add_refs (db : DB , def_cache : DefCache , refs : RefsDict ):
67+ def add_refs (db : DB , def_cache : Set [ bytes ], idx_to_hash_and_filename : IdxCache , refs : RefsDict ):
5968 for ident , idx_to_lines in refs .items ():
60- # Skip reference if definition was not collected in this tag
61- deflist = def_cache .get (ident )
62- if deflist is None :
69+ deflist = db .defs .get (ident )
70+ in_version = def_in_version (db , def_cache , idx_to_hash_and_filename , ident )
71+
72+ if deflist is None or not in_version :
6373 continue
6474
65- def deflist_exists (idx , n ):
66- for didx , dn in deflist :
67- if didx == idx and dn == n :
75+ def deflist_exists (idx : int , line : int ):
76+ for def_idx , _ , def_line , _ in deflist . iter () :
77+ if def_idx == idx and def_line == line :
6878 return True
6979 return False
7080
7181 obj = db .refs .get (ident )
7282 if obj is None :
7383 obj = RefList ()
7484
85+ modified = False
7586 for (idx , family ), lines in idx_to_lines .items ():
76- lines = [n for n in lines if not deflist_exists (str ( idx ). encode () , n )]
87+ lines = [n for n in lines if not deflist_exists (idx , n )]
7788
7889 if len (lines ) != 0 :
7990 lines_str = ',' .join ((str (n ) for n in lines ))
8091 obj .append (idx , lines_str , family )
92+ modified = True
8193
82- db .refs .put (ident , obj )
94+ if modified :
95+ db .refs .put (ident , obj )
8396
8497# Add documentation references to database
8598def add_docs (db : DB , idx : int , family : str , docs : Dict [str , List [int ]]):
@@ -111,7 +124,7 @@ def add_to_lineslist(db_file: BsdDB, idx: int, family: str, to_add: Dict[str, Li
111124
112125
113126# Adds blob list to database, returns blob id -> (hash, filename) dict
114- def collect_blobs (db : DB , tag : bytes ) -> Dict [ int , Tuple [ bytes , str ]] :
127+ def collect_blobs (db : DB , tag : bytes ) -> IdxCache :
115128 idx = db .vars .get ('numBlobs' )
116129 if idx is None :
117130 idx = 0
@@ -126,11 +139,14 @@ def collect_blobs(db: DB, tag: bytes) -> Dict[int, Tuple[bytes, str]]:
126139 hash , path = blob .split (b' ' ,maxsplit = 1 )
127140 filename = os .path .basename (path .decode ())
128141 blob_idx = db .blob .get (hash )
142+
129143 if blob_idx is not None :
130144 versionBuf .append ((blob_idx , path ))
145+ if blob_idx not in idx_to_hash_and_filename :
146+ idx_to_hash_and_filename [blob_idx ] = (hash , filename , False )
131147 else :
132148 versionBuf .append ((idx , path ))
133- idx_to_hash_and_filename [idx ] = (hash , filename )
149+ idx_to_hash_and_filename [idx ] = (hash , filename , True )
134150 db .blob .put (hash , idx )
135151 db .hash .put (idx , hash )
136152 db .file .put (idx , filename )
@@ -271,18 +287,17 @@ def get_comps_docs(file_id: FileId) -> Optional[Tuple[int, str, LinesListDict]]:
271287# Update a single version - collects data from all the stages and saves it in the database
272288def update_version (db : DB , tag : bytes , pool : Pool , dts_comp_support : bool ):
273289 idx_to_hash_and_filename = collect_blobs (db , tag )
274- def_cache = {}
275290
276291 # Collect blobs to process and split list of blobs into chunks
277- idxes = [(idx , hash , filename ) for (idx , (hash , filename )) in idx_to_hash_and_filename .items ()]
292+ idxes = [(idx , hash , filename ) for (idx , (hash , filename , new )) in idx_to_hash_and_filename .items () if new ]
278293 chunksize = int (len (idxes ) / cpu_count ())
279294 chunksize = min (max (1 , chunksize ), 100 )
280295
281296 logger .info ("collecting blobs done" )
282297
283298 for result in pool .imap_unordered (get_defs , idxes , chunksize ):
284299 if result is not None :
285- add_defs (db , def_cache , result )
300+ add_defs (db , result )
286301
287302 logger .info ("defs done" )
288303
@@ -305,16 +320,16 @@ def update_version(db: DB, tag: bytes, pool: Pool, dts_comp_support: bool):
305320
306321 logger .info ("dts comps docs done" )
307322
323+ def_cache = set ()
308324 for result in pool .imap_unordered (get_refs , idxes , chunksize ):
309325 if result is not None :
310- add_refs (db , def_cache , result )
326+ add_refs (db , def_cache , idx_to_hash_and_filename , result )
311327
312328 logger .info ("refs done" )
313329
314330 generate_defs_caches (db )
315331 logger .info ("update done" )
316332
317-
318333if __name__ == "__main__" :
319334 dts_comp_support = bool (int (script ('dts-comp' )))
320335 db = None
0 commit comments