44import multiprocessing as mp
55from collections import defaultdict
66from concurrent .futures import ThreadPoolExecutor , as_completed
7+ from datetime import datetime , timezone
78
89import fastremap
910import numpy as np
@@ -59,33 +60,47 @@ def _populate_cx_edges_with_timestamps(
5960 for all IDs involved in an edit, we can use the timestamps of
6061 when cross edges of children were updated.
6162 """
63+
64+ start = time .time ()
6265 global CX_EDGES
6366 attrs = [Connectivity .CrossChunkEdge [l ] for l in range (layer , cg .meta .layer_count )]
6467 all_children = np .concatenate (list (CHILDREN .values ()))
6568 response = cg .client .read_nodes (node_ids = all_children , properties = attrs )
6669 timestamps_d = get_parent_timestamps (cg , nodes )
6770 end_timestamps = get_end_timestamps (cg , nodes , nodes_ts , CHILDREN , layer = layer )
71+ logging .info (f"_populate_nodes_and_children init: { time .time () - start } " )
6872
69- rows = []
70- for node , node_ts , node_end_ts in zip (nodes , nodes_ts , end_timestamps ):
73+ start = time .time ()
74+ partners_map = {}
75+ for node , node_ts in zip (nodes , nodes_ts ):
7176 CX_EDGES [node ] = {}
72- timestamps = timestamps_d [node ]
7377 cx_edges_d_node_ts = _get_cx_edges_at_timestamp (node , response , node_ts )
74-
7578 edges = np .concatenate ([empty_2d ] + list (cx_edges_d_node_ts .values ()))
76- partner_parent_ts_d = get_parent_timestamps (cg , edges [:, 1 ])
77- for v in partner_parent_ts_d .values ():
78- timestamps .update (v )
79+ partners_map [node ] = edges [:, 1 ]
7980 CX_EDGES [node ][node_ts ] = cx_edges_d_node_ts
8081
82+ partners = np .unique (np .concatenate ([* partners_map .values ()]))
83+ partner_parent_ts_d = get_parent_timestamps (cg , partners )
84+ logging .info (f"get partners timestamps init: { time .time () - start } " )
85+
86+ rows = []
87+ for node , node_ts , node_end_ts in zip (nodes , nodes_ts , end_timestamps ):
88+ timestamps = timestamps_d [node ]
89+ for partner in partners_map [node ]:
90+ timestamps .update (partner_parent_ts_d [partner ])
91+
92+ is_stale = node_end_ts is not None
93+ node_end_ts = node_end_ts or datetime .now (timezone .utc )
8194 for ts in sorted (timestamps ):
8295 if ts > node_end_ts :
8396 break
8497 CX_EDGES [node ][ts ] = _get_cx_edges_at_timestamp (node , response , ts )
8598
86- row_id = serializers .serialize_uint64 (node )
87- val_dict = {Hierarchy .StaleTimeStamp : 0 }
88- rows .append (cg .client .mutate_row (row_id , val_dict , time_stamp = node_end_ts ))
99+ if is_stale :
100+ row_id = serializers .serialize_uint64 (node )
101+ val_dict = {Hierarchy .StaleTimeStamp : 0 }
102+ rows .append (cg .client .mutate_row (row_id , val_dict , time_stamp = node_end_ts ))
103+
89104 cg .client .write (rows )
90105
91106
@@ -140,7 +155,6 @@ def _update_cross_edges_helper(args):
140155 futures = [executor .submit (_update_cross_edges_helper_thread , task ) for task in tasks ]
141156 for future in tqdm (as_completed (futures ), total = len (futures )):
142157 rows .extend (future .result ())
143-
144158 cg .client .write (rows )
145159
146160
@@ -154,13 +168,21 @@ def update_chunk(
154168 start = time .time ()
155169 x , y , z = chunk_coords
156170 chunk_id = cg .get_chunk_id (layer = layer , x = x , y = y , z = z )
171+
157172 _populate_nodes_and_children (cg , chunk_id , nodes = nodes )
173+ logging .info (f"_populate_nodes_and_children: { time .time () - start } " )
158174 if not CHILDREN :
159175 return
160176 nodes = list (CHILDREN .keys ())
161177 random .shuffle (nodes )
178+
179+ start = time .time ()
162180 nodes_ts = cg .get_node_timestamps (nodes , return_numpy = False , normalize = True )
181+ logging .info (f"get_node_timestamps: { time .time () - start } " )
182+
183+ start = time .time ()
163184 _populate_cx_edges_with_timestamps (cg , layer , nodes , nodes_ts )
185+ logging .info (f"_populate_cx_edges_with_timestamps: { time .time () - start } " )
164186
165187 if debug :
166188 rows = []
0 commit comments