@@ -1547,8 +1547,11 @@ def __init__(self, group_key: str):
1547
1547
self .app_span_roots : List [SpanNode ] = None
1548
1548
# 用于存放 `app_span` 的所有 leaf
1549
1549
self .app_span_leafs : List [SpanNode ] = None
1550
+ # 记录叶子节点的 syscall_trace_id, 用以匹配 s-p root
1550
1551
self .leaf_syscall_trace_id_request : Set [int ] = set ()
1551
1552
self .leaf_syscall_trace_id_response : Set [int ] = set ()
1553
+ # 记录叶子节点的 x_request_id => index (in self.spans), 用以匹配 s-p root
1554
+ self .leaf_x_request_id : Dict [str , List [int ]] = {}
1552
1555
# 用于显示调用拓扑使用
1553
1556
self .subnet_id = None
1554
1557
self .subnet = None
@@ -1659,10 +1662,23 @@ def append_sys_span(self, sys_span: SysSpanNode):
1659
1662
self ._set_extra_value_for_sys_span (sys_span )
1660
1663
self ._set_auto_service (sys_span )
1661
1664
if sys_span .tap_side == TAP_SIDE_CLIENT_PROCESS :
1662
- self .leaf_syscall_trace_id_request .add (
1663
- sys_span .get_syscall_trace_id_request ())
1664
- self .leaf_syscall_trace_id_response .add (
1665
- sys_span .get_syscall_trace_id_response ())
1665
+ cp_syscall_trace_id_req = sys_span .get_syscall_trace_id_request ()
1666
+ cp_syscall_trace_id_res = sys_span .get_syscall_trace_id_response ()
1667
+ cp_x_request_id_0 = sys_span .get_x_request_id_0 ()
1668
+ cp_x_request_id_1 = sys_span .get_x_request_id_1 ()
1669
+ if cp_syscall_trace_id_req :
1670
+ self .leaf_syscall_trace_id_request .add (
1671
+ sys_span .get_syscall_trace_id_request ())
1672
+ if cp_syscall_trace_id_res :
1673
+ self .leaf_syscall_trace_id_response .add (
1674
+ sys_span .get_syscall_trace_id_response ())
1675
+ if cp_x_request_id_0 :
1676
+ # index of sys_span = len(self.spans)-1
1677
+ self .leaf_x_request_id .setdefault (
1678
+ cp_x_request_id_0 , []).append (len (self .spans ) - 1 )
1679
+ if cp_x_request_id_1 and cp_x_request_id_1 != cp_x_request_id_0 :
1680
+ self .leaf_x_request_id .setdefault (
1681
+ cp_x_request_id_1 , []).append (len (self .spans ) - 1 )
1666
1682
1667
1683
def remove_server_sys_span (self , sys_span : SysSpanNode ):
1668
1684
# 这里应该要做 append_sys_span 的逆操作(但对象仅为 ServerProcess sys_span)
@@ -1743,7 +1759,7 @@ def split_to_multiple_process_span_set(self) -> list:
1743
1759
# 极端情况下可能会有多个没有 parent_span_id 的入口,这里没法分辨它们的关系,不做拆分
1744
1760
if root_parent_span_id == '' :
1745
1761
root_parent_span_id = "root" # 只是标记 root_parent_span_id,没有实际作用
1746
- if root_parent_span_id not in split_result :
1762
+ if split_result . get ( root_parent_span_id , None ) is None :
1747
1763
newSet = ProcessSpanSet (root_parent_span_id )
1748
1764
newSet .app_span_roots = [self .spans [root_span_index ]]
1749
1765
newSet ._copy_meta_data_from (self )
@@ -1776,7 +1792,12 @@ def attach_sys_span_via_app_span(self, sys_span: SysSpanNode) -> bool:
1776
1792
return self ._attach_client_sys_span (sys_span )
1777
1793
1778
1794
def _attach_server_sys_span (self , sys_span : SysSpanNode ) -> bool :
1795
+ # connection priority: span_id > syscall_trace_id > x_request_id
1779
1796
span_id_of_sys_span = sys_span .get_span_id ()
1797
+ syscall_trace_id_request = sys_span .get_syscall_trace_id_request ()
1798
+ syscall_trace_id_response = sys_span .get_syscall_trace_id_response ()
1799
+ x_request_id_0 = sys_span .get_x_request_id_0 ()
1800
+ x_request_id_1 = sys_span .get_x_request_id_1 ()
1780
1801
if span_id_of_sys_span :
1781
1802
for app_root in self .app_span_roots :
1782
1803
if span_id_of_sys_span == app_root .get_parent_span_id ():
@@ -1815,10 +1836,9 @@ def _attach_server_sys_span(self, sys_span: SysSpanNode) -> bool:
1815
1836
"s-p sys_span mounted due to same span_id as parent" ,
1816
1837
self .mounted_callback )
1817
1838
return True
1818
- else :
1819
- syscall_trace_id_request = sys_span .get_syscall_trace_id_request ()
1820
- syscall_trace_id_response = sys_span .get_syscall_trace_id_response (
1821
- )
1839
+
1840
+ # span_id not matched, try syscall_trace_id
1841
+ if syscall_trace_id_request or syscall_trace_id_response :
1822
1842
for app_root in self .app_span_roots :
1823
1843
# 如果 span_id 不存在,说明可能是入口 span,上游没有注入 span_id,此时根据叶子节点 c-p 的 syscall_trace_id 匹配即可
1824
1844
# 这里匹配可以严格点,s-p 和 c-p 只会同侧(req-req / res-res)相等,避免误关联一个独立的 c-p
@@ -1830,6 +1850,36 @@ def _attach_server_sys_span(self, sys_span: SysSpanNode) -> bool:
1830
1850
"s-p sys_span mounted due to syscall_trace_id matched c-p" ,
1831
1851
self .mounted_callback )
1832
1852
return True
1853
+
1854
+ # span_id/syscall not matched, try x_request_id
1855
+ if x_request_id_0 or x_request_id_1 :
1856
+ # 场景:过 ingress/nginx 进入服务网关/服务,传递了 x_request_id,且作为首个 span 没有 trace_id/span_id
1857
+ # 且发生跨线程调度,无法基于 syscall 关联时,允许通过 s-p.x_request_id(0/1) <=> c-p.x_request_id(0/1) 关联
1858
+ # 此处已确保 auto_instance_id 一致 (即同一个进程)
1859
+
1860
+ # x_req_id 同侧相等: 透传 x_req_id,来自上游
1861
+ # x_req_id 异侧相等: 注入 x_req_id,内部产生
1862
+ x_req_id_matched = False
1863
+ # 同一个进程内时间一定覆盖
1864
+ for same_xreqid_idx in self .leaf_x_request_id .get (
1865
+ x_request_id_0 , []):
1866
+ if sys_span .time_range_cover (self .spans [same_xreqid_idx ]):
1867
+ x_req_id_matched = True
1868
+ if not x_req_id_matched :
1869
+ for same_xreqid_idx in self .leaf_x_request_id .get (
1870
+ x_request_id_1 , []):
1871
+ if sys_span .time_range_cover (self .spans [same_xreqid_idx ]):
1872
+ x_req_id_matched = True
1873
+ if x_req_id_matched :
1874
+ for app_root in self .app_span_roots :
1875
+ if app_root .get_parent_id () < 0 :
1876
+ self .append_sys_span (sys_span )
1877
+ app_root .set_parent (
1878
+ sys_span ,
1879
+ "s-p sys_span mounted due to x_request_id matched c-p" ,
1880
+ self .mounted_callback )
1881
+ return True
1882
+
1833
1883
return False
1834
1884
1835
1885
def _attach_client_sys_span (self , sys_span : SysSpanNode ) -> bool :
0 commit comments