@@ -1442,8 +1442,11 @@ def __init__(self, group_key: str):
1442
1442
self .app_span_roots : List [SpanNode ] = None
1443
1443
# 用于存放 `app_span` 的所有 leaf
1444
1444
self .app_span_leafs : List [SpanNode ] = None
1445
+ # 记录叶子节点的 syscall_trace_id, 用以匹配 s-p root
1445
1446
self .leaf_syscall_trace_id_request : Set [int ] = set ()
1446
1447
self .leaf_syscall_trace_id_response : Set [int ] = set ()
1448
+ # 记录叶子节点的 x_request_id => index (in self.spans), 用以匹配 s-p root
1449
+ self .leaf_x_request_id : Dict [str , List [int ]] = {}
1447
1450
# 用于显示调用拓扑使用
1448
1451
self .subnet_id = None
1449
1452
self .subnet = None
@@ -1552,10 +1555,23 @@ def append_sys_span(self, sys_span: SysSpanNode):
1552
1555
self ._set_extra_value_for_sys_span (sys_span )
1553
1556
self ._set_auto_service (sys_span )
1554
1557
if sys_span .tap_side == TAP_SIDE_CLIENT_PROCESS :
1555
- self .leaf_syscall_trace_id_request .add (
1556
- sys_span .get_syscall_trace_id_request ())
1557
- self .leaf_syscall_trace_id_response .add (
1558
- sys_span .get_syscall_trace_id_response ())
1558
+ cp_syscall_trace_id_req = sys_span .get_syscall_trace_id_request ()
1559
+ cp_syscall_trace_id_res = sys_span .get_syscall_trace_id_response ()
1560
+ cp_x_request_id_0 = sys_span .get_x_request_id_0 ()
1561
+ cp_x_request_id_1 = sys_span .get_x_request_id_1 ()
1562
+ if cp_syscall_trace_id_req :
1563
+ self .leaf_syscall_trace_id_request .add (
1564
+ sys_span .get_syscall_trace_id_request ())
1565
+ if cp_syscall_trace_id_res :
1566
+ self .leaf_syscall_trace_id_response .add (
1567
+ sys_span .get_syscall_trace_id_response ())
1568
+ if cp_x_request_id_0 :
1569
+ # index of sys_span = len(self.spans)-1
1570
+ self .leaf_x_request_id .setdefault (
1571
+ cp_x_request_id_0 , []).append (len (self .spans ) - 1 )
1572
+ if cp_x_request_id_1 and cp_x_request_id_1 != cp_x_request_id_0 :
1573
+ self .leaf_x_request_id .setdefault (
1574
+ cp_x_request_id_1 , []).append (len (self .spans ) - 1 )
1559
1575
1560
1576
def remove_server_sys_span (self , sys_span : SysSpanNode ):
1561
1577
# 这里应该要做 append_sys_span 的逆操作(但对象仅为 ServerProcess sys_span)
@@ -1635,7 +1651,7 @@ def split_to_multiple_process_span_set(self) -> list:
1635
1651
# 极端情况下可能会有多个没有 parent_span_id 的入口,这里没法分辨它们的关系,不做拆分
1636
1652
if root_parent_span_id == '' :
1637
1653
root_parent_span_id = "root" # 只是标记 root_parent_span_id,没有实际作用
1638
- if root_parent_span_id not in split_result :
1654
+ if split_result . get ( root_parent_span_id , None ) is None :
1639
1655
newSet = ProcessSpanSet (root_parent_span_id )
1640
1656
newSet .app_span_roots = [self .spans [root_span_index ]]
1641
1657
newSet ._copy_meta_data_from (self )
@@ -1668,7 +1684,12 @@ def attach_sys_span_via_app_span(self, sys_span: SysSpanNode) -> bool:
1668
1684
return self ._attach_client_sys_span (sys_span )
1669
1685
1670
1686
def _attach_server_sys_span (self , sys_span : SysSpanNode ) -> bool :
1687
+ # connection priority: span_id > syscall_trace_id > x_request_id
1671
1688
span_id_of_sys_span = sys_span .get_span_id ()
1689
+ syscall_trace_id_request = sys_span .get_syscall_trace_id_request ()
1690
+ syscall_trace_id_response = sys_span .get_syscall_trace_id_response ()
1691
+ x_request_id_0 = sys_span .get_x_request_id_0 ()
1692
+ x_request_id_1 = sys_span .get_x_request_id_1 ()
1672
1693
if span_id_of_sys_span :
1673
1694
for app_root in self .app_span_roots :
1674
1695
if span_id_of_sys_span == app_root .get_parent_span_id ():
@@ -1707,10 +1728,9 @@ def _attach_server_sys_span(self, sys_span: SysSpanNode) -> bool:
1707
1728
"s-p sys_span mounted due to same span_id as parent"
1708
1729
)
1709
1730
return True
1710
- else :
1711
- syscall_trace_id_request = sys_span .get_syscall_trace_id_request ()
1712
- syscall_trace_id_response = sys_span .get_syscall_trace_id_response (
1713
- )
1731
+
1732
+ # span_id not matched, try syscall_trace_id
1733
+ if syscall_trace_id_request or syscall_trace_id_response :
1714
1734
for app_root in self .app_span_roots :
1715
1735
# 如果 span_id 不存在,说明可能是入口 span,上游没有注入 span_id,此时根据叶子节点 c-p 的 syscall_trace_id 匹配即可
1716
1736
# 这里匹配可以严格点,s-p 和 c-p 只会同侧(req-req / res-res)相等,避免误关联一个独立的 c-p
@@ -1722,6 +1742,36 @@ def _attach_server_sys_span(self, sys_span: SysSpanNode) -> bool:
1722
1742
"s-p sys_span mounted due to syscall_trace_id matched c-p"
1723
1743
)
1724
1744
return True
1745
+
1746
+ # span_id/syscall not matched, try x_request_id
1747
+ if x_request_id_0 or x_request_id_1 :
1748
+ # 场景:过 ingress/nginx 进入服务网关/服务,传递了 x_request_id,且作为首个 span 没有 trace_id/span_id
1749
+ # 且发生跨线程调度,无法基于 syscall 关联时,允许通过 s-p.x_request_id(0/1) <=> c-p.x_request_id(0/1) 关联
1750
+ # 此处已确保 auto_instance_id 一致 (即同一个进程)
1751
+
1752
+ # x_req_id 同侧相等: 透传 x_req_id,来自上游
1753
+ # x_req_id 异侧相等: 注入 x_req_id,内部产生
1754
+ x_req_id_matched = False
1755
+ # 同一个进程内时间一定覆盖
1756
+ for same_xreqid_idx in self .leaf_x_request_id .get (
1757
+ x_request_id_0 , []):
1758
+ if sys_span .time_range_cover (self .spans [same_xreqid_idx ]):
1759
+ x_req_id_matched = True
1760
+ if not x_req_id_matched :
1761
+ for same_xreqid_idx in self .leaf_x_request_id .get (
1762
+ x_request_id_1 , []):
1763
+ if sys_span .time_range_cover (self .spans [same_xreqid_idx ]):
1764
+ x_req_id_matched = True
1765
+ if x_req_id_matched :
1766
+ for app_root in self .app_span_roots :
1767
+ if app_root .get_parent_id () < 0 :
1768
+ self .append_sys_span (sys_span )
1769
+ app_root .set_parent (
1770
+ sys_span ,
1771
+ "s-p sys_span mounted due to x_request_id matched c-p" ,
1772
+ self .mounted_callback )
1773
+ return True
1774
+
1725
1775
return False
1726
1776
1727
1777
def _attach_client_sys_span (self , sys_span : SysSpanNode ) -> bool :
0 commit comments