@@ -46,10 +46,11 @@ var (
4646)
4747
4848type Device struct {
49- PubKey string
50- Code string
51- MaxUsers int
52- UsersCount int
49+ PubKey string
50+ Code string
51+ ExchangeCode string
52+ MaxUsers int
53+ UsersCount int
5354}
5455
5556type DeviceTestResult struct {
@@ -81,12 +82,20 @@ func TestMain(m *testing.M) {
8182 log .Fatalf ("failed to get program data: %v" , err )
8283 }
8384
85+ // Create a map of exchange pubkeys to codes for lookup
86+ exchangeMap := make (map [[32 ]uint8 ]string )
87+ for _ , e := range data .Exchanges {
88+ exchangeMap [e .PubKey ] = e .Code
89+ }
90+
8491 for _ , d := range data .Devices {
92+ exchangeCode := exchangeMap [d .ExchangePubKey ]
8593 dev := & Device {
86- PubKey : base58 .Encode (d .PubKey [:]),
87- Code : d .Code ,
88- MaxUsers : int (d .MaxUsers ),
89- UsersCount : int (d .UsersCount ),
94+ PubKey : base58 .Encode (d .PubKey [:]),
95+ Code : d .Code ,
96+ ExchangeCode : exchangeCode ,
97+ MaxUsers : int (d .MaxUsers ),
98+ UsersCount : int (d .UsersCount ),
9099 }
91100 devices = append (devices , dev )
92101 }
@@ -479,7 +488,7 @@ func waitForUserDeletion(t *testing.T, ctx context.Context, client pb.QAAgentSer
479488 }
480489 }
481490
482- t .Logf ("User with IP %s does not exist onchain" , clientIP )
491+ t .Logf ("Waiting for user with IP %s to be deleted onchain" , clientIP )
483492 return true , nil
484493 }
485494
@@ -657,10 +666,10 @@ func runUnicastConnectivityTest(t *testing.T, hosts []string, devices []*Device)
657666 if len (devices ) == 0 {
658667 // QA mode: Connect all hosts without device specification
659668 t .Log ("Running in QA mode - connecting all hosts without device specification" )
660- hostIPMap , err := connectHosts (t , hosts , nil )
669+ hostIPMap , hostDeviceMap , err := connectHosts (t , hosts , nil )
661670 require .NoError (t , err , "Failed to connect hosts" )
662671
663- err = testAllToAllConnectivity (t , hostIPMap , false ) // false = use simple ping
672+ err = testAllToAllConnectivity (t , hostIPMap , hostDeviceMap , false ) // false = use simple ping
664673 require .NoError (t , err , "Connectivity test failed" )
665674 return
666675 }
@@ -674,6 +683,7 @@ func runUnicastConnectivityTest(t *testing.T, hosts []string, devices []*Device)
674683
675684 // Connect first host to first working device
676685 var firstHostIP string
686+ var firstHostDevice * Device
677687
678688 for i , device := range devices {
679689 if i > 0 {
@@ -682,7 +692,7 @@ func runUnicastConnectivityTest(t *testing.T, hosts []string, devices []*Device)
682692 }
683693
684694 t .Logf ("Attempting to connect %s to device %s" , firstHost , device .Code )
685- hostIPMap , err := connectHosts (t , []string {firstHost }, device )
695+ hostIPMap , _ , err := connectHosts (t , []string {firstHost }, device )
686696 if err != nil {
687697 t .Logf ("Failed to connect to device %s: %v" , device .Code , err )
688698 // Try to disconnect to clean up
@@ -693,6 +703,7 @@ func runUnicastConnectivityTest(t *testing.T, hosts []string, devices []*Device)
693703 }
694704
695705 firstHostIP = hostIPMap [firstHost ]
706+ firstHostDevice = device
696707 t .Logf ("First host %s successfully connected to device %s with IP %s" ,
697708 firstHost , device .Code , firstHostIP )
698709 break
@@ -716,7 +727,7 @@ func runUnicastConnectivityTest(t *testing.T, hosts []string, devices []*Device)
716727 device := device // capture loop variable
717728 t .Run (fmt .Sprintf ("device_%s" , device .Code ), func (t * testing.T ) {
718729 t .Logf ("Testing device %s %d/%d" , device .Code , i + 1 , len (devices ))
719- result := testDeviceConnectivity (t , device , remainingHosts , firstHost , firstHostIP )
730+ result := testDeviceConnectivity (t , device , remainingHosts , firstHost , firstHostIP , firstHostDevice )
720731
721732 resultsMutex .Lock ()
722733 results = append (results , result )
@@ -734,14 +745,16 @@ func runUnicastConnectivityTest(t *testing.T, hosts []string, devices []*Device)
734745}
735746
736747// connectHosts connects the specified hosts, optionally to a specific device
737- func connectHosts (t * testing.T , hosts []string , device * Device ) (map [string ]string , error ) {
748+ // Returns maps of host->IP and host->Device
749+ func connectHosts (t * testing.T , hosts []string , device * Device ) (map [string ]string , map [string ]* Device , error ) {
738750 hostIPMap := make (map [string ]string )
751+ hostDeviceMap := make (map [string ]* Device )
739752 ctx := context .Background ()
740753
741754 for _ , host := range hosts {
742755 client , err := getQAClient (host )
743756 if err != nil {
744- return nil , fmt .Errorf ("failed to create QA client for %s: %w" , host , err )
757+ return nil , nil , fmt .Errorf ("failed to create QA client for %s: %w" , host , err )
745758 }
746759
747760 ensureDisconnected (t , ctx , client , host )
@@ -760,10 +773,10 @@ func connectHosts(t *testing.T, hosts []string, device *Device) (map[string]stri
760773 cancel ()
761774
762775 if err != nil {
763- return nil , fmt .Errorf ("failed to connect %s: %w" , host , err )
776+ return nil , nil , fmt .Errorf ("failed to connect %s: %w" , host , err )
764777 }
765778 if ! result .GetSuccess () {
766- return nil , fmt .Errorf ("connection failed for %s: %s" , host , result .GetOutput ())
779+ return nil , nil , fmt .Errorf ("connection failed for %s: %s" , host , result .GetOutput ())
767780 }
768781
769782 // Get IP address
@@ -772,23 +785,35 @@ func connectHosts(t *testing.T, hosts []string, device *Device) (map[string]stri
772785 statusCancel ()
773786
774787 if err != nil {
775- return nil , fmt .Errorf ("failed to get status for %s: %w" , host , err )
788+ return nil , nil , fmt .Errorf ("failed to get status for %s: %w" , host , err )
776789 }
777790
778791 ip := getIPFromStatus (status )
779792 if ip == "" {
780- return nil , fmt .Errorf ("failed to get IP for %s" , host )
793+ return nil , nil , fmt .Errorf ("failed to get IP for %s" , host )
781794 }
782795
783796 hostIPMap [host ] = ip
797+
798+ // If we're connecting to a specific device, store it
799+ if device != nil {
800+ hostDeviceMap [host ] = device
801+ } else {
802+ // In QA mode, we need to find which device we connected to for exchange comparison
803+ connectedDevice := findDeviceByHostIP (t , ip )
804+ if connectedDevice != nil {
805+ hostDeviceMap [host ] = connectedDevice
806+ }
807+ }
808+
784809 t .Logf ("Host %s connected with IP %s" , host , ip )
785810 }
786811
787- return hostIPMap , nil
812+ return hostIPMap , hostDeviceMap , nil
788813}
789814
790815// testDeviceConnectivity tests connectivity for a specific device
791- func testDeviceConnectivity (t * testing.T , device * Device , hosts []string , additionalHost string , additionalIP string ) * DeviceTestResult {
816+ func testDeviceConnectivity (t * testing.T , device * Device , hosts []string , additionalHost string , additionalIP string , additionalHostDevice * Device ) * DeviceTestResult {
792817 result := & DeviceTestResult {
793818 Device : device ,
794819 Success : true ,
@@ -813,18 +838,19 @@ func testDeviceConnectivity(t *testing.T, device *Device, hosts []string, additi
813838
814839 // Connect all hosts to this device
815840 t .Logf ("Connecting hosts %s to device %s" , hosts , device .Code )
816- hostIPMap , err := connectHosts (t , hosts , device )
841+ hostIPMap , hostDeviceMap , err := connectHosts (t , hosts , device )
817842 if err != nil {
818843 result .Success = false
819844 result .Error = err .Error ()
820845 return result
821846 }
822847
823- // Add the already-connected first host to the map
848+ // Add the already-connected first host to the maps
824849 hostIPMap [additionalHost ] = additionalIP
850+ hostDeviceMap [additionalHost ] = additionalHostDevice
825851
826852 // Test connectivity between all hosts
827- err = testAllToAllConnectivity (t , hostIPMap , true ) // true = use retry ping
853+ err = testAllToAllConnectivity (t , hostIPMap , hostDeviceMap , true ) // true = use retry ping
828854 if err != nil {
829855 result .Success = false
830856 result .Error = err .Error ()
@@ -833,7 +859,7 @@ func testDeviceConnectivity(t *testing.T, device *Device, hosts []string, additi
833859 return result
834860}
835861
836- func testAllToAllConnectivity (t * testing.T , hostIPMap map [string ]string , useRetry bool ) error {
862+ func testAllToAllConnectivity (t * testing.T , hostIPMap map [string ]string , hostDeviceMap map [ string ] * Device , useRetry bool ) error {
837863 // Build ordered lists for consistent testing
838864 var sortedHosts []string
839865 for host := range hostIPMap {
@@ -857,22 +883,32 @@ func testAllToAllConnectivity(t *testing.T, hostIPMap map[string]string, useRetr
857883
858884 t .Logf ("Testing ping from %s (%s) to %s (%s)" , sourceHost , sourceIP , targetHost , targetIP )
859885
886+ // Determine if we need to use SourceIface based on exchange comparison
887+ sourceDevice := hostDeviceMap [sourceHost ]
888+ targetDevice := hostDeviceMap [targetHost ]
889+ useSourceIface := shouldUseSourceIfaceSimple (sourceDevice , targetDevice )
890+
860891 if useRetry {
861892 // Use robust ping with retries for device testing
862893 err := performPingWithRetries (t , client , sourceIP , targetIP ,
863- sourceHost , targetHost , 3 )
894+ sourceHost , targetHost , 3 , useSourceIface )
864895 if err != nil {
865896 return err
866897 }
867898 } else {
868899 // Use simple ping for basic QA mode
869900 ctx , cancel := context .WithTimeout (context .Background (), 60 * time .Second )
870901 pingReq := & pb.PingRequest {
871- TargetIp : targetIP ,
872- SourceIp : sourceIP ,
873- SourceIface : "doublezero0" ,
874- PingType : pb .PingRequest_ICMP ,
875- Timeout : 10 ,
902+ TargetIp : targetIP ,
903+ SourceIp : sourceIP ,
904+ PingType : pb .PingRequest_ICMP ,
905+ Timeout : 10 ,
906+ }
907+ if useSourceIface {
908+ pingReq .SourceIface = "doublezero0"
909+ t .Logf ("Sending ping request with -I doublezero0 (inter-exchange routing): target=%s, source=%s" , targetIP , sourceIP )
910+ } else {
911+ t .Logf ("Sending ping request WITHOUT -I doublezero0 (intra-exchange routing): target=%s, source=%s" , targetIP , sourceIP )
876912 }
877913 pingResp , err := client .Ping (ctx , pingReq )
878914 cancel ()
@@ -906,7 +942,7 @@ func disconnectOnError(client pb.QAAgentServiceClient) {
906942}
907943
908944// performPingWithRetries executes a ping test with retry logic
909- func performPingWithRetries (t * testing.T , client pb.QAAgentServiceClient , sourceIP , targetIP , sourceName , targetName string , maxRetries int ) error {
945+ func performPingWithRetries (t * testing.T , client pb.QAAgentServiceClient , sourceIP , targetIP , sourceName , targetName string , maxRetries int , useSourceIface bool ) error {
910946 var lastErr error
911947
912948 for attempt := 1 ; attempt <= maxRetries ; attempt ++ {
@@ -918,13 +954,19 @@ func performPingWithRetries(t *testing.T, client pb.QAAgentServiceClient, source
918954 pingCtx , cancel := context .WithTimeout (context .Background (), 15 * time .Second )
919955
920956 pingReq := & pb.PingRequest {
921- TargetIp : targetIP ,
922- SourceIp : sourceIP ,
923- SourceIface : "doublezero0" ,
924- PingType : pb .PingRequest_ICMP ,
957+ TargetIp : targetIP ,
958+ SourceIp : sourceIP ,
959+ PingType : pb .PingRequest_ICMP ,
960+ }
961+ if useSourceIface {
962+ pingReq .SourceIface = "doublezero0"
925963 }
926964
927- t .Logf ("Attempt %d: Sending ping request: target=%s, source=%s" , attempt , targetIP , sourceIP )
965+ if useSourceIface {
966+ t .Logf ("Attempt %d: Sending ping request with -I doublezero0 (inter-exchange routing): target=%s, source=%s" , attempt , targetIP , sourceIP )
967+ } else {
968+ t .Logf ("Attempt %d: Sending ping request WITHOUT -I doublezero0 (intra-exchange routing): target=%s, source=%s" , attempt , targetIP , sourceIP )
969+ }
928970 pingResp , err := client .Ping (pingCtx , pingReq )
929971 cancel ()
930972
@@ -959,6 +1001,48 @@ func performPingWithRetries(t *testing.T, client pb.QAAgentServiceClient, source
9591001 return lastErr
9601002}
9611003
1004+ // findDeviceByHostIP finds the device that a host is connected to based on its IP
1005+ func findDeviceByHostIP (t * testing.T , ip string ) * Device {
1006+ ctx := context .Background ()
1007+ data , err := serviceabilityClient .GetProgramData (ctx )
1008+ if err != nil {
1009+ t .Logf ("Warning: Failed to get program data for device lookup: %v" , err )
1010+ return nil
1011+ }
1012+
1013+ // Find user by IP
1014+ var user * serviceability.User
1015+ for i := range data .Users {
1016+ u := & data .Users [i ]
1017+ userIP := net .IP (u .DzIp [:]).String ()
1018+ if userIP == ip {
1019+ user = u
1020+ break
1021+ }
1022+ }
1023+
1024+ if user == nil {
1025+ return nil
1026+ }
1027+
1028+ // Find the device from our global devices list
1029+ for _ , device := range devices {
1030+ devicePubKey := base58 .Encode (user .DevicePubKey [:])
1031+ if device .PubKey == devicePubKey {
1032+ return device
1033+ }
1034+ }
1035+
1036+ return nil
1037+ }
1038+
1039+ // The intra-exchange routing policy defined in rfc6 dictates that unicast clients that are connected to the
1040+ // same exchange will communicate with each other over the internet instead of doublezero0. If they are
1041+ // connected to the same exchange, `ping -I doublezero0` will fail. This check lets us avoid that.
1042+ func shouldUseSourceIfaceSimple (sourceDevice , targetDevice * Device ) bool {
1043+ return sourceDevice .ExchangeCode != targetDevice .ExchangeCode
1044+ }
1045+
9621046func getIPFromStatus (resp * pb.StatusResponse ) string {
9631047 for _ , status := range resp .Status {
9641048 if (status .UserType == "IBRL" || status .UserType == "IBRLWithAllocatedIP" ) && status .DoubleZeroIp != "" {
0 commit comments