Skip to content

Commit 5340989

Browse files
authored
e2e: qa test - route intra no-exchange traffic over the internet (#1999)
1 parent e1adb95 commit 5340989

File tree

1 file changed

+121
-37
lines changed

1 file changed

+121
-37
lines changed

e2e/qa_test.go

Lines changed: 121 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,11 @@ var (
4646
)
4747

4848
type Device struct {
49-
PubKey string
50-
Code string
51-
MaxUsers int
52-
UsersCount int
49+
PubKey string
50+
Code string
51+
ExchangeCode string
52+
MaxUsers int
53+
UsersCount int
5354
}
5455

5556
type DeviceTestResult struct {
@@ -81,12 +82,20 @@ func TestMain(m *testing.M) {
8182
log.Fatalf("failed to get program data: %v", err)
8283
}
8384

85+
// Create a map of exchange pubkeys to codes for lookup
86+
exchangeMap := make(map[[32]uint8]string)
87+
for _, e := range data.Exchanges {
88+
exchangeMap[e.PubKey] = e.Code
89+
}
90+
8491
for _, d := range data.Devices {
92+
exchangeCode := exchangeMap[d.ExchangePubKey]
8593
dev := &Device{
86-
PubKey: base58.Encode(d.PubKey[:]),
87-
Code: d.Code,
88-
MaxUsers: int(d.MaxUsers),
89-
UsersCount: int(d.UsersCount),
94+
PubKey: base58.Encode(d.PubKey[:]),
95+
Code: d.Code,
96+
ExchangeCode: exchangeCode,
97+
MaxUsers: int(d.MaxUsers),
98+
UsersCount: int(d.UsersCount),
9099
}
91100
devices = append(devices, dev)
92101
}
@@ -479,7 +488,7 @@ func waitForUserDeletion(t *testing.T, ctx context.Context, client pb.QAAgentSer
479488
}
480489
}
481490

482-
t.Logf("User with IP %s does not exist onchain", clientIP)
491+
t.Logf("Waiting for user with IP %s to be deleted onchain", clientIP)
483492
return true, nil
484493
}
485494

@@ -657,10 +666,10 @@ func runUnicastConnectivityTest(t *testing.T, hosts []string, devices []*Device)
657666
if len(devices) == 0 {
658667
// QA mode: Connect all hosts without device specification
659668
t.Log("Running in QA mode - connecting all hosts without device specification")
660-
hostIPMap, err := connectHosts(t, hosts, nil)
669+
hostIPMap, hostDeviceMap, err := connectHosts(t, hosts, nil)
661670
require.NoError(t, err, "Failed to connect hosts")
662671

663-
err = testAllToAllConnectivity(t, hostIPMap, false) // false = use simple ping
672+
err = testAllToAllConnectivity(t, hostIPMap, hostDeviceMap, false) // false = use simple ping
664673
require.NoError(t, err, "Connectivity test failed")
665674
return
666675
}
@@ -674,6 +683,7 @@ func runUnicastConnectivityTest(t *testing.T, hosts []string, devices []*Device)
674683

675684
// Connect first host to first working device
676685
var firstHostIP string
686+
var firstHostDevice *Device
677687

678688
for i, device := range devices {
679689
if i > 0 {
@@ -682,7 +692,7 @@ func runUnicastConnectivityTest(t *testing.T, hosts []string, devices []*Device)
682692
}
683693

684694
t.Logf("Attempting to connect %s to device %s", firstHost, device.Code)
685-
hostIPMap, err := connectHosts(t, []string{firstHost}, device)
695+
hostIPMap, _, err := connectHosts(t, []string{firstHost}, device)
686696
if err != nil {
687697
t.Logf("Failed to connect to device %s: %v", device.Code, err)
688698
// Try to disconnect to clean up
@@ -693,6 +703,7 @@ func runUnicastConnectivityTest(t *testing.T, hosts []string, devices []*Device)
693703
}
694704

695705
firstHostIP = hostIPMap[firstHost]
706+
firstHostDevice = device
696707
t.Logf("First host %s successfully connected to device %s with IP %s",
697708
firstHost, device.Code, firstHostIP)
698709
break
@@ -716,7 +727,7 @@ func runUnicastConnectivityTest(t *testing.T, hosts []string, devices []*Device)
716727
device := device // capture loop variable
717728
t.Run(fmt.Sprintf("device_%s", device.Code), func(t *testing.T) {
718729
t.Logf("Testing device %s %d/%d", device.Code, i+1, len(devices))
719-
result := testDeviceConnectivity(t, device, remainingHosts, firstHost, firstHostIP)
730+
result := testDeviceConnectivity(t, device, remainingHosts, firstHost, firstHostIP, firstHostDevice)
720731

721732
resultsMutex.Lock()
722733
results = append(results, result)
@@ -734,14 +745,16 @@ func runUnicastConnectivityTest(t *testing.T, hosts []string, devices []*Device)
734745
}
735746

736747
// connectHosts connects the specified hosts, optionally to a specific device
737-
func connectHosts(t *testing.T, hosts []string, device *Device) (map[string]string, error) {
748+
// Returns maps of host->IP and host->Device
749+
func connectHosts(t *testing.T, hosts []string, device *Device) (map[string]string, map[string]*Device, error) {
738750
hostIPMap := make(map[string]string)
751+
hostDeviceMap := make(map[string]*Device)
739752
ctx := context.Background()
740753

741754
for _, host := range hosts {
742755
client, err := getQAClient(host)
743756
if err != nil {
744-
return nil, fmt.Errorf("failed to create QA client for %s: %w", host, err)
757+
return nil, nil, fmt.Errorf("failed to create QA client for %s: %w", host, err)
745758
}
746759

747760
ensureDisconnected(t, ctx, client, host)
@@ -760,10 +773,10 @@ func connectHosts(t *testing.T, hosts []string, device *Device) (map[string]stri
760773
cancel()
761774

762775
if err != nil {
763-
return nil, fmt.Errorf("failed to connect %s: %w", host, err)
776+
return nil, nil, fmt.Errorf("failed to connect %s: %w", host, err)
764777
}
765778
if !result.GetSuccess() {
766-
return nil, fmt.Errorf("connection failed for %s: %s", host, result.GetOutput())
779+
return nil, nil, fmt.Errorf("connection failed for %s: %s", host, result.GetOutput())
767780
}
768781

769782
// Get IP address
@@ -772,23 +785,35 @@ func connectHosts(t *testing.T, hosts []string, device *Device) (map[string]stri
772785
statusCancel()
773786

774787
if err != nil {
775-
return nil, fmt.Errorf("failed to get status for %s: %w", host, err)
788+
return nil, nil, fmt.Errorf("failed to get status for %s: %w", host, err)
776789
}
777790

778791
ip := getIPFromStatus(status)
779792
if ip == "" {
780-
return nil, fmt.Errorf("failed to get IP for %s", host)
793+
return nil, nil, fmt.Errorf("failed to get IP for %s", host)
781794
}
782795

783796
hostIPMap[host] = ip
797+
798+
// If we're connecting to a specific device, store it
799+
if device != nil {
800+
hostDeviceMap[host] = device
801+
} else {
802+
// In QA mode, we need to find which device we connected to for exchange comparison
803+
connectedDevice := findDeviceByHostIP(t, ip)
804+
if connectedDevice != nil {
805+
hostDeviceMap[host] = connectedDevice
806+
}
807+
}
808+
784809
t.Logf("Host %s connected with IP %s", host, ip)
785810
}
786811

787-
return hostIPMap, nil
812+
return hostIPMap, hostDeviceMap, nil
788813
}
789814

790815
// testDeviceConnectivity tests connectivity for a specific device
791-
func testDeviceConnectivity(t *testing.T, device *Device, hosts []string, additionalHost string, additionalIP string) *DeviceTestResult {
816+
func testDeviceConnectivity(t *testing.T, device *Device, hosts []string, additionalHost string, additionalIP string, additionalHostDevice *Device) *DeviceTestResult {
792817
result := &DeviceTestResult{
793818
Device: device,
794819
Success: true,
@@ -813,18 +838,19 @@ func testDeviceConnectivity(t *testing.T, device *Device, hosts []string, additi
813838

814839
// Connect all hosts to this device
815840
t.Logf("Connecting hosts %s to device %s", hosts, device.Code)
816-
hostIPMap, err := connectHosts(t, hosts, device)
841+
hostIPMap, hostDeviceMap, err := connectHosts(t, hosts, device)
817842
if err != nil {
818843
result.Success = false
819844
result.Error = err.Error()
820845
return result
821846
}
822847

823-
// Add the already-connected first host to the map
848+
// Add the already-connected first host to the maps
824849
hostIPMap[additionalHost] = additionalIP
850+
hostDeviceMap[additionalHost] = additionalHostDevice
825851

826852
// Test connectivity between all hosts
827-
err = testAllToAllConnectivity(t, hostIPMap, true) // true = use retry ping
853+
err = testAllToAllConnectivity(t, hostIPMap, hostDeviceMap, true) // true = use retry ping
828854
if err != nil {
829855
result.Success = false
830856
result.Error = err.Error()
@@ -833,7 +859,7 @@ func testDeviceConnectivity(t *testing.T, device *Device, hosts []string, additi
833859
return result
834860
}
835861

836-
func testAllToAllConnectivity(t *testing.T, hostIPMap map[string]string, useRetry bool) error {
862+
func testAllToAllConnectivity(t *testing.T, hostIPMap map[string]string, hostDeviceMap map[string]*Device, useRetry bool) error {
837863
// Build ordered lists for consistent testing
838864
var sortedHosts []string
839865
for host := range hostIPMap {
@@ -857,22 +883,32 @@ func testAllToAllConnectivity(t *testing.T, hostIPMap map[string]string, useRetr
857883

858884
t.Logf("Testing ping from %s (%s) to %s (%s)", sourceHost, sourceIP, targetHost, targetIP)
859885

886+
// Determine if we need to use SourceIface based on exchange comparison
887+
sourceDevice := hostDeviceMap[sourceHost]
888+
targetDevice := hostDeviceMap[targetHost]
889+
useSourceIface := shouldUseSourceIfaceSimple(sourceDevice, targetDevice)
890+
860891
if useRetry {
861892
// Use robust ping with retries for device testing
862893
err := performPingWithRetries(t, client, sourceIP, targetIP,
863-
sourceHost, targetHost, 3)
894+
sourceHost, targetHost, 3, useSourceIface)
864895
if err != nil {
865896
return err
866897
}
867898
} else {
868899
// Use simple ping for basic QA mode
869900
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
870901
pingReq := &pb.PingRequest{
871-
TargetIp: targetIP,
872-
SourceIp: sourceIP,
873-
SourceIface: "doublezero0",
874-
PingType: pb.PingRequest_ICMP,
875-
Timeout: 10,
902+
TargetIp: targetIP,
903+
SourceIp: sourceIP,
904+
PingType: pb.PingRequest_ICMP,
905+
Timeout: 10,
906+
}
907+
if useSourceIface {
908+
pingReq.SourceIface = "doublezero0"
909+
t.Logf("Sending ping request with -I doublezero0 (inter-exchange routing): target=%s, source=%s", targetIP, sourceIP)
910+
} else {
911+
t.Logf("Sending ping request WITHOUT -I doublezero0 (intra-exchange routing): target=%s, source=%s", targetIP, sourceIP)
876912
}
877913
pingResp, err := client.Ping(ctx, pingReq)
878914
cancel()
@@ -906,7 +942,7 @@ func disconnectOnError(client pb.QAAgentServiceClient) {
906942
}
907943

908944
// performPingWithRetries executes a ping test with retry logic
909-
func performPingWithRetries(t *testing.T, client pb.QAAgentServiceClient, sourceIP, targetIP, sourceName, targetName string, maxRetries int) error {
945+
func performPingWithRetries(t *testing.T, client pb.QAAgentServiceClient, sourceIP, targetIP, sourceName, targetName string, maxRetries int, useSourceIface bool) error {
910946
var lastErr error
911947

912948
for attempt := 1; attempt <= maxRetries; attempt++ {
@@ -918,13 +954,19 @@ func performPingWithRetries(t *testing.T, client pb.QAAgentServiceClient, source
918954
pingCtx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
919955

920956
pingReq := &pb.PingRequest{
921-
TargetIp: targetIP,
922-
SourceIp: sourceIP,
923-
SourceIface: "doublezero0",
924-
PingType: pb.PingRequest_ICMP,
957+
TargetIp: targetIP,
958+
SourceIp: sourceIP,
959+
PingType: pb.PingRequest_ICMP,
960+
}
961+
if useSourceIface {
962+
pingReq.SourceIface = "doublezero0"
925963
}
926964

927-
t.Logf("Attempt %d: Sending ping request: target=%s, source=%s", attempt, targetIP, sourceIP)
965+
if useSourceIface {
966+
t.Logf("Attempt %d: Sending ping request with -I doublezero0 (inter-exchange routing): target=%s, source=%s", attempt, targetIP, sourceIP)
967+
} else {
968+
t.Logf("Attempt %d: Sending ping request WITHOUT -I doublezero0 (intra-exchange routing): target=%s, source=%s", attempt, targetIP, sourceIP)
969+
}
928970
pingResp, err := client.Ping(pingCtx, pingReq)
929971
cancel()
930972

@@ -959,6 +1001,48 @@ func performPingWithRetries(t *testing.T, client pb.QAAgentServiceClient, source
9591001
return lastErr
9601002
}
9611003

1004+
// findDeviceByHostIP finds the device that a host is connected to based on its IP
1005+
func findDeviceByHostIP(t *testing.T, ip string) *Device {
1006+
ctx := context.Background()
1007+
data, err := serviceabilityClient.GetProgramData(ctx)
1008+
if err != nil {
1009+
t.Logf("Warning: Failed to get program data for device lookup: %v", err)
1010+
return nil
1011+
}
1012+
1013+
// Find user by IP
1014+
var user *serviceability.User
1015+
for i := range data.Users {
1016+
u := &data.Users[i]
1017+
userIP := net.IP(u.DzIp[:]).String()
1018+
if userIP == ip {
1019+
user = u
1020+
break
1021+
}
1022+
}
1023+
1024+
if user == nil {
1025+
return nil
1026+
}
1027+
1028+
// Find the device from our global devices list
1029+
for _, device := range devices {
1030+
devicePubKey := base58.Encode(user.DevicePubKey[:])
1031+
if device.PubKey == devicePubKey {
1032+
return device
1033+
}
1034+
}
1035+
1036+
return nil
1037+
}
1038+
1039+
// The intra-exchange routing policy defined in rfc6 dictates that unicast clients that are connected to the
1040+
// same exchange will communicate with each other over the internet instead of doublezero0. If they are
1041+
// connected to the same exchange, `ping -I doublezero0` will fail. This check lets us avoid that.
1042+
func shouldUseSourceIfaceSimple(sourceDevice, targetDevice *Device) bool {
1043+
return sourceDevice.ExchangeCode != targetDevice.ExchangeCode
1044+
}
1045+
9621046
func getIPFromStatus(resp *pb.StatusResponse) string {
9631047
for _, status := range resp.Status {
9641048
if (status.UserType == "IBRL" || status.UserType == "IBRLWithAllocatedIP") && status.DoubleZeroIp != "" {

0 commit comments

Comments
 (0)