Skip to content

Commit 05469cb

Browse files
committed
fix: port forward for local interactive tests
1 parent 028d0d5 commit 05469cb

File tree

3 files changed

+52
-17
lines changed

3 files changed

+52
-17
lines changed

.github/workflows/e2e_tests.yaml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,12 @@ jobs:
104104
kubectl create clusterrolebinding sdk-user-localqueue-creator --clusterrole=localqueue-creator --user=sdk-user
105105
kubectl create clusterrole list-secrets --verb=get,list --resource=secrets
106106
kubectl create clusterrolebinding sdk-user-list-secrets --clusterrole=list-secrets --user=sdk-user
107-
kubectl create clusterrole pod-creator --verb=get,list --resource=pods
107+
kubectl create clusterrole pod-creator --verb=get,list,watch --resource=pods
108108
kubectl create clusterrolebinding sdk-user-pod-creator --clusterrole=pod-creator --user=sdk-user
109+
kubectl create clusterrole service-reader --verb=get,list,watch --resource=services
110+
kubectl create clusterrolebinding sdk-user-service-reader --clusterrole=service-reader --user=sdk-user
111+
kubectl create clusterrole port-forward-pods --verb=create --resource=pods/portforward
112+
kubectl create clusterrolebinding sdk-user-port-forward-pods-binding --clusterrole=port-forward-pods --user=sdk-user
109113
kubectl config use-context sdk-user
110114
111115
- name: Run e2e tests
@@ -117,7 +121,7 @@ jobs:
117121
pip install poetry
118122
poetry install --with test,docs
119123
echo "Running e2e tests..."
120-
poetry run pytest -v -s ./tests/e2e -m 'kind and nvidia_gpu' > ${CODEFLARE_TEST_OUTPUT_DIR}/pytest_output.log 2>&1
124+
poetry run pytest -v -s --log-cli-level=INFO ./tests/e2e/local_interactive_sdk_kind_test.py::TestRayLocalInteractiveKind::test_local_interactives_nvidia_gpu > ${CODEFLARE_TEST_OUTPUT_DIR}/pytest_output.log 2>&1
121125
env:
122126
GRPC_DNS_RESOLVER: "native"
123127

tests/e2e/local_interactive_sdk_kind_test.py

Lines changed: 46 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,31 @@
11
from codeflare_sdk import (
22
Cluster,
33
ClusterConfiguration,
4-
TokenAuthentication,
54
generate_cert,
65
)
76

87
import pytest
98
import ray
109
import math
10+
import subprocess
1111

1212
from support import *
1313

1414

1515
@pytest.mark.kind
16-
class TestRayLocalInteractiveOauth:
16+
class TestRayLocalInteractiveKind:
1717
def setup_method(self):
1818
initialize_kubernetes_client(self)
19+
self.port_forward_process = None
20+
21+
def cleanup_port_forward(self):
22+
if self.port_forward_process:
23+
self.port_forward_process.terminate()
24+
self.port_forward_process.wait(timeout=10)
25+
self.port_forward_process = None
1926

2027
def teardown_method(self):
28+
self.cleanup_port_forward()
2129
delete_namespace(self)
2230
delete_kueue_resources(self)
2331

@@ -39,6 +47,8 @@ def run_local_interactives(
3947
):
4048
cluster_name = "test-ray-cluster-li"
4149

50+
ray.shutdown()
51+
4252
cluster = Cluster(
4353
ClusterConfiguration(
4454
name=cluster_name,
@@ -49,25 +59,24 @@ def run_local_interactives(
4959
head_memory_requests=2,
5060
head_memory_limits=2,
5161
worker_cpu_requests="500m",
52-
worker_cpu_limits=1,
62+
worker_cpu_limits="500m",
5363
worker_memory_requests=1,
5464
worker_memory_limits=4,
5565
worker_extended_resource_requests={gpu_resource_name: number_of_gpus},
56-
write_to_file=True,
5766
verify_tls=False,
5867
)
5968
)
69+
6070
cluster.up()
71+
6172
cluster.wait_ready()
73+
cluster.status()
6274

6375
generate_cert.generate_tls_cert(cluster_name, self.namespace)
6476
generate_cert.export_env(cluster_name, self.namespace)
6577

6678
print(cluster.local_client_url())
6779

68-
ray.shutdown()
69-
ray.init(address=cluster.local_client_url(), logging_level="DEBUG")
70-
7180
@ray.remote(num_gpus=number_of_gpus / 2)
7281
def heavy_calculation_part(num_iterations):
7382
result = 0.0
@@ -84,10 +93,34 @@ def heavy_calculation(num_iterations):
8493
)
8594
return sum(results)
8695

87-
ref = heavy_calculation.remote(3000)
88-
result = ray.get(ref)
89-
assert result == 1789.4644387076714
90-
ray.cancel(ref)
91-
ray.shutdown()
96+
# Attempt to port forward
97+
try:
98+
local_port = "20001"
99+
ray_client_port = "10001"
100+
101+
port_forward_cmd = [
102+
"kubectl",
103+
"port-forward",
104+
"-n",
105+
self.namespace,
106+
f"svc/{cluster_name}-head-svc",
107+
f"{local_port}:{ray_client_port}",
108+
]
109+
self.port_forward_process = subprocess.Popen(
110+
port_forward_cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
111+
)
112+
113+
client_url = f"ray://localhost:{local_port}"
114+
cluster.status()
115+
116+
ray.init(address=client_url, logging_level="INFO")
117+
118+
ref = heavy_calculation.remote(3000)
119+
result = ray.get(ref)
120+
assert result == 1789.4644387076714
121+
ray.cancel(ref)
122+
ray.shutdown()
92123

93-
cluster.down()
124+
cluster.down()
125+
finally:
126+
self.cleanup_port_forward()

tests/e2e/support.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
1-
import json
21
import os
32
import random
43
import string
54
import subprocess
65
from codeflare_sdk import get_cluster
76
from kubernetes import client, config
8-
import kubernetes.client
97
from codeflare_sdk.common.kubernetes_cluster.kube_api_helpers import (
108
_kube_api_error_handling,
119
)

0 commit comments

Comments
 (0)