Skip to content

Commit 20612c4

Browse files
apricotejooola
andauthored
refactor: new dev/testing environment (#671)
- Add a new development environment based on [github.com/hetznercloud/kubernetes-dev-env](https://github.com/hetznercloud/kubernetes-dev-env) - Refactor the Robot test server to be re-installed once a week and use `overlayrootfs` for clean disk on every run --------- Co-authored-by: Jonas Lammler <[email protected]>
1 parent 8656812 commit 20612c4

27 files changed

+698
-614
lines changed

.github/workflows/robot.yml

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
name: Robot
2+
3+
on:
4+
push:
5+
branches: [main]
6+
paths:
7+
- .github/workflows/robot.yml
8+
- dev/robot/**
9+
schedule:
10+
# https://crontab.guru/#30_12_*_*_1
11+
- cron: "30 12 * * 1"
12+
13+
jobs:
14+
install:
15+
runs-on: ubuntu-latest
16+
17+
concurrency: robot
18+
environment: e2e-robot
19+
20+
defaults:
21+
run:
22+
working-directory: dev/robot
23+
24+
env:
25+
PY_COLORS: true
26+
ANSIBLE_FORCE_COLOR: true
27+
28+
steps:
29+
- uses: actions/checkout@v4
30+
31+
- name: Install robot server
32+
env:
33+
ROBOT_USER: ${{ secrets.ROBOT_USER }}
34+
ROBOT_PASSWORD: ${{ secrets.ROBOT_PASSWORD }}
35+
ROBOT_SSH_KEY: ${{ secrets.ROBOT_SSH_KEY }}
36+
run: |
37+
ansible-galaxy install -r requirements.yml
38+
39+
dev/robot/with-ssh-agent ansible-playbook -vv install.yml

.github/workflows/test_e2e.yml

Lines changed: 82 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,28 @@ on:
55
branches: [main]
66
jobs:
77
cloud:
8-
name: Cloud ${{ matrix.k3s }}
8+
name: cloud ${{ matrix.k3s }}
9+
runs-on: ubuntu-latest
10+
911
permissions:
1012
id-token: write
11-
runs-on: ubuntu-latest
13+
14+
concurrency:
15+
group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.k3s }}
16+
cancel-in-progress: true
17+
1218
strategy:
19+
fail-fast: false # Continue tests matrix if a flaky run occurs.
1320
matrix:
14-
# All k3s after January 2024 break our e2e tests, we hardcode
15-
# the versions for now until we can fix the source of this.
16-
k3s: [ v1.26.12+k3s1, v1.27.9+k3s1, v1.28.5+k3s1, v1.29.0+k3s1 ]
17-
fail-fast: false
21+
k3s:
22+
- v1.26
23+
- v1.27
24+
- v1.28
25+
- v1.29
1826

1927
env:
20-
K3S_VERSION: ${{ matrix.k3s }}
21-
SCOPE: gha-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.k3s }}
28+
K3S_CHANNEL: ${{ matrix.k3s }}
29+
ENV: gha-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.k3s }}
2230

2331
# Domain must be available in the account running the tests. This domain is available in the account
2432
# running the public integration tests.
@@ -33,107 +41,101 @@ jobs:
3341

3442
- uses: hetznercloud/setup-hcloud@v1
3543

36-
- uses: hetznercloud/tps-action@main
44+
- uses: opentofu/setup-opentofu@v1
3745
with:
38-
token: ${{ secrets.HCLOUD_TOKEN }}
46+
tofu_version: v1.7.2 # renovate: datasource=github-releases depName=opentofu/opentofu
47+
tofu_wrapper: false
3948

4049
- uses: yokawasa/[email protected]
4150
with:
4251
setup-tools: |
4352
helm
4453
kubectl
4554
skaffold
46-
helm: v3.15.1
47-
kubectl: v1.29.0
48-
skaffold: v2.12.0
55+
helm: v3.15.2 # renovate: datasource=github-releases depName=helm/helm
56+
kubectl: v1.29.6 # renovate: datasource=github-releases depName=kubernetes/kubernetes
57+
skaffold: v2.12.0 # renovate: datasource=github-releases depName=GoogleContainerTools/skaffold
58+
4959
- name: Install k3sup
5060
run: |
5161
curl -sLS https://get.k3sup.dev | sh
5262
53-
- name: Setup test environment
54-
run: |
55-
source <(hack/dev-up.sh)
56-
57-
# make exported env variables available to following jobs
58-
echo "KUBECONFIG=$KUBECONFIG" >> "$GITHUB_ENV"
59-
echo "SKAFFOLD_DEFAULT_REPO=$SKAFFOLD_DEFAULT_REPO" >> "$GITHUB_ENV"
60-
echo "CONTROL_IP=$CONTROL_IP" >> "$GITHUB_ENV"
61-
62-
- name: Build and Deploy HCCM
63+
- uses: hetznercloud/tps-action@main
64+
with:
65+
token: ${{ secrets.HCLOUD_TOKEN }}
66+
67+
- name: Setup environment
68+
run: make -C dev up
69+
70+
- name: Run skaffold
6371
run: |
64-
skaffold build --tag="e2e-${GITHUB_RUN_ID}-${GITHUB_RUN_NUMBER}"
65-
tag=$(skaffold build --tag="e2e-${GITHUB_RUN_ID}-${GITHUB_RUN_NUMBER}" --quiet --output="{{ (index .Builds 0).Tag }}")
66-
skaffold deploy --images=hetznercloud/hcloud-cloud-controller-manager=$tag
72+
source dev/files/env.sh
73+
skaffold run
6774
6875
- name: Run tests
6976
run: |
77+
source dev/files/env.sh
7078
go test ./tests/e2e -tags e2e -v -race -timeout 60m -coverprofile=coverage.txt
7179
7280
- name: Upload coverage reports to Codecov
7381
uses: codecov/codecov-action@v4
7482
with:
7583
token: ${{ secrets.CODECOV_TOKEN }}
7684

77-
- name: Download logs & events
85+
- name: Dump logs & events
7886
if: always()
7987
continue-on-error: true
8088
run: |
89+
source dev/files/env.sh
8190
mkdir debug-logs
91+
92+
echo "::group::hccm.log"
8293
kubectl logs \
8394
--namespace kube-system \
8495
--selector app.kubernetes.io/name=hcloud-cloud-controller-manager \
8596
--all-containers \
8697
--prefix=true \
8798
--tail=-1 \
88-
> debug-logs/hccm.log
89-
99+
| tee debug-logs/hccm.log
100+
echo "::endgroup::"
101+
102+
echo "::group::events.yaml"
90103
kubectl get events \
91104
--all-namespaces \
92105
--sort-by=.firstTimestamp \
93106
--output yaml \
94-
> debug-logs/events.yaml
95-
96-
- name: Show HCCM Logs on Failure
97-
if: failure()
98-
continue-on-error: true
99-
run: |
100-
echo "::group::hccm.log"
101-
cat debug-logs/hccm.log
107+
| tee debug-logs/events.yaml
102108
echo "::endgroup::"
103109
104-
- name: Cleanup test environment
110+
- name: Cleanup
105111
if: always()
106112
continue-on-error: true
107-
run: |
108-
hack/dev-down.sh
113+
run: make -C dev down
109114

110115
- name: Persist debug artifacts
111116
if: always()
112117
continue-on-error: true
113118
uses: actions/upload-artifact@v4
114119
with:
115-
name: debug-logs-${{ env.SCOPE }}
120+
name: debug-logs-${{ env.ENV }}
116121
path: debug-logs/
117122

118123
robot:
119-
name: Robot
124+
runs-on: ubuntu-latest
125+
120126
permissions:
121127
id-token: write
122128

123-
# Make sure that only one Job is using the server at a time
124-
concurrency: robot-test-server
129+
# Make sure that only one job is using the server at a time
130+
concurrency: robot
125131
environment: e2e-robot
126132

127133
env:
128-
K3S_VERSION: v1.29.0+k3s1
129-
SCOPE: gha-${{ github.run_id }}-${{ github.run_attempt }}-robot
134+
K3S_CHANNEL: v1.29
135+
ENV: gha-${{ github.run_id }}-${{ github.run_attempt }}-robot
130136

131-
# Disable routes in dev-env, not supported for Robot.
132-
ROUTES_ENABLED: "false"
133137
ROBOT_ENABLED: "true"
134-
SERVER_NUMBER: ${{ vars.SERVER_NUMBER }}
135138

136-
runs-on: ubuntu-latest
137139
steps:
138140
- uses: actions/checkout@v4
139141

@@ -143,102 +145,84 @@ jobs:
143145

144146
- uses: hetznercloud/setup-hcloud@v1
145147

146-
- uses: hetznercloud/tps-action@main
148+
- uses: opentofu/setup-opentofu@v1
147149
with:
148-
token: ${{ secrets.HCLOUD_TOKEN }}
150+
tofu_version: v1.7.2 # renovate: datasource=github-releases depName=opentofu/opentofu
151+
tofu_wrapper: false
149152

150153
- uses: yokawasa/[email protected]
151154
with:
152155
setup-tools: |
153156
helm
154157
kubectl
155158
skaffold
156-
helm: v3.15.1
157-
kubectl: v1.29.0
158-
skaffold: v2.12.0
159+
helm: v3.15.2 # renovate: datasource=github-releases depName=helm/helm
160+
kubectl: v1.29.6 # renovate: datasource=github-releases depName=kubernetes/kubernetes
161+
skaffold: v2.12.0 # renovate: datasource=github-releases depName=GoogleContainerTools/skaffold
162+
159163
- name: Install k3sup
160164
run: |
161165
curl -sLS https://get.k3sup.dev | sh
162166
163-
- name: Setup test environment
164-
env:
165-
ROBOT_USER: ${{ secrets.ROBOT_USER }}
166-
ROBOT_PASSWORD: ${{ secrets.ROBOT_PASSWORD }}
167-
run: |
168-
source <(hack/dev-up.sh)
169-
170-
# make exported env variables available to following jobs
171-
echo "KUBECONFIG=$KUBECONFIG" >> "$GITHUB_ENV"
172-
echo "SKAFFOLD_DEFAULT_REPO=$SKAFFOLD_DEFAULT_REPO" >> "$GITHUB_ENV"
173-
echo "CONTROL_IP=$CONTROL_IP" >> "$GITHUB_ENV"
174-
175-
- name: Build and Deploy HCCM
176-
run: |
177-
skaffold build --tag="e2e-${GITHUB_RUN_ID}-${GITHUB_RUN_NUMBER}"
178-
tag=$(skaffold build --tag="e2e-${GITHUB_RUN_ID}-${GITHUB_RUN_NUMBER}" --quiet --output="{{ (index .Builds 0).Tag }}")
179-
skaffold deploy \
180-
--profile=robot \
181-
--images=hetznercloud/hcloud-cloud-controller-manager=$tag
167+
- uses: hetznercloud/tps-action@main
168+
with:
169+
token: ${{ secrets.HCLOUD_TOKEN }}
182170

183-
- name: Setup Robot Server
171+
- name: Setup environment
184172
env:
173+
ROBOT_SSH_KEY: ${{ secrets.ROBOT_SSH_KEY }}
185174
ROBOT_USER: ${{ secrets.ROBOT_USER }}
186175
ROBOT_PASSWORD: ${{ secrets.ROBOT_PASSWORD }}
176+
run: |
177+
dev/robot/with-ssh-agent make -C dev up
187178
188-
# Nicer output
189-
PY_COLORS: true
190-
ANSIBLE_FORCE_COLOR: true
191-
working-directory: hack/robot-e2e
179+
- name: Run skaffold
192180
run: |
193-
ansible-galaxy install -r requirements.yml
194-
echo "::group::ansible-playbook e2e-setup-robot-server.yml"
195-
ansible-playbook e2e-setup-robot-server.yml -e scope=$SCOPE -e server_number=$SERVER_NUMBER -vvv
196-
echo "::endgroup::"
181+
source dev/files/env.sh
182+
skaffold run --profile=robot
197183
198184
- name: Run tests
199185
env:
200186
ROBOT_USER: ${{ secrets.ROBOT_USER }}
201187
ROBOT_PASSWORD: ${{ secrets.ROBOT_PASSWORD }}
202188
run: |
189+
source dev/files/env.sh
203190
go test ./tests/e2e -tags e2e,robot -v -timeout 60m
204191
205-
- name: Download logs & events
192+
- name: Dump logs & events
206193
if: always()
207194
continue-on-error: true
208195
run: |
196+
source dev/files/env.sh
209197
mkdir debug-logs
198+
199+
echo "::group::hccm.log"
210200
kubectl logs \
211201
--namespace kube-system \
212202
--selector app.kubernetes.io/name=hcloud-cloud-controller-manager \
213203
--all-containers \
214204
--prefix=true \
215205
--tail=-1 \
216-
> debug-logs/hccm.log
217-
206+
| tee debug-logs/hccm.log
207+
echo "::endgroup::"
208+
209+
echo "::group::events.yaml"
218210
kubectl get events \
219211
--all-namespaces \
220212
--sort-by=.firstTimestamp \
221213
--output yaml \
222-
> debug-logs/events.yaml
223-
224-
- name: Show HCCM Logs on Failure
225-
if: failure()
226-
continue-on-error: true
227-
run: |
228-
echo "::group::hccm.log"
229-
cat debug-logs/hccm.log
214+
| tee debug-logs/events.yaml
230215
echo "::endgroup::"
231216
232-
- name: Cleanup test environment
217+
- name: Cleanup
233218
if: always()
234219
continue-on-error: true
235-
run: |
236-
hack/dev-down.sh
220+
run: make -C dev down
237221

238222
- name: Persist debug artifacts
239223
if: always()
240224
continue-on-error: true
241225
uses: actions/upload-artifact@v4
242226
with:
243-
name: debug-logs-${{ env.SCOPE }}
227+
name: debug-logs-${{ env.ENV }}
244228
path: debug-logs/

0 commit comments

Comments
 (0)