Skip to content

Commit 683600f

Browse files
committed
tests/ingition/kdump: add a remote NFS kdump test
This way we have good coverage of most-used kdump features. Some context on the NFS kdump configuration: coreos/fedora-coreos-tracker#1729 This was previously merged in [1] then reverted in [2] because the nfs server container was not multi-arch, causing the pipeline to trip on it. It's also not functionning on systemd256 (so anything f41 and above), see [3] This requires #3917 for the multi-arch container, and #3921 [1] b10d8dc [2] af1468c [3] rhkdump/kdump-utils#52
1 parent 8bc7a5e commit 683600f

File tree

1 file changed

+158
-25
lines changed

1 file changed

+158
-25
lines changed

mantle/kola/tests/ignition/kdump.go

+158-25
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,63 @@ func init() {
2828
Tags: []string{"kdump", kola.SkipBaseChecksTag, kola.NeedsInternetTag},
2929
Platforms: []string{"qemu"},
3030
})
31+
register.RegisterTest(&register.Test{
32+
Run: kdumpNFSTest,
33+
ClusterSize: 0,
34+
Name: `kdump.crash.nfs`,
35+
Description: "Verifies kdump logs are exported to NFS destination",
36+
Tags: []string{"kdump", kola.SkipBaseChecksTag, kola.NeedsInternetTag},
37+
Platforms: []string{"qemu"},
38+
})
39+
}
40+
41+
// This function test the remote kdump feature by:
42+
// - making sure kdump is ready
43+
// - crashing machine
44+
// - monitoring the expected vmcore path
45+
func testRemoteKdump(c cluster.TestCluster, kdump_machine platform.Machine, remote_machine platform.Machine, crash_path string) {
46+
47+
// Wait for kdump to become active
48+
// 3 minutes should be enough to generate the kdump initramfs
49+
err := util.Retry(12, 15*time.Second, func() error {
50+
51+
kdump_status, err := c.SSH(kdump_machine, "systemctl is-active kdump.service")
52+
53+
if err != nil {
54+
return err
55+
} else if string(kdump_status) == "inactive" {
56+
return fmt.Errorf("Kdump.service is not ready: %s.", string(kdump_status))
57+
}
58+
return nil
59+
})
60+
if err != nil {
61+
c.Fatalf("Timed out while waiting for kdump.service to be ready: %v", err)
62+
}
63+
64+
// crash the kernel
65+
// use systemd-run because direclty calling `echo c > ...` will always
66+
// throw an error as the kernel immediately hangs.
67+
_, err = c.SSH(kdump_machine, "sudo systemd-run sh -c 'sleep 5 && echo c > /proc/sysrq-trigger'")
68+
if err != nil {
69+
c.Fatalf("failed to queue kernel crash: %v", err)
70+
}
71+
72+
// Wait for kdump to create vmcore dump on the remote host
73+
err = util.Retry(8, 10*time.Second, func() error {
74+
75+
// Look for the crash files created on the SSH machine
76+
logs, err := c.SSH(remote_machine, fmt.Sprintf("find %s -type f -name vmcore*", crash_path))
77+
78+
if err != nil {
79+
return fmt.Errorf("failed to search for vmcore: %w", err)
80+
} else if logs == nil {
81+
return fmt.Errorf("No vmcore created on remote host")
82+
}
83+
return nil
84+
})
85+
if err != nil {
86+
c.Fatalf("Timed out while waiting for kdump to create vmcore files: %v", err)
87+
}
3188
}
3289

3390
// The destination VM for kdump logs
@@ -180,45 +237,121 @@ kernel_arguments:
180237
c.Fatalf("Unable to create test machine: %v", err)
181238
}
182239

183-
// Wait for kdump to become active
184-
// 3 minutes should be enough to generate the kdump initramfs
185-
err = util.Retry(12, 15*time.Second, func() error {
240+
testRemoteKdump(c, kdump_machine, ssh_host.Machine, "/home/core/crash")
241+
}
186242

187-
kdump_status, err := c.SSH(kdump_machine, "systemctl is-active kdump.service")
243+
// The destination VM for kdump logs over NFS
244+
type NfsServer struct {
245+
Machine platform.Machine
246+
MachineAddress string
247+
}
188248

189-
if err != nil {
190-
return err
191-
} else if string(kdump_status) == "inactive" {
192-
return fmt.Errorf("kdump.service is not ready: %s", string(kdump_status))
193-
}
194-
return nil
195-
})
249+
func setupNFSMachine(c cluster.TestCluster) NfsServer {
250+
var m platform.Machine
251+
var err error
252+
253+
options := platform.QemuMachineOptions{
254+
HostForwardPorts: []platform.HostForwardPort{
255+
{Service: "ssh", HostPort: 0, GuestPort: 22},
256+
// Kdump NFS option does not allow a custom port
257+
{Service: "nfs", HostPort: 2049, GuestPort: 2049},
258+
},
259+
}
260+
261+
nfs_server_butane := conf.Butane(`variant: fcos
262+
version: 1.5.0
263+
storage:
264+
files:
265+
- path: /etc/containers/systemd/nfs.container
266+
overwrite: true
267+
contents:
268+
inline: |
269+
[Container]
270+
Image=quay.io/coreos-assembler/nfs
271+
Volume=/var/nfs:/export
272+
Network=host
273+
PodmanArgs=--privileged
274+
[Install]
275+
WantedBy=default.target
276+
directories:
277+
- path: /var/nfs/crash`)
278+
279+
// start the machine
280+
switch c := c.Cluster.(type) {
281+
// These cases have to be separated because when put together to the same case statement
282+
// the golang compiler no longer checks that the individual types in the case have the
283+
// NewMachineWithQemuOptions function, but rather whether platform.Cluster
284+
// does which fails
285+
case *qemu.Cluster:
286+
m, err = c.NewMachineWithQemuOptions(nfs_server_butane, options)
287+
default:
288+
panic("unreachable")
289+
}
196290
if err != nil {
197-
c.Fatalf("Timed out while waiting for kdump.service to be ready: %v", err)
291+
c.Fatal(err)
198292
}
199293

200-
// crash the kernel
201-
// use systemd-run because direclty calling `echo c...` will alaways
202-
// throw an error as the kernel immediately hangs.
203-
_, err = c.SSH(kdump_machine, "sudo systemd-run sh -c 'sleep 5 && echo c > /proc/sysrq-trigger'")
294+
return NfsServer{
295+
Machine: m,
296+
MachineAddress: "10.0.2.2",
297+
}
298+
}
299+
300+
func kdumpNFSTest(c cluster.TestCluster) {
301+
nfs_host := setupNFSMachine(c)
302+
303+
butane := conf.Butane(fmt.Sprintf(`variant: fcos
304+
version: 1.5.0
305+
storage:
306+
files:
307+
- path: /etc/kdump.conf
308+
overwrite: true
309+
contents:
310+
inline: |
311+
nfs %s:/
312+
path /crash
313+
core_collector makedumpfile -l --message-level 1 -d 31
314+
extra_bins /sbin/mount.nfs
315+
extra_modules nfs nfsv3 nfs_layout_nfsv41_files blocklayoutdriver nfs_layout_flexfiles nfs_layout_nfsv41_files
316+
systemd:
317+
units:
318+
- name: kdump.service
319+
enabled: true
320+
dropins:
321+
- name: debug.conf
322+
contents: |
323+
[Service]
324+
Environment="debug=1"
325+
kernel_arguments:
326+
should_exist:
327+
- crashkernel=512M`,
328+
nfs_host.MachineAddress))
329+
330+
opts := platform.MachineOptions{
331+
MinMemory: 2048,
332+
}
333+
334+
kdump_machine, err := c.NewMachineWithOptions(butane, opts)
204335
if err != nil {
205-
c.Fatalf("failed to queue kernel crash: %v", err)
336+
c.Fatalf("Unable to create test machine: %v", err)
206337
}
207338

208-
// Wait for kdump to create vmcore dump on the remote host
209-
err = util.Retry(5, 10*time.Second, func() error {
339+
// Wait for nfs server to become active
340+
// 1 minutes should be enough to pull the container image
341+
err = util.Retry(4, 15*time.Second, func() error {
210342

211-
// Look for the crash files created on the SSH machine
212-
logs, err := c.SSH(ssh_host.Machine, "find /home/core/crash -type f -name vmcore*")
343+
nfs_status, err := c.SSH(nfs_host.Machine, "systemctl is-active nfs.service")
213344

214345
if err != nil {
215-
return fmt.Errorf("failed to search for vmcore: %w", err)
216-
} else if logs == nil {
217-
return fmt.Errorf("No vmcore created on remote SSH host")
346+
return err
347+
} else if string(nfs_status) == "inactive" {
348+
return fmt.Errorf("nfs.service is not ready: %s.", string(nfs_status))
218349
}
219350
return nil
220351
})
221352
if err != nil {
222-
c.Fatalf("Timed out while waiting for kdump to create vmcore files: %v", err)
353+
c.Fatalf("Timed out while waiting for nfs.service to be ready: %v", err)
223354
}
355+
356+
testRemoteKdump(c, kdump_machine, nfs_host.Machine, "/var/nfs/crash")
224357
}

0 commit comments

Comments
 (0)