@@ -92,6 +92,26 @@ def clear_stop(self):
92
92
else :
93
93
self .assertTrue (False , "OpTestHMIHandling failed to recover from previous OpSystemState.UNKNOWN_BAD" )
94
94
95
+ def handle_panic (self ):
96
+ rc = self .cv_SYSTEM .console .pty .expect_no_fail (["Kernel panic - not syncing: Unrecoverable HMI exception" , pexpect .TIMEOUT , pexpect .EOF ], timeout = 120 )
97
+ if rc == 0 :
98
+ rc = self .cv_SYSTEM .console .pty .expect_no_fail (["ISTEP" , pexpect .TIMEOUT , pexpect .EOF ], timeout = 120 )
99
+ if rc == 0 :
100
+ self .cv_SYSTEM .set_state (OpSystemState .IPLing )
101
+ self .cv_SYSTEM .goto_state (OpSystemState .OS )
102
+ else :
103
+ self .assertTrue (False , "OpTestHMIHandling: System failing to reboot after topology recovery failure" )
104
+ else :
105
+ self .assertTrue (False , "OpTestHMIHandling: No panic after topology recovery failure" )
106
+
107
+ def handle_OpalTI (self ):
108
+ rc = self .cv_SYSTEM .console .pty .expect_no_fail (["ISTEP" , pexpect .TIMEOUT , pexpect .EOF ], timeout = 120 )
109
+ if rc == 0 :
110
+ self .cv_SYSTEM .set_state (OpSystemState .IPLing )
111
+ self .cv_SYSTEM .goto_state (OpSystemState .OS )
112
+ else :
113
+ self .assertTrue (False , "System failed to reboot after OPAL TI" )
114
+
95
115
def handle_ipl (self ):
96
116
rc = self .cv_SYSTEM .console .pty .expect (["ISTEP" , pexpect .TIMEOUT , pexpect .EOF ], timeout = 120 )
97
117
if rc == 0 :
@@ -202,6 +222,54 @@ def form_scom_addr(self, addr, core):
202
222
log .debug (val )
203
223
return val
204
224
225
+ def is_node_present (self , node ):
226
+ ''' Check if specified device tree is present or not.'''
227
+ self .cv_SYSTEM .goto_state (OpSystemState .OS )
228
+ l_cmd = "ls %s" % node
229
+ try :
230
+ self .cv_HOST .host_run_command (l_cmd , console = 1 )
231
+ except CommandFailed as cf :
232
+ '''Node is not present '''
233
+ return 0
234
+
235
+ return 1
236
+
237
+ def get_OpalSwXstop (self ):
238
+ self .proc_gen = self .cv_HOST .host_get_proc_gen (console = 1 )
239
+ self .cv_SYSTEM .goto_state (OpSystemState .OS )
240
+ try :
241
+ o = self .cv_HOST .host_run_command ("nvram -p ibm,skiboot --print-config=opal-sw-xstop" , console = 1 )
242
+ '''
243
+ On a fresh system this isn't set. The command will exit with
244
+ exitcode = 255.
245
+ On power8 we treat this as enabled
246
+ On power9 we treat this as disable.
247
+ '''
248
+ except CommandFailed as cf :
249
+ if cf .exitcode == 255 :
250
+ if self .proc_gen in ["POWER8" , "POWER8E" ]:
251
+ return "enable"
252
+ elif self .proc_gen in ["POWER9" ]:
253
+ return "disable"
254
+ else :
255
+ self .assertTrue (False , "get_OpalSwXstop() failed to query nvram." )
256
+ return o
257
+
258
+ def set_OpalSwXstop (self , val ):
259
+ self .cv_SYSTEM .goto_state (OpSystemState .OS )
260
+ o = self .get_OpalSwXstop ()
261
+ if val in o :
262
+ return
263
+
264
+ l_cmd = "nvram -p ibm,skiboot --update-config opal-sw-xstop=%s" % val
265
+ self .cv_HOST .host_run_command (l_cmd , console = 1 )
266
+ o = self .get_OpalSwXstop ()
267
+ if val in o :
268
+ pass
269
+ else :
270
+ l_msg = "Failed to set opal-sw-xstop config to %s" % val
271
+ self .assertTrue (False , l_msg )
272
+
205
273
def clearGardEntries (self ):
206
274
self .cv_SYSTEM .goto_state (OpSystemState .OS )
207
275
self .util .PingFunc (self .cv_HOST .ip , BMC_CONST .PING_RETRY_POWERCYCLE )
@@ -287,6 +355,8 @@ def _testHMIHandling(self, i_test):
287
355
self ._testTFMR_Errors (BMC_CONST .TFMR_DEC_PARITY_ERROR )
288
356
self ._testTFMR_Errors (BMC_CONST .TFMR_PURR_PARITY_ERROR )
289
357
self ._testTFMR_Errors (BMC_CONST .TFMR_SPURR_PARITY_ERROR )
358
+ elif l_test == BMC_CONST .HMI_TOD_TOPOLOGY_FAILOVER :
359
+ self ._test_tod_topology_failover ()
290
360
else :
291
361
raise Exception ("Please provide valid test case" )
292
362
l_con .run_command ("dmesg -C" )
@@ -407,6 +477,43 @@ def _test_malfunction_alert(self):
407
477
console .pty .sendline (l_cmd )
408
478
self .handle_ipl ()
409
479
480
+ def _test_tod_topology_failover (self ):
481
+ '''
482
+ This function is used to test error path for hmi TOD topology failover.
483
+ On HMI recovery failure TOD/TB goes in invalid state and stops running.
484
+ In this case kernel should either
485
+ a) panic followed by clean reboot. (For opal-sw-xstop=disable)
486
+ OR
487
+ b) cause OPAL TI by triggering sw checkstop to OCC. (For
488
+ opal-sw-xstop=enable)
489
+
490
+ In both cases we should not see any hangs at Linux OS level.
491
+ To simulate error condition inject TOD topology failover on all the
492
+ chips until we see HMI failure.
493
+ '''
494
+ scom_addr = "0x40000"
495
+ l_error = "0x4000000000000000"
496
+ l_test_mode = "TI"
497
+
498
+ g = self .get_OpalSwXstop ()
499
+ if "disable" in g :
500
+ l_test_mode = "panic"
501
+
502
+ console = self .cv_SYSTEM .console
503
+ l_cmd = ""
504
+ for l_pair in self .l_dic :
505
+ l_chip = l_pair [0 ]
506
+ l_cmd_str = "PATH=/usr/local/sbin:$PATH putscom -c %s %s %s; " % (l_chip , scom_addr , l_error )
507
+ l_cmd = l_cmd + l_cmd_str
508
+
509
+ console .pty .sendline (l_cmd )
510
+ if l_test_mode == "panic" :
511
+ self .handle_panic ()
512
+ else :
513
+ self .handle_OpalTI ()
514
+
515
+ return
516
+
410
517
def _test_hyp_resource_err (self ):
411
518
'''
412
519
This function is used to test HMI: Hypervisor resource error
@@ -563,6 +670,20 @@ def runTest(self):
563
670
self ._testHMIHandling (BMC_CONST .HMI_MALFUNCTION_ALERT )
564
671
self .clearGardEntries ()
565
672
673
+ class TodTopologyFailoverPanic (OpTestHMIHandling ):
674
+ def runTest (self ):
675
+ self .set_OpalSwXstop ("disable" )
676
+ self ._testHMIHandling (BMC_CONST .HMI_TOD_TOPOLOGY_FAILOVER )
677
+
678
+ class TodTopologyFailoverOpalTI (OpTestHMIHandling ):
679
+ def runTest (self ):
680
+ rc = self .is_node_present ("/proc/device-tree/ibm,sw-checkstop-fir" )
681
+ if rc == 1 :
682
+ self .set_OpalSwXstop ("enable" )
683
+ self ._testHMIHandling (BMC_CONST .HMI_TOD_TOPOLOGY_FAILOVER )
684
+ else :
685
+ self .skipTest ("OPAL TI not supported on this system." )
686
+
566
687
class HypervisorResourceError (OpTestHMIHandling ):
567
688
def runTest (self ):
568
689
self ._testHMIHandling (BMC_CONST .HMI_HYPERVISOR_RESOURCE_ERROR )
@@ -576,6 +697,8 @@ def unrecoverable_suite():
576
697
s = unittest .TestSuite ()
577
698
s .addTest (MalfunctionAlert ())
578
699
s .addTest (HypervisorResourceError ())
700
+ s .addTest (TodTopologyFailoverPanic ())
701
+ s .addTest (TodTopologyFailoverOpalTI ())
579
702
s .addTest (ClearGard ())
580
703
return s
581
704
0 commit comments