@@ -106,6 +106,26 @@ def clear_stop(self):
106
106
else :
107
107
self .assertTrue (False , "OpTestHMIHandling failed to recover from previous OpSystemState.UNKNOWN_BAD" )
108
108
109
+ def handle_panic (self ):
110
+ rc = self .cv_SYSTEM .console .pty .expect_no_fail (["Kernel panic - not syncing: Unrecoverable HMI exception" , pexpect .TIMEOUT , pexpect .EOF ], timeout = 120 )
111
+ if rc == 0 :
112
+ rc = self .cv_SYSTEM .console .pty .expect_no_fail (["ISTEP" , pexpect .TIMEOUT , pexpect .EOF ], timeout = 120 )
113
+ if rc == 0 :
114
+ self .cv_SYSTEM .set_state (OpSystemState .IPLing )
115
+ self .cv_SYSTEM .goto_state (OpSystemState .OS )
116
+ else :
117
+ self .assertTrue (False , "OpTestHMIHandling: System failing to reboot after topology recovery failure" )
118
+ else :
119
+ self .assertTrue (False , "OpTestHMIHandling: No panic after topology recovery failure" )
120
+
121
+ def handle_OpalTI (self ):
122
+ rc = self .cv_SYSTEM .console .pty .expect_no_fail (["ISTEP" , pexpect .TIMEOUT , pexpect .EOF ], timeout = 120 )
123
+ if rc == 0 :
124
+ self .cv_SYSTEM .set_state (OpSystemState .IPLing )
125
+ self .cv_SYSTEM .goto_state (OpSystemState .OS )
126
+ else :
127
+ self .assertTrue (False , "System failed to reboot after OPAL TI" )
128
+
109
129
def handle_ipl (self ):
110
130
rc = self .cv_SYSTEM .console .pty .expect (["ISTEP" , "istep" , pexpect .TIMEOUT , pexpect .EOF ], timeout = 180 )
111
131
log .debug ("before={}" .format (self .cv_SYSTEM .console .pty .before ))
@@ -218,6 +238,54 @@ def form_scom_addr(self, addr, core):
218
238
log .debug (val )
219
239
return val
220
240
241
+ def is_node_present (self , node ):
242
+ ''' Check if specified device tree is present or not.'''
243
+ self .cv_SYSTEM .goto_state (OpSystemState .OS )
244
+ l_cmd = "ls %s" % node
245
+ try :
246
+ self .cv_HOST .host_run_command (l_cmd , console = 1 )
247
+ except CommandFailed as cf :
248
+ '''Node is not present '''
249
+ return 0
250
+
251
+ return 1
252
+
253
+ def get_OpalSwXstop (self ):
254
+ self .proc_gen = self .cv_HOST .host_get_proc_gen (console = 1 )
255
+ self .cv_SYSTEM .goto_state (OpSystemState .OS )
256
+ try :
257
+ o = self .cv_HOST .host_run_command ("nvram -p ibm,skiboot --print-config=opal-sw-xstop" , console = 1 )
258
+ '''
259
+ On a fresh system this isn't set. The command will exit with
260
+ exitcode = 255.
261
+ On power8 we treat this as enabled
262
+ On power9 we treat this as disable.
263
+ '''
264
+ except CommandFailed as cf :
265
+ if cf .exitcode == 255 :
266
+ if self .proc_gen in ["POWER8" , "POWER8E" ]:
267
+ return "enable"
268
+ elif self .proc_gen in ["POWER9" ]:
269
+ return "disable"
270
+ else :
271
+ self .assertTrue (False , "get_OpalSwXstop() failed to query nvram." )
272
+ return o
273
+
274
+ def set_OpalSwXstop (self , val ):
275
+ self .cv_SYSTEM .goto_state (OpSystemState .OS )
276
+ o = self .get_OpalSwXstop ()
277
+ if val in o :
278
+ return
279
+
280
+ l_cmd = "nvram -p ibm,skiboot --update-config opal-sw-xstop=%s" % val
281
+ self .cv_HOST .host_run_command (l_cmd , console = 1 )
282
+ o = self .get_OpalSwXstop ()
283
+ if val in o :
284
+ pass
285
+ else :
286
+ l_msg = "Failed to set opal-sw-xstop config to %s" % val
287
+ self .assertTrue (False , l_msg )
288
+
221
289
def clearGardEntries (self ):
222
290
self .cv_SYSTEM .goto_state (OpSystemState .OS )
223
291
expect_prompt = self .cv_SYSTEM .util .build_prompt ()
@@ -374,6 +442,8 @@ def _testHMIHandling(self, i_test):
374
442
self ._testTFMR_Errors (BMC_CONST .TFMR_DEC_PARITY_ERROR )
375
443
self ._testTFMR_Errors (BMC_CONST .TFMR_PURR_PARITY_ERROR )
376
444
self ._testTFMR_Errors (BMC_CONST .TFMR_SPURR_PARITY_ERROR )
445
+ elif l_test == BMC_CONST .HMI_TOD_TOPOLOGY_FAILOVER :
446
+ self ._test_tod_topology_failover ()
377
447
else :
378
448
raise Exception ("Please provide valid test case" )
379
449
l_con .run_command ("dmesg -C" )
@@ -494,6 +564,43 @@ def _test_malfunction_alert(self):
494
564
console .pty .sendline (l_cmd )
495
565
self .handle_ipl ()
496
566
567
+ def _test_tod_topology_failover (self ):
568
+ '''
569
+ This function is used to test error path for hmi TOD topology failover.
570
+ On HMI recovery failure TOD/TB goes in invalid state and stops running.
571
+ In this case kernel should either
572
+ a) panic followed by clean reboot. (For opal-sw-xstop=disable)
573
+ OR
574
+ b) cause OPAL TI by triggering sw checkstop to OCC. (For
575
+ opal-sw-xstop=enable)
576
+
577
+ In both cases we should not see any hangs at Linux OS level.
578
+ To simulate error condition inject TOD topology failover on all the
579
+ chips until we see HMI failure.
580
+ '''
581
+ scom_addr = "0x40000"
582
+ l_error = "0x4000000000000000"
583
+ l_test_mode = "TI"
584
+
585
+ g = self .get_OpalSwXstop ()
586
+ if "disable" in g :
587
+ l_test_mode = "panic"
588
+
589
+ console = self .cv_SYSTEM .console
590
+ l_cmd = ""
591
+ for l_pair in self .l_dic :
592
+ l_chip = l_pair [0 ]
593
+ l_cmd_str = "PATH=/usr/local/sbin:$PATH putscom -c %s %s %s; " % (l_chip , scom_addr , l_error )
594
+ l_cmd = l_cmd + l_cmd_str
595
+
596
+ console .pty .sendline (l_cmd )
597
+ if l_test_mode == "panic" :
598
+ self .handle_panic ()
599
+ else :
600
+ self .handle_OpalTI ()
601
+
602
+ return
603
+
497
604
def _test_hyp_resource_err (self ):
498
605
'''
499
606
This function is used to test HMI: Hypervisor resource error
@@ -650,6 +757,20 @@ def runTest(self):
650
757
self ._testHMIHandling (BMC_CONST .HMI_MALFUNCTION_ALERT )
651
758
self .clearGardEntries ()
652
759
760
+ class TodTopologyFailoverPanic (OpTestHMIHandling ):
761
+ def runTest (self ):
762
+ self .set_OpalSwXstop ("disable" )
763
+ self ._testHMIHandling (BMC_CONST .HMI_TOD_TOPOLOGY_FAILOVER )
764
+
765
+ class TodTopologyFailoverOpalTI (OpTestHMIHandling ):
766
+ def runTest (self ):
767
+ rc = self .is_node_present ("/proc/device-tree/ibm,sw-checkstop-fir" )
768
+ if rc == 1 :
769
+ self .set_OpalSwXstop ("enable" )
770
+ self ._testHMIHandling (BMC_CONST .HMI_TOD_TOPOLOGY_FAILOVER )
771
+ else :
772
+ self .skipTest ("OPAL TI not supported on this system." )
773
+
653
774
class HypervisorResourceError (OpTestHMIHandling ):
654
775
def runTest (self ):
655
776
self ._testHMIHandling (BMC_CONST .HMI_HYPERVISOR_RESOURCE_ERROR )
@@ -663,6 +784,8 @@ def unrecoverable_suite():
663
784
s = unittest .TestSuite ()
664
785
s .addTest (MalfunctionAlert ())
665
786
s .addTest (HypervisorResourceError ())
787
+ s .addTest (TodTopologyFailoverPanic ())
788
+ s .addTest (TodTopologyFailoverOpalTI ())
666
789
s .addTest (ClearGard ())
667
790
return s
668
791
0 commit comments