Skip to content

Commit 16dd5f5

Browse files
ibmzachdcrowell77
authored andcommitted
PLDM Runtime BMC R/R support: Fix deadlock in runtime PNOR logic (commit 4/n)
This commit fixes two issues in Hostboot runtime code: 1. The constructor of the runtime PNOR component makes several PLDM requests. If an error happens during these PLDM requests, an error is created, which itself makes an attempt to access PNOR through the same RtPnor Singleton instance. This causes the constructor of the Singleton class' RtPnor instance to be re-entered. This is undefined behavior in C++, and in HBRT it results in a deadlock as the thread accessing the Singleton waits on itself to finish initializing the Singleton. A similar cycle exists for the ErrlManager Singleton instance; when creating an error, the ErrlEntry constructor requests a unique ID for itself from the ErrlManager singleton, which initializes it. The constructor accesses PNOR, which can fail, which in turn creates another error, which accesses the ErrlManager again, resulting in a deadlock. This commit fixes these issues by separating the constructors from the initialization of these classes, so that recursive uses don't cause problems. 2. Re-entrant constructors in local static variables previously caused deadlocks. This commit changes the mutex code to assert when it detects that a static variable is already mid-initialization to avert an impending deadlock. Change-Id: I7a61636820776173206865726520626f69696969 RTC: 299918 Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/130043 Tested-by: Jenkins OP Build CI <[email protected]> Tested-by: Jenkins Server <[email protected]> Tested-by: FSP CI Jenkins <[email protected]> Reviewed-by: Matt Derksen <[email protected]> Reviewed-by: Isaac Salem <[email protected]> Tested-by: Jenkins Combined Simics CI <[email protected]> Tested-by: Jenkins OP HW <[email protected]> Tested-by: Hostboot CI <[email protected]> Reviewed-by: Daniel M Crowell <[email protected]>
1 parent 7f99da6 commit 16dd5f5

File tree

10 files changed

+199
-44
lines changed

10 files changed

+199
-44
lines changed

src/include/usr/errl/errlmanager.H

+7
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,13 @@ public:
431431
static void setLastIplEid(uint32_t i_eid);
432432
#endif
433433

434+
#ifdef __HOSTBOOT_RUNTIME
435+
/* @brief Set up the ErrlManager instance. This must be called before the
436+
* ErrlManager is used.
437+
*/
438+
void setup();
439+
#endif
440+
434441
protected:
435442
/**
436443
* @brief Destructor

src/include/usr/pnor/pnorif.H

+7
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,13 @@ void readAndClearCounter( uint32_t i_threshold,
324324
const std::array<uint32_t, PNOR::NUM_SECTIONS>& getLidIds();
325325
#endif
326326

327+
#ifdef __HOSTBOOT_RUNTIME
328+
/**
329+
* @brief Whether PNOR is initialized yet.
330+
*/
331+
bool isPnorInitialized();
332+
#endif
333+
327334
} // PNOR
328335

329336
#endif

src/libc++/builtins.C

+37-9
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
/* */
66
/* OpenPOWER HostBoot Project */
77
/* */
8-
/* Contributors Listed Below - COPYRIGHT 2010,2017 */
8+
/* Contributors Listed Below - COPYRIGHT 2010,2022 */
99
/* [+] International Business Machines Corp. */
1010
/* */
1111
/* */
@@ -30,6 +30,11 @@
3030
#include <util/locked/list.H>
3131
#include <sys/sync.h>
3232

33+
#ifdef __HOSTBOOT_RUNTIME
34+
#include <assert.h>
35+
#include <builtins.h>
36+
#endif
37+
3338
void* operator new(size_t s)
3439
{
3540
return malloc(s);
@@ -71,6 +76,19 @@ void operator delete[](void* p, size_t)
7176
}
7277
#endif // bl_builtins_C
7378

79+
enum CXA_GUARD_LOCK_VALUE : uint32_t
80+
{
81+
UNINITIALIZED = 0,
82+
LOCKED = 1,
83+
UNLOCKED_AND_INITIALIZED = 2
84+
};
85+
86+
enum CXA_GUARD_ACTION : int
87+
{
88+
DO_NOT_RUN_CONSTRUCTOR = 0,
89+
RUN_CONSTRUCTOR = 1,
90+
};
91+
7492
extern "C" int __cxa_guard_acquire(volatile uint64_t* gv)
7593
{
7694
// States:
@@ -79,18 +97,30 @@ extern "C" int __cxa_guard_acquire(volatile uint64_t* gv)
7997
// 2 -> unlocked and initialized
8098

8199
uint32_t v = __sync_val_compare_and_swap((volatile uint32_t*)gv, 0, 1);
82-
if (v == 0)
83-
return 1;
84-
if (v == 2)
85-
return 0;
100+
if (v == UNINITIALIZED)
101+
return RUN_CONSTRUCTOR;
102+
if (v == UNLOCKED_AND_INITIALIZED)
103+
return DO_NOT_RUN_CONSTRUCTOR;
104+
105+
#ifdef __HOSTBOOT_RUNTIME
106+
// Hostboot runtime is single-threaded. If we go to initialize a local
107+
// static variable and see that it's already in the "locked" state, it means
108+
// that this thread is already initializing the object and we must have
109+
// entered this path recursively. Trying to acquire the lock again will
110+
// result in a deadlock; so instead, we assert here to fail faster. The C++
111+
// standard specifies that if the initialization of a local static variable
112+
// is entered recursively, the behavior is undefined, so we are permitted to
113+
// do whatever we want here.
114+
crit_assert(false);
115+
#endif
86116

87117
// Wait for peer thread to perform initialization (state 2).
88-
while(2 != *(volatile uint32_t*)gv);
118+
while(UNLOCKED_AND_INITIALIZED != *(volatile uint32_t*)gv);
89119

90120
// Instruction barrier to ensure value is set before later loads execute.
91121
isync();
92122

93-
return 0;
123+
return DO_NOT_RUN_CONSTRUCTOR;
94124
}
95125

96126
extern "C" void __cxa_guard_release(volatile uint64_t* gv)
@@ -176,5 +206,3 @@ extern "C" int __cxa_atexit(void (*i_dtor)(void*),
176206
return 0;
177207
}
178208
#endif // bl_builtins_C
179-
180-

src/libc++/makefile

+4-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#
66
# OpenPOWER HostBoot Project
77
#
8-
# Contributors Listed Below - COPYRIGHT 2010,2019
8+
# Contributors Listed Below - COPYRIGHT 2010,2022
99
# [+] International Business Machines Corp.
1010
#
1111
#
@@ -26,6 +26,9 @@ ROOTPATH = ../..
2626

2727
HOSTBOOT_PROFILE_ARTIFACT=
2828

29+
EXTRAINCDIR = ${ROOTPATH}/src/include/usr
30+
2931
OBJS += builtins.o
32+
OBJS += rt_builtins.o
3033

3134
include ${ROOTPATH}/config.mk

src/libc++/rt_builtins.C

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/* IBM_PROLOG_BEGIN_TAG */
2+
/* This is an automatically generated prolog. */
3+
/* */
4+
/* $Source: src/libc++/rt_builtins.C $ */
5+
/* */
6+
/* OpenPOWER HostBoot Project */
7+
/* */
8+
/* Contributors Listed Below - COPYRIGHT 2022 */
9+
/* [+] International Business Machines Corp. */
10+
/* */
11+
/* */
12+
/* Licensed under the Apache License, Version 2.0 (the "License"); */
13+
/* you may not use this file except in compliance with the License. */
14+
/* You may obtain a copy of the License at */
15+
/* */
16+
/* http://www.apache.org/licenses/LICENSE-2.0 */
17+
/* */
18+
/* Unless required by applicable law or agreed to in writing, software */
19+
/* distributed under the License is distributed on an "AS IS" BASIS, */
20+
/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */
21+
/* implied. See the License for the specific language governing */
22+
/* permissions and limitations under the License. */
23+
/* */
24+
/* IBM_PROLOG_END_TAG */
25+
26+
#define __HOSTBOOT_RUNTIME
27+
28+
#include "builtins.C"

src/usr/errl/errlmanager_common.C

+52-5
Original file line numberDiff line numberDiff line change
@@ -57,12 +57,59 @@ const uint32_t PNOR_ERROR_LENGTH = 4096;
5757
const uint32_t EMPTY_ERRLOG_IN_PNOR = 0xFFFFFFFF;
5858
const uint32_t FIRST_BYTE_ERRLOG = 0xF0000000;
5959

60-
///////////////////////////////////////////////////////////////////////////////
61-
///////////////////////////////////////////////////////////////////////////////
62-
// Global function (not a method on an object) to commit the error log.
63-
void errlCommit(errlHndl_t& io_err, compId_t i_committerComp )
60+
/* @brief Structure to record the arguments to errlCommit, so that it can be
61+
* re-invoked later if called too early.
62+
*
63+
*/
64+
struct errlCommitRecord
65+
{
66+
errlHndl_t errl;
67+
compId_t committerComp;
68+
};
69+
70+
void errlCommit(errlHndl_t& io_err, const compId_t i_committerComp)
6471
{
65-
ERRORLOG::theErrlManager::instance().commitErrLog(io_err, i_committerComp );
72+
do
73+
{
74+
75+
#if defined(__HOSTBOOT_RUNTIME) && defined(CONFIG_FILE_XFER_VIA_PLDM)
76+
// Committing an error at runtime requires PNOR access. If an error log is
77+
// committed before PNOR is available, queue it up to be committed later.
78+
79+
static std::vector<errlCommitRecord> early_error_logs;
80+
81+
if (PNOR::isPnorInitialized())
82+
{ // If PNOR is initialized and we've saved error logs to commit later,
83+
// commit them now.
84+
if (!early_error_logs.empty())
85+
{
86+
// Move the storage to a local variable so that the static vector is
87+
// empty and we don't enter this path multiple times if we commit an
88+
// error in the error commit code. This will also cause the storage
89+
// to be deallocated when we're done here.
90+
const std::vector<errlCommitRecord> recs = move(early_error_logs);
91+
92+
auto& theErrlManager = ERRORLOG::theErrlManager::instance();
93+
94+
for (auto record : recs)
95+
{
96+
theErrlManager.commitErrLog(record.errl, record.committerComp);
97+
}
98+
}
99+
}
100+
else
101+
{ // If PNOR isn't initialized, we can't commit errors yet. Save the error
102+
// for committing later.
103+
early_error_logs.push_back({ io_err, i_committerComp });
104+
io_err = nullptr;
105+
break;
106+
}
107+
#endif
108+
109+
ERRORLOG::theErrlManager::instance().commitErrLog(io_err, i_committerComp);
110+
111+
} while (false);
112+
66113
return;
67114
}
68115

src/usr/errl/runtime/rt_errlmanager.C

+16-9
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ bool rt_processCallout(errlHndl_t &io_errl,
7373
ErrlManager::ErrlManager() :
7474
iv_currLogId(0),
7575
iv_baseNodeId(ERRLOG_PLID_BASE_MASK),
76-
iv_pnorReadyForErrorLogs(true),
76+
iv_pnorReadyForErrorLogs(false),
7777
iv_pStorage(NULL),
7878
iv_hwasProcessCalloutFn(NULL),
7979
iv_pnorAddr(NULL),
@@ -92,7 +92,6 @@ ErrlManager::ErrlManager() :
9292
#endif
9393
{
9494
TRACFCOMP( g_trac_errl, ENTER_MRK "ErrlManager::ErrlManager constructor." );
95-
//Note - There is no PNOR access inside HBRT
9695

9796
iv_hwasProcessCalloutFn = rt_processCallout;
9897

@@ -108,7 +107,8 @@ ErrlManager::ErrlManager() :
108107
"TARGETING is ready..." );
109108

110109
}
111-
if(sys)
110+
111+
if (sys)
112112
{
113113
iv_currLogId = sys->getAttr<TARGETING::ATTR_HOSTSVC_PLID>();
114114
TRACFCOMP( g_trac_errl,"Initial Error Log ID = %.8X", iv_currLogId );
@@ -124,11 +124,6 @@ ErrlManager::ErrlManager() :
124124
// specifically disabled via the attribute ATTR_DISABLE_PLD_WAIT,
125125
// then PLD waits are enabled.
126126
iv_pldWaitEnable = !iv_isFSP && !sys->getAttr<TARGETING::ATTR_DISABLE_PLD_WAIT>();
127-
128-
if(!iv_isFSP)
129-
{
130-
setupPnorInfo();
131-
}
132127
}
133128
else
134129
{
@@ -144,6 +139,18 @@ ErrlManager::ErrlManager() :
144139
TRACFCOMP( g_trac_errl, EXIT_MRK "ErrlManager::ErrlManager constructor." );
145140
}
146141

142+
void ErrlManager::setup()
143+
{
144+
TRACFCOMP( g_trac_errl, ENTER_MRK"ErrlManager::setup" );
145+
146+
if (TARGETING::targetService().isInitialized() && !iv_isFSP)
147+
{
148+
setupPnorInfo();
149+
}
150+
151+
TRACFCOMP( g_trac_errl, EXIT_MRK"ErrlManager::setup" );
152+
}
153+
147154
///////////////////////////////////////////////////////////////////////////////
148155
///////////////////////////////////////////////////////////////////////////////
149156
ErrlManager::~ErrlManager()
@@ -773,7 +780,7 @@ void initErrlManager(void)
773780
{
774781
// Note: rtPnor needs to be setup before this is called
775782
// call errlManager ctor so that we're ready and waiting for errors.
776-
ERRORLOG::theErrlManager::instance();
783+
ERRORLOG::theErrlManager::instance().setup();
777784

778785
// Note: TARGETING needs to be ready before this is called
779786
// Set the FW Release Version

src/usr/pnor/pnor_common.C

+7-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
/* */
66
/* OpenPOWER HostBoot Project */
77
/* */
8-
/* Contributors Listed Below - COPYRIGHT 2014,2021 */
8+
/* Contributors Listed Below - COPYRIGHT 2014,2022 */
99
/* [+] Google Inc. */
1010
/* [+] International Business Machines Corp. */
1111
/* */
@@ -55,7 +55,13 @@
5555

5656
// Trace definition
5757
trace_desc_t* g_trac_pnor = NULL;
58+
59+
#ifndef __HOSTBOOT_RUNTIME
60+
// If this is IPL time code, initialize the trace buffer as normal. If this is
61+
// runtime code, we need to ensure that this is initialized before the RtPnor
62+
// instance, so we explicitly sequence the initialization ourselves in rt_pnor.C
5863
TRAC_INIT(&g_trac_pnor, PNOR_COMP_NAME, 4*KILOBYTE, TRACE::BUFFER_SLOW); //4K
64+
#endif
5965

6066
// Easy macro replace for unit testing
6167
//#define TRACUCOMP(args...) TRACFCOMP(args)

0 commit comments

Comments
 (0)