Skip to content

Commit ad3e042

Browse files
Checkpoint cleanup while fixing TSan warnings in RMI. (#304)
Signed-off-by: Samuel K. Gutierrez <[email protected]>
1 parent 63fe44c commit ad3e042

10 files changed

+84
-68
lines changed

src/qvi-bbuff.cc

+3-3
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
#include "qvi-utils.h"
1919

2020
void
21-
qvi_bbuff::init(void)
21+
qvi_bbuff::m_init(void)
2222
{
2323
// Make sure we get rid of any data that may be present.
2424
if (m_data) {
@@ -33,7 +33,7 @@ qvi_bbuff::init(void)
3333

3434
qvi_bbuff::qvi_bbuff(void)
3535
{
36-
init();
36+
m_init();
3737
}
3838

3939
qvi_bbuff::qvi_bbuff(
@@ -56,7 +56,7 @@ void
5656
qvi_bbuff::operator=(
5757
const qvi_bbuff &src
5858
) {
59-
init();
59+
m_init();
6060
const int rc = append(src.m_data, src.m_size);
6161
if (qvi_unlikely(rc != QV_SUCCESS)) throw qvi_runtime_error();
6262
}

src/qvi-bbuff.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ struct qvi_bbuff {
3838
/** Pointer to data backing store. */
3939
void *m_data = nullptr;
4040
/** Initializes the instance. */
41-
void init(void);
41+
void m_init(void);
4242
public:
4343
/** Constructor. */
4444
qvi_bbuff(void);

src/qvi-hwloc.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -865,7 +865,7 @@ qvi_hwloc_cpuset_debug(
865865
hwloc_const_cpuset_t cpuset
866866
) {
867867
#if QVI_DEBUG_MODE == 0
868-
QVI_UNUSED(msg);
868+
qvi_unused(msg);
869869
#endif
870870
assert(cpuset);
871871
char *cpusets = nullptr;

src/qvi-hwsplit.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,8 @@ pool_obtain_cpus_by_cpuset(
6868
);
6969
return (hwrc == 0 ? QV_SUCCESS : QV_ERR_HWLOC);
7070
#endif
71-
QVI_UNUSED(pool);
72-
QVI_UNUSED(request);
71+
qvi_unused(pool);
72+
qvi_unused(request);
7373
return QV_SUCCESS;
7474
}
7575
#endif

src/qvi-macros.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* -*- Mode: C++; c-basic-offset:4; indent-tabs-mode:nil -*- */
22
/*
3-
* Copyright (c) 2020-2024 Triad National Security, LLC
3+
* Copyright (c) 2020-2025 Triad National Security, LLC
44
* All rights reserved.
55
*
66
* Copyright (c) 2020-2021 Lawrence Livermore National Security, LLC
@@ -32,7 +32,7 @@
3232
*
3333
* @param[in] x Unused variable.
3434
*/
35-
#define QVI_UNUSED(x) \
35+
#define qvi_unused(x) \
3636
do { \
3737
(void)(x); \
3838
} while (0)

src/qvi-map.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ qvi_map_debug_dump(
336336
const qvi_map_t &map
337337
) {
338338
#if QVI_DEBUG_MODE == 0
339-
QVI_UNUSED(map);
339+
qvi_unused(map);
340340
#else
341341
qvi_log_debug(" # nfids_mapped={}", qvi_map_nfids_mapped(map));
342342
for (const auto &mi : map) {

src/qvi-nvml.cc

+3-3
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@ qvi_hwloc_nvml_get_device_cpuset_by_pci_bus_id(
2525
qvi_hwloc_bitmap &cpuset
2626
) {
2727
#ifndef CUDAToolkit_FOUND
28-
QVI_UNUSED(hwl);
29-
QVI_UNUSED(uuid);
30-
QVI_UNUSED(cpuset);
28+
qvi_unused(hwl);
29+
qvi_unused(uuid);
30+
qvi_unused(cpuset);
3131
return QV_ERR_NOT_SUPPORTED;
3232
#else
3333
int rc = QV_SUCCESS;

src/qvi-rmi.cc

+62-48
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
#include "qvi-utils.h"
3535

3636
struct qvi_rmi_msg_header {
37-
qvi_rmi_rpc_funid_t fid = QVI_RMI_FID_INVALID;
37+
qvi_rmi_rpc_fid_t fid = QVI_RMI_FID_INVALID;
3838
char picture[16] = {'\0'};
3939
};
4040

@@ -62,7 +62,7 @@ zwrn_msg(
6262
qvi_log_warn("{} with errno={} ({})", ers, erno, qvi_strerr(erno));
6363
}
6464

65-
static void
65+
static inline void
6666
zsocket_close(
6767
void **sock
6868
) {
@@ -77,7 +77,7 @@ zsocket_close(
7777
*sock = nullptr;
7878
}
7979

80-
static void
80+
static inline void
8181
zctx_destroy(
8282
void **ctx
8383
) {
@@ -92,27 +92,34 @@ zctx_destroy(
9292
*ctx = nullptr;
9393
}
9494

95-
static void *
96-
zsocket_create_and_connect(
95+
static inline void *
96+
zsocket_create(
9797
void *zctx,
98-
int sock_type,
99-
const char *addr
98+
int sock_type
10099
) {
101100
void *zsock = zmq_socket(zctx, sock_type);
102101
if (qvi_unlikely(!zsock)) {
103102
zerr_msg("zmq_socket() failed", errno);
104103
return nullptr;
105104
}
105+
return zsock;
106+
}
107+
108+
static inline int
109+
zsocket_connect(
110+
void *zsock,
111+
const char *addr
112+
) {
106113
const int rc = zmq_connect(zsock, addr);
107114
if (qvi_unlikely(rc != 0)) {
108115
zerr_msg("zmq_connect() failed", errno);
109116
zsocket_close(&zsock);
110-
return nullptr;
117+
return QV_ERR_RPC;
111118
}
112-
return zsock;
119+
return QV_SUCCESS;
113120
}
114121

115-
static void *
122+
static inline void *
116123
zsocket_create_and_bind(
117124
void *zctx,
118125
int sock_type,
@@ -147,7 +154,7 @@ msg_free_byte_buffer_cb(
147154
static inline int
148155
buffer_append_header(
149156
qvi_bbuff *buff,
150-
qvi_rmi_rpc_funid_t fid,
157+
qvi_rmi_rpc_fid_t fid,
151158
cstr_t picture
152159
) {
153160
qvi_rmi_msg_header hdr;
@@ -206,14 +213,13 @@ zmsg_send(
206213
zmq_msg_t *msg,
207214
int *bsent
208215
) {
209-
int qvrc = QV_SUCCESS;
210216
*bsent = zmq_msg_send(msg, zsock, 0);
211217
if (qvi_unlikely(*bsent == -1)) {
212218
zerr_msg("zmq_msg_send() failed", errno);
213-
qvrc = QV_ERR_RPC;
219+
zmq_msg_close(msg);
220+
return QV_ERR_RPC;
214221
}
215-
if (qvi_unlikely(qvrc != QV_SUCCESS)) zmq_msg_close(msg);
216-
return qvrc;
222+
return QV_SUCCESS;
217223
}
218224

219225
static inline int
@@ -243,7 +249,7 @@ template <typename... Types>
243249
static inline int
244250
rpc_pack(
245251
qvi_bbuff **buff,
246-
qvi_rmi_rpc_funid_t fid,
252+
qvi_rmi_rpc_fid_t fid,
247253
Types &&...args
248254
) {
249255
std::string picture;
@@ -297,24 +303,23 @@ template <typename... Types>
297303
static inline int
298304
rpc_req(
299305
void *zsock,
300-
qvi_rmi_rpc_funid_t fid,
306+
qvi_rmi_rpc_fid_t fid,
301307
Types &&...args
302308
) {
303-
int buffer_size = 0;
304-
305309
qvi_bbuff *buff = nullptr;
306310
int rc = rpc_pack(&buff, fid, std::forward<Types>(args)...);
307311
if (qvi_unlikely(rc != QV_SUCCESS)) {
308312
qvi_bbuff_delete(&buff);
309313
return rc;
310314
}
311-
zmq_msg_t msg;
312-
rc = zmsg_init_from_bbuff(buff, &msg);
313-
if (qvi_unlikely(rc != QV_SUCCESS)) goto out;
314315
// Cache buffer size here because our call to qvi_bbuff_size() after
315316
// zmsg_send() may be invalid because msg_free_byte_buffer_cb() may have
316317
// already been called.
317-
buffer_size = (int)buff->size();
318+
const int buffer_size = (int)buff->size();
319+
320+
zmq_msg_t msg;
321+
rc = zmsg_init_from_bbuff(buff, &msg);
322+
if (qvi_unlikely(rc != QV_SUCCESS)) goto out;
318323

319324
int nbytes_sent;
320325
rc = zmsg_send(zsock, &msg, &nbytes_sent);
@@ -351,6 +356,9 @@ qvi_rmi_client::qvi_rmi_client(void)
351356
// Create a new ZMQ context.
352357
m_zctx = zmq_ctx_new();
353358
if (qvi_unlikely(!m_zctx)) throw qvi_runtime_error();
359+
// Create the ZMQ socket used for communication with the server.
360+
m_zsock = zsocket_create(m_zctx, ZMQ_REQ);
361+
if (qvi_unlikely(!m_zsock)) throw qvi_runtime_error();
354362
}
355363

356364
qvi_rmi_client::~qvi_rmi_client(void)
@@ -366,27 +374,21 @@ qvi_rmi_client::hwloc(void)
366374
return m_config.hwloc;
367375
}
368376

369-
int
370-
qvi_rmi_client::m_hello(void)
371-
{
372-
const int rc = rpc_req(m_zsock, QVI_RMI_FID_HELLO, getpid());
373-
if (qvi_unlikely(rc != QV_SUCCESS)) return rc;
374-
375-
return rpc_rep(m_zsock, m_config.url, m_config.hwtopo_path);
376-
}
377-
378377
int
379378
qvi_rmi_client::connect(
380379
const std::string &url
381380
) {
382-
m_zsock = zsocket_create_and_connect(
383-
m_zctx, ZMQ_REQ, url.c_str()
384-
);
385-
if (qvi_unlikely(!m_zsock)) return QV_ERR_RPC;
386-
387-
int rc = m_hello();
381+
int rc = zsocket_connect(m_zsock, url.c_str());
388382
if (qvi_unlikely(rc != QV_SUCCESS)) return rc;
389383

384+
std::string hwtopo_path;
385+
rc = m_hello(hwtopo_path);
386+
if (qvi_unlikely(rc != QV_SUCCESS)) return rc;
387+
// Now that we have all the info we need,
388+
// finish populating the RMI config.
389+
m_config.url = url;
390+
m_config.hwtopo_path = hwtopo_path;
391+
// Now we can initialize and load our topology.
390392
rc = qvi_hwloc_topology_init(
391393
m_config.hwloc, m_config.hwtopo_path.c_str()
392394
);
@@ -396,8 +398,23 @@ qvi_rmi_client::connect(
396398
}
397399

398400
////////////////////////////////////////////////////////////////////////////////
399-
// Client-Side (Public) RPC Definitions
401+
// Client-Side RPC Definitions
400402
////////////////////////////////////////////////////////////////////////////////
403+
int
404+
qvi_rmi_client::m_hello(
405+
std::string &hwtopo_path
406+
) {
407+
int qvrc = rpc_req(m_zsock, QVI_RMI_FID_HELLO, qvi_gettid());
408+
if (qvi_unlikely(qvrc != QV_SUCCESS)) return qvrc;
409+
// Should be set by rpc_rep, so assume an error.
410+
int rpcrc = QV_ERR_RPC;
411+
qvrc = rpc_rep(m_zsock, &rpcrc, hwtopo_path);
412+
if (qvi_unlikely(qvrc != QV_SUCCESS)) {
413+
return qvrc;
414+
}
415+
return rpcrc;
416+
}
417+
401418
int
402419
qvi_rmi_client::get_cpubind(
403420
pid_t who,
@@ -584,9 +601,9 @@ qvi_rmi_server::s_rpc_hello(
584601
const int rc = qvi_bbuff_rmi_unpack(input, &whoisit);
585602
if (qvi_unlikely(rc != QV_SUCCESS)) return rc;
586603
// Pack relevant configuration information.
604+
const int rpcrc = QV_SUCCESS;
587605
return rpc_pack(
588-
output, hdr->fid,
589-
server->m_config.url,
606+
output, hdr->fid, rpcrc,
590607
server->m_config.hwtopo_path
591608
);
592609
}
@@ -831,11 +848,11 @@ qvi_rmi_server::m_rpc_dispatch(
831848
}
832849

833850
int
834-
qvi_rmi_server::m_go(void)
851+
qvi_rmi_server::m_execute_main_server_loop(void)
835852
{
836853
int rc, bsent;
837854
volatile int bsentt = 0;
838-
volatile std::atomic<bool> active{true};
855+
volatile bool active = true;
839856
do {
840857
zmq_msg_t mrx, mtx;
841858
rc = zmsg_recv(m_zsock, &mrx);
@@ -851,7 +868,7 @@ qvi_rmi_server::m_go(void)
851868
// Nice to understand messaging characteristics.
852869
qvi_log_debug("Server Sent {} bytes", bsentt);
853870
#else
854-
QVI_UNUSED(bsentt);
871+
qvi_unused(bsentt);
855872
#endif
856873
if (qvi_unlikely(rc != QV_SUCCESS && rc != QV_SUCCESS_SHUTDOWN)) {
857874
qvi_log_error("RX/TX loop exited with rc={} ({})", rc, qv_strerr(rc));
@@ -867,9 +884,6 @@ qvi_rmi_server::configure(
867884
return QV_SUCCESS;
868885
}
869886

870-
/**
871-
* Populates base hardware pool.
872-
*/
873887
int
874888
qvi_rmi_server::m_populate_base_hwpool(void)
875889
{
@@ -891,7 +905,7 @@ qvi_rmi_server::start(void)
891905
);
892906
if (qvi_unlikely(!m_zsock)) return QV_ERR_SYS;
893907
// Start the main service loop.
894-
return m_go();
908+
return m_execute_main_server_loop();
895909
}
896910

897911
/*

src/qvi-rmi.h

+6-4
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ typedef enum {
3636
QVI_RMI_FID_GET_NOBJS_IN_CPUSET,
3737
QVI_RMI_FID_GET_DEVICE_IN_CPUSET,
3838
QVI_RMI_FID_GET_INTRINSIC_HWPOOL
39-
} qvi_rmi_rpc_funid_t;
39+
} qvi_rmi_rpc_fid_t;
4040

4141
/**
4242
* @note: The return value is used for operation status (e.g., was the internal
@@ -65,7 +65,7 @@ struct qvi_rmi_config {
6565
struct qvi_rmi_server {
6666
private:
6767
/** Maps function IDs to function pointers. */
68-
std::map<qvi_rmi_rpc_funid_t, qvi_rmi_rpc_fun_ptr_t> m_rpc_dispatch_table;
68+
std::map<qvi_rmi_rpc_fid_t, qvi_rmi_rpc_fun_ptr_t> m_rpc_dispatch_table;
6969
/** Server configuration. */
7070
qvi_rmi_config m_config;
7171
/** The base resource pool maintained by the server. */
@@ -97,7 +97,7 @@ struct qvi_rmi_server {
9797
);
9898
/** Executes the main server loop. */
9999
int
100-
m_go(void);
100+
m_execute_main_server_loop(void);
101101
/** */
102102
static int
103103
s_rpc_invalid(
@@ -206,7 +206,9 @@ struct qvi_rmi_client {
206206
void *m_zsock = nullptr;
207207
/** Performs connection handshake. */
208208
int
209-
m_hello(void);
209+
m_hello(
210+
std::string &hwtopo_path
211+
);
210212
public:
211213
/** Constructor. */
212214
qvi_rmi_client(void);

0 commit comments

Comments
 (0)