Skip to content

Commit 72a813a

Browse files
committed
Merge branch 'mlxsw-new-reset-flow'
Petr Machata says: ==================== mlxsw: Add support for new reset flow Ido Schimmel writes: This patchset changes mlxsw to issue a PCI reset during probe and devlink reload so that the PCI firmware could be upgraded without a reboot. Unlike the old version of this patchset [1], in this version the driver no longer tries to issue a PCI reset by triggering a PCI link toggle on its own, but instead calls the PCI core to issue the reset. The PCI APIs require the device lock to be held which is why patches Patches #7 adds reset method quirk for NVIDIA Spectrum devices. Patch #8 adds a debug level print in PCI core so that device ready delay will be printed even if it is shorter than one second. Patches #9-#11 are straightforward preparations in mlxsw. Patch #12 finally implements the new reset flow in mlxsw. Patch #13 adds PCI reset handlers in mlxsw to avoid user space from resetting the device from underneath an unaware driver. Instead, the driver is gracefully de-initialized before the PCI reset and then initialized again after it. Patch #14 adds a PCI reset selftest to make sure this code path does not regress. [1] https://lore.kernel.org/netdev/[email protected]/ ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 4dce97b + af51d6b commit 72a813a

File tree

14 files changed

+247
-30
lines changed

14 files changed

+247
-30
lines changed

Documentation/netlink/specs/devlink.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1484,8 +1484,8 @@ operations:
14841484
dont-validate: [ strict ]
14851485
flags: [ admin-perm ]
14861486
do:
1487-
pre: devlink-nl-pre-doit
1488-
post: devlink-nl-post-doit
1487+
pre: devlink-nl-pre-doit-dev-lock
1488+
post: devlink-nl-post-doit-dev-lock
14891489
request:
14901490
attributes:
14911491
- bus-name

drivers/net/ethernet/mellanox/mlxsw/pci.c

+83-7
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ struct mlxsw_pci {
130130
const struct pci_device_id *id;
131131
enum mlxsw_pci_cqe_v max_cqe_ver; /* Maximal supported CQE version */
132132
u8 num_sdq_cqs; /* Number of CQs used for SDQs */
133+
bool skip_reset;
133134
};
134135

135136
static void mlxsw_pci_queue_tasklet_schedule(struct mlxsw_pci_queue *q)
@@ -1476,11 +1477,47 @@ static int mlxsw_pci_sys_ready_wait(struct mlxsw_pci *mlxsw_pci,
14761477
return -EBUSY;
14771478
}
14781479

1479-
static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci,
1480-
const struct pci_device_id *id)
1480+
static int mlxsw_pci_reset_at_pci_disable(struct mlxsw_pci *mlxsw_pci)
14811481
{
14821482
struct pci_dev *pdev = mlxsw_pci->pdev;
14831483
char mrsr_pl[MLXSW_REG_MRSR_LEN];
1484+
int err;
1485+
1486+
mlxsw_reg_mrsr_pack(mrsr_pl,
1487+
MLXSW_REG_MRSR_COMMAND_RESET_AT_PCI_DISABLE);
1488+
err = mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl);
1489+
if (err)
1490+
return err;
1491+
1492+
device_lock_assert(&pdev->dev);
1493+
1494+
pci_cfg_access_lock(pdev);
1495+
pci_save_state(pdev);
1496+
1497+
err = __pci_reset_function_locked(pdev);
1498+
if (err)
1499+
pci_err(pdev, "PCI function reset failed with %d\n", err);
1500+
1501+
pci_restore_state(pdev);
1502+
pci_cfg_access_unlock(pdev);
1503+
1504+
return err;
1505+
}
1506+
1507+
static int mlxsw_pci_reset_sw(struct mlxsw_pci *mlxsw_pci)
1508+
{
1509+
char mrsr_pl[MLXSW_REG_MRSR_LEN];
1510+
1511+
mlxsw_reg_mrsr_pack(mrsr_pl, MLXSW_REG_MRSR_COMMAND_SOFTWARE_RESET);
1512+
return mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl);
1513+
}
1514+
1515+
static int
1516+
mlxsw_pci_reset(struct mlxsw_pci *mlxsw_pci, const struct pci_device_id *id)
1517+
{
1518+
struct pci_dev *pdev = mlxsw_pci->pdev;
1519+
char mcam_pl[MLXSW_REG_MCAM_LEN];
1520+
bool pci_reset_supported;
14841521
u32 sys_status;
14851522
int err;
14861523

@@ -1491,11 +1528,27 @@ static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci,
14911528
return err;
14921529
}
14931530

1494-
mlxsw_reg_mrsr_pack(mrsr_pl);
1495-
err = mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl);
1531+
/* PCI core already issued a PCI reset, do not issue another reset. */
1532+
if (mlxsw_pci->skip_reset)
1533+
return 0;
1534+
1535+
mlxsw_reg_mcam_pack(mcam_pl,
1536+
MLXSW_REG_MCAM_FEATURE_GROUP_ENHANCED_FEATURES);
1537+
err = mlxsw_reg_query(mlxsw_pci->core, MLXSW_REG(mcam), mcam_pl);
14961538
if (err)
14971539
return err;
14981540

1541+
mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_PCI_RESET,
1542+
&pci_reset_supported);
1543+
1544+
if (pci_reset_supported) {
1545+
pci_dbg(pdev, "Starting PCI reset flow\n");
1546+
err = mlxsw_pci_reset_at_pci_disable(mlxsw_pci);
1547+
} else {
1548+
pci_dbg(pdev, "Starting software reset flow\n");
1549+
err = mlxsw_pci_reset_sw(mlxsw_pci);
1550+
}
1551+
14991552
err = mlxsw_pci_sys_ready_wait(mlxsw_pci, id, &sys_status);
15001553
if (err) {
15011554
dev_err(&pdev->dev, "Failed to reach system ready status after reset. Status is 0x%x\n",
@@ -1537,9 +1590,9 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
15371590
if (!mbox)
15381591
return -ENOMEM;
15391592

1540-
err = mlxsw_pci_sw_reset(mlxsw_pci, mlxsw_pci->id);
1593+
err = mlxsw_pci_reset(mlxsw_pci, mlxsw_pci->id);
15411594
if (err)
1542-
goto err_sw_reset;
1595+
goto err_reset;
15431596

15441597
err = mlxsw_pci_alloc_irq_vectors(mlxsw_pci);
15451598
if (err < 0) {
@@ -1672,7 +1725,7 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
16721725
err_query_fw:
16731726
mlxsw_pci_free_irq_vectors(mlxsw_pci);
16741727
err_alloc_irq:
1675-
err_sw_reset:
1728+
err_reset:
16761729
mbox_put:
16771730
mlxsw_cmd_mbox_free(mbox);
16781731
return err;
@@ -2059,11 +2112,34 @@ static void mlxsw_pci_remove(struct pci_dev *pdev)
20592112
kfree(mlxsw_pci);
20602113
}
20612114

2115+
static void mlxsw_pci_reset_prepare(struct pci_dev *pdev)
2116+
{
2117+
struct mlxsw_pci *mlxsw_pci = pci_get_drvdata(pdev);
2118+
2119+
mlxsw_core_bus_device_unregister(mlxsw_pci->core, false);
2120+
}
2121+
2122+
static void mlxsw_pci_reset_done(struct pci_dev *pdev)
2123+
{
2124+
struct mlxsw_pci *mlxsw_pci = pci_get_drvdata(pdev);
2125+
2126+
mlxsw_pci->skip_reset = true;
2127+
mlxsw_core_bus_device_register(&mlxsw_pci->bus_info, &mlxsw_pci_bus,
2128+
mlxsw_pci, false, NULL, NULL);
2129+
mlxsw_pci->skip_reset = false;
2130+
}
2131+
2132+
static const struct pci_error_handlers mlxsw_pci_err_handler = {
2133+
.reset_prepare = mlxsw_pci_reset_prepare,
2134+
.reset_done = mlxsw_pci_reset_done,
2135+
};
2136+
20622137
int mlxsw_pci_driver_register(struct pci_driver *pci_driver)
20632138
{
20642139
pci_driver->probe = mlxsw_pci_probe;
20652140
pci_driver->remove = mlxsw_pci_remove;
20662141
pci_driver->shutdown = mlxsw_pci_remove;
2142+
pci_driver->err_handler = &mlxsw_pci_err_handler;
20672143
return pci_register_driver(pci_driver);
20682144
}
20692145
EXPORT_SYMBOL(mlxsw_pci_driver_register);

drivers/net/ethernet/mellanox/mlxsw/reg.h

+14-2
Original file line numberDiff line numberDiff line change
@@ -10122,6 +10122,15 @@ mlxsw_reg_mgir_unpack(char *payload, u32 *hw_rev, char *fw_info_psid,
1012210122

1012310123
MLXSW_REG_DEFINE(mrsr, MLXSW_REG_MRSR_ID, MLXSW_REG_MRSR_LEN);
1012410124

10125+
enum mlxsw_reg_mrsr_command {
10126+
/* Switch soft reset, does not reset PCI firmware. */
10127+
MLXSW_REG_MRSR_COMMAND_SOFTWARE_RESET = 1,
10128+
/* Reset will be done when PCI link will be disabled.
10129+
* This command will reset PCI firmware also.
10130+
*/
10131+
MLXSW_REG_MRSR_COMMAND_RESET_AT_PCI_DISABLE = 6,
10132+
};
10133+
1012510134
/* reg_mrsr_command
1012610135
* Reset/shutdown command
1012710136
* 0 - do nothing
@@ -10130,10 +10139,11 @@ MLXSW_REG_DEFINE(mrsr, MLXSW_REG_MRSR_ID, MLXSW_REG_MRSR_LEN);
1013010139
*/
1013110140
MLXSW_ITEM32(reg, mrsr, command, 0x00, 0, 4);
1013210141

10133-
static inline void mlxsw_reg_mrsr_pack(char *payload)
10142+
static inline void mlxsw_reg_mrsr_pack(char *payload,
10143+
enum mlxsw_reg_mrsr_command command)
1013410144
{
1013510145
MLXSW_REG_ZERO(mrsr, payload);
10136-
mlxsw_reg_mrsr_command_set(payload, 1);
10146+
mlxsw_reg_mrsr_command_set(payload, command);
1013710147
}
1013810148

1013910149
/* MLCR - Management LED Control Register
@@ -10584,6 +10594,8 @@ MLXSW_ITEM32(reg, mcam, feature_group, 0x00, 16, 8);
1058410594
enum mlxsw_reg_mcam_mng_feature_cap_mask_bits {
1058510595
/* If set, MCIA supports 128 bytes payloads. Otherwise, 48 bytes. */
1058610596
MLXSW_REG_MCAM_MCIA_128B = 34,
10597+
/* If set, MRSR.command=6 is supported. */
10598+
MLXSW_REG_MCAM_PCI_RESET = 48,
1058710599
};
1058810600

1058910601
#define MLXSW_REG_BYTES_PER_DWORD 0x4

drivers/pci/pci.c

+3
Original file line numberDiff line numberDiff line change
@@ -1219,6 +1219,9 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
12191219
if (delay > PCI_RESET_WAIT)
12201220
pci_info(dev, "ready %dms after %s\n", delay - 1,
12211221
reset_type);
1222+
else
1223+
pci_dbg(dev, "ready %dms after %s\n", delay - 1,
1224+
reset_type);
12221225

12231226
return 0;
12241227
}

drivers/pci/quirks.c

+13
Original file line numberDiff line numberDiff line change
@@ -3786,6 +3786,19 @@ static void quirk_no_pm_reset(struct pci_dev *dev)
37863786
DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_ATI, PCI_ANY_ID,
37873787
PCI_CLASS_DISPLAY_VGA, 8, quirk_no_pm_reset);
37883788

3789+
/*
3790+
* Spectrum-{1,2,3,4} devices report that a D3hot->D0 transition causes a reset
3791+
* (i.e., they advertise NoSoftRst-). However, this transition does not have
3792+
* any effect on the device: It continues to be operational and network ports
3793+
* remain up. Advertising this support makes it seem as if a PM reset is viable
3794+
* for these devices. Mark it as unavailable to skip it when testing reset
3795+
* methods.
3796+
*/
3797+
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcb84, quirk_no_pm_reset);
3798+
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcf6c, quirk_no_pm_reset);
3799+
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcf70, quirk_no_pm_reset);
3800+
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcf80, quirk_no_pm_reset);
3801+
37893802
/*
37903803
* Thunderbolt controllers with broken MSI hotplug signaling:
37913804
* Entire 1st generation (Light Ridge, Eagle Ridge, Light Peak) and part

net/devlink/core.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -503,14 +503,14 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
503503
* all devlink instances from this namespace into init_net.
504504
*/
505505
devlinks_xa_for_each_registered_get(net, index, devlink) {
506-
devl_lock(devlink);
506+
devl_dev_lock(devlink, true);
507507
err = 0;
508508
if (devl_is_registered(devlink))
509509
err = devlink_reload(devlink, &init_net,
510510
DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
511511
DEVLINK_RELOAD_LIMIT_UNSPEC,
512512
&actions_performed, NULL);
513-
devl_unlock(devlink);
513+
devl_dev_unlock(devlink, true);
514514
devlink_put(devlink);
515515
if (err && err != -EOPNOTSUPP)
516516
pr_warn("Failed to reload devlink instance into init_net\n");

net/devlink/dev.c

+8
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
* Copyright (c) 2016 Jiri Pirko <[email protected]>
55
*/
66

7+
#include <linux/device.h>
78
#include <net/genetlink.h>
89
#include <net/sock.h>
910
#include "devl_internal.h"
@@ -433,6 +434,13 @@ int devlink_reload(struct devlink *devlink, struct net *dest_net,
433434
struct net *curr_net;
434435
int err;
435436

437+
/* Make sure the reload operations are invoked with the device lock
438+
* held to allow drivers to trigger functionality that expects it
439+
* (e.g., PCI reset) and to close possible races between these
440+
* operations and probe/remove.
441+
*/
442+
device_lock_assert(devlink->dev);
443+
436444
memcpy(remote_reload_stats, devlink->stats.remote_reload_stats,
437445
sizeof(remote_reload_stats));
438446

net/devlink/devl_internal.h

+17-4
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
* Copyright (c) 2016 Jiri Pirko <[email protected]>
44
*/
55

6+
#include <linux/device.h>
67
#include <linux/etherdevice.h>
78
#include <linux/mutex.h>
89
#include <linux/netdevice.h>
@@ -96,6 +97,20 @@ static inline bool devl_is_registered(struct devlink *devlink)
9697
return xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
9798
}
9899

100+
static inline void devl_dev_lock(struct devlink *devlink, bool dev_lock)
101+
{
102+
if (dev_lock)
103+
device_lock(devlink->dev);
104+
devl_lock(devlink);
105+
}
106+
107+
static inline void devl_dev_unlock(struct devlink *devlink, bool dev_lock)
108+
{
109+
devl_unlock(devlink);
110+
if (dev_lock)
111+
device_unlock(devlink->dev);
112+
}
113+
99114
typedef void devlink_rel_notify_cb_t(struct devlink *devlink, u32 obj_index);
100115
typedef void devlink_rel_cleanup_cb_t(struct devlink *devlink, u32 obj_index,
101116
u32 rel_index);
@@ -111,9 +126,6 @@ int devlink_rel_devlink_handle_put(struct sk_buff *msg, struct devlink *devlink,
111126
bool *msg_updated);
112127

113128
/* Netlink */
114-
#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
115-
#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1)
116-
117129
enum devlink_multicast_groups {
118130
DEVLINK_MCGRP_CONFIG,
119131
};
@@ -140,7 +152,8 @@ typedef int devlink_nl_dump_one_func_t(struct sk_buff *msg,
140152
int flags);
141153

142154
struct devlink *
143-
devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs);
155+
devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs,
156+
bool dev_lock);
144157

145158
int devlink_nl_dumpit(struct sk_buff *msg, struct netlink_callback *cb,
146159
devlink_nl_dump_one_func_t *dump_one);

net/devlink/health.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -1151,7 +1151,8 @@ devlink_health_reporter_get_from_cb_lock(struct netlink_callback *cb)
11511151
struct nlattr **attrs = info->attrs;
11521152
struct devlink *devlink;
11531153

1154-
devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs);
1154+
devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs,
1155+
false);
11551156
if (IS_ERR(devlink))
11561157
return NULL;
11571158

0 commit comments

Comments
 (0)