Skip to content

Commit 707ec62

Browse files
committed
Merge branch 'net-reduce-rtnl-pressure-in-unregister_netdevice'
Eric Dumazet says: ==================== net: reduce RTNL pressure in unregister_netdevice() One major source of RTNL contention resides in unregister_netdevice() Due to RCU protection of various network structures, and unregister_netdevice() being a synchronous function, it is calling potentially slow functions while holding RTNL. I think we can release RTNL in two points, so that three slow functions are called while RTNL can be used by other threads. v1: https://lore.kernel.org/netdev/[email protected]/T/#m398c95f5778e1ff70938e079d3c4c43c050ad2a6 ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
2 parents bc1e64d + 83419b6 commit 707ec62

File tree

3 files changed

+82
-22
lines changed

3 files changed

+82
-22
lines changed

Diff for: include/net/net_namespace.h

+2
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,8 @@ void net_ns_barrier(void);
210210

211211
struct ns_common *get_net_ns(struct ns_common *ns);
212212
struct net *get_net_ns_by_fd(int fd);
213+
extern struct task_struct *cleanup_net_task;
214+
213215
#else /* CONFIG_NET_NS */
214216
#include <linux/sched.h>
215217
#include <linux/nsproxy.h>

Diff for: net/core/dev.c

+75-22
Original file line numberDiff line numberDiff line change
@@ -6124,8 +6124,6 @@ void netif_receive_skb_list(struct list_head *head)
61246124
}
61256125
EXPORT_SYMBOL(netif_receive_skb_list);
61266126

6127-
static DEFINE_PER_CPU(struct work_struct, flush_works);
6128-
61296127
/* Network device is going away, flush any packets still pending */
61306128
static void flush_backlog(struct work_struct *work)
61316129
{
@@ -6182,36 +6180,54 @@ static bool flush_required(int cpu)
61826180
return true;
61836181
}
61846182

6183+
struct flush_backlogs {
6184+
cpumask_t flush_cpus;
6185+
struct work_struct w[];
6186+
};
6187+
6188+
static struct flush_backlogs *flush_backlogs_alloc(void)
6189+
{
6190+
return kmalloc(struct_size_t(struct flush_backlogs, w, nr_cpu_ids),
6191+
GFP_KERNEL);
6192+
}
6193+
6194+
static struct flush_backlogs *flush_backlogs_fallback;
6195+
static DEFINE_MUTEX(flush_backlogs_mutex);
6196+
61856197
static void flush_all_backlogs(void)
61866198
{
6187-
static cpumask_t flush_cpus;
6199+
struct flush_backlogs *ptr = flush_backlogs_alloc();
61886200
unsigned int cpu;
61896201

6190-
/* since we are under rtnl lock protection we can use static data
6191-
* for the cpumask and avoid allocating on stack the possibly
6192-
* large mask
6193-
*/
6194-
ASSERT_RTNL();
6202+
if (!ptr) {
6203+
mutex_lock(&flush_backlogs_mutex);
6204+
ptr = flush_backlogs_fallback;
6205+
}
6206+
cpumask_clear(&ptr->flush_cpus);
61956207

61966208
cpus_read_lock();
61976209

6198-
cpumask_clear(&flush_cpus);
61996210
for_each_online_cpu(cpu) {
62006211
if (flush_required(cpu)) {
6201-
queue_work_on(cpu, system_highpri_wq,
6202-
per_cpu_ptr(&flush_works, cpu));
6203-
cpumask_set_cpu(cpu, &flush_cpus);
6212+
INIT_WORK(&ptr->w[cpu], flush_backlog);
6213+
queue_work_on(cpu, system_highpri_wq, &ptr->w[cpu]);
6214+
__cpumask_set_cpu(cpu, &ptr->flush_cpus);
62046215
}
62056216
}
62066217

62076218
/* we can have in flight packet[s] on the cpus we are not flushing,
62086219
* synchronize_net() in unregister_netdevice_many() will take care of
6209-
* them
6220+
* them.
62106221
*/
6211-
for_each_cpu(cpu, &flush_cpus)
6212-
flush_work(per_cpu_ptr(&flush_works, cpu));
6222+
for_each_cpu(cpu, &ptr->flush_cpus)
6223+
flush_work(&ptr->w[cpu]);
62136224

62146225
cpus_read_unlock();
6226+
6227+
if (ptr != flush_backlogs_fallback)
6228+
kfree(ptr);
6229+
else
6230+
mutex_unlock(&flush_backlogs_mutex);
62156231
}
62166232

62176233
static void net_rps_send_ipi(struct softnet_data *remsd)
@@ -10244,14 +10260,46 @@ static void dev_index_release(struct net *net, int ifindex)
1024410260
WARN_ON(xa_erase(&net->dev_by_index, ifindex));
1024510261
}
1024610262

10263+
static bool from_cleanup_net(void)
10264+
{
10265+
#ifdef CONFIG_NET_NS
10266+
return current == cleanup_net_task;
10267+
#else
10268+
return false;
10269+
#endif
10270+
}
10271+
10272+
static void rtnl_drop_if_cleanup_net(void)
10273+
{
10274+
if (from_cleanup_net())
10275+
__rtnl_unlock();
10276+
}
10277+
10278+
static void rtnl_acquire_if_cleanup_net(void)
10279+
{
10280+
if (from_cleanup_net())
10281+
rtnl_lock();
10282+
}
10283+
1024710284
/* Delayed registration/unregisteration */
1024810285
LIST_HEAD(net_todo_list);
10286+
static LIST_HEAD(net_todo_list_for_cleanup_net);
10287+
10288+
/* TODO: net_todo_list/net_todo_list_for_cleanup_net should probably
10289+
* be provided by callers, instead of being static, rtnl protected.
10290+
*/
10291+
static struct list_head *todo_list(void)
10292+
{
10293+
return from_cleanup_net() ? &net_todo_list_for_cleanup_net :
10294+
&net_todo_list;
10295+
}
10296+
1024910297
DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
1025010298
atomic_t dev_unreg_count = ATOMIC_INIT(0);
1025110299

1025210300
static void net_set_todo(struct net_device *dev)
1025310301
{
10254-
list_add_tail(&dev->todo_list, &net_todo_list);
10302+
list_add_tail(&dev->todo_list, todo_list());
1025510303
}
1025610304

1025710305
static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
@@ -11101,7 +11149,7 @@ void netdev_run_todo(void)
1110111149
#endif
1110211150

1110311151
/* Snapshot list, allow later requests */
11104-
list_replace_init(&net_todo_list, &list);
11152+
list_replace_init(todo_list(), &list);
1110511153

1110611154
__rtnl_unlock();
1110711155

@@ -11623,7 +11671,7 @@ EXPORT_SYMBOL_GPL(alloc_netdev_dummy);
1162311671
void synchronize_net(void)
1162411672
{
1162511673
might_sleep();
11626-
if (rtnl_is_locked())
11674+
if (from_cleanup_net() || rtnl_is_locked())
1162711675
synchronize_rcu_expedited();
1162811676
else
1162911677
synchronize_rcu();
@@ -11728,9 +11776,11 @@ void unregister_netdevice_many_notify(struct list_head *head,
1172811776
WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERING);
1172911777
netdev_unlock(dev);
1173011778
}
11731-
flush_all_backlogs();
1173211779

11780+
rtnl_drop_if_cleanup_net();
11781+
flush_all_backlogs();
1173311782
synchronize_net();
11783+
rtnl_acquire_if_cleanup_net();
1173411784

1173511785
list_for_each_entry(dev, head, unreg_list) {
1173611786
struct sk_buff *skb = NULL;
@@ -11790,7 +11840,9 @@ void unregister_netdevice_many_notify(struct list_head *head,
1179011840
#endif
1179111841
}
1179211842

11843+
rtnl_drop_if_cleanup_net();
1179311844
synchronize_net();
11845+
rtnl_acquire_if_cleanup_net();
1179411846

1179511847
list_for_each_entry(dev, head, unreg_list) {
1179611848
netdev_put(dev, &dev->dev_registered_tracker);
@@ -12455,12 +12507,13 @@ static int __init net_dev_init(void)
1245512507
* Initialise the packet receive queues.
1245612508
*/
1245712509

12510+
flush_backlogs_fallback = flush_backlogs_alloc();
12511+
if (!flush_backlogs_fallback)
12512+
goto out;
12513+
1245812514
for_each_possible_cpu(i) {
12459-
struct work_struct *flush = per_cpu_ptr(&flush_works, i);
1246012515
struct softnet_data *sd = &per_cpu(softnet_data, i);
1246112516

12462-
INIT_WORK(flush, flush_backlog);
12463-
1246412517
skb_queue_head_init(&sd->input_pkt_queue);
1246512518
skb_queue_head_init(&sd->process_queue);
1246612519
#ifdef CONFIG_XFRM_OFFLOAD

Diff for: net/core/net_namespace.c

+5
Original file line numberDiff line numberDiff line change
@@ -588,6 +588,8 @@ static void unhash_nsid(struct net *net, struct net *last)
588588

589589
static LLIST_HEAD(cleanup_list);
590590

591+
struct task_struct *cleanup_net_task;
592+
591593
static void cleanup_net(struct work_struct *work)
592594
{
593595
const struct pernet_operations *ops;
@@ -596,6 +598,8 @@ static void cleanup_net(struct work_struct *work)
596598
LIST_HEAD(net_exit_list);
597599
LIST_HEAD(dev_kill_list);
598600

601+
cleanup_net_task = current;
602+
599603
/* Atomically snapshot the list of namespaces to cleanup */
600604
net_kill_list = llist_del_all(&cleanup_list);
601605

@@ -670,6 +674,7 @@ static void cleanup_net(struct work_struct *work)
670674
put_user_ns(net->user_ns);
671675
net_free(net);
672676
}
677+
cleanup_net_task = NULL;
673678
}
674679

675680
/**

0 commit comments

Comments
 (0)