Skip to content

Commit 3df88c6

Browse files
committed
Merge branch 'work.namespace' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull ipc namespace update from Al Viro: "Rik's patches reducing the amount of synchronize_rcu() triggered by ipc namespace destruction. I've some pending stuff reducing that on the normal umount side, but it's nowhere near ready and Rik's stuff shouldn't be held back due to conflicts - I'll just redo the parts of my series that stray into ipc/*" * 'work.namespace' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: ipc,namespace: batch free ipc_namespace structures ipc,namespace: make ipc namespace allocation wait for pending free
2 parents 10cc5d4 + da27f79 commit 3df88c6

File tree

5 files changed

+41
-20
lines changed

5 files changed

+41
-20
lines changed

fs/namespace.c

+14-4
Original file line numberDiff line numberDiff line change
@@ -1283,6 +1283,17 @@ struct vfsmount *mntget(struct vfsmount *mnt)
12831283
}
12841284
EXPORT_SYMBOL(mntget);
12851285

1286+
/*
1287+
* Make a mount point inaccessible to new lookups.
1288+
* Because there may still be current users, the caller MUST WAIT
1289+
* for an RCU grace period before destroying the mount point.
1290+
*/
1291+
void mnt_make_shortterm(struct vfsmount *mnt)
1292+
{
1293+
if (mnt)
1294+
real_mount(mnt)->mnt_ns = NULL;
1295+
}
1296+
12861297
/**
12871298
* path_is_mountpoint() - Check if path is a mount in the current namespace.
12881299
* @path: path to check
@@ -4459,8 +4470,8 @@ EXPORT_SYMBOL_GPL(kern_mount);
44594470
void kern_unmount(struct vfsmount *mnt)
44604471
{
44614472
/* release long term mount so mount point can be released */
4462-
if (!IS_ERR_OR_NULL(mnt)) {
4463-
real_mount(mnt)->mnt_ns = NULL;
4473+
if (!IS_ERR(mnt)) {
4474+
mnt_make_shortterm(mnt);
44644475
synchronize_rcu(); /* yecchhh... */
44654476
mntput(mnt);
44664477
}
@@ -4472,8 +4483,7 @@ void kern_unmount_array(struct vfsmount *mnt[], unsigned int num)
44724483
unsigned int i;
44734484

44744485
for (i = 0; i < num; i++)
4475-
if (mnt[i])
4476-
real_mount(mnt[i])->mnt_ns = NULL;
4486+
mnt_make_shortterm(mnt[i]);
44774487
synchronize_rcu_expedited();
44784488
for (i = 0; i < num; i++)
44794489
mntput(mnt[i]);

include/linux/mount.h

+1
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ extern void mnt_drop_write(struct vfsmount *mnt);
8686
extern void mnt_drop_write_file(struct file *file);
8787
extern void mntput(struct vfsmount *mnt);
8888
extern struct vfsmount *mntget(struct vfsmount *mnt);
89+
extern void mnt_make_shortterm(struct vfsmount *mnt);
8990
extern struct vfsmount *mnt_clone_internal(const struct path *path);
9091
extern bool __mnt_is_readonly(struct vfsmount *mnt);
9192
extern bool mnt_may_suid(struct vfsmount *mnt);

ipc/mqueue.c

-5
Original file line numberDiff line numberDiff line change
@@ -1709,11 +1709,6 @@ void mq_clear_sbinfo(struct ipc_namespace *ns)
17091709
ns->mq_mnt->mnt_sb->s_fs_info = NULL;
17101710
}
17111711

1712-
void mq_put_mnt(struct ipc_namespace *ns)
1713-
{
1714-
kern_unmount(ns->mq_mnt);
1715-
}
1716-
17171712
static int __init init_mqueue_fs(void)
17181713
{
17191714
int error;

ipc/namespace.c

+26-9
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,12 @@
1919

2020
#include "util.h"
2121

22+
/*
23+
* The work queue is used to avoid the cost of synchronize_rcu in kern_unmount.
24+
*/
25+
static void free_ipc(struct work_struct *unused);
26+
static DECLARE_WORK(free_ipc_work, free_ipc);
27+
2228
static struct ucounts *inc_ipc_namespaces(struct user_namespace *ns)
2329
{
2430
return inc_ucount(ns, current_euid(), UCOUNT_IPC_NAMESPACES);
@@ -37,9 +43,18 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
3743
int err;
3844

3945
err = -ENOSPC;
46+
again:
4047
ucounts = inc_ipc_namespaces(user_ns);
41-
if (!ucounts)
48+
if (!ucounts) {
49+
/*
50+
* IPC namespaces are freed asynchronously, by free_ipc_work.
51+
* If frees were pending, flush_work will wait, and
52+
* return true. Fail the allocation if no frees are pending.
53+
*/
54+
if (flush_work(&free_ipc_work))
55+
goto again;
4256
goto fail;
57+
}
4358

4459
err = -ENOMEM;
4560
ns = kzalloc(sizeof(struct ipc_namespace), GFP_KERNEL_ACCOUNT);
@@ -130,10 +145,11 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
130145

131146
static void free_ipc_ns(struct ipc_namespace *ns)
132147
{
133-
/* mq_put_mnt() waits for a grace period as kern_unmount()
134-
* uses synchronize_rcu().
148+
/*
149+
* Caller needs to wait for an RCU grace period to have passed
150+
* after making the mount point inaccessible to new accesses.
135151
*/
136-
mq_put_mnt(ns);
152+
mntput(ns->mq_mnt);
137153
sem_exit_ns(ns);
138154
msg_exit_ns(ns);
139155
shm_exit_ns(ns);
@@ -153,15 +169,16 @@ static void free_ipc(struct work_struct *unused)
153169
struct llist_node *node = llist_del_all(&free_ipc_list);
154170
struct ipc_namespace *n, *t;
155171

172+
llist_for_each_entry_safe(n, t, node, mnt_llist)
173+
mnt_make_shortterm(n->mq_mnt);
174+
175+
/* Wait for any last users to have gone away. */
176+
synchronize_rcu();
177+
156178
llist_for_each_entry_safe(n, t, node, mnt_llist)
157179
free_ipc_ns(n);
158180
}
159181

160-
/*
161-
* The work queue is used to avoid the cost of synchronize_rcu in kern_unmount.
162-
*/
163-
static DECLARE_WORK(free_ipc_work, free_ipc);
164-
165182
/*
166183
* put_ipc_ns - drop a reference to an ipc namespace.
167184
* @ns: the namespace to put

ipc/util.h

-2
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,8 @@ struct pid_namespace;
5656

5757
#ifdef CONFIG_POSIX_MQUEUE
5858
extern void mq_clear_sbinfo(struct ipc_namespace *ns);
59-
extern void mq_put_mnt(struct ipc_namespace *ns);
6059
#else
6160
static inline void mq_clear_sbinfo(struct ipc_namespace *ns) { }
62-
static inline void mq_put_mnt(struct ipc_namespace *ns) { }
6361
#endif
6462

6563
#ifdef CONFIG_SYSVIPC

0 commit comments

Comments
 (0)