> > If you get down to it, the thing is about delegating control over part
> > of namespace to somebody, without letting them control, see, etc. the
> > rest of it. So I'd rather be very conservative about extra information
> > we allow to piggyback on that. I don't know... perhaps with stable peer
> > group IDs it would be OK to show peer group ID by (our) vfsmount + peer
> > group ID of master + peer group ID of nearest dominating group that has
> > intersection with our namespace. Then we don't leak information (AFAICS),
> > get full propagation information between our vfsmounts and cooperating
> > tasks in different namespaces can figure the things out as much as possible
> > without leaking 3rd-party information to either.
>
Here's a patch against current -mm implementing this (with some
cleanups thrown in). Done some testing on it as well, it wasn't
entirey trivial to figure out a setup, where propagation goes out of
the namespace first, then comes back in:
mount --bind /mnt1 /mnt1
mount --make-shared /mnt1
mount --bind /mnt2 /mnt2
mount --make-shared /mnt2
newns
mount --make-slave /mnt1
old ns:
mount --make-slave /mnt2
mount --bind /mnt1/tmp /mnt1/tmp
new ns:
mount --make-shared /mnt1/tmp
mount --bind /mnt1/tmp /mnt2/tmp
Voila.
Signed-off-by: Miklos Szeredi <[EMAIL PROTECTED]>
---
Index: linux/fs/pnode.c
===================================================================
--- linux.orig/fs/pnode.c 2008-02-22 15:27:23.000000000 +0100
+++ linux/fs/pnode.c 2008-02-22 15:27:26.000000000 +0100
@@ -9,8 +9,12 @@
#include <linux/mnt_namespace.h>
#include <linux/mount.h>
#include <linux/fs.h>
+#include <linux/idr.h>
#include "pnode.h"
+static DEFINE_SPINLOCK(mnt_pgid_lock);
+static DEFINE_IDA(mnt_pgid_ida);
+
/* return the next shared peer mount of @p */
static inline struct vfsmount *next_peer(struct vfsmount *p)
{
@@ -27,36 +31,90 @@ static inline struct vfsmount *next_slav
return list_entry(p->mnt_slave.next, struct vfsmount, mnt_slave);
}
-static int __peer_group_id(struct vfsmount *mnt)
+static void __set_mnt_shared(struct vfsmount *mnt)
{
- struct vfsmount *m;
- int id = mnt->mnt_id;
+ mnt->mnt_flags &= ~MNT_PNODE_MASK;
+ mnt->mnt_flags |= MNT_SHARED;
+}
+
+void set_mnt_shared(struct vfsmount *mnt)
+{
+ int res;
- for (m = next_peer(mnt); m != mnt; m = next_peer(m))
- id = min(id, m->mnt_id);
+ retry:
+ spin_lock(&mnt_pgid_lock);
+ if (IS_MNT_SHARED(mnt)) {
+ spin_unlock(&mnt_pgid_lock);
+ return;
+ }
- return id;
+ res = ida_get_new(&mnt_pgid_ida, &mnt->mnt_pgid);
+ spin_unlock(&mnt_pgid_lock);
+ if (res == -EAGAIN) {
+ if (ida_pre_get(&mnt_pgid_ida, GFP_KERNEL))
+ goto retry;
+ }
+ __set_mnt_shared(mnt);
+}
+
+void clear_mnt_shared(struct vfsmount *mnt)
+{
+ if (IS_MNT_SHARED(mnt)) {
+ mnt->mnt_flags &= ~MNT_SHARED;
+ mnt->mnt_pgid = -1;
+ }
+}
+
+void make_mnt_peer(struct vfsmount *old, struct vfsmount *mnt)
+{
+ mnt->mnt_pgid = old->mnt_pgid;
+ list_add(&mnt->mnt_share, &old->mnt_share);
+ __set_mnt_shared(mnt);
}
-/* return the smallest ID within the peer group */
int get_peer_group_id(struct vfsmount *mnt)
{
+ return mnt->mnt_pgid;
+}
+
+int get_master_id(struct vfsmount *mnt)
+{
int id;
spin_lock(&vfsmount_lock);
- id = __peer_group_id(mnt);
+ id = get_peer_group_id(mnt->mnt_master);
spin_unlock(&vfsmount_lock);
return id;
}
-/* return the smallest ID within the master's peer group */
-int get_master_id(struct vfsmount *mnt)
+static struct vfsmount *get_peer_in_ns(struct vfsmount *mnt,
+ struct mnt_namespace *ns)
{
- int id;
+ struct vfsmount *m = mnt;
+
+ do {
+ if (m->mnt_ns == ns)
+ return m;
+ m = next_peer(m);
+ } while (m != mnt);
+
+ return NULL;
+}
+
+int get_dominator_id_same_ns(struct vfsmount *mnt)
+{
+ int id = -1;
+ struct vfsmount *m;
spin_lock(&vfsmount_lock);
- id = __peer_group_id(mnt->mnt_master);
+ for (m = mnt->mnt_master; m != NULL; m = m->mnt_master) {
+ struct vfsmount *d = get_peer_in_ns(m, mnt->mnt_ns);
+ if (d) {
+ id = d->mnt_pgid;
+ break;
+ }
+ }
spin_unlock(&vfsmount_lock);
return id;
@@ -80,7 +138,13 @@ static int do_make_slave(struct vfsmount
if (peer_mnt == mnt)
peer_mnt = NULL;
}
- list_del_init(&mnt->mnt_share);
+ if (!list_empty(&mnt->mnt_share))
+ list_del_init(&mnt->mnt_share);
+ else if (IS_MNT_SHARED(mnt)) {
+ spin_lock(&mnt_pgid_lock);
+ ida_remove(&mnt_pgid_ida, mnt->mnt_pgid);
+ spin_unlock(&mnt_pgid_lock);
+ }
if (peer_mnt)
master = peer_mnt;
@@ -89,20 +153,18 @@ static int do_make_slave(struct vfsmount
list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave)
slave_mnt->mnt_master = master;
list_move(&mnt->mnt_slave, &master->mnt_slave_list);
- list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev);
- INIT_LIST_HEAD(&mnt->mnt_slave_list);
+ list_splice_init(&mnt->mnt_slave_list,
+ master->mnt_slave_list.prev);
} else {
- struct list_head *p = &mnt->mnt_slave_list;
- while (!list_empty(p)) {
- slave_mnt = list_first_entry(p,
+ while (!list_empty(&mnt->mnt_slave_list)) {
+ slave_mnt = list_first_entry(&mnt->mnt_slave_list,
struct vfsmount, mnt_slave);
list_del_init(&slave_mnt->mnt_slave);
slave_mnt->mnt_master = NULL;
}
}
mnt->mnt_master = master;
- CLEAR_MNT_SHARED(mnt);
- INIT_LIST_HEAD(&mnt->mnt_slave_list);
+ clear_mnt_shared(mnt);
return 0;
}
Index: linux/fs/namespace.c
===================================================================
--- linux.orig/fs/namespace.c 2008-02-22 15:27:23.000000000 +0100
+++ linux/fs/namespace.c 2008-02-22 15:27:26.000000000 +0100
@@ -95,6 +95,7 @@ struct vfsmount *alloc_vfsmnt(const char
return NULL;
}
+ mnt->mnt_pgid = -1;
atomic_set(&mnt->mnt_count, 1);
INIT_LIST_HEAD(&mnt->mnt_hash);
INIT_LIST_HEAD(&mnt->mnt_child);
@@ -537,10 +538,12 @@ static struct vfsmount *clone_mnt(struct
if (flag & CL_SLAVE) {
list_add(&mnt->mnt_slave, &old->mnt_slave_list);
mnt->mnt_master = old;
- CLEAR_MNT_SHARED(mnt);
+ clear_mnt_shared(mnt);
} else if (!(flag & CL_PRIVATE)) {
- if ((flag & CL_PROPAGATION) || IS_MNT_SHARED(old))
- list_add(&mnt->mnt_share, &old->mnt_share);
+ if (flag & CL_PROPAGATION)
+ set_mnt_shared(old);
+ if (IS_MNT_SHARED(old))
+ make_mnt_peer(old, mnt);
if (IS_MNT_SLAVE(old))
list_add(&mnt->mnt_slave, &old->mnt_slave);
mnt->mnt_master = old->mnt_master;
@@ -795,16 +798,24 @@ static int show_mountinfo(struct seq_fil
show_sb_opts(m, sb);
if (sb->s_op->show_options)
err = sb->s_op->show_options(m, mnt);
- if (IS_MNT_SHARED(mnt)) {
- seq_printf(m, " shared:%i", get_peer_group_id(mnt));
- if (IS_MNT_SLAVE(mnt))
- seq_printf(m, ",slave:%i", get_master_id(mnt));
- } else if (IS_MNT_SLAVE(mnt)) {
- seq_printf(m, " slave:%i", get_master_id(mnt));
+ seq_putc(m, ' ');
+ if (IS_MNT_SHARED(mnt) || IS_MNT_SLAVE(mnt)) {
+ if (IS_MNT_SHARED(mnt))
+ seq_printf(m, "shared:%i", get_peer_group_id(mnt));
+ if (IS_MNT_SLAVE(mnt)) {
+ int dominator_id = get_dominator_id_same_ns(mnt);
+
+ if (IS_MNT_SHARED(mnt))
+ seq_putc(m, ',');
+
+ seq_printf(m, "slave:%i", get_master_id(mnt));
+ if (dominator_id != -1)
+ seq_printf(m, ":%i", dominator_id);
+ }
} else if (IS_MNT_UNBINDABLE(mnt)) {
- seq_printf(m, " unbindable");
+ seq_printf(m, "unbindable");
} else {
- seq_printf(m, " private");
+ seq_printf(m, "private");
}
seq_putc(m, '\n');
return err;
Index: linux/fs/pnode.h
===================================================================
--- linux.orig/fs/pnode.h 2008-02-22 15:27:23.000000000 +0100
+++ linux/fs/pnode.h 2008-02-22 15:27:26.000000000 +0100
@@ -14,7 +14,6 @@
#define IS_MNT_SHARED(mnt) (mnt->mnt_flags & MNT_SHARED)
#define IS_MNT_SLAVE(mnt) (mnt->mnt_master)
#define IS_MNT_NEW(mnt) (!mnt->mnt_ns)
-#define CLEAR_MNT_SHARED(mnt) (mnt->mnt_flags &= ~MNT_SHARED)
#define IS_MNT_UNBINDABLE(mnt) (mnt->mnt_flags & MNT_UNBINDABLE)
#define CL_EXPIRE 0x01
@@ -24,12 +23,9 @@
#define CL_PROPAGATION 0x10
#define CL_PRIVATE 0x20
-static inline void set_mnt_shared(struct vfsmount *mnt)
-{
- mnt->mnt_flags &= ~MNT_PNODE_MASK;
- mnt->mnt_flags |= MNT_SHARED;
-}
-
+void set_mnt_shared(struct vfsmount *);
+void clear_mnt_shared(struct vfsmount *);
+void make_mnt_peer(struct vfsmount *, struct vfsmount *);
void change_mnt_propagation(struct vfsmount *, int);
int propagate_mnt(struct vfsmount *, struct dentry *, struct vfsmount *,
struct list_head *);
@@ -37,4 +33,5 @@ int propagate_umount(struct list_head *)
int propagate_mount_busy(struct vfsmount *, int);
int get_peer_group_id(struct vfsmount *);
int get_master_id(struct vfsmount *);
+int get_dominator_id_same_ns(struct vfsmount *);
#endif /* _LINUX_PNODE_H */
Index: linux/include/linux/mount.h
===================================================================
--- linux.orig/include/linux/mount.h 2008-02-22 15:27:23.000000000 +0100
+++ linux/include/linux/mount.h 2008-02-22 15:27:26.000000000 +0100
@@ -57,6 +57,7 @@ struct vfsmount {
struct vfsmount *mnt_master; /* slave is on master->mnt_slave_list */
struct mnt_namespace *mnt_ns; /* containing namespace */
int mnt_id; /* mount identifier */
+ int mnt_pgid; /* peer group identifier */
/*
* We put mnt_count & mnt_expiry_mark at the end of struct vfsmount
* to let these frequently modified fields in a separate cache line
Index: linux/Documentation/filesystems/proc.txt
===================================================================
--- linux.orig/Documentation/filesystems/proc.txt 2008-02-22
15:27:23.000000000 +0100
+++ linux/Documentation/filesystems/proc.txt 2008-02-22 15:27:26.000000000
+0100
@@ -2367,21 +2367,20 @@ MNTOPTS: per mount options
SBOPTS: per super block options
PROPAGATION: propagation type
-propagation type: <propagation_flag>[:<mntid>][,...]
- note: 'shared' flag is followed by the mntid of its peer mount
- 'slave' flag is followed by the mntid of its master mount
+propagation type: <propagation_flag>[:<peergrpid>[:<domgrpid>]][,...]
+ note: 'shared' flag is followed by the id of this mount's peer group
+ 'slave' flag is followed by the peer group id of its master mount,
+ optionally followed by the id of the closest dominant(*)
+ peer group in the same namespace, if one exists.
'private' flag stands by itself
'unbindable' flag stands by itself
-The 'mntid' used in the propagation type is a canonical ID of the peer
-group (currently the smallest ID within the group is used for this
-purpose, but this should not be relied on). Since mounts can be added
-or removed from the peer group, this ID only guaranteed to stay the
-same on a static propagation tree.
+(*) A dominant peer group is an ancestor of this mount in the
+propagation tree, in other words, this mount receives propagation from
+the dominant peer group, but not the other way round.
For more information see:
Documentation/filesystems/sharedsubtree.txt
-
------------------------------------------------------------------------------
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html