On Mon, Oct 22, 2018 at 02:29:19PM +0200, Pierre-Elliott Bécue wrote: > Cc-ing Christian as he proposed the patch. > > Le jeudi 18 octobre 2018 à 14:32:12+0200, Matthias Heinz a écrit : > > Hi, > > > > I've tried applying the patch to the 2.0.9 source. Unfortunately there have > > been more changes to conf.c since the release of lxc 2.0.9 and this patch > > wont > > apply without applying these changes as well.
So, let me make your life easier by appending a patch on top of 2.0.9. :) Christian > > What amount of patches would it represent? Would it be relevant to consider > updating lxc in buster to a newer release? > > Cheers, > > -- > Pierre-Elliott Bécue > GPG: 9AE0 4D98 6400 E3B6 7528 F493 0D44 2664 1949 74E2 > It's far easier to fight for one's principles than to live up to them.
>From 8991fdfa9401da4803fb932786b449d3f32ca07c Mon Sep 17 00:00:00 2001 From: Christian Brauner <christian.brau...@ubuntu.com> Date: Mon, 22 Oct 2018 16:30:49 +0200 Subject: [PATCH] autodev: adapt to changes in Linux 4.18 Starting with commit 55956b59df33 ("vfs: Allow userns root to call mknod on owned filesystems.") Linux will allow mknod() in user namespaces for userns root if CAP_MKNOD is available. However, these device nodes are useless since static struct super_block *alloc_super(struct file_system_type *type, int flags, struct user_namespace *user_ns) { /* <snip> */ if (s->s_user_ns != &init_user_ns) s->s_iflags |= SB_I_NODEV; /* <snip> */ } will set the SB_I_NODEV flag on the filesystem. When a device node created in non-init userns is open()ed the call chain will hit: bool may_open_dev(const struct path *path) { return !(path->mnt->mnt_flags & MNT_NODEV) && !(path->mnt->mnt_sb->s_iflags & SB_I_NODEV); } which will cause an EPERM because the device node is located on an fs owned by non-init-userns and thus doesn't grant access to device nodes due to SB_I_NODEV. The solution is straightforward. Unless you're real root you should bind-mount device nodes. Signed-off-by: Christian Brauner <christian.brau...@ubuntu.com> --- src/lxc/conf.c | 127 +++++++++++++++++++++++++++++++------------------ 1 file changed, 81 insertions(+), 46 deletions(-) diff --git a/src/lxc/conf.c b/src/lxc/conf.c index 91816beb..384138ec 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -1130,32 +1130,41 @@ static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs, return 0; } -struct lxc_devs { +struct lxc_device_node { const char *name; - mode_t mode; - int maj; - int min; + const mode_t mode; + const int maj; + const int min; }; -static const struct lxc_devs lxc_devs[] = { - { "null", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 3 }, - { "zero", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 5 }, +static const struct lxc_device_node lxc_devices[] = { { "full", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 7 }, - { "urandom", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 9 }, + { "null", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 3 }, { "random", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 8 }, { "tty", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 5, 0 }, + { "urandom", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 9 }, + { "zero", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 5 }, +}; + + + +enum { + LXC_DEVNODE_BIND, + LXC_DEVNODE_MKNOD, + LXC_DEVNODE_PARTIAL, + LXC_DEVNODE_OPEN, }; static int lxc_fill_autodev(const struct lxc_rootfs *rootfs) { - int ret; - char path[MAXPATHLEN]; - int i; + int i, ret; + char path[PATH_MAX]; mode_t cmask; + int use_mknod = LXC_DEVNODE_MKNOD; - ret = snprintf(path, MAXPATHLEN, "%s/dev", + ret = snprintf(path, PATH_MAX, "%s/dev", rootfs->path ? rootfs->mount : ""); - if (ret < 0 || ret >= MAXPATHLEN) + if (ret < 0 || ret >= PATH_MAX) return -1; /* ignore, just don't try to fill in */ @@ -1165,53 +1174,79 @@ static int lxc_fill_autodev(const struct lxc_rootfs *rootfs) INFO("Populating \"/dev\""); cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH); - for (i = 0; i < sizeof(lxc_devs) / sizeof(lxc_devs[0]); i++) { - const struct lxc_devs *d = &lxc_devs[i]; + for (i = 0; i < sizeof(lxc_devices) / sizeof(lxc_devices[0]); i++) { + char hostpath[PATH_MAX]; + const struct lxc_device_node *device = &lxc_devices[i]; - ret = snprintf(path, MAXPATHLEN, "%s/dev/%s", - rootfs->path ? rootfs->mount : "", d->name); - if (ret < 0 || ret >= MAXPATHLEN) + ret = snprintf(path, PATH_MAX, "%s/dev/%s", + rootfs->path ? rootfs->mount : "", device->name); + if (ret < 0 || ret >= PATH_MAX) return -1; - ret = mknod(path, d->mode, makedev(d->maj, d->min)); - if (ret < 0) { - FILE *pathfile; - char hostpath[MAXPATHLEN]; + if (use_mknod >= LXC_DEVNODE_MKNOD) { + ret = mknod(path, device->mode, makedev(device->maj, device->min)); + if (ret == 0 || (ret < 0 && errno == EEXIST)) { + DEBUG("Created device node \"%s\"", path); + } else if (ret < 0) { + if (errno != EPERM) { + SYSERROR("Failed to create device node \"%s\"", path); + return -1; + } + + use_mknod = LXC_DEVNODE_BIND; + } - if (errno == EEXIST) { - DEBUG("\"%s\" device already existed", path); + /* Device nodes are fully useable. */ + if (use_mknod == LXC_DEVNODE_OPEN) continue; + + if (use_mknod == LXC_DEVNODE_MKNOD) { + /* See + * - https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=55956b59df336f6738da916dbb520b6e37df9fbd + * - https://lists.linuxfoundation.org/pipermail/containers/2018-June/039176.html + */ + ret = open(path, O_RDONLY | O_CLOEXEC); + if (ret >= 0) { + close(ret); + /* Device nodes are fully useable. */ + use_mknod = LXC_DEVNODE_OPEN; + continue; + } + + TRACE("Failed to open \"%s\" device", path); + /* Device nodes are only partially useable. */ + use_mknod = LXC_DEVNODE_PARTIAL; } + } - /* Unprivileged containers cannot create devices, so - * bind mount the device from the host. + if (use_mknod != LXC_DEVNODE_PARTIAL) { + /* If we are dealing with partially functional device + * nodes the prio mknod() call will have created the + * device node so we can use it as a bind-mount target. */ - ret = snprintf(hostpath, MAXPATHLEN, "/dev/%s", d->name); - if (ret < 0 || ret >= MAXPATHLEN) - return -1; - - pathfile = fopen(path, "wb"); - if (!pathfile) { + ret = mknod(path, S_IFREG | 0000, 0); + if (ret < 0 && errno != EEXIST) { SYSERROR("Failed to create file \"%s\"", path); return -1; } - fclose(pathfile); + } - ret = safe_mount(hostpath, path, 0, MS_BIND, NULL, - rootfs->path ? rootfs->mount : NULL); - if (ret < 0) { - SYSERROR("Failed to bind mount \"%s\" from " - "host into container", - d->name); - return -1; - } - DEBUG("Bind mounted \"%s\" onto \"%s\"", hostpath, - path); - } else { - DEBUG("Created device node \"%s\"", path); + /* Fallback to bind-mounting the device from the host. */ + ret = snprintf(hostpath, PATH_MAX, "/dev/%s", device->name); + if (ret < 0 || ret >= PATH_MAX) + return -1; + + ret = safe_mount(hostpath, path, 0, MS_BIND, NULL, + rootfs->path ? rootfs->mount : NULL); + if (ret < 0) { + SYSERROR("Failed to bind mount host device node \"%s\" " + "onto \"%s\"", hostpath, path); + return -1; } + DEBUG("Bind mounted host device node \"%s\" onto \"%s\"", + hostpath, path); } - umask(cmask); + (void)umask(cmask); INFO("Populated \"/dev\""); return 0; -- 2.19.1