Package: release.debian.org
Severity: normal
User: release.debian....@packages.debian.org
Usertags: unblock
X-Debbugs-Cc: z...@debian.org

Please unblock package runc

[ Reason ]
Fix CVE-2021-30465
https://github.com/opencontainers/runc/security/advisories/GHSA-c3xm-pvg7-gh7r

[ Impact ]
The package can migrate itself(have autopkgtest and not key package),
but I'd like to reduce the age.

[ Tests ]
I have done some basic tests. But I'm not sure how to trigger the security
issue that I can't verify if it's really fixed.

[ Risks ]
The patch provided by upstream can't be applied clearly to the version we have
in sid. So I look the changes and backport another two PR, which makes the diff
a bit large.

[ Checklist ]
  [x] all changes are documented in the d/changelog
  [x] I reviewed all changes and I approve them
  [*] attach debdiff against the package in testing
      Since only patches are added in debian/patches dir, I just attached the 3
      new patches.

[ Other info ]

unblock runc/1.0.0~rc93+ds1-4


debian/changelog:

 runc (1.0.0~rc93+ds1-4) unstable; urgency=high
 .
   * Team upload.
   * Backport patches for CVE-2021-30465 (Closes: #988768)
     To apply CVE-2021-30465 patch clearly, following PR are backported as
     well:
     + https://github.com/opencontainers/runc/pull/2798
     + https://github.com/opencontainers/runc/pull/2818

$ cat debian/patches/00{11,12,13}*|filterdiff -x '*.bats' -x '*_test.go' 
|diffstat

 b/libcontainer/container_linux.go  |   10 +
 b/libcontainer/init_linux.go       |    1
 b/libcontainer/rootfs_linux.go     |   64 ++++++---
 b/libcontainer/specconv/example.go |   18 +-
 b/libcontainer/utils/utils.go      |   54 +++++++
 libcontainer/container_linux.go    |   52 ++++++-
 libcontainer/rootfs_linux.go       |  251 ++++++++++++++++++-------------------
 7 files changed, 283 insertions(+), 167 deletions(-)

$ cat debian/patches/00{11,12,13}*|filterdiff -x '*.bats' -x '*_test.go'

From: Kir Kolyshkin <kolysh...@gmail.com>
Date: Tue, 23 Feb 2021 17:58:07 -0800
Subject: PR2818 Fix cgroup2 mount for rootless case

Backport this PR so we can apply the patch for CVE-2021-30465

1. libct/newInitConfig: nit
2. libct/rootfs: introduce and use mountConfig
3 .libct/rootfs/mountCgroupV2: minor refactor
4. Fix cgroup2 mount for rootless case
5. tests/int: use bfq test with rootless
6. tests/int: add a case for cgroupv2 mount

Origin: backport, https://github.com/opencontainers/runc/pull/2818
---
 libcontainer/container_linux.go  | 10 +++++--
 libcontainer/init_linux.go       |  1 +
 libcontainer/rootfs_linux.go     | 64 ++++++++++++++++++++++++++++------------
 libcontainer/specconv/example.go | 18 +++++------
 tests/integration/cgroups.bats   | 34 +++++++++++++++++++++
 5 files changed, 96 insertions(+), 31 deletions(-)

diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go
index 3dca29e..1cbc734 100644
--- a/libcontainer/container_linux.go
+++ b/libcontainer/container_linux.go
@@ -594,6 +594,9 @@ func (c *linuxContainer) newInitConfig(process *Process) 
*initConfig {
                AppArmorProfile:  c.config.AppArmorProfile,
                ProcessLabel:     c.config.ProcessLabel,
                Rlimits:          c.config.Rlimits,
+               CreateConsole:    process.ConsoleSocket != nil,
+               ConsoleWidth:     process.ConsoleWidth,
+               ConsoleHeight:    process.ConsoleHeight,
        }
        if process.NoNewPrivileges != nil {
                cfg.NoNewPrivileges = *process.NoNewPrivileges
@@ -607,9 +610,10 @@ func (c *linuxContainer) newInitConfig(process *Process) 
*initConfig {
        if len(process.Rlimits) > 0 {
                cfg.Rlimits = process.Rlimits
        }
-       cfg.CreateConsole = process.ConsoleSocket != nil
-       cfg.ConsoleWidth = process.ConsoleWidth
-       cfg.ConsoleHeight = process.ConsoleHeight
+       if cgroups.IsCgroup2UnifiedMode() {
+               cfg.Cgroup2Path = c.cgroupManager.Path("")
+       }
+
        return cfg
 }
 
diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go
index c57af0e..6817970 100644
--- a/libcontainer/init_linux.go
+++ b/libcontainer/init_linux.go
@@ -70,6 +70,7 @@ type initConfig struct {
        RootlessEUID     bool                  `json:"rootless_euid,omitempty"`
        RootlessCgroups  bool                  
`json:"rootless_cgroups,omitempty"`
        SpecState        *specs.State          `json:"spec_state,omitempty"`
+       Cgroup2Path      string                `json:"cgroup2_path,omitempty"`
 }
 
 type initer interface {
diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go
index 411496a..5d2d74c 100644
--- a/libcontainer/rootfs_linux.go
+++ b/libcontainer/rootfs_linux.go
@@ -17,6 +17,7 @@ import (
        "github.com/moby/sys/mountinfo"
        "github.com/mrunalp/fileutils"
        "github.com/opencontainers/runc/libcontainer/cgroups"
+       "github.com/opencontainers/runc/libcontainer/cgroups/fs2"
        "github.com/opencontainers/runc/libcontainer/configs"
        "github.com/opencontainers/runc/libcontainer/devices"
        "github.com/opencontainers/runc/libcontainer/system"
@@ -29,6 +30,14 @@ import (
 
 const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
 
+type mountConfig struct {
+       root            string
+       label           string
+       cgroup2Path     string
+       rootlessCgroups bool
+       cgroupns        bool
+}
+
 // needsSetupDev returns true if /dev needs to be set up.
 func needsSetupDev(config *configs.Config) bool {
        for _, m := range config.Mounts {
@@ -48,7 +57,13 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) 
(err error) {
                return newSystemErrorWithCause(err, "preparing rootfs")
        }
 
-       hasCgroupns := config.Namespaces.Contains(configs.NEWCGROUP)
+       mountConfig := &mountConfig{
+               root:            config.Rootfs,
+               label:           config.MountLabel,
+               cgroup2Path:     iConfig.Cgroup2Path,
+               rootlessCgroups: iConfig.RootlessCgroups,
+               cgroupns:        config.Namespaces.Contains(configs.NEWCGROUP),
+       }
        setupDev := needsSetupDev(config)
        for _, m := range config.Mounts {
                for _, precmd := range m.PremountCmds {
@@ -56,7 +71,7 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) 
(err error) {
                                return newSystemErrorWithCause(err, "running 
premount command")
                        }
                }
-               if err := mountToRootfs(m, config.Rootfs, config.MountLabel, 
hasCgroupns); err != nil {
+               if err := mountToRootfs(m, mountConfig); err != nil {
                        return newSystemErrorWithCausef(err, "mounting %q to 
rootfs at %q", m.Source, m.Destination)
                }
 
@@ -222,7 +237,7 @@ func prepareBindMount(m *configs.Mount, rootfs string) 
error {
        return nil
 }
 
-func mountCgroupV1(m *configs.Mount, rootfs, mountLabel string, enableCgroupns 
bool) error {
+func mountCgroupV1(m *configs.Mount, c *mountConfig) error {
        binds, err := getCgroupMounts(m)
        if err != nil {
                return err
@@ -242,12 +257,12 @@ func mountCgroupV1(m *configs.Mount, rootfs, mountLabel 
string, enableCgroupns b
                Data:             "mode=755",
                PropagationFlags: m.PropagationFlags,
        }
-       if err := mountToRootfs(tmpfs, rootfs, mountLabel, enableCgroupns); err 
!= nil {
+       if err := mountToRootfs(tmpfs, c); err != nil {
                return err
        }
        for _, b := range binds {
-               if enableCgroupns {
-                       subsystemPath := filepath.Join(rootfs, b.Destination)
+               if c.cgroupns {
+                       subsystemPath := filepath.Join(c.root, b.Destination)
                        if err := os.MkdirAll(subsystemPath, 0755); err != nil {
                                return err
                        }
@@ -266,7 +281,7 @@ func mountCgroupV1(m *configs.Mount, rootfs, mountLabel 
string, enableCgroupns b
                                return err
                        }
                } else {
-                       if err := mountToRootfs(b, rootfs, mountLabel, 
enableCgroupns); err != nil {
+                       if err := mountToRootfs(b, c); err != nil {
                                return err
                        }
                }
@@ -276,7 +291,7 @@ func mountCgroupV1(m *configs.Mount, rootfs, mountLabel 
string, enableCgroupns b
                        // symlink(2) is very dumb, it will just shove the path 
into
                        // the link and doesn't do any checks or relative path
                        // conversion. Also, don't error out if the cgroup 
already exists.
-                       if err := os.Symlink(mc, filepath.Join(rootfs, 
m.Destination, ss)); err != nil && !os.IsExist(err) {
+                       if err := os.Symlink(mc, filepath.Join(c.root, 
m.Destination, ss)); err != nil && !os.IsExist(err) {
                                return err
                        }
                }
@@ -284,28 +299,39 @@ func mountCgroupV1(m *configs.Mount, rootfs, mountLabel 
string, enableCgroupns b
        return nil
 }
 
-func mountCgroupV2(m *configs.Mount, rootfs, mountLabel string, enableCgroupns 
bool) error {
-       cgroupPath, err := securejoin.SecureJoin(rootfs, m.Destination)
+func mountCgroupV2(m *configs.Mount, c *mountConfig) error {
+       dest, err := securejoin.SecureJoin(c.root, m.Destination)
        if err != nil {
                return err
        }
-       if err := os.MkdirAll(cgroupPath, 0755); err != nil {
+       if err := os.MkdirAll(dest, 0755); err != nil {
                return err
        }
-       if err := unix.Mount(m.Source, cgroupPath, "cgroup2", uintptr(m.Flags), 
m.Data); err != nil {
+       if err := unix.Mount(m.Source, dest, "cgroup2", uintptr(m.Flags), 
m.Data); err != nil {
                // when we are in UserNS but CgroupNS is not unshared, we 
cannot mount cgroup2 (#2158)
                if err == unix.EPERM || err == unix.EBUSY {
-                       return unix.Mount("/sys/fs/cgroup", cgroupPath, "", 
uintptr(m.Flags)|unix.MS_BIND, "")
+                       src := fs2.UnifiedMountpoint
+                       if c.cgroupns && c.cgroup2Path != "" {
+                               // Emulate cgroupns by bind-mounting
+                               // the container cgroup path rather than
+                               // the whole /sys/fs/cgroup.
+                               src = c.cgroup2Path
+                       }
+                       err = unix.Mount(src, dest, "", 
uintptr(m.Flags)|unix.MS_BIND, "")
+                       if err == unix.ENOENT && c.rootlessCgroups {
+                               err = nil
+                       }
+                       return err
                }
                return err
        }
        return nil
 }
 
-func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns 
bool) error {
-       var (
-               dest = m.Destination
-       )
+func mountToRootfs(m *configs.Mount, c *mountConfig) error {
+       rootfs := c.root
+       mountLabel := c.label
+       dest := m.Destination
        if !strings.HasPrefix(dest, rootfs) {
                dest = filepath.Join(rootfs, dest)
        }
@@ -424,9 +450,9 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel 
string, enableCgroupns b
                }
        case "cgroup":
                if cgroups.IsCgroup2UnifiedMode() {
-                       return mountCgroupV2(m, rootfs, mountLabel, 
enableCgroupns)
+                       return mountCgroupV2(m, c)
                }
-               return mountCgroupV1(m, rootfs, mountLabel, enableCgroupns)
+               return mountCgroupV1(m, c)
        default:
                // ensure that the destination of the mount is resolved of 
symlinks at mount time because
                // any previous mounts can invalidate the next mount's 
destination.
diff --git a/libcontainer/specconv/example.go b/libcontainer/specconv/example.go
index 8a201bc..56bab3b 100644
--- a/libcontainer/specconv/example.go
+++ b/libcontainer/specconv/example.go
@@ -2,6 +2,7 @@ package specconv
 
 import (
        "os"
+       "path/filepath"
        "strings"
 
        "github.com/opencontainers/runc/libcontainer/cgroups"
@@ -200,8 +201,14 @@ func ToRootless(spec *specs.Spec) {
        // Fix up mounts.
        var mounts []specs.Mount
        for _, mount := range spec.Mounts {
-               // Ignore all mounts that are under /sys.
-               if strings.HasPrefix(mount.Destination, "/sys") {
+               // Replace the /sys mount with an rbind.
+               if filepath.Clean(mount.Destination) == "/sys" {
+                       mounts = append(mounts, specs.Mount{
+                               Source:      "/sys",
+                               Destination: "/sys",
+                               Type:        "none",
+                               Options:     []string{"rbind", "nosuid", 
"noexec", "nodev", "ro"},
+                       })
                        continue
                }
 
@@ -216,13 +223,6 @@ func ToRootless(spec *specs.Spec) {
                mount.Options = options
                mounts = append(mounts, mount)
        }
-       // Add the sysfs mount as an rbind.
-       mounts = append(mounts, specs.Mount{
-               Source:      "/sys",
-               Destination: "/sys",
-               Type:        "none",
-               Options:     []string{"rbind", "nosuid", "noexec", "nodev", 
"ro"},
-       })
        spec.Mounts = mounts
 
        // Remove cgroup settings.
From: Adrian Reber <are...@redhat.com>
Date: Mon, 8 Feb 2021 13:05:54 +0000
Subject: PR2798 Correctly restore containers with nested bind mounts

Backport this PR so we can apply the patch for CVE-2021-30465

1. Re-create mountpoints during restore
2. tests: test nested bind mount restore

Origin: backport, https://github.com/opencontainers/runc/pull/2798
---
 libcontainer/container_linux.go   | 27 +++++++++++++++++++++++++
 tests/integration/checkpoint.bats | 42 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+)

diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go
index 1cbc734..76a69af 100644
--- a/libcontainer/container_linux.go
+++ b/libcontainer/container_linux.go
@@ -1239,11 +1239,38 @@ func (c *linuxContainer) 
prepareCriuRestoreMounts(mounts []*configs.Mount) error
        // Now go through all mounts and create the mountpoints
        // if the mountpoints are not on a tmpfs, as CRIU will
        // restore the complete tmpfs content from its checkpoint.
+       umounts := []string{}
+       defer func() {
+               for _, u := range umounts {
+                       if e := unix.Unmount(u, unix.MNT_DETACH); e != nil {
+                               if e != unix.EINVAL {
+                                       // Ignore EINVAL as it means 'target is 
not a mount point.'
+                                       // It probably has already been 
unmounted.
+                                       logrus.Warnf("Error during cleanup 
unmounting of %q (%v)", u, e)
+                               }
+                       }
+               }
+       }()
        for _, m := range mounts {
                if !isPathInPrefixList(m.Destination, tmpfs) {
                        if err := c.makeCriuRestoreMountpoints(m); err != nil {
                                return err
                        }
+                       // If the mount point is a bind mount, we need to mount
+                       // it now so that runc can create the necessary mount
+                       // points for mounts in bind mounts.
+                       // This also happens during initial container creation.
+                       // Without this CRIU restore will fail
+                       // See: 
https://github.com/opencontainers/runc/issues/2748
+                       // It is also not necessary to order the mount points
+                       // because during initial container creation mounts are
+                       // set up in the order they are configured.
+                       if m.Device == "bind" {
+                               if err := unix.Mount(m.Source, m.Destination, 
"", unix.MS_BIND|unix.MS_REC, ""); err != nil {
+                                       return errorsf.Wrapf(err, "unable to 
bind mount %q to %q", m.Source, m.Destination)
+                               }
+                               umounts = append(umounts, m.Destination)
+                       }
                }
        }
        return nil
From: Aleksa Sarai <cyp...@cyphar.com>
Date: Thu, 1 Apr 2021 12:00:31 -0700
Subject: CVE-2021-30465

Origin: backport, 
https://github.com/opencontainers/runc/commit/0ca91f44f1664da834bc61115a849b56d22f595f
---
 libcontainer/container_linux.go  |  25 ++--
 libcontainer/rootfs_linux.go     | 251 +++++++++++++++++++--------------------
 libcontainer/utils/utils.go      |  54 +++++++++
 libcontainer/utils/utils_test.go |  35 ++++++
 4 files changed, 229 insertions(+), 136 deletions(-)

diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go
index 76a69af..309b02a 100644
--- a/libcontainer/container_linux.go
+++ b/libcontainer/container_linux.go
@@ -1202,7 +1202,6 @@ func (c *linuxContainer) makeCriuRestoreMountpoints(m 
*configs.Mount) error {
                if err := checkProcMount(c.config.Rootfs, dest, ""); err != nil 
{
                        return err
                }
-               m.Destination = dest
                if err := os.MkdirAll(dest, 0755); err != nil {
                        return err
                }
@@ -1242,13 +1241,16 @@ func (c *linuxContainer) 
prepareCriuRestoreMounts(mounts []*configs.Mount) error
        umounts := []string{}
        defer func() {
                for _, u := range umounts {
-                       if e := unix.Unmount(u, unix.MNT_DETACH); e != nil {
-                               if e != unix.EINVAL {
-                                       // Ignore EINVAL as it means 'target is 
not a mount point.'
-                                       // It probably has already been 
unmounted.
-                                       logrus.Warnf("Error during cleanup 
unmounting of %q (%v)", u, e)
+                       _ = utils.WithProcfd(c.config.Rootfs, u, func(procfd 
string) error {
+                               if e := unix.Unmount(procfd, unix.MNT_DETACH); 
e != nil {
+                                       if e != unix.EINVAL {
+                                               // Ignore EINVAL as it means 
'target is not a mount point.'
+                                               // It probably has already been 
unmounted.
+                                               logrus.Warnf("Error during 
cleanup unmounting of %s (%s): %v", procfd, u, e)
+                                       }
                                }
-                       }
+                               return nil
+                       })
                }
        }()
        for _, m := range mounts {
@@ -1266,8 +1268,13 @@ func (c *linuxContainer) prepareCriuRestoreMounts(mounts 
[]*configs.Mount) error
                        // because during initial container creation mounts are
                        // set up in the order they are configured.
                        if m.Device == "bind" {
-                               if err := unix.Mount(m.Source, m.Destination, 
"", unix.MS_BIND|unix.MS_REC, ""); err != nil {
-                                       return errorsf.Wrapf(err, "unable to 
bind mount %q to %q", m.Source, m.Destination)
+                               if err := utils.WithProcfd(c.config.Rootfs, 
m.Destination, func(procfd string) error {
+                                       if err := unix.Mount(m.Source, procfd, 
"", unix.MS_BIND|unix.MS_REC, ""); err != nil {
+                                               return errorsf.Wrapf(err, 
"unable to bind mount %q to %q (through %q)", m.Source, m.Destination, procfd)
+                                       }
+                                       return nil
+                               }); err != nil {
+                                       return err
                                }
                                umounts = append(umounts, m.Destination)
                        }
diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go
index 5d2d74c..96be669 100644
--- a/libcontainer/rootfs_linux.go
+++ b/libcontainer/rootfs_linux.go
@@ -25,6 +25,7 @@ import (
        libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
        "github.com/opencontainers/runtime-spec/specs-go"
        "github.com/opencontainers/selinux/go-selinux/label"
+       "github.com/sirupsen/logrus"
        "golang.org/x/sys/unix"
 )
 
@@ -228,8 +229,6 @@ func prepareBindMount(m *configs.Mount, rootfs string) 
error {
        if err := checkProcMount(rootfs, dest, m.Source); err != nil {
                return err
        }
-       // update the mount with the correct dest after symlinks are resolved.
-       m.Destination = dest
        if err := createIfNotExists(dest, stat.IsDir()); err != nil {
                return err
        }
@@ -266,18 +265,21 @@ func mountCgroupV1(m *configs.Mount, c *mountConfig) 
error {
                        if err := os.MkdirAll(subsystemPath, 0755); err != nil {
                                return err
                        }
-                       flags := defaultMountFlags
-                       if m.Flags&unix.MS_RDONLY != 0 {
-                               flags = flags | unix.MS_RDONLY
-                       }
-                       cgroupmount := &configs.Mount{
-                               Source:      "cgroup",
-                               Device:      "cgroup", // this is actually 
fstype
-                               Destination: subsystemPath,
-                               Flags:       flags,
-                               Data:        filepath.Base(subsystemPath),
-                       }
-                       if err := mountNewCgroup(cgroupmount); err != nil {
+                       if err := utils.WithProcfd(c.root, b.Destination, 
func(procfd string) error {
+                               flags := defaultMountFlags
+                               if m.Flags&unix.MS_RDONLY != 0 {
+                                       flags = flags | unix.MS_RDONLY
+                               }
+                               var (
+                                       source = "cgroup"
+                                       data   = filepath.Base(subsystemPath)
+                               )
+                               if data == "systemd" {
+                                       data = cgroups.CgroupNamePrefix + data
+                                       source = "systemd"
+                               }
+                               return unix.Mount(source, procfd, "cgroup", 
uintptr(flags), data)
+                       }); err != nil {
                                return err
                        }
                } else {
@@ -307,33 +309,79 @@ func mountCgroupV2(m *configs.Mount, c *mountConfig) 
error {
        if err := os.MkdirAll(dest, 0755); err != nil {
                return err
        }
-       if err := unix.Mount(m.Source, dest, "cgroup2", uintptr(m.Flags), 
m.Data); err != nil {
-               // when we are in UserNS but CgroupNS is not unshared, we 
cannot mount cgroup2 (#2158)
-               if err == unix.EPERM || err == unix.EBUSY {
-                       src := fs2.UnifiedMountpoint
-                       if c.cgroupns && c.cgroup2Path != "" {
-                               // Emulate cgroupns by bind-mounting
-                               // the container cgroup path rather than
-                               // the whole /sys/fs/cgroup.
-                               src = c.cgroup2Path
-                       }
-                       err = unix.Mount(src, dest, "", 
uintptr(m.Flags)|unix.MS_BIND, "")
-                       if err == unix.ENOENT && c.rootlessCgroups {
-                               err = nil
+       return utils.WithProcfd(c.root, m.Destination, func(procfd string) 
error {
+               if err := unix.Mount(m.Source, procfd, "cgroup2", 
uintptr(m.Flags), m.Data); err != nil {
+                       // when we are in UserNS but CgroupNS is not unshared, 
we cannot mount cgroup2 (#2158)
+                       if err == unix.EPERM || err == unix.EBUSY {
+                               src := fs2.UnifiedMountpoint
+                               if c.cgroupns && c.cgroup2Path != "" {
+                                       // Emulate cgroupns by bind-mounting
+                                       // the container cgroup path rather than
+                                       // the whole /sys/fs/cgroup.
+                                       src = c.cgroup2Path
+                               }
+                               err = unix.Mount(src, procfd, "", 
uintptr(m.Flags)|unix.MS_BIND, "")
+                               if err == unix.ENOENT && c.rootlessCgroups {
+                                       err = nil
+                               }
                        }
                        return err
                }
+               return nil
+       })
+}
+
+func doTmpfsCopyUp(m *configs.Mount, rootfs, mountLabel string) (Err error) {
+       // Set up a scratch dir for the tmpfs on the host.
+       tmpdir, err := prepareTmp("/tmp")
+       if err != nil {
+               return newSystemErrorWithCause(err, "tmpcopyup: failed to setup 
tmpdir")
+       }
+       defer cleanupTmp(tmpdir)
+       tmpDir, err := ioutil.TempDir(tmpdir, "runctmpdir")
+       if err != nil {
+               return newSystemErrorWithCause(err, "tmpcopyup: failed to 
create tmpdir")
+       }
+       defer os.RemoveAll(tmpDir)
+
+       // Configure the *host* tmpdir as if it's the container mount. We change
+       // m.Destination since we are going to mount *on the host*.
+       oldDest := m.Destination
+       m.Destination = tmpDir
+       err = mountPropagate(m, "/", mountLabel)
+       m.Destination = oldDest
+       if err != nil {
                return err
        }
-       return nil
+       defer func() {
+               if Err != nil {
+                       if err := unix.Unmount(tmpDir, unix.MNT_DETACH); err != 
nil {
+                               logrus.Warnf("tmpcopyup: failed to unmount 
tmpdir on error: %v", err)
+                       }
+               }
+       }()
+
+       return utils.WithProcfd(rootfs, m.Destination, func(procfd string) (Err 
error) {
+               // Copy the container data to the host tmpdir. We append "/" to 
force
+               // CopyDirectory to resolve the symlink rather than trying to 
copy the
+               // symlink itself.
+               if err := fileutils.CopyDirectory(procfd+"/", tmpDir); err != 
nil {
+                       return fmt.Errorf("tmpcopyup: failed to copy %s to %s 
(%s): %w", m.Destination, procfd, tmpDir, err)
+               }
+               // Now move the mount into the container.
+               if err := unix.Mount(tmpDir, procfd, "", unix.MS_MOVE, ""); err 
!= nil {
+                       return fmt.Errorf("tmpcopyup: failed to move mount %s 
to %s (%s): %w", tmpDir, procfd, m.Destination, err)
+               }
+               return nil
+       })
 }
 
 func mountToRootfs(m *configs.Mount, c *mountConfig) error {
        rootfs := c.root
        mountLabel := c.label
-       dest := m.Destination
-       if !strings.HasPrefix(dest, rootfs) {
-               dest = filepath.Join(rootfs, dest)
+       dest, err := securejoin.SecureJoin(rootfs, m.Destination)
+       if err != nil {
+               return err
        }
 
        switch m.Device {
@@ -364,53 +412,21 @@ func mountToRootfs(m *configs.Mount, c *mountConfig) 
error {
                }
                return label.SetFileLabel(dest, mountLabel)
        case "tmpfs":
-               copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
-               tmpDir := ""
-               // dest might be an absolute symlink, so it needs
-               // to be resolved under rootfs.
-               dest, err := securejoin.SecureJoin(rootfs, m.Destination)
-               if err != nil {
-                       return err
-               }
-               m.Destination = dest
                stat, err := os.Stat(dest)
                if err != nil {
                        if err := os.MkdirAll(dest, 0755); err != nil {
                                return err
                        }
                }
-               if copyUp {
-                       tmpdir, err := prepareTmp("/tmp")
-                       if err != nil {
-                               return newSystemErrorWithCause(err, "tmpcopyup: 
failed to setup tmpdir")
-                       }
-                       defer cleanupTmp(tmpdir)
-                       tmpDir, err = ioutil.TempDir(tmpdir, "runctmpdir")
-                       if err != nil {
-                               return newSystemErrorWithCause(err, "tmpcopyup: 
failed to create tmpdir")
-                       }
-                       defer os.RemoveAll(tmpDir)
-                       m.Destination = tmpDir
+
+               if m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP {
+                       err = doTmpfsCopyUp(m, rootfs, mountLabel)
+               } else {
+                       err = mountPropagate(m, rootfs, mountLabel)
                }
-               if err := mountPropagate(m, rootfs, mountLabel); err != nil {
+               if err != nil {
                        return err
                }
-               if copyUp {
-                       if err := fileutils.CopyDirectory(dest, tmpDir); err != 
nil {
-                               errMsg := fmt.Errorf("tmpcopyup: failed to copy 
%s to %s: %v", dest, tmpDir, err)
-                               if err1 := unix.Unmount(tmpDir, 
unix.MNT_DETACH); err1 != nil {
-                                       return newSystemErrorWithCausef(err1, 
"tmpcopyup: %v: failed to unmount", errMsg)
-                               }
-                               return errMsg
-                       }
-                       if err := unix.Mount(tmpDir, dest, "", unix.MS_MOVE, 
""); err != nil {
-                               errMsg := fmt.Errorf("tmpcopyup: failed to move 
mount %s to %s: %v", tmpDir, dest, err)
-                               if err1 := unix.Unmount(tmpDir, 
unix.MNT_DETACH); err1 != nil {
-                                       return newSystemErrorWithCausef(err1, 
"tmpcopyup: %v: failed to unmount", errMsg)
-                               }
-                               return errMsg
-                       }
-               }
                if stat != nil {
                        if err = os.Chmod(dest, stat.Mode()); err != nil {
                                return err
@@ -454,19 +470,9 @@ func mountToRootfs(m *configs.Mount, c *mountConfig) error 
{
                }
                return mountCgroupV1(m, c)
        default:
-               // ensure that the destination of the mount is resolved of 
symlinks at mount time because
-               // any previous mounts can invalidate the next mount's 
destination.
-               // this can happen when a user specifies mounts within other 
mounts to cause breakouts or other
-               // evil stuff to try to escape the container's rootfs.
-               var err error
-               if dest, err = securejoin.SecureJoin(rootfs, m.Destination); 
err != nil {
-                       return err
-               }
                if err := checkProcMount(rootfs, dest, m.Source); err != nil {
                        return err
                }
-               // update the mount with the correct dest after symlinks are 
resolved.
-               m.Destination = dest
                if err := os.MkdirAll(dest, 0755); err != nil {
                        return err
                }
@@ -649,7 +655,7 @@ func createDevices(config *configs.Config) error {
        return nil
 }
 
-func bindMountDeviceNode(dest string, node *devices.Device) error {
+func bindMountDeviceNode(rootfs, dest string, node *devices.Device) error {
        f, err := os.Create(dest)
        if err != nil && !os.IsExist(err) {
                return err
@@ -657,7 +663,9 @@ func bindMountDeviceNode(dest string, node *devices.Device) 
error {
        if f != nil {
                f.Close()
        }
-       return unix.Mount(node.Path, dest, "bind", unix.MS_BIND, "")
+       return utils.WithProcfd(rootfs, dest, func(procfd string) error {
+               return unix.Mount(node.Path, procfd, "bind", unix.MS_BIND, "")
+       })
 }
 
 // Creates the device node in the rootfs of the container.
@@ -666,18 +674,21 @@ func createDeviceNode(rootfs string, node 
*devices.Device, bind bool) error {
                // The node only exists for cgroup reasons, ignore it here.
                return nil
        }
-       dest := filepath.Join(rootfs, node.Path)
+       dest, err := securejoin.SecureJoin(rootfs, node.Path)
+       if err != nil {
+               return err
+       }
        if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil {
                return err
        }
        if bind {
-               return bindMountDeviceNode(dest, node)
+               return bindMountDeviceNode(rootfs, dest, node)
        }
        if err := mknodDevice(dest, node); err != nil {
                if os.IsExist(err) {
                        return nil
                } else if os.IsPermission(err) {
-                       return bindMountDeviceNode(dest, node)
+                       return bindMountDeviceNode(rootfs, dest, node)
                }
                return err
        }
@@ -1013,61 +1024,47 @@ func writeSystemProperty(key, value string) error {
 }
 
 func remount(m *configs.Mount, rootfs string) error {
-       var (
-               dest = m.Destination
-       )
-       if !strings.HasPrefix(dest, rootfs) {
-               dest = filepath.Join(rootfs, dest)
-       }
-       return unix.Mount(m.Source, dest, m.Device, 
uintptr(m.Flags|unix.MS_REMOUNT), "")
+       return utils.WithProcfd(rootfs, m.Destination, func(procfd string) 
error {
+               return unix.Mount(m.Source, procfd, m.Device, 
uintptr(m.Flags|unix.MS_REMOUNT), "")
+       })
 }
 
 // Do the mount operation followed by additional mounts required to take care
-// of propagation flags.
+// of propagation flags. This will always be scoped inside the container 
rootfs.
 func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
        var (
-               dest  = m.Destination
                data  = label.FormatMountLabel(m.Data, mountLabel)
                flags = m.Flags
        )
-       if libcontainerUtils.CleanPath(dest) == "/dev" {
-               flags &= ^unix.MS_RDONLY
-       }
-
-       // Mount it rw to allow chmod operation. A remount will be performed
-       // later to make it ro if set.
-       if m.Device == "tmpfs" {
+       // Delay mounting the filesystem read-only if we need to do further
+       // operations on it. We need to set up files in "/dev" and tmpfs mounts 
may
+       // need to be chmod-ed after mounting. The mount will be remounted ro 
later
+       // in finalizeRootfs() if necessary.
+       if libcontainerUtils.CleanPath(m.Destination) == "/dev" || m.Device == 
"tmpfs" {
                flags &= ^unix.MS_RDONLY
        }
 
-       copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
-       if !(copyUp || strings.HasPrefix(dest, rootfs)) {
-               dest = filepath.Join(rootfs, dest)
-       }
-
-       if err := unix.Mount(m.Source, dest, m.Device, uintptr(flags), data); 
err != nil {
-               return err
-       }
-
-       for _, pflag := range m.PropagationFlags {
-               if err := unix.Mount("", dest, "", uintptr(pflag), ""); err != 
nil {
-                       return err
+       // Because the destination is inside a container path which might be
+       // mutating underneath us, we verify that we are actually going to mount
+       // inside the container with WithProcfd() -- mounting through a procfd
+       // mounts on the target.
+       if err := utils.WithProcfd(rootfs, m.Destination, func(procfd string) 
error {
+               return unix.Mount(m.Source, procfd, m.Device, uintptr(flags), 
data)
+       }); err != nil {
+               return fmt.Errorf("mount through procfd: %w", err)
+       }
+       // We have to apply mount propagation flags in a separate WithProcfd() 
call
+       // because the previous call invalidates the passed procfd -- the mount
+       // target needs to be re-opened.
+       if err := utils.WithProcfd(rootfs, m.Destination, func(procfd string) 
error {
+               for _, pflag := range m.PropagationFlags {
+                       if err := unix.Mount("", procfd, "", uintptr(pflag), 
""); err != nil {
+                               return err
+                       }
                }
-       }
-       return nil
-}
-
-func mountNewCgroup(m *configs.Mount) error {
-       var (
-               data   = m.Data
-               source = m.Source
-       )
-       if data == "systemd" {
-               data = cgroups.CgroupNamePrefix + data
-               source = "systemd"
-       }
-       if err := unix.Mount(source, m.Destination, m.Device, uintptr(m.Flags), 
data); err != nil {
-               return err
+               return nil
+       }); err != nil {
+               return fmt.Errorf("change mount propagation through procfd: 
%w", err)
        }
        return nil
 }
diff --git a/libcontainer/utils/utils.go b/libcontainer/utils/utils.go
index 1b72b7a..cd78f23 100644
--- a/libcontainer/utils/utils.go
+++ b/libcontainer/utils/utils.go
@@ -3,12 +3,15 @@ package utils
 import (
        "encoding/binary"
        "encoding/json"
+       "fmt"
        "io"
        "os"
        "path/filepath"
+       "strconv"
        "strings"
        "unsafe"
 
+       "github.com/cyphar/filepath-securejoin"
        "golang.org/x/sys/unix"
 )
 
@@ -88,6 +91,57 @@ func CleanPath(path string) string {
        return filepath.Clean(path)
 }
 
+// stripRoot returns the passed path, stripping the root path if it was
+// (lexicially) inside it. Note that both passed paths will always be treated
+// as absolute, and the returned path will also always be absolute. In
+// addition, the paths are cleaned before stripping the root.
+func stripRoot(root, path string) string {
+       // Make the paths clean and absolute.
+       root, path = CleanPath("/"+root), CleanPath("/"+path)
+       switch {
+       case path == root:
+               path = "/"
+       case root == "/":
+               // do nothing
+       case strings.HasPrefix(path, root+"/"):
+               path = strings.TrimPrefix(path, root+"/")
+       }
+       return CleanPath("/" + path)
+}
+
+// WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...)
+// corresponding to the unsafePath resolved within the root. Before passing the
+// fd, this path is verified to have been inside the root -- so operating on it
+// through the passed fdpath should be safe. Do not access this path through
+// the original path strings, and do not attempt to use the pathname outside of
+// the passed closure (the file handle will be freed once the closure returns).
+func WithProcfd(root, unsafePath string, fn func(procfd string) error) error {
+       // Remove the root then forcefully resolve inside the root.
+       unsafePath = stripRoot(root, unsafePath)
+       path, err := securejoin.SecureJoin(root, unsafePath)
+       if err != nil {
+               return fmt.Errorf("resolving path inside rootfs failed: %v", 
err)
+       }
+
+       // Open the target path.
+       fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0)
+       if err != nil {
+               return fmt.Errorf("open o_path procfd: %w", err)
+       }
+       defer fh.Close()
+
+       // Double-check the path is the one we expected.
+       procfd := "/proc/self/fd/" + strconv.Itoa(int(fh.Fd()))
+       if realpath, err := os.Readlink(procfd); err != nil {
+               return fmt.Errorf("procfd verification failed: %w", err)
+       } else if realpath != path {
+               return fmt.Errorf("possibly malicious path detected -- refusing 
to operate on %s", realpath)
+       }
+
+       // Run the closure.
+       return fn(procfd)
+}
+
 // SearchLabels searches a list of key-value pairs for the provided key and
 // returns the corresponding value. The pairs must be separated with '='.
 func SearchLabels(labels []string, query string) string {

Reply via email to