From 7ed7e6caf6c1605ba5fcc80c015b4afced7c8bfd Mon Sep 17 00:00:00 2001 From: Akihiro Suda Date: Thu, 14 Mar 2024 13:49:35 +0900 Subject: [PATCH 1/2] plugin: fix mounting /etc/hosts when running in UserNS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix `error mounting "/etc/hosts" to rootfs at "/etc/hosts": mount /etc/hosts:/etc/hosts (via /proc/self/fd/6), flags: 0x5021: operation not permitted`. This error was introduced in 7d08d84b039d2f4661a2242e765a141e65943920 (`dockerd-rootless.sh: set rootlesskit --state-dir=DIR`) that changed the filesystem of the state dir from /tmp to /run (in a typical setup). Fix issue 47248 Signed-off-by: Akihiro Suda (cherry picked from commit 762ec4b60ce1b337e64bc103d0166ed9b6bf1e99) Signed-off-by: Paweł Gronowski --- plugin/v2/plugin_linux.go | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/plugin/v2/plugin_linux.go b/plugin/v2/plugin_linux.go index 82f973ffc9d95..746afde8d5e67 100644 --- a/plugin/v2/plugin_linux.go +++ b/plugin/v2/plugin_linux.go @@ -1,3 +1,6 @@ +// FIXME(thaJeztah): remove once we are a module; the go:build directive prevents go from downgrading language version to go1.16: +//go:build go1.19 + package v2 // import "github.com/docker/docker/plugin/v2" import ( @@ -6,7 +9,10 @@ import ( "runtime" "strings" + "github.com/containerd/containerd/pkg/userns" "github.com/docker/docker/api/types" + "github.com/docker/docker/internal/rootless/mountopts" + "github.com/docker/docker/internal/sliceutil" "github.com/docker/docker/oci" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" @@ -136,5 +142,35 @@ func (p *Plugin) InitSpec(execRoot string) (*specs.Spec, error) { p.modifyRuntimeSpec(&s) } + // Rootless mode requires modifying the mount flags + // https://github.com/moby/moby/issues/47248#issuecomment-1927776700 + // https://github.com/moby/moby/pull/47558 + if userns.RunningInUserNS() { + for i := range s.Mounts { + m := &s.Mounts[i] + for _, o := range m.Options { + switch o { + case "bind", "rbind": + if _, err := os.Lstat(m.Source); err != nil { + if errors.Is(err, os.ErrNotExist) { + continue + } + return nil, err + } + // UnprivilegedMountFlags gets the set of mount flags that are set on the mount that contains the given + // path and are locked by CL_UNPRIVILEGED. This is necessary to ensure that + // bind-mounting "with options" will not fail with user namespaces, due to + // kernel restrictions that require user namespace mounts to preserve + // CL_UNPRIVILEGED locked flags. + unpriv, err := mountopts.UnprivilegedMountFlags(m.Source) + if err != nil { + return nil, errors.Wrapf(err, "failed to get unprivileged mount flags for %+v", m) + } + m.Options = sliceutil.Dedup(append(m.Options, unpriv...)) + } + } + } + } + return &s, nil } From 4be97233cc191bda476d8ecfcd6ee48446ddb3da Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Fri, 15 Mar 2024 12:55:09 +0100 Subject: [PATCH 2/2] daemon: move getUnprivilegedMountFlags to internal package MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This code is currently only used in the daemon, but is also needed in other places. We should consider moving this code to github.com/moby/sys, so that BuildKit can also use the same implementation instead of maintaining a fork; moving it to internal allows us to reuse this code inside the repository, but does not allow external consumers to depend on it (which we don't want as it's not a permanent location). As our code only uses this in linux files, I did not add a stub for other platforms (but we may decide to do that in the moby/sys repository). Signed-off-by: Sebastiaan van Stijn (cherry picked from commit 7b414f5703f1e4755a9ee7765b87eac2a2f1e0da) Signed-off-by: Paweł Gronowski --- daemon/oci_linux.go | 36 +---------------- .../rootless/mountopts/mountopts_linux.go | 39 +++++++++++++++++++ 2 files changed, 41 insertions(+), 34 deletions(-) create mode 100644 internal/rootless/mountopts/mountopts_linux.go diff --git a/daemon/oci_linux.go b/daemon/oci_linux.go index c7fdedcb31f10..a865dc9398fce 100644 --- a/daemon/oci_linux.go +++ b/daemon/oci_linux.go @@ -19,6 +19,7 @@ import ( "github.com/docker/docker/container" dconfig "github.com/docker/docker/daemon/config" "github.com/docker/docker/errdefs" + "github.com/docker/docker/internal/rootless/mountopts" "github.com/docker/docker/oci" "github.com/docker/docker/oci/caps" "github.com/docker/docker/pkg/idtools" @@ -31,7 +32,6 @@ import ( "github.com/opencontainers/runc/libcontainer/cgroups" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" - "golang.org/x/sys/unix" ) const inContainerInitPath = "/sbin/" + dconfig.DefaultInitBinary @@ -468,38 +468,6 @@ func ensureSharedOrSlave(path string) error { return nil } -// Get the set of mount flags that are set on the mount that contains the given -// path and are locked by CL_UNPRIVILEGED. This is necessary to ensure that -// bind-mounting "with options" will not fail with user namespaces, due to -// kernel restrictions that require user namespace mounts to preserve -// CL_UNPRIVILEGED locked flags. -func getUnprivilegedMountFlags(path string) ([]string, error) { - var statfs unix.Statfs_t - if err := unix.Statfs(path, &statfs); err != nil { - return nil, err - } - - // The set of keys come from https://github.com/torvalds/linux/blob/v4.13/fs/namespace.c#L1034-L1048. - unprivilegedFlags := map[uint64]string{ - unix.MS_RDONLY: "ro", - unix.MS_NODEV: "nodev", - unix.MS_NOEXEC: "noexec", - unix.MS_NOSUID: "nosuid", - unix.MS_NOATIME: "noatime", - unix.MS_RELATIME: "relatime", - unix.MS_NODIRATIME: "nodiratime", - } - - var flags []string - for mask, flag := range unprivilegedFlags { - if uint64(statfs.Flags)&mask == mask { - flags = append(flags, flag) - } - } - - return flags, nil -} - var ( mountPropagationMap = map[string]int{ "private": mount.PRIVATE, @@ -723,7 +691,7 @@ func withMounts(daemon *Daemon, daemonCfg *configStore, c *container.Container) // when runc sets up the root filesystem, it is already inside a user // namespace, and thus cannot change any flags that are locked. if daemonCfg.RemappedRoot != "" || userns.RunningInUserNS() { - unprivOpts, err := getUnprivilegedMountFlags(m.Source) + unprivOpts, err := mountopts.UnprivilegedMountFlags(m.Source) if err != nil { return err } diff --git a/internal/rootless/mountopts/mountopts_linux.go b/internal/rootless/mountopts/mountopts_linux.go new file mode 100644 index 0000000000000..f4ecf710c8983 --- /dev/null +++ b/internal/rootless/mountopts/mountopts_linux.go @@ -0,0 +1,39 @@ +package mountopts + +import ( + "golang.org/x/sys/unix" +) + +// UnprivilegedMountFlags gets the set of mount flags that are set on the mount that contains the given +// path and are locked by CL_UNPRIVILEGED. This is necessary to ensure that +// bind-mounting "with options" will not fail with user namespaces, due to +// kernel restrictions that require user namespace mounts to preserve +// CL_UNPRIVILEGED locked flags. +// +// TODO: Move to github.com/moby/sys/mount, and update BuildKit copy of this code as well (https://github.com/moby/buildkit/blob/v0.13.0/util/rootless/mountopts/mountopts_linux.go#L11-L18) +func UnprivilegedMountFlags(path string) ([]string, error) { + var statfs unix.Statfs_t + if err := unix.Statfs(path, &statfs); err != nil { + return nil, err + } + + // The set of keys come from https://github.com/torvalds/linux/blob/v4.13/fs/namespace.c#L1034-L1048. + unprivilegedFlags := map[uint64]string{ + unix.MS_RDONLY: "ro", + unix.MS_NODEV: "nodev", + unix.MS_NOEXEC: "noexec", + unix.MS_NOSUID: "nosuid", + unix.MS_NOATIME: "noatime", + unix.MS_RELATIME: "relatime", + unix.MS_NODIRATIME: "nodiratime", + } + + var flags []string + for mask, flag := range unprivilegedFlags { + if uint64(statfs.Flags)&mask == mask { + flags = append(flags, flag) + } + } + + return flags, nil +}