Skip to content

Commit

Permalink
Merge pull request #47588 from vvoland/v25.0-47558
Browse files Browse the repository at this point in the history
[25.0 backport] plugin: fix mounting /etc/hosts when running in UserNS
  • Loading branch information
vvoland committed Mar 19, 2024
2 parents 2a0601e + 4be9723 commit 817bccb
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 34 deletions.
36 changes: 2 additions & 34 deletions daemon/oci_linux.go
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/docker/docker/container"
dconfig "github.com/docker/docker/daemon/config"
"github.com/docker/docker/errdefs"
"github.com/docker/docker/internal/rootless/mountopts"
"github.com/docker/docker/oci"
"github.com/docker/docker/oci/caps"
"github.com/docker/docker/pkg/idtools"
Expand All @@ -31,7 +32,6 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)

const inContainerInitPath = "/sbin/" + dconfig.DefaultInitBinary
Expand Down Expand Up @@ -468,38 +468,6 @@ func ensureSharedOrSlave(path string) error {
return nil
}

// Get the set of mount flags that are set on the mount that contains the given
// path and are locked by CL_UNPRIVILEGED. This is necessary to ensure that
// bind-mounting "with options" will not fail with user namespaces, due to
// kernel restrictions that require user namespace mounts to preserve
// CL_UNPRIVILEGED locked flags.
func getUnprivilegedMountFlags(path string) ([]string, error) {
var statfs unix.Statfs_t
if err := unix.Statfs(path, &statfs); err != nil {
return nil, err
}

// The set of keys come from https://github.com/torvalds/linux/blob/v4.13/fs/namespace.c#L1034-L1048.
unprivilegedFlags := map[uint64]string{
unix.MS_RDONLY: "ro",
unix.MS_NODEV: "nodev",
unix.MS_NOEXEC: "noexec",
unix.MS_NOSUID: "nosuid",
unix.MS_NOATIME: "noatime",
unix.MS_RELATIME: "relatime",
unix.MS_NODIRATIME: "nodiratime",
}

var flags []string
for mask, flag := range unprivilegedFlags {
if uint64(statfs.Flags)&mask == mask {
flags = append(flags, flag)
}
}

return flags, nil
}

var (
mountPropagationMap = map[string]int{
"private": mount.PRIVATE,
Expand Down Expand Up @@ -723,7 +691,7 @@ func withMounts(daemon *Daemon, daemonCfg *configStore, c *container.Container)
// when runc sets up the root filesystem, it is already inside a user
// namespace, and thus cannot change any flags that are locked.
if daemonCfg.RemappedRoot != "" || userns.RunningInUserNS() {
unprivOpts, err := getUnprivilegedMountFlags(m.Source)
unprivOpts, err := mountopts.UnprivilegedMountFlags(m.Source)
if err != nil {
return err
}
Expand Down
39 changes: 39 additions & 0 deletions internal/rootless/mountopts/mountopts_linux.go
@@ -0,0 +1,39 @@
package mountopts

import (
"golang.org/x/sys/unix"
)

// UnprivilegedMountFlags gets the set of mount flags that are set on the mount that contains the given
// path and are locked by CL_UNPRIVILEGED. This is necessary to ensure that
// bind-mounting "with options" will not fail with user namespaces, due to
// kernel restrictions that require user namespace mounts to preserve
// CL_UNPRIVILEGED locked flags.
//
// TODO: Move to github.com/moby/sys/mount, and update BuildKit copy of this code as well (https://github.com/moby/buildkit/blob/v0.13.0/util/rootless/mountopts/mountopts_linux.go#L11-L18)
func UnprivilegedMountFlags(path string) ([]string, error) {
var statfs unix.Statfs_t
if err := unix.Statfs(path, &statfs); err != nil {
return nil, err
}

// The set of keys come from https://github.com/torvalds/linux/blob/v4.13/fs/namespace.c#L1034-L1048.
unprivilegedFlags := map[uint64]string{
unix.MS_RDONLY: "ro",
unix.MS_NODEV: "nodev",
unix.MS_NOEXEC: "noexec",
unix.MS_NOSUID: "nosuid",
unix.MS_NOATIME: "noatime",
unix.MS_RELATIME: "relatime",
unix.MS_NODIRATIME: "nodiratime",
}

var flags []string
for mask, flag := range unprivilegedFlags {
if uint64(statfs.Flags)&mask == mask {
flags = append(flags, flag)
}
}

return flags, nil
}
36 changes: 36 additions & 0 deletions plugin/v2/plugin_linux.go
@@ -1,3 +1,6 @@
// FIXME(thaJeztah): remove once we are a module; the go:build directive prevents go from downgrading language version to go1.16:
//go:build go1.19

package v2 // import "github.com/docker/docker/plugin/v2"

import (
Expand All @@ -6,7 +9,10 @@ import (
"runtime"
"strings"

"github.com/containerd/containerd/pkg/userns"
"github.com/docker/docker/api/types"
"github.com/docker/docker/internal/rootless/mountopts"
"github.com/docker/docker/internal/sliceutil"
"github.com/docker/docker/oci"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
Expand Down Expand Up @@ -136,5 +142,35 @@ func (p *Plugin) InitSpec(execRoot string) (*specs.Spec, error) {
p.modifyRuntimeSpec(&s)
}

// Rootless mode requires modifying the mount flags
// https://github.com/moby/moby/issues/47248#issuecomment-1927776700
// https://github.com/moby/moby/pull/47558
if userns.RunningInUserNS() {
for i := range s.Mounts {
m := &s.Mounts[i]
for _, o := range m.Options {
switch o {
case "bind", "rbind":
if _, err := os.Lstat(m.Source); err != nil {
if errors.Is(err, os.ErrNotExist) {
continue
}
return nil, err
}
// UnprivilegedMountFlags gets the set of mount flags that are set on the mount that contains the given
// path and are locked by CL_UNPRIVILEGED. This is necessary to ensure that
// bind-mounting "with options" will not fail with user namespaces, due to
// kernel restrictions that require user namespace mounts to preserve
// CL_UNPRIVILEGED locked flags.
unpriv, err := mountopts.UnprivilegedMountFlags(m.Source)
if err != nil {
return nil, errors.Wrapf(err, "failed to get unprivileged mount flags for %+v", m)
}
m.Options = sliceutil.Dedup(append(m.Options, unpriv...))
}
}
}
}

return &s, nil
}

0 comments on commit 817bccb

Please sign in to comment.