Skip to content

Commit

Permalink
volumes: Implement subpath mount
Browse files Browse the repository at this point in the history
`VolumeOptions` now has a `SubPath` field which allows to specify a path
relative to the volume that should be mounted as a destination.

Symlinks are supported, but they cannot escape the base volume
directory.
A protection against the Time-of-check to Time-of-use is implemented for
Linux and Windows to make it not possible to replace the mount source
with a symlink that escapes the volume directory.

On Linux, all subpath components are opened with openat, relative to the
base volume directory and checked against the volume escape.
The final file descriptor is mounted from the /proc/self/fd/<fd> to a
temporary mount point owned by the daemon and then passed to the
underlying container runtime. Temporary mountpoint is removed after the
container is started.

On Windows, all components of the path are locked before the check, and
released once the path is already mounted.

Signed-off-by: Paweł Gronowski <pawel.gronowski@docker.com>
  • Loading branch information
vvoland committed Jun 2, 2023
1 parent e0228c8 commit 1765550
Show file tree
Hide file tree
Showing 20 changed files with 703 additions and 75 deletions.
4 changes: 4 additions & 0 deletions api/swagger.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,10 @@ definitions:
type: "object"
additionalProperties:
type: "string"
SubPath:
description: "Source path inside the volume. Must be relative without any back traversals."
type: "string"
example: "dir-inside-volume/subdirectory"
TmpfsOptions:
description: "Optional configuration for the `tmpfs` type."
type: "object"
Expand Down
1 change: 1 addition & 0 deletions api/types/mount/mount.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ type BindOptions struct {
type VolumeOptions struct {
NoCopy bool `json:",omitempty"`
Labels map[string]string `json:",omitempty"`
SubPath string `json:",omitempty"`
DriverConfig *Driver `json:",omitempty"`
}

Expand Down
4 changes: 3 additions & 1 deletion daemon/containerfs_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,9 @@ func (daemon *Daemon) openContainerFS(container *container.Container) (_ *contai
}
}()

mounts, err := daemon.setupMounts(container)
mounts, cleanup, err := daemon.setupMounts(container)
defer cleanup()

if err != nil {
return nil, err
}
Expand Down
13 changes: 8 additions & 5 deletions daemon/oci_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,7 @@ func inSlice(slice []string, s string) bool {
}

// withMounts sets the container's mounts
func withMounts(daemon *Daemon, daemonCfg *configStore, c *container.Container) coci.SpecOpts {
func withMounts(daemon *Daemon, daemonCfg *configStore, cleanup *func() error, c *container.Container) coci.SpecOpts {
return func(ctx context.Context, _ coci.Client, _ *containers.Container, s *coci.Spec) (err error) {
if err := daemon.setupContainerMountsRoot(c); err != nil {
return err
Expand All @@ -509,7 +509,8 @@ func withMounts(daemon *Daemon, daemonCfg *configStore, c *container.Container)
return err
}

ms, err := daemon.setupMounts(c)
ms, clean, err := daemon.setupMounts(c)
*cleanup = clean
if err != nil {
return err
}
Expand Down Expand Up @@ -1019,7 +1020,7 @@ func WithUser(c *container.Container) coci.SpecOpts {
}
}

func (daemon *Daemon) createSpec(ctx context.Context, daemonCfg *configStore, c *container.Container) (retSpec *specs.Spec, err error) {
func (daemon *Daemon) createSpec(ctx context.Context, daemonCfg *configStore, c *container.Container) (retSpec *specs.Spec, cleanup func() error, err error) {
var (
opts []coci.SpecOpts
s = oci.DefaultSpec()
Expand All @@ -1034,7 +1035,7 @@ func (daemon *Daemon) createSpec(ctx context.Context, daemonCfg *configStore, c
WithNamespaces(daemon, c),
WithCapabilities(c),
WithSeccomp(daemon, c),
withMounts(daemon, daemonCfg, c),
withMounts(daemon, daemonCfg, &cleanup, c),
withLibnetwork(daemon, &daemonCfg.Config, c),
WithApparmor(c),
WithSelinux(c),
Expand Down Expand Up @@ -1078,11 +1079,13 @@ func (daemon *Daemon) createSpec(ctx context.Context, daemonCfg *configStore, c
snapshotKey = c.ID
}

return &s, coci.ApplyOpts(ctx, daemon.containerdCli, &containers.Container{
err = coci.ApplyOpts(ctx, daemon.containerdCli, &containers.Container{
ID: c.ID,
Snapshotter: snapshotter,
SnapshotKey: snapshotKey,
}, &s, opts...)

return &s, cleanup, err
}

func clearReadOnly(m *specs.Mount) {
Expand Down
30 changes: 22 additions & 8 deletions daemon/oci_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,9 @@ func TestTmpfsDevShmNoDupMount(t *testing.T) {
d := setupFakeDaemon(t, c)
defer cleanupFakeContainer(c)

_, err := d.createSpec(context.TODO(), &configStore{}, c)
_, cleanup, err := d.createSpec(context.TODO(), &configStore{}, c)
assert.Check(t, err)
defer cleanup()
}

// TestIpcPrivateVsReadonly checks that in case of IpcMode: private
Expand All @@ -101,8 +102,9 @@ func TestIpcPrivateVsReadonly(t *testing.T) {
d := setupFakeDaemon(t, c)
defer cleanupFakeContainer(c)

s, err := d.createSpec(context.TODO(), &configStore{}, c)
s, cleanup, err := d.createSpec(context.TODO(), &configStore{}, c)
assert.Check(t, err)
defer cleanup()

// Find the /dev/shm mount in ms, check it does not have ro
for _, m := range s.Mounts {
Expand Down Expand Up @@ -131,8 +133,10 @@ func TestSysctlOverride(t *testing.T) {
defer cleanupFakeContainer(c)

// Ensure that the implicit sysctl is set correctly.
s, err := d.createSpec(context.TODO(), &configStore{}, c)
s, cleanup, err := d.createSpec(context.TODO(), &configStore{}, c)
assert.NilError(t, err)
defer cleanup()

assert.Equal(t, s.Hostname, "foobar")
assert.Equal(t, s.Linux.Sysctl["kernel.domainname"], c.Config.Domainname)
if sysctlExists("net.ipv4.ip_unprivileged_port_start") {
Expand All @@ -147,23 +151,29 @@ func TestSysctlOverride(t *testing.T) {
assert.Assert(t, c.HostConfig.Sysctls["kernel.domainname"] != c.Config.Domainname)
c.HostConfig.Sysctls["net.ipv4.ip_unprivileged_port_start"] = "1024"

s, err = d.createSpec(context.TODO(), &configStore{}, c)
s, cleanup, err = d.createSpec(context.TODO(), &configStore{}, c)
assert.NilError(t, err)
defer cleanup()

assert.Equal(t, s.Hostname, "foobar")
assert.Equal(t, s.Linux.Sysctl["kernel.domainname"], c.HostConfig.Sysctls["kernel.domainname"])
assert.Equal(t, s.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], c.HostConfig.Sysctls["net.ipv4.ip_unprivileged_port_start"])

// Ensure the ping_group_range is not set on a daemon with user-namespaces enabled
s, err = d.createSpec(context.TODO(), &configStore{Config: config.Config{RemappedRoot: "dummy:dummy"}}, c)
s, cleanup, err = d.createSpec(context.TODO(), &configStore{Config: config.Config{RemappedRoot: "dummy:dummy"}}, c)
assert.NilError(t, err)
defer cleanup()

_, ok := s.Linux.Sysctl["net.ipv4.ping_group_range"]
assert.Assert(t, !ok)

// Ensure the ping_group_range is set on a container in "host" userns mode
// on a daemon with user-namespaces enabled
c.HostConfig.UsernsMode = "host"
s, err = d.createSpec(context.TODO(), &configStore{Config: config.Config{RemappedRoot: "dummy:dummy"}}, c)
s, cleanup, err = d.createSpec(context.TODO(), &configStore{Config: config.Config{RemappedRoot: "dummy:dummy"}}, c)
assert.NilError(t, err)
defer cleanup()

assert.Equal(t, s.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647")
}

Expand All @@ -182,16 +192,20 @@ func TestSysctlOverrideHost(t *testing.T) {
defer cleanupFakeContainer(c)

// Ensure that the implicit sysctl is not set
s, err := d.createSpec(context.TODO(), &configStore{}, c)
s, cleanup, err := d.createSpec(context.TODO(), &configStore{}, c)
assert.NilError(t, err)
defer cleanup()

assert.Equal(t, s.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "")
assert.Equal(t, s.Linux.Sysctl["net.ipv4.ping_group_range"], "")

// Set an explicit sysctl.
c.HostConfig.Sysctls["net.ipv4.ip_unprivileged_port_start"] = "1024"

s, err = d.createSpec(context.TODO(), &configStore{}, c)
s, cleanup, err = d.createSpec(context.TODO(), &configStore{}, c)
assert.NilError(t, err)
defer cleanup()

assert.Equal(t, s.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], c.HostConfig.Sysctls["net.ipv4.ip_unprivileged_port_start"])
}

Expand Down
34 changes: 18 additions & 16 deletions daemon/oci_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,42 +28,44 @@ const (
credentialSpecFileLocation = "CredentialSpecs"
)

func (daemon *Daemon) createSpec(ctx context.Context, daemonCfg *configStore, c *container.Container) (*specs.Spec, error) {
func (daemon *Daemon) createSpec(ctx context.Context, daemonCfg *configStore, c *container.Container) (*specs.Spec, func() error, error) {
cleanup := func() error { return nil }
img, err := daemon.imageService.GetImage(ctx, string(c.ImageID), imagetypes.GetImageOpts{})
if err != nil {
return nil, err
return nil, cleanup, err
}
if !system.IsOSSupported(img.OperatingSystem()) {
return nil, system.ErrNotSupportedOperatingSystem
return nil, cleanup, system.ErrNotSupportedOperatingSystem
}

s := oci.DefaultSpec()

if err := coci.WithAnnotations(c.HostConfig.Annotations)(ctx, nil, nil, &s); err != nil {
return nil, err
return nil, cleanup, err
}

linkedEnv, err := daemon.setupLinkedContainers(c)
if err != nil {
return nil, err
return nil, cleanup, err
}

// Note, unlike Unix, we do NOT call into SetupWorkingDirectory as
// this is done in VMCompute. Further, we couldn't do it for Hyper-V
// containers anyway.

if err := daemon.setupSecretDir(c); err != nil {
return nil, err
return nil, cleanup, err
}

if err := daemon.setupConfigDir(c); err != nil {
return nil, err
return nil, cleanup, err
}

// In s.Mounts
mounts, err := daemon.setupMounts(c)
mounts, clean, err := daemon.setupMounts(c)
cleanup = clean
if err != nil {
return nil, err
return nil, cleanup, err
}

var isHyperV bool
Expand All @@ -90,21 +92,21 @@ func (daemon *Daemon) createSpec(ctx context.Context, daemonCfg *configStore, c
// except for Hyper-V containers, so mount it here in that case.
if isHyperV {
if err := daemon.Mount(c); err != nil {
return nil, err
return nil, cleanup, err
}
defer daemon.Unmount(c)
}
if err := c.CreateSecretSymlinks(); err != nil {
return nil, err
return nil, cleanup, err
}
if err := c.CreateConfigSymlinks(); err != nil {
return nil, err
return nil, cleanup, err
}
}

secretMounts, err := c.SecretMounts()
if err != nil {
return nil, err
return nil, cleanup, err
}
if secretMounts != nil {
mounts = append(mounts, secretMounts...)
Expand Down Expand Up @@ -140,7 +142,7 @@ func (daemon *Daemon) createSpec(ctx context.Context, daemonCfg *configStore, c
s.Process.User.Username = c.Config.User
s.Windows.LayerFolders, err = daemon.imageService.GetLayerFolders(img, c.RWLayer)
if err != nil {
return nil, errors.Wrapf(err, "container %s", c.ID)
return nil, cleanup, errors.Wrapf(err, "container %s", c.ID)
}

dnsSearch := daemon.getDNSSearchSettings(&daemonCfg.Config, c)
Expand Down Expand Up @@ -203,7 +205,7 @@ func (daemon *Daemon) createSpec(ctx context.Context, daemonCfg *configStore, c
}

if err := daemon.createSpecWindowsFields(c, &s, isHyperV); err != nil {
return nil, err
return nil, cleanup, err
}

if logrus.IsLevelEnabled(logrus.DebugLevel) {
Expand All @@ -212,7 +214,7 @@ func (daemon *Daemon) createSpec(ctx context.Context, daemonCfg *configStore, c
}
}

return &s, nil
return &s, cleanup, nil
}

// Sets the Windows-specific fields of the OCI spec
Expand Down
3 changes: 2 additions & 1 deletion daemon/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,10 +153,11 @@ func (daemon *Daemon) containerStart(ctx context.Context, daemonCfg *configStore
return err
}

spec, err := daemon.createSpec(ctx, daemonCfg, container)
spec, cleanup, err := daemon.createSpec(ctx, daemonCfg, container)
if err != nil {
return errdefs.System(err)
}
defer cleanup()

if resetRestartManager {
container.ResetRestartManager(true)
Expand Down
4 changes: 3 additions & 1 deletion daemon/volumes.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,9 +202,11 @@ func (daemon *Daemon) registerMountPoints(container *container.Container, hostCo
mp.Volume = &volumeWrapper{v: v, s: daemon.volumes}
mp.Name = v.Name
mp.Driver = v.Driver

// need to selinux-relabel local mounts
mp.Source = v.Mountpoint
if mp.Source != "" && cfg.VolumeOptions != nil && cfg.VolumeOptions.SubPath != "" {
mp.Source = filepath.Join(mp.Source, cfg.VolumeOptions.SubPath)
}
if mp.Driver == volume.DefaultDriverName {
setBindModeIfNull(mp)
}
Expand Down
39 changes: 29 additions & 10 deletions daemon/volumes_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,37 @@ import (
mounttypes "github.com/docker/docker/api/types/mount"
"github.com/docker/docker/container"
volumemounts "github.com/docker/docker/volume/mounts"
"github.com/hashicorp/go-multierror"
"github.com/pkg/errors"
)

// setupMounts iterates through each of the mount points for a container and
// calls Setup() on each. It also looks to see if is a network mount such as
// /etc/resolv.conf, and if it is not, appends it to the array of mounts.
func (daemon *Daemon) setupMounts(c *container.Container) ([]container.Mount, error) {
func (daemon *Daemon) setupMounts(c *container.Container) ([]container.Mount, func() error, error) {
var cleanups []func() error
cleanup := func() error {
var allErr error
for _, c := range cleanups {
err := c()
if err != nil {
if allErr == nil {
allErr = err
} else {
allErr = multierror.Append(allErr, err)
}
}
}

return allErr
}

var mounts []container.Mount
// TODO: tmpfs mounts should be part of Mountpoints
tmpfsMounts := make(map[string]bool)
tmpfsMountInfo, err := c.TmpfsMounts()
if err != nil {
return nil, err
return nil, cleanup, err
}
for _, m := range tmpfsMountInfo {
tmpfsMounts[m.Destination] = true
Expand All @@ -34,7 +52,7 @@ func (daemon *Daemon) setupMounts(c *container.Container) ([]container.Mount, er
continue
}
if err := daemon.lazyInitializeVolume(c.ID, m); err != nil {
return nil, err
return nil, cleanup, err
}
// If the daemon is being shutdown, we should not let a container start if it is trying to
// mount the socket the daemon is listening on. During daemon shutdown, the socket
Expand All @@ -47,9 +65,10 @@ func (daemon *Daemon) setupMounts(c *container.Container) ([]container.Mount, er
return nil
}

path, err := m.Setup(c.MountLabel, daemon.idMapping.RootPair(), checkfunc)
path, clean, err := m.Setup(c.MountLabel, daemon.idMapping.RootPair(), checkfunc)
cleanups = append(cleanups, clean)
if err != nil {
return nil, err
return nil, cleanup, err
}
if !c.TrySetNetworkMount(m.Destination, path) {
mnt := container.Mount{
Expand All @@ -60,13 +79,13 @@ func (daemon *Daemon) setupMounts(c *container.Container) ([]container.Mount, er
}
if m.Spec.Type == mounttypes.TypeBind && m.Spec.BindOptions != nil {
if !m.Spec.ReadOnly && m.Spec.BindOptions.ReadOnlyNonRecursive {
return nil, errors.New("mount options conflict: !ReadOnly && BindOptions.ReadOnlyNonRecursive")
return nil, cleanup, errors.New("mount options conflict: !ReadOnly && BindOptions.ReadOnlyNonRecursive")
}
if !m.Spec.ReadOnly && m.Spec.BindOptions.ReadOnlyForceRecursive {
return nil, errors.New("mount options conflict: !ReadOnly && BindOptions.ReadOnlyForceRecursive")
return nil, cleanup, errors.New("mount options conflict: !ReadOnly && BindOptions.ReadOnlyForceRecursive")
}
if m.Spec.BindOptions.ReadOnlyNonRecursive && m.Spec.BindOptions.ReadOnlyForceRecursive {
return nil, errors.New("mount options conflict: ReadOnlyNonRecursive && BindOptions.ReadOnlyForceRecursive")
return nil, cleanup, errors.New("mount options conflict: ReadOnlyNonRecursive && BindOptions.ReadOnlyForceRecursive")
}
mnt.NonRecursive = m.Spec.BindOptions.NonRecursive
mnt.ReadOnlyNonRecursive = m.Spec.BindOptions.ReadOnlyNonRecursive
Expand Down Expand Up @@ -98,11 +117,11 @@ func (daemon *Daemon) setupMounts(c *container.Container) ([]container.Mount, er
// up to the user to make sure the file has proper ownership for userns
if strings.Index(mnt.Source, daemon.repository) == 0 {
if err := os.Chown(mnt.Source, rootIDs.UID, rootIDs.GID); err != nil {
return nil, err
return nil, cleanup, err
}
}
}
return append(mounts, netMounts...), nil
return append(mounts, netMounts...), cleanup, nil
}

// sortMounts sorts an array of mounts in lexicographic order. This ensure that
Expand Down

0 comments on commit 1765550

Please sign in to comment.