From 0046b16d87105d334b72f5e98efd28bbd94d9659 Mon Sep 17 00:00:00 2001 From: Cory Snider Date: Tue, 1 Nov 2022 15:21:37 -0400 Subject: [PATCH 1/2] daemon: set libnetwork sandbox key w/o OCI hook Signed-off-by: Cory Snider --- daemon/oci_linux.go | 24 ------------------------ daemon/start.go | 4 ++++ daemon/start_linux.go | 31 +++++++++++++++++++++++++++++++ daemon/start_notlinux.go | 17 +++++++++++++++++ libnetwork/osl/namespace_linux.go | 6 +++++- oci/namespaces.go | 11 +++++++++++ 6 files changed, 68 insertions(+), 25 deletions(-) create mode 100644 daemon/start_linux.go create mode 100644 daemon/start_notlinux.go diff --git a/daemon/oci_linux.go b/daemon/oci_linux.go index 97bc8e707afb6..ab5d5b59b34c8 100644 --- a/daemon/oci_linux.go +++ b/daemon/oci_linux.go @@ -23,7 +23,6 @@ import ( "github.com/docker/docker/oci/caps" "github.com/docker/docker/pkg/idtools" "github.com/docker/docker/pkg/rootless/specconv" - "github.com/docker/docker/pkg/stringid" volumemounts "github.com/docker/docker/volume/mounts" "github.com/moby/sys/mount" "github.com/moby/sys/mountinfo" @@ -61,28 +60,6 @@ func withRlimits(daemon *Daemon, daemonCfg *dconfig.Config, c *container.Contain } } -// withLibnetwork sets the libnetwork hook -func withLibnetwork(daemon *Daemon, daemonCfg *dconfig.Config, c *container.Container) coci.SpecOpts { - return func(ctx context.Context, _ coci.Client, _ *containers.Container, s *coci.Spec) error { - if c.Config.NetworkDisabled { - return nil - } - for _, ns := range s.Linux.Namespaces { - if ns.Type == specs.NetworkNamespace && ns.Path == "" { - if s.Hooks == nil { - s.Hooks = &specs.Hooks{} - } - shortNetCtlrID := stringid.TruncateID(daemon.netController.ID()) - s.Hooks.Prestart = append(s.Hooks.Prestart, specs.Hook{ - Path: filepath.Join("/proc", strconv.Itoa(os.Getpid()), "exe"), - Args: []string{"libnetwork-setkey", "-exec-root=" + daemonCfg.GetExecRoot(), c.ID, shortNetCtlrID}, - }) - } - } - return nil - } -} - // withRootless sets the spec to the rootless configuration func withRootless(daemon *Daemon, daemonCfg *dconfig.Config) coci.SpecOpts { return func(_ context.Context, _ coci.Client, _ *containers.Container, s *coci.Spec) error { @@ -1070,7 +1047,6 @@ func (daemon *Daemon) createSpec(ctx context.Context, daemonCfg *configStore, c WithCapabilities(c), WithSeccomp(daemon, c), withMounts(daemon, daemonCfg, c, mounts), - withLibnetwork(daemon, &daemonCfg.Config, c), WithApparmor(c), WithSelinux(c), WithOOMScore(&c.HostConfig.OomScoreAdj), diff --git a/daemon/start.go b/daemon/start.go index 7e6690295de3b..516c069958a65 100644 --- a/daemon/start.go +++ b/daemon/start.go @@ -236,6 +236,10 @@ func (daemon *Daemon) containerStart(ctx context.Context, daemonCfg *configStore } }() + if err := daemon.initializeCreatedTask(ctx, tsk, container, spec); err != nil { + return err + } + if err := tsk.Start(context.TODO()); err != nil { // passing ctx caused integration tests to be stuck in the cleanup phase return setExitCodeFromError(container.SetExitCode, err) } diff --git a/daemon/start_linux.go b/daemon/start_linux.go new file mode 100644 index 0000000000000..f4c0044dbfb80 --- /dev/null +++ b/daemon/start_linux.go @@ -0,0 +1,31 @@ +package daemon // import "github.com/docker/docker/daemon" + +import ( + "context" + "fmt" + + specs "github.com/opencontainers/runtime-spec/specs-go" + + "github.com/docker/docker/container" + "github.com/docker/docker/errdefs" + "github.com/docker/docker/libcontainerd/types" + "github.com/docker/docker/oci" +) + +// initializeCreatedTask performs any initialization that needs to be done to +// prepare a freshly-created task to be started. +func (daemon *Daemon) initializeCreatedTask(ctx context.Context, tsk types.Task, container *container.Container, spec *specs.Spec) error { + if !container.Config.NetworkDisabled { + nspath, ok := oci.NamespacePath(spec, specs.NetworkNamespace) + if ok && nspath == "" { // the runtime has been instructed to create a new network namespace for tsk. + sb, err := daemon.netController.GetSandbox(container.ID) + if err != nil { + return errdefs.System(err) + } + if err := sb.SetKey(fmt.Sprintf("/proc/%d/ns/net", tsk.Pid())); err != nil { + return errdefs.System(err) + } + } + } + return nil +} diff --git a/daemon/start_notlinux.go b/daemon/start_notlinux.go new file mode 100644 index 0000000000000..0170e38cca214 --- /dev/null +++ b/daemon/start_notlinux.go @@ -0,0 +1,17 @@ +//go:build !linux + +package daemon // import "github.com/docker/docker/daemon" + +import ( + "context" + + "github.com/docker/docker/container" + "github.com/docker/docker/libcontainerd/types" + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +// initializeCreatedTask performs any initialization that needs to be done to +// prepare a freshly-created task to be started. +func (daemon *Daemon) initializeCreatedTask(ctx context.Context, tsk types.Task, container *container.Container, spec *specs.Spec) error { + return nil +} diff --git a/libnetwork/osl/namespace_linux.go b/libnetwork/osl/namespace_linux.go index 1ea66b65cb863..256dc0d3b7f6e 100644 --- a/libnetwork/osl/namespace_linux.go +++ b/libnetwork/osl/namespace_linux.go @@ -226,7 +226,11 @@ func NewSandbox(key string, osCreate, isRestore bool) (*Namespace, error) { } func mountNetworkNamespace(basePath string, lnPath string) error { - return syscall.Mount(basePath, lnPath, "bind", syscall.MS_BIND, "") + err := syscall.Mount(basePath, lnPath, "bind", syscall.MS_BIND, "") + if err != nil { + return fmt.Errorf("bind-mount %s -> %s: %w", basePath, lnPath, err) + } + return nil } // GetSandboxForExternalKey returns sandbox object for the supplied path diff --git a/oci/namespaces.go b/oci/namespaces.go index 851edd61ef240..befcefcc40145 100644 --- a/oci/namespaces.go +++ b/oci/namespaces.go @@ -14,3 +14,14 @@ func RemoveNamespace(s *specs.Spec, nsType specs.LinuxNamespaceType) { } } } + +// NamespacePath returns the configured Path of the first namespace in +// s.Linux.Namespaces of type nsType. +func NamespacePath(s *specs.Spec, nsType specs.LinuxNamespaceType) (path string, ok bool) { + for _, n := range s.Linux.Namespaces { + if n.Type == nsType { + return n.Path, true + } + } + return "", false +} From a8f7c5ee481d4a7c4a98b18ea4189dc2eea7ecf7 Mon Sep 17 00:00:00 2001 From: Rob Murray Date: Thu, 11 Jan 2024 16:44:58 +0000 Subject: [PATCH 2/2] Detect IPv6 support in containers. Some configuration in a container depends on whether it has support for IPv6 (including default entries for '::1' etc in '/etc/hosts'). Before this change, the container's support for IPv6 was determined by whether it was connected to any IPv6-enabled networks. But, that can change over time, it isn't a property of the container itself. So, instead, detect IPv6 support by looking for '::1' on the container's loopback interface. It will not be present if the kernel does not have IPv6 support, or the user has disabled it in new namespaces by other means. Once IPv6 support has been determined for the container, its '/etc/hosts' is re-generated accordingly. The daemon no longer disables IPv6 on all interfaces during initialisation. It now disables IPv6 only for interfaces that have not been assigned an IPv6 address. (But, even if IPv6 is disabled for the container using the sysctl 'net.ipv6.conf.all.disable_ipv6=1', interfaces connected to IPv6 networks still get IPv6 addresses that appear in the internal DNS. There's more to-do!) Signed-off-by: Rob Murray --- integration/internal/container/ops.go | 8 ++ integration/networking/etchosts_test.go | 107 ++++++++++++++++++ .../drivers/bridge/port_mapping_linux.go | 27 +---- libnetwork/endpoint.go | 38 +++---- libnetwork/etchosts/etchosts.go | 53 ++++++--- libnetwork/etchosts/etchosts_test.go | 23 ++++ libnetwork/netutils/utils.go | 26 +++++ libnetwork/osl/interface_linux.go | 4 - libnetwork/osl/namespace_linux.go | 82 +++++--------- libnetwork/sandbox_dns_unix.go | 69 ++++++++--- libnetwork/sandbox_linux.go | 26 +++++ 11 files changed, 330 insertions(+), 133 deletions(-) create mode 100644 integration/networking/etchosts_test.go diff --git a/integration/internal/container/ops.go b/integration/internal/container/ops.go index 6962032bad82f..b2d35ca8a7be0 100644 --- a/integration/internal/container/ops.go +++ b/integration/internal/container/ops.go @@ -1,6 +1,7 @@ package container import ( + "maps" "strings" "github.com/docker/docker/api/types/container" @@ -46,6 +47,13 @@ func WithNetworkMode(mode string) func(*TestContainerConfig) { } } +// WithSysctls sets sysctl options for the container +func WithSysctls(sysctls map[string]string) func(*TestContainerConfig) { + return func(c *TestContainerConfig) { + c.HostConfig.Sysctls = maps.Clone(sysctls) + } +} + // WithExposedPorts sets the exposed ports of the container func WithExposedPorts(ports ...string) func(*TestContainerConfig) { return func(c *TestContainerConfig) { diff --git a/integration/networking/etchosts_test.go b/integration/networking/etchosts_test.go new file mode 100644 index 0000000000000..bab28d3659f93 --- /dev/null +++ b/integration/networking/etchosts_test.go @@ -0,0 +1,107 @@ +package networking + +import ( + "context" + "testing" + "time" + + containertypes "github.com/docker/docker/api/types/container" + "github.com/docker/docker/integration/internal/container" + "github.com/docker/docker/testutil" + "github.com/docker/docker/testutil/daemon" + "gotest.tools/v3/assert" + is "gotest.tools/v3/assert/cmp" + "gotest.tools/v3/skip" +) + +// Check that the '/etc/hosts' file in a container is created according to +// whether the container supports IPv6. +// Regression test for https://github.com/moby/moby/issues/35954 +func TestEtcHostsIpv6(t *testing.T) { + skip.If(t, testEnv.DaemonInfo.OSType == "windows") + + ctx := setupTest(t) + d := daemon.New(t) + d.StartWithBusybox(ctx, t, + "--ipv6", + "--ip6tables", + "--experimental", + "--fixed-cidr-v6=fdc8:ffe2:d8d7:1234::/64") + defer d.Stop(t) + + c := d.NewClientT(t) + defer c.Close() + + testcases := []struct { + name string + sysctls map[string]string + expIPv6Enabled bool + expEtcHosts string + }{ + { + // Create a container with no overrides, on the IPv6-enabled default bridge. + // Expect the container to have a working '::1' address, on the assumption + // the test host's kernel supports IPv6 - and for its '/etc/hosts' file to + // include IPv6 addresses. + name: "IPv6 enabled", + expIPv6Enabled: true, + expEtcHosts: `127.0.0.1 localhost +::1 localhost ip6-localhost ip6-loopback +fe00::0 ip6-localnet +ff00::0 ip6-mcastprefix +ff02::1 ip6-allnodes +ff02::2 ip6-allrouters +`, + }, + { + // Create a container in the same network, with IPv6 disabled. Expect '::1' + // not to be pingable, and no IPv6 addresses in its '/etc/hosts'. + name: "IPv6 disabled", + sysctls: map[string]string{"net.ipv6.conf.all.disable_ipv6": "1"}, + expIPv6Enabled: false, + expEtcHosts: "127.0.0.1\tlocalhost\n", + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + ctx := testutil.StartSpan(ctx, t) + ctrId := container.Run(ctx, t, c, + container.WithName("etchosts_"+sanitizeCtrName(t.Name())), + container.WithImage("busybox:latest"), + container.WithCmd("top"), + container.WithSysctls(tc.sysctls), + ) + defer func() { + c.ContainerRemove(ctx, ctrId, containertypes.RemoveOptions{Force: true}) + }() + + runCmd := func(ctrId string, cmd []string, expExitCode int) string { + t.Helper() + execCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + res, err := container.Exec(execCtx, c, ctrId, cmd) + assert.Check(t, is.Nil(err)) + assert.Check(t, is.Equal(res.ExitCode, expExitCode)) + return res.Stdout() + } + + // Check that IPv6 is/isn't enabled, as expected. + var expPingExitStatus int + if !tc.expIPv6Enabled { + expPingExitStatus = 1 + } + runCmd(ctrId, []string{"ping", "-6", "-c1", "-W3", "::1"}, expPingExitStatus) + + // Check the contents of /etc/hosts. + stdout := runCmd(ctrId, []string{"cat", "/etc/hosts"}, 0) + // Append the container's own addresses/name to the expected hosts file content. + inspect := container.Inspect(ctx, t, c, ctrId) + exp := tc.expEtcHosts + inspect.NetworkSettings.IPAddress + "\t" + inspect.Config.Hostname + "\n" + if tc.expIPv6Enabled { + exp += inspect.NetworkSettings.GlobalIPv6Address + "\t" + inspect.Config.Hostname + "\n" + } + assert.Check(t, is.Equal(stdout, exp)) + }) + } +} diff --git a/libnetwork/drivers/bridge/port_mapping_linux.go b/libnetwork/drivers/bridge/port_mapping_linux.go index d9b95490d7ab0..de489ecad74f0 100644 --- a/libnetwork/drivers/bridge/port_mapping_linux.go +++ b/libnetwork/drivers/bridge/port_mapping_linux.go @@ -6,9 +6,9 @@ import ( "errors" "fmt" "net" - "sync" "github.com/containerd/log" + "github.com/docker/docker/libnetwork/netutils" "github.com/docker/docker/libnetwork/types" "github.com/ishidawataru/sctp" ) @@ -55,7 +55,7 @@ func (n *bridgeNetwork) allocatePortsInternal(bindings []types.PortBinding, cont // skip adding implicit v6 addr, when the kernel was booted with `ipv6.disable=1` // https://github.com/moby/moby/issues/42288 isV6Binding := c.HostIP != nil && c.HostIP.To4() == nil - if !isV6Binding && !IsV6Listenable() { + if !isV6Binding && !netutils.IsV6Listenable() { continue } @@ -219,26 +219,3 @@ func (n *bridgeNetwork) releasePort(bnd types.PortBinding) error { return portmapper.Unmap(host) } - -var ( - v6ListenableCached bool - v6ListenableOnce sync.Once -) - -// IsV6Listenable returns true when `[::1]:0` is listenable. -// IsV6Listenable returns false mostly when the kernel was booted with `ipv6.disable=1` option. -func IsV6Listenable() bool { - v6ListenableOnce.Do(func() { - ln, err := net.Listen("tcp6", "[::1]:0") - if err != nil { - // When the kernel was booted with `ipv6.disable=1`, - // we get err "listen tcp6 [::1]:0: socket: address family not supported by protocol" - // https://github.com/moby/moby/issues/42288 - log.G(context.TODO()).Debugf("port_mapping: v6Listenable=false (%v)", err) - } else { - v6ListenableCached = true - ln.Close() - } - }) - return v6ListenableCached -} diff --git a/libnetwork/endpoint.go b/libnetwork/endpoint.go index fa53450868d84..8375c07df3eea 100644 --- a/libnetwork/endpoint.go +++ b/libnetwork/endpoint.go @@ -478,18 +478,8 @@ func (ep *Endpoint) sbJoin(sb *Sandbox, options ...EndpointOption) (err error) { } } - // Do not update hosts file with internal networks endpoint IP - if !n.ingress && n.Name() != libnGWNetwork { - var addresses []string - if ip := ep.getFirstInterfaceIPv4Address(); ip != nil { - addresses = append(addresses, ip.String()) - } - if ip := ep.getFirstInterfaceIPv6Address(); ip != nil { - addresses = append(addresses, ip.String()) - } - if err = sb.updateHostsFile(addresses); err != nil { - return err - } + if err := sb.updateHostsFile(ep.getEtcHostsAddrs()); err != nil { + return err } if err = sb.updateDNS(n.enableIPv6); err != nil { return err @@ -860,26 +850,24 @@ func (ep *Endpoint) getSandbox() (*Sandbox, bool) { return ps, ok } -func (ep *Endpoint) getFirstInterfaceIPv4Address() net.IP { +// Return a list of this endpoint's addresses to add to '/etc/hosts'. +func (ep *Endpoint) getEtcHostsAddrs() []string { ep.mu.Lock() defer ep.mu.Unlock() - if ep.iface.addr != nil { - return ep.iface.addr.IP + // Do not update hosts file with internal network's endpoint IP + if n := ep.network; n == nil || n.ingress || n.Name() == libnGWNetwork { + return nil } - return nil -} - -func (ep *Endpoint) getFirstInterfaceIPv6Address() net.IP { - ep.mu.Lock() - defer ep.mu.Unlock() - + var addresses []string + if ep.iface.addr != nil { + addresses = append(addresses, ep.iface.addr.IP.String()) + } if ep.iface.addrv6 != nil { - return ep.iface.addrv6.IP + addresses = append(addresses, ep.iface.addrv6.IP.String()) } - - return nil + return addresses } // EndpointOptionGeneric function returns an option setter for a Generic option defined diff --git a/libnetwork/etchosts/etchosts.go b/libnetwork/etchosts/etchosts.go index 21e74922c2674..7832ebe673ed2 100644 --- a/libnetwork/etchosts/etchosts.go +++ b/libnetwork/etchosts/etchosts.go @@ -5,6 +5,7 @@ import ( "bytes" "fmt" "io" + "net/netip" "os" "regexp" "strings" @@ -25,8 +26,10 @@ func (r Record) WriteTo(w io.Writer) (int64, error) { var ( // Default hosts config records slice - defaultContent = []Record{ + defaultContentIPv4 = []Record{ {Hosts: "localhost", IP: "127.0.0.1"}, + } + defaultContentIPv6 = []Record{ {Hosts: "localhost ip6-localhost ip6-loopback", IP: "::1"}, {Hosts: "ip6-localnet", IP: "fe00::0"}, {Hosts: "ip6-mcastprefix", IP: "ff00::0"}, @@ -71,9 +74,34 @@ func Drop(path string) { // IP, hostname, and domainname set main record leave empty for no master record // extraContent is an array of extra host records. func Build(path, IP, hostname, domainname string, extraContent []Record) error { + return build(path, IP, hostname, domainname, defaultContentIPv4, defaultContentIPv6, extraContent) +} + +// BuildNoIPv6 is the same as Build, but will not include IPv6 entries. +func BuildNoIPv6(path, IP, hostname, domainname string, extraContent []Record) error { + if isIPv6(IP) { + IP = "" + } + + var ipv4ExtraContent []Record + for _, rec := range extraContent { + if !isIPv6(rec.IP) { + ipv4ExtraContent = append(ipv4ExtraContent, rec) + } + } + + return build(path, IP, hostname, domainname, defaultContentIPv4, ipv4ExtraContent) +} + +func isIPv6(s string) bool { + addr, err := netip.ParseAddr(s) + return err == nil && addr.Is6() +} + +func build(path, IP, hostname, domainname string, contents ...[]Record) error { defer pathLock(path)() - content := bytes.NewBuffer(nil) + buf := bytes.NewBuffer(nil) if IP != "" { // set main record var mainRec Record @@ -89,24 +117,21 @@ func Build(path, IP, hostname, domainname string, extraContent []Record) error { if hostName, _, ok := strings.Cut(fqdn, "."); ok { mainRec.Hosts += " " + hostName } - if _, err := mainRec.WriteTo(content); err != nil { - return err - } - } - // Write defaultContent slice to buffer - for _, r := range defaultContent { - if _, err := r.WriteTo(content); err != nil { + if _, err := mainRec.WriteTo(buf); err != nil { return err } } - // Write extra content from function arguments - for _, r := range extraContent { - if _, err := r.WriteTo(content); err != nil { - return err + + // Write content from function arguments + for _, content := range contents { + for _, c := range content { + if _, err := c.WriteTo(buf); err != nil { + return err + } } } - return os.WriteFile(path, content.Bytes(), 0o644) + return os.WriteFile(path, buf.Bytes(), 0o644) } // Add adds an arbitrary number of Records to an already existing /etc/hosts file diff --git a/libnetwork/etchosts/etchosts_test.go b/libnetwork/etchosts/etchosts_test.go index 4a89d67246bf1..96bb10fb3c4d5 100644 --- a/libnetwork/etchosts/etchosts_test.go +++ b/libnetwork/etchosts/etchosts_test.go @@ -4,9 +4,12 @@ import ( "bytes" "fmt" "os" + "path/filepath" "testing" "golang.org/x/sync/errgroup" + "gotest.tools/v3/assert" + is "gotest.tools/v3/assert/cmp" ) func TestBuildDefault(t *testing.T) { @@ -35,6 +38,26 @@ func TestBuildDefault(t *testing.T) { } } +func TestBuildNoIPv6(t *testing.T) { + d := t.TempDir() + filename := filepath.Join(d, "hosts") + + err := BuildNoIPv6(filename, "fdbb:c59c:d015::2", "an.example", "", []Record{ + { + Hosts: "another.example", + IP: "fdbb:c59c:d015::3", + }, + { + Hosts: "another.example", + IP: "10.11.12.13", + }, + }) + assert.NilError(t, err) + content, err := os.ReadFile(filename) + assert.NilError(t, err) + assert.Check(t, is.DeepEqual(string(content), "127.0.0.1\tlocalhost\n10.11.12.13\tanother.example\n")) +} + func TestBuildHostnameDomainname(t *testing.T) { file, err := os.CreateTemp("", "") if err != nil { diff --git a/libnetwork/netutils/utils.go b/libnetwork/netutils/utils.go index c8af237d83640..4896d1099623b 100644 --- a/libnetwork/netutils/utils.go +++ b/libnetwork/netutils/utils.go @@ -3,6 +3,7 @@ package netutils import ( + "context" "crypto/rand" "encoding/hex" "errors" @@ -10,7 +11,9 @@ import ( "io" "net" "strings" + "sync" + "github.com/containerd/log" "github.com/docker/docker/libnetwork/types" ) @@ -144,3 +147,26 @@ func ReverseIP(IP string) string { return strings.Join(reverseIP, ".") } + +var ( + v6ListenableCached bool + v6ListenableOnce sync.Once +) + +// IsV6Listenable returns true when `[::1]:0` is listenable. +// IsV6Listenable returns false mostly when the kernel was booted with `ipv6.disable=1` option. +func IsV6Listenable() bool { + v6ListenableOnce.Do(func() { + ln, err := net.Listen("tcp6", "[::1]:0") + if err != nil { + // When the kernel was booted with `ipv6.disable=1`, + // we get err "listen tcp6 [::1]:0: socket: address family not supported by protocol" + // https://github.com/moby/moby/issues/42288 + log.G(context.TODO()).Debugf("v6Listenable=false (%v)", err) + } else { + v6ListenableCached = true + ln.Close() + } + }) + return v6ListenableCached +} diff --git a/libnetwork/osl/interface_linux.go b/libnetwork/osl/interface_linux.go index 27e079d1b9335..e87efbaa3922d 100644 --- a/libnetwork/osl/interface_linux.go +++ b/libnetwork/osl/interface_linux.go @@ -257,8 +257,6 @@ func (n *Namespace) AddInterface(srcName, dstPrefix string, options ...IfaceOpti n.iFaces = append(n.iFaces, i) n.mu.Unlock() - n.checkLoV6() - return nil } @@ -311,8 +309,6 @@ func (n *Namespace) RemoveInterface(i *Interface) error { } n.mu.Unlock() - // TODO(aker): This function will disable IPv6 on lo interface if the removed interface was the last one offering IPv6 connectivity. That's a weird behavior, and shouldn't be hiding this deep down in this function. - n.checkLoV6() return nil } diff --git a/libnetwork/osl/namespace_linux.go b/libnetwork/osl/namespace_linux.go index 256dc0d3b7f6e..a00075cfa48a3 100644 --- a/libnetwork/osl/namespace_linux.go +++ b/libnetwork/osl/namespace_linux.go @@ -20,6 +20,7 @@ import ( "github.com/docker/docker/libnetwork/osl/kernel" "github.com/docker/docker/libnetwork/types" "github.com/vishvananda/netlink" + "github.com/vishvananda/netlink/nl" "github.com/vishvananda/netns" "golang.org/x/sys/unix" ) @@ -206,16 +207,6 @@ func NewSandbox(key string, osCreate, isRestore bool) (*Namespace, error) { if err != nil { log.G(context.TODO()).Warnf("Failed to set the timeout on the sandbox netlink handle sockets: %v", err) } - // In live-restore mode, IPV6 entries are getting cleaned up due to below code - // We should retain IPV6 configurations in live-restore mode when Docker Daemon - // comes back. It should work as it is on other cases - // As starting point, disable IPv6 on all interfaces - if !isRestore && !n.isDefault { - err = setIPv6(n.path, "all", false) - if err != nil { - log.G(context.TODO()).Warnf("Failed to disable IPv6 on all interfaces on network namespace %q: %v", n.path, err) - } - } if err = n.loopbackUp(); err != nil { n.nlHandle.Close() @@ -260,12 +251,6 @@ func GetSandboxForExternalKey(basePath string, key string) (*Namespace, error) { log.G(context.TODO()).Warnf("Failed to set the timeout on the sandbox netlink handle sockets: %v", err) } - // As starting point, disable IPv6 on all interfaces - err = setIPv6(n.path, "all", false) - if err != nil { - log.G(context.TODO()).Warnf("Failed to disable IPv6 on all interfaces on network namespace %q: %v", n.path, err) - } - if err = n.loopbackUp(); err != nil { n.nlHandle.Close() return nil, err @@ -325,17 +310,18 @@ func createNamespaceFile(path string) error { // or sets the gateway etc. It holds a list of Interfaces, routes etc., and more // can be added dynamically. type Namespace struct { - path string - iFaces []*Interface - gw net.IP - gwv6 net.IP - staticRoutes []*types.StaticRoute - neighbors []*neigh - nextIfIndex map[string]int - isDefault bool - nlHandle *netlink.Handle - loV6Enabled bool - mu sync.Mutex + path string + iFaces []*Interface + gw net.IP + gwv6 net.IP + staticRoutes []*types.StaticRoute + neighbors []*neigh + nextIfIndex map[string]int + isDefault bool + ipv6LoEnabledOnce sync.Once + ipv6LoEnabledCached bool + nlHandle *netlink.Handle + mu sync.Mutex } // Interfaces returns the collection of Interface previously added with the AddInterface @@ -559,32 +545,24 @@ func (n *Namespace) Restore(interfaces map[Iface][]IfaceOption, routes []*types. return nil } -// Checks whether IPv6 needs to be enabled/disabled on the loopback interface -func (n *Namespace) checkLoV6() { - var ( - enable = false - action = "disable" - ) - - n.mu.Lock() - for _, iface := range n.iFaces { - if iface.AddressIPv6() != nil { - enable = true - action = "enable" - break +// IPv6LoEnabled checks whether the loopback interface has an IPv6 address ('::1' +// is assigned by the kernel if IPv6 is enabled). +func (n *Namespace) IPv6LoEnabled() bool { + n.ipv6LoEnabledOnce.Do(func() { + // If anything goes wrong, assume no-IPv6. + iface, err := n.nlHandle.LinkByName("lo") + if err != nil { + log.G(context.TODO()).WithError(err).Warn("Unable to find 'lo' to determine IPv6 support") + return } - } - n.mu.Unlock() - - if n.loV6Enabled == enable { - return - } - - if err := setIPv6(n.path, "lo", enable); err != nil { - log.G(context.TODO()).Warnf("Failed to %s IPv6 on loopback interface on network namespace %q: %v", action, n.path, err) - } - - n.loV6Enabled = enable + addrs, err := n.nlHandle.AddrList(iface, nl.FAMILY_V6) + if err != nil { + log.G(context.TODO()).WithError(err).Warn("Unable to get 'lo' addresses to determine IPv6 support") + return + } + n.ipv6LoEnabledCached = len(addrs) > 0 + }) + return n.ipv6LoEnabledCached } // ApplyOSTweaks applies operating system specific knobs on the sandbox. diff --git a/libnetwork/sandbox_dns_unix.go b/libnetwork/sandbox_dns_unix.go index e30f394057688..40f1a7b46f7e7 100644 --- a/libnetwork/sandbox_dns_unix.go +++ b/libnetwork/sandbox_dns_unix.go @@ -7,6 +7,7 @@ import ( "context" "fmt" "net" + "net/netip" "os" "path" "path/filepath" @@ -14,6 +15,7 @@ import ( "strings" "github.com/containerd/log" + "github.com/docker/docker/errdefs" "github.com/docker/docker/libnetwork/etchosts" "github.com/docker/docker/libnetwork/resolvconf" "github.com/docker/docker/libnetwork/types" @@ -27,6 +29,21 @@ const ( resolverIPSandbox = "127.0.0.11" ) +// finishInitDNS is to be called after the container namespace has been created, +// before it the user process is started. The container's support for IPv6 can be +// determined at this point. +func (sb *Sandbox) finishInitDNS() error { + if err := sb.buildHostsFile(); err != nil { + return errdefs.System(err) + } + for _, ep := range sb.Endpoints() { + if err := sb.updateHostsFile(ep.getEtcHostsAddrs()); err != nil { + return errdefs.System(err) + } + } + return nil +} + func (sb *Sandbox) startResolver(restore bool) { sb.resolverOnce.Do(func() { var err error @@ -65,11 +82,17 @@ func (sb *Sandbox) startResolver(restore bool) { } func (sb *Sandbox) setupResolutionFiles() error { - if err := sb.buildHostsFile(); err != nil { + // Create a hosts file that can be mounted during container setup. For most + // networking modes (not host networking) it will be re-created before the + // container start, once its support for IPv6 is known. + if sb.config.hostsPath == "" { + sb.config.hostsPath = defaultPrefix + "/" + sb.id + "/hosts" + } + dir, _ := filepath.Split(sb.config.hostsPath) + if err := createBasePath(dir); err != nil { return err } - - if err := sb.updateParentHosts(); err != nil { + if err := sb.buildHostsFile(); err != nil { return err } @@ -77,15 +100,6 @@ func (sb *Sandbox) setupResolutionFiles() error { } func (sb *Sandbox) buildHostsFile() error { - if sb.config.hostsPath == "" { - sb.config.hostsPath = defaultPrefix + "/" + sb.id + "/hosts" - } - - dir, _ := filepath.Split(sb.config.hostsPath) - if err := createBasePath(dir); err != nil { - return err - } - // This is for the host mode networking if sb.config.useDefaultSandBox && len(sb.config.extraHosts) == 0 { // We are working under the assumption that the origin file option had been properly expressed by the upper layer @@ -101,7 +115,16 @@ func (sb *Sandbox) buildHostsFile() error { extraContent = append(extraContent, etchosts.Record{Hosts: extraHost.name, IP: extraHost.IP}) } - return etchosts.Build(sb.config.hostsPath, "", sb.config.hostName, sb.config.domainName, extraContent) + // Assume IPv6 support, unless it's definitely disabled. + buildf := etchosts.Build + if en, ok := sb.ipv6Enabled(); ok && !en { + buildf = etchosts.BuildNoIPv6 + } + if err := buildf(sb.config.hostsPath, "", sb.config.hostName, sb.config.domainName, extraContent); err != nil { + return err + } + + return sb.updateParentHosts() } func (sb *Sandbox) updateHostsFile(ifaceIPs []string) error { @@ -135,6 +158,16 @@ func (sb *Sandbox) updateHostsFile(ifaceIPs []string) error { } func (sb *Sandbox) addHostsEntries(recs []etchosts.Record) { + // Assume IPv6 support, unless it's definitely disabled. + if en, ok := sb.ipv6Enabled(); ok && !en { + var filtered []etchosts.Record + for _, rec := range recs { + if addr, err := netip.ParseAddr(rec.IP); err == nil && !addr.Is6() { + filtered = append(filtered, rec) + } + } + recs = filtered + } if err := etchosts.Add(sb.config.hostsPath, recs); err != nil { log.G(context.TODO()).Warnf("Failed adding service host entries to the running container: %v", err) } @@ -157,6 +190,16 @@ func (sb *Sandbox) updateParentHosts() error { if pSb == nil { continue } + // TODO(robmry) - filter out IPv6 addresses here if !sb.ipv6Enabled() but... + // - this is part of the implementation of '--link', which will be removed along + // with the rest of legacy networking. + // - IPv6 addresses shouldn't be allocated if IPv6 is not available in a container, + // and that change will come along later. + // - I think this may be dead code, it's not possible to start a parent container with + // '--link child' unless the child has already started ("Error response from daemon: + // Cannot link to a non running container"). So, when the child starts and this method + // is called with updates for parents, the parents aren't running and GetSandbox() + // returns nil.) if err := etchosts.Update(pSb.config.hostsPath, update.ip, update.name); err != nil { return err } diff --git a/libnetwork/sandbox_linux.go b/libnetwork/sandbox_linux.go index fc3bbb344740f..21c43f755d695 100644 --- a/libnetwork/sandbox_linux.go +++ b/libnetwork/sandbox_linux.go @@ -7,6 +7,7 @@ import ( "time" "github.com/containerd/log" + "github.com/docker/docker/libnetwork/netutils" "github.com/docker/docker/libnetwork/osl" "github.com/docker/docker/libnetwork/types" ) @@ -157,14 +158,39 @@ func (sb *Sandbox) SetKey(basePath string) error { } } + if err := sb.finishInitDNS(); err != nil { + return err + } + for _, ep := range sb.Endpoints() { if err = sb.populateNetworkResources(ep); err != nil { return err } } + return nil } +// IPv6 support can always be determined for host networking. For other network +// types it can only be determined once there's a container namespace to probe, +// return ok=false in that case. +func (sb *Sandbox) ipv6Enabled() (enabled, ok bool) { + // For host networking, IPv6 support depends on the host. + if sb.config.useDefaultSandBox { + return netutils.IsV6Listenable(), true + } + + // For other network types, look at whether the container's loopback interface has an IPv6 address. + sb.mu.Lock() + osSbox := sb.osSbox + sb.mu.Unlock() + + if osSbox == nil { + return false, false + } + return osSbox.IPv6LoEnabled(), true +} + func (sb *Sandbox) releaseOSSbox() error { sb.mu.Lock() osSbox := sb.osSbox