diff --git a/daemon/oci_linux.go b/daemon/oci_linux.go index 97bc8e707afb6..ab5d5b59b34c8 100644 --- a/daemon/oci_linux.go +++ b/daemon/oci_linux.go @@ -23,7 +23,6 @@ import ( "github.com/docker/docker/oci/caps" "github.com/docker/docker/pkg/idtools" "github.com/docker/docker/pkg/rootless/specconv" - "github.com/docker/docker/pkg/stringid" volumemounts "github.com/docker/docker/volume/mounts" "github.com/moby/sys/mount" "github.com/moby/sys/mountinfo" @@ -61,28 +60,6 @@ func withRlimits(daemon *Daemon, daemonCfg *dconfig.Config, c *container.Contain } } -// withLibnetwork sets the libnetwork hook -func withLibnetwork(daemon *Daemon, daemonCfg *dconfig.Config, c *container.Container) coci.SpecOpts { - return func(ctx context.Context, _ coci.Client, _ *containers.Container, s *coci.Spec) error { - if c.Config.NetworkDisabled { - return nil - } - for _, ns := range s.Linux.Namespaces { - if ns.Type == specs.NetworkNamespace && ns.Path == "" { - if s.Hooks == nil { - s.Hooks = &specs.Hooks{} - } - shortNetCtlrID := stringid.TruncateID(daemon.netController.ID()) - s.Hooks.Prestart = append(s.Hooks.Prestart, specs.Hook{ - Path: filepath.Join("/proc", strconv.Itoa(os.Getpid()), "exe"), - Args: []string{"libnetwork-setkey", "-exec-root=" + daemonCfg.GetExecRoot(), c.ID, shortNetCtlrID}, - }) - } - } - return nil - } -} - // withRootless sets the spec to the rootless configuration func withRootless(daemon *Daemon, daemonCfg *dconfig.Config) coci.SpecOpts { return func(_ context.Context, _ coci.Client, _ *containers.Container, s *coci.Spec) error { @@ -1070,7 +1047,6 @@ func (daemon *Daemon) createSpec(ctx context.Context, daemonCfg *configStore, c WithCapabilities(c), WithSeccomp(daemon, c), withMounts(daemon, daemonCfg, c, mounts), - withLibnetwork(daemon, &daemonCfg.Config, c), WithApparmor(c), WithSelinux(c), WithOOMScore(&c.HostConfig.OomScoreAdj), diff --git a/daemon/start.go b/daemon/start.go index 7e6690295de3b..516c069958a65 100644 --- a/daemon/start.go +++ b/daemon/start.go @@ -236,6 +236,10 @@ func (daemon *Daemon) containerStart(ctx context.Context, daemonCfg *configStore } }() + if err := daemon.initializeCreatedTask(ctx, tsk, container, spec); err != nil { + return err + } + if err := tsk.Start(context.TODO()); err != nil { // passing ctx caused integration tests to be stuck in the cleanup phase return setExitCodeFromError(container.SetExitCode, err) } diff --git a/daemon/start_linux.go b/daemon/start_linux.go new file mode 100644 index 0000000000000..f4c0044dbfb80 --- /dev/null +++ b/daemon/start_linux.go @@ -0,0 +1,31 @@ +package daemon // import "github.com/docker/docker/daemon" + +import ( + "context" + "fmt" + + specs "github.com/opencontainers/runtime-spec/specs-go" + + "github.com/docker/docker/container" + "github.com/docker/docker/errdefs" + "github.com/docker/docker/libcontainerd/types" + "github.com/docker/docker/oci" +) + +// initializeCreatedTask performs any initialization that needs to be done to +// prepare a freshly-created task to be started. +func (daemon *Daemon) initializeCreatedTask(ctx context.Context, tsk types.Task, container *container.Container, spec *specs.Spec) error { + if !container.Config.NetworkDisabled { + nspath, ok := oci.NamespacePath(spec, specs.NetworkNamespace) + if ok && nspath == "" { // the runtime has been instructed to create a new network namespace for tsk. + sb, err := daemon.netController.GetSandbox(container.ID) + if err != nil { + return errdefs.System(err) + } + if err := sb.SetKey(fmt.Sprintf("/proc/%d/ns/net", tsk.Pid())); err != nil { + return errdefs.System(err) + } + } + } + return nil +} diff --git a/daemon/start_notlinux.go b/daemon/start_notlinux.go new file mode 100644 index 0000000000000..0170e38cca214 --- /dev/null +++ b/daemon/start_notlinux.go @@ -0,0 +1,17 @@ +//go:build !linux + +package daemon // import "github.com/docker/docker/daemon" + +import ( + "context" + + "github.com/docker/docker/container" + "github.com/docker/docker/libcontainerd/types" + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +// initializeCreatedTask performs any initialization that needs to be done to +// prepare a freshly-created task to be started. +func (daemon *Daemon) initializeCreatedTask(ctx context.Context, tsk types.Task, container *container.Container, spec *specs.Spec) error { + return nil +} diff --git a/integration/internal/container/ops.go b/integration/internal/container/ops.go index 6962032bad82f..b2d35ca8a7be0 100644 --- a/integration/internal/container/ops.go +++ b/integration/internal/container/ops.go @@ -1,6 +1,7 @@ package container import ( + "maps" "strings" "github.com/docker/docker/api/types/container" @@ -46,6 +47,13 @@ func WithNetworkMode(mode string) func(*TestContainerConfig) { } } +// WithSysctls sets sysctl options for the container +func WithSysctls(sysctls map[string]string) func(*TestContainerConfig) { + return func(c *TestContainerConfig) { + c.HostConfig.Sysctls = maps.Clone(sysctls) + } +} + // WithExposedPorts sets the exposed ports of the container func WithExposedPorts(ports ...string) func(*TestContainerConfig) { return func(c *TestContainerConfig) { diff --git a/integration/networking/etchosts_test.go b/integration/networking/etchosts_test.go new file mode 100644 index 0000000000000..bab28d3659f93 --- /dev/null +++ b/integration/networking/etchosts_test.go @@ -0,0 +1,107 @@ +package networking + +import ( + "context" + "testing" + "time" + + containertypes "github.com/docker/docker/api/types/container" + "github.com/docker/docker/integration/internal/container" + "github.com/docker/docker/testutil" + "github.com/docker/docker/testutil/daemon" + "gotest.tools/v3/assert" + is "gotest.tools/v3/assert/cmp" + "gotest.tools/v3/skip" +) + +// Check that the '/etc/hosts' file in a container is created according to +// whether the container supports IPv6. +// Regression test for https://github.com/moby/moby/issues/35954 +func TestEtcHostsIpv6(t *testing.T) { + skip.If(t, testEnv.DaemonInfo.OSType == "windows") + + ctx := setupTest(t) + d := daemon.New(t) + d.StartWithBusybox(ctx, t, + "--ipv6", + "--ip6tables", + "--experimental", + "--fixed-cidr-v6=fdc8:ffe2:d8d7:1234::/64") + defer d.Stop(t) + + c := d.NewClientT(t) + defer c.Close() + + testcases := []struct { + name string + sysctls map[string]string + expIPv6Enabled bool + expEtcHosts string + }{ + { + // Create a container with no overrides, on the IPv6-enabled default bridge. + // Expect the container to have a working '::1' address, on the assumption + // the test host's kernel supports IPv6 - and for its '/etc/hosts' file to + // include IPv6 addresses. + name: "IPv6 enabled", + expIPv6Enabled: true, + expEtcHosts: `127.0.0.1 localhost +::1 localhost ip6-localhost ip6-loopback +fe00::0 ip6-localnet +ff00::0 ip6-mcastprefix +ff02::1 ip6-allnodes +ff02::2 ip6-allrouters +`, + }, + { + // Create a container in the same network, with IPv6 disabled. Expect '::1' + // not to be pingable, and no IPv6 addresses in its '/etc/hosts'. + name: "IPv6 disabled", + sysctls: map[string]string{"net.ipv6.conf.all.disable_ipv6": "1"}, + expIPv6Enabled: false, + expEtcHosts: "127.0.0.1\tlocalhost\n", + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + ctx := testutil.StartSpan(ctx, t) + ctrId := container.Run(ctx, t, c, + container.WithName("etchosts_"+sanitizeCtrName(t.Name())), + container.WithImage("busybox:latest"), + container.WithCmd("top"), + container.WithSysctls(tc.sysctls), + ) + defer func() { + c.ContainerRemove(ctx, ctrId, containertypes.RemoveOptions{Force: true}) + }() + + runCmd := func(ctrId string, cmd []string, expExitCode int) string { + t.Helper() + execCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + res, err := container.Exec(execCtx, c, ctrId, cmd) + assert.Check(t, is.Nil(err)) + assert.Check(t, is.Equal(res.ExitCode, expExitCode)) + return res.Stdout() + } + + // Check that IPv6 is/isn't enabled, as expected. + var expPingExitStatus int + if !tc.expIPv6Enabled { + expPingExitStatus = 1 + } + runCmd(ctrId, []string{"ping", "-6", "-c1", "-W3", "::1"}, expPingExitStatus) + + // Check the contents of /etc/hosts. + stdout := runCmd(ctrId, []string{"cat", "/etc/hosts"}, 0) + // Append the container's own addresses/name to the expected hosts file content. + inspect := container.Inspect(ctx, t, c, ctrId) + exp := tc.expEtcHosts + inspect.NetworkSettings.IPAddress + "\t" + inspect.Config.Hostname + "\n" + if tc.expIPv6Enabled { + exp += inspect.NetworkSettings.GlobalIPv6Address + "\t" + inspect.Config.Hostname + "\n" + } + assert.Check(t, is.Equal(stdout, exp)) + }) + } +} diff --git a/libnetwork/drivers/bridge/port_mapping_linux.go b/libnetwork/drivers/bridge/port_mapping_linux.go index d9b95490d7ab0..de489ecad74f0 100644 --- a/libnetwork/drivers/bridge/port_mapping_linux.go +++ b/libnetwork/drivers/bridge/port_mapping_linux.go @@ -6,9 +6,9 @@ import ( "errors" "fmt" "net" - "sync" "github.com/containerd/log" + "github.com/docker/docker/libnetwork/netutils" "github.com/docker/docker/libnetwork/types" "github.com/ishidawataru/sctp" ) @@ -55,7 +55,7 @@ func (n *bridgeNetwork) allocatePortsInternal(bindings []types.PortBinding, cont // skip adding implicit v6 addr, when the kernel was booted with `ipv6.disable=1` // https://github.com/moby/moby/issues/42288 isV6Binding := c.HostIP != nil && c.HostIP.To4() == nil - if !isV6Binding && !IsV6Listenable() { + if !isV6Binding && !netutils.IsV6Listenable() { continue } @@ -219,26 +219,3 @@ func (n *bridgeNetwork) releasePort(bnd types.PortBinding) error { return portmapper.Unmap(host) } - -var ( - v6ListenableCached bool - v6ListenableOnce sync.Once -) - -// IsV6Listenable returns true when `[::1]:0` is listenable. -// IsV6Listenable returns false mostly when the kernel was booted with `ipv6.disable=1` option. -func IsV6Listenable() bool { - v6ListenableOnce.Do(func() { - ln, err := net.Listen("tcp6", "[::1]:0") - if err != nil { - // When the kernel was booted with `ipv6.disable=1`, - // we get err "listen tcp6 [::1]:0: socket: address family not supported by protocol" - // https://github.com/moby/moby/issues/42288 - log.G(context.TODO()).Debugf("port_mapping: v6Listenable=false (%v)", err) - } else { - v6ListenableCached = true - ln.Close() - } - }) - return v6ListenableCached -} diff --git a/libnetwork/endpoint.go b/libnetwork/endpoint.go index fa53450868d84..8375c07df3eea 100644 --- a/libnetwork/endpoint.go +++ b/libnetwork/endpoint.go @@ -478,18 +478,8 @@ func (ep *Endpoint) sbJoin(sb *Sandbox, options ...EndpointOption) (err error) { } } - // Do not update hosts file with internal networks endpoint IP - if !n.ingress && n.Name() != libnGWNetwork { - var addresses []string - if ip := ep.getFirstInterfaceIPv4Address(); ip != nil { - addresses = append(addresses, ip.String()) - } - if ip := ep.getFirstInterfaceIPv6Address(); ip != nil { - addresses = append(addresses, ip.String()) - } - if err = sb.updateHostsFile(addresses); err != nil { - return err - } + if err := sb.updateHostsFile(ep.getEtcHostsAddrs()); err != nil { + return err } if err = sb.updateDNS(n.enableIPv6); err != nil { return err @@ -860,26 +850,24 @@ func (ep *Endpoint) getSandbox() (*Sandbox, bool) { return ps, ok } -func (ep *Endpoint) getFirstInterfaceIPv4Address() net.IP { +// Return a list of this endpoint's addresses to add to '/etc/hosts'. +func (ep *Endpoint) getEtcHostsAddrs() []string { ep.mu.Lock() defer ep.mu.Unlock() - if ep.iface.addr != nil { - return ep.iface.addr.IP + // Do not update hosts file with internal network's endpoint IP + if n := ep.network; n == nil || n.ingress || n.Name() == libnGWNetwork { + return nil } - return nil -} - -func (ep *Endpoint) getFirstInterfaceIPv6Address() net.IP { - ep.mu.Lock() - defer ep.mu.Unlock() - + var addresses []string + if ep.iface.addr != nil { + addresses = append(addresses, ep.iface.addr.IP.String()) + } if ep.iface.addrv6 != nil { - return ep.iface.addrv6.IP + addresses = append(addresses, ep.iface.addrv6.IP.String()) } - - return nil + return addresses } // EndpointOptionGeneric function returns an option setter for a Generic option defined diff --git a/libnetwork/etchosts/etchosts.go b/libnetwork/etchosts/etchosts.go index 21e74922c2674..7832ebe673ed2 100644 --- a/libnetwork/etchosts/etchosts.go +++ b/libnetwork/etchosts/etchosts.go @@ -5,6 +5,7 @@ import ( "bytes" "fmt" "io" + "net/netip" "os" "regexp" "strings" @@ -25,8 +26,10 @@ func (r Record) WriteTo(w io.Writer) (int64, error) { var ( // Default hosts config records slice - defaultContent = []Record{ + defaultContentIPv4 = []Record{ {Hosts: "localhost", IP: "127.0.0.1"}, + } + defaultContentIPv6 = []Record{ {Hosts: "localhost ip6-localhost ip6-loopback", IP: "::1"}, {Hosts: "ip6-localnet", IP: "fe00::0"}, {Hosts: "ip6-mcastprefix", IP: "ff00::0"}, @@ -71,9 +74,34 @@ func Drop(path string) { // IP, hostname, and domainname set main record leave empty for no master record // extraContent is an array of extra host records. func Build(path, IP, hostname, domainname string, extraContent []Record) error { + return build(path, IP, hostname, domainname, defaultContentIPv4, defaultContentIPv6, extraContent) +} + +// BuildNoIPv6 is the same as Build, but will not include IPv6 entries. +func BuildNoIPv6(path, IP, hostname, domainname string, extraContent []Record) error { + if isIPv6(IP) { + IP = "" + } + + var ipv4ExtraContent []Record + for _, rec := range extraContent { + if !isIPv6(rec.IP) { + ipv4ExtraContent = append(ipv4ExtraContent, rec) + } + } + + return build(path, IP, hostname, domainname, defaultContentIPv4, ipv4ExtraContent) +} + +func isIPv6(s string) bool { + addr, err := netip.ParseAddr(s) + return err == nil && addr.Is6() +} + +func build(path, IP, hostname, domainname string, contents ...[]Record) error { defer pathLock(path)() - content := bytes.NewBuffer(nil) + buf := bytes.NewBuffer(nil) if IP != "" { // set main record var mainRec Record @@ -89,24 +117,21 @@ func Build(path, IP, hostname, domainname string, extraContent []Record) error { if hostName, _, ok := strings.Cut(fqdn, "."); ok { mainRec.Hosts += " " + hostName } - if _, err := mainRec.WriteTo(content); err != nil { - return err - } - } - // Write defaultContent slice to buffer - for _, r := range defaultContent { - if _, err := r.WriteTo(content); err != nil { + if _, err := mainRec.WriteTo(buf); err != nil { return err } } - // Write extra content from function arguments - for _, r := range extraContent { - if _, err := r.WriteTo(content); err != nil { - return err + + // Write content from function arguments + for _, content := range contents { + for _, c := range content { + if _, err := c.WriteTo(buf); err != nil { + return err + } } } - return os.WriteFile(path, content.Bytes(), 0o644) + return os.WriteFile(path, buf.Bytes(), 0o644) } // Add adds an arbitrary number of Records to an already existing /etc/hosts file diff --git a/libnetwork/etchosts/etchosts_test.go b/libnetwork/etchosts/etchosts_test.go index 4a89d67246bf1..96bb10fb3c4d5 100644 --- a/libnetwork/etchosts/etchosts_test.go +++ b/libnetwork/etchosts/etchosts_test.go @@ -4,9 +4,12 @@ import ( "bytes" "fmt" "os" + "path/filepath" "testing" "golang.org/x/sync/errgroup" + "gotest.tools/v3/assert" + is "gotest.tools/v3/assert/cmp" ) func TestBuildDefault(t *testing.T) { @@ -35,6 +38,26 @@ func TestBuildDefault(t *testing.T) { } } +func TestBuildNoIPv6(t *testing.T) { + d := t.TempDir() + filename := filepath.Join(d, "hosts") + + err := BuildNoIPv6(filename, "fdbb:c59c:d015::2", "an.example", "", []Record{ + { + Hosts: "another.example", + IP: "fdbb:c59c:d015::3", + }, + { + Hosts: "another.example", + IP: "10.11.12.13", + }, + }) + assert.NilError(t, err) + content, err := os.ReadFile(filename) + assert.NilError(t, err) + assert.Check(t, is.DeepEqual(string(content), "127.0.0.1\tlocalhost\n10.11.12.13\tanother.example\n")) +} + func TestBuildHostnameDomainname(t *testing.T) { file, err := os.CreateTemp("", "") if err != nil { diff --git a/libnetwork/netutils/utils.go b/libnetwork/netutils/utils.go index c8af237d83640..4896d1099623b 100644 --- a/libnetwork/netutils/utils.go +++ b/libnetwork/netutils/utils.go @@ -3,6 +3,7 @@ package netutils import ( + "context" "crypto/rand" "encoding/hex" "errors" @@ -10,7 +11,9 @@ import ( "io" "net" "strings" + "sync" + "github.com/containerd/log" "github.com/docker/docker/libnetwork/types" ) @@ -144,3 +147,26 @@ func ReverseIP(IP string) string { return strings.Join(reverseIP, ".") } + +var ( + v6ListenableCached bool + v6ListenableOnce sync.Once +) + +// IsV6Listenable returns true when `[::1]:0` is listenable. +// IsV6Listenable returns false mostly when the kernel was booted with `ipv6.disable=1` option. +func IsV6Listenable() bool { + v6ListenableOnce.Do(func() { + ln, err := net.Listen("tcp6", "[::1]:0") + if err != nil { + // When the kernel was booted with `ipv6.disable=1`, + // we get err "listen tcp6 [::1]:0: socket: address family not supported by protocol" + // https://github.com/moby/moby/issues/42288 + log.G(context.TODO()).Debugf("v6Listenable=false (%v)", err) + } else { + v6ListenableCached = true + ln.Close() + } + }) + return v6ListenableCached +} diff --git a/libnetwork/osl/interface_linux.go b/libnetwork/osl/interface_linux.go index 27e079d1b9335..e87efbaa3922d 100644 --- a/libnetwork/osl/interface_linux.go +++ b/libnetwork/osl/interface_linux.go @@ -257,8 +257,6 @@ func (n *Namespace) AddInterface(srcName, dstPrefix string, options ...IfaceOpti n.iFaces = append(n.iFaces, i) n.mu.Unlock() - n.checkLoV6() - return nil } @@ -311,8 +309,6 @@ func (n *Namespace) RemoveInterface(i *Interface) error { } n.mu.Unlock() - // TODO(aker): This function will disable IPv6 on lo interface if the removed interface was the last one offering IPv6 connectivity. That's a weird behavior, and shouldn't be hiding this deep down in this function. - n.checkLoV6() return nil } diff --git a/libnetwork/osl/namespace_linux.go b/libnetwork/osl/namespace_linux.go index 1ea66b65cb863..a00075cfa48a3 100644 --- a/libnetwork/osl/namespace_linux.go +++ b/libnetwork/osl/namespace_linux.go @@ -20,6 +20,7 @@ import ( "github.com/docker/docker/libnetwork/osl/kernel" "github.com/docker/docker/libnetwork/types" "github.com/vishvananda/netlink" + "github.com/vishvananda/netlink/nl" "github.com/vishvananda/netns" "golang.org/x/sys/unix" ) @@ -206,16 +207,6 @@ func NewSandbox(key string, osCreate, isRestore bool) (*Namespace, error) { if err != nil { log.G(context.TODO()).Warnf("Failed to set the timeout on the sandbox netlink handle sockets: %v", err) } - // In live-restore mode, IPV6 entries are getting cleaned up due to below code - // We should retain IPV6 configurations in live-restore mode when Docker Daemon - // comes back. It should work as it is on other cases - // As starting point, disable IPv6 on all interfaces - if !isRestore && !n.isDefault { - err = setIPv6(n.path, "all", false) - if err != nil { - log.G(context.TODO()).Warnf("Failed to disable IPv6 on all interfaces on network namespace %q: %v", n.path, err) - } - } if err = n.loopbackUp(); err != nil { n.nlHandle.Close() @@ -226,7 +217,11 @@ func NewSandbox(key string, osCreate, isRestore bool) (*Namespace, error) { } func mountNetworkNamespace(basePath string, lnPath string) error { - return syscall.Mount(basePath, lnPath, "bind", syscall.MS_BIND, "") + err := syscall.Mount(basePath, lnPath, "bind", syscall.MS_BIND, "") + if err != nil { + return fmt.Errorf("bind-mount %s -> %s: %w", basePath, lnPath, err) + } + return nil } // GetSandboxForExternalKey returns sandbox object for the supplied path @@ -256,12 +251,6 @@ func GetSandboxForExternalKey(basePath string, key string) (*Namespace, error) { log.G(context.TODO()).Warnf("Failed to set the timeout on the sandbox netlink handle sockets: %v", err) } - // As starting point, disable IPv6 on all interfaces - err = setIPv6(n.path, "all", false) - if err != nil { - log.G(context.TODO()).Warnf("Failed to disable IPv6 on all interfaces on network namespace %q: %v", n.path, err) - } - if err = n.loopbackUp(); err != nil { n.nlHandle.Close() return nil, err @@ -321,17 +310,18 @@ func createNamespaceFile(path string) error { // or sets the gateway etc. It holds a list of Interfaces, routes etc., and more // can be added dynamically. type Namespace struct { - path string - iFaces []*Interface - gw net.IP - gwv6 net.IP - staticRoutes []*types.StaticRoute - neighbors []*neigh - nextIfIndex map[string]int - isDefault bool - nlHandle *netlink.Handle - loV6Enabled bool - mu sync.Mutex + path string + iFaces []*Interface + gw net.IP + gwv6 net.IP + staticRoutes []*types.StaticRoute + neighbors []*neigh + nextIfIndex map[string]int + isDefault bool + ipv6LoEnabledOnce sync.Once + ipv6LoEnabledCached bool + nlHandle *netlink.Handle + mu sync.Mutex } // Interfaces returns the collection of Interface previously added with the AddInterface @@ -555,32 +545,24 @@ func (n *Namespace) Restore(interfaces map[Iface][]IfaceOption, routes []*types. return nil } -// Checks whether IPv6 needs to be enabled/disabled on the loopback interface -func (n *Namespace) checkLoV6() { - var ( - enable = false - action = "disable" - ) - - n.mu.Lock() - for _, iface := range n.iFaces { - if iface.AddressIPv6() != nil { - enable = true - action = "enable" - break +// IPv6LoEnabled checks whether the loopback interface has an IPv6 address ('::1' +// is assigned by the kernel if IPv6 is enabled). +func (n *Namespace) IPv6LoEnabled() bool { + n.ipv6LoEnabledOnce.Do(func() { + // If anything goes wrong, assume no-IPv6. + iface, err := n.nlHandle.LinkByName("lo") + if err != nil { + log.G(context.TODO()).WithError(err).Warn("Unable to find 'lo' to determine IPv6 support") + return } - } - n.mu.Unlock() - - if n.loV6Enabled == enable { - return - } - - if err := setIPv6(n.path, "lo", enable); err != nil { - log.G(context.TODO()).Warnf("Failed to %s IPv6 on loopback interface on network namespace %q: %v", action, n.path, err) - } - - n.loV6Enabled = enable + addrs, err := n.nlHandle.AddrList(iface, nl.FAMILY_V6) + if err != nil { + log.G(context.TODO()).WithError(err).Warn("Unable to get 'lo' addresses to determine IPv6 support") + return + } + n.ipv6LoEnabledCached = len(addrs) > 0 + }) + return n.ipv6LoEnabledCached } // ApplyOSTweaks applies operating system specific knobs on the sandbox. diff --git a/libnetwork/sandbox_dns_unix.go b/libnetwork/sandbox_dns_unix.go index e30f394057688..40f1a7b46f7e7 100644 --- a/libnetwork/sandbox_dns_unix.go +++ b/libnetwork/sandbox_dns_unix.go @@ -7,6 +7,7 @@ import ( "context" "fmt" "net" + "net/netip" "os" "path" "path/filepath" @@ -14,6 +15,7 @@ import ( "strings" "github.com/containerd/log" + "github.com/docker/docker/errdefs" "github.com/docker/docker/libnetwork/etchosts" "github.com/docker/docker/libnetwork/resolvconf" "github.com/docker/docker/libnetwork/types" @@ -27,6 +29,21 @@ const ( resolverIPSandbox = "127.0.0.11" ) +// finishInitDNS is to be called after the container namespace has been created, +// before it the user process is started. The container's support for IPv6 can be +// determined at this point. +func (sb *Sandbox) finishInitDNS() error { + if err := sb.buildHostsFile(); err != nil { + return errdefs.System(err) + } + for _, ep := range sb.Endpoints() { + if err := sb.updateHostsFile(ep.getEtcHostsAddrs()); err != nil { + return errdefs.System(err) + } + } + return nil +} + func (sb *Sandbox) startResolver(restore bool) { sb.resolverOnce.Do(func() { var err error @@ -65,11 +82,17 @@ func (sb *Sandbox) startResolver(restore bool) { } func (sb *Sandbox) setupResolutionFiles() error { - if err := sb.buildHostsFile(); err != nil { + // Create a hosts file that can be mounted during container setup. For most + // networking modes (not host networking) it will be re-created before the + // container start, once its support for IPv6 is known. + if sb.config.hostsPath == "" { + sb.config.hostsPath = defaultPrefix + "/" + sb.id + "/hosts" + } + dir, _ := filepath.Split(sb.config.hostsPath) + if err := createBasePath(dir); err != nil { return err } - - if err := sb.updateParentHosts(); err != nil { + if err := sb.buildHostsFile(); err != nil { return err } @@ -77,15 +100,6 @@ func (sb *Sandbox) setupResolutionFiles() error { } func (sb *Sandbox) buildHostsFile() error { - if sb.config.hostsPath == "" { - sb.config.hostsPath = defaultPrefix + "/" + sb.id + "/hosts" - } - - dir, _ := filepath.Split(sb.config.hostsPath) - if err := createBasePath(dir); err != nil { - return err - } - // This is for the host mode networking if sb.config.useDefaultSandBox && len(sb.config.extraHosts) == 0 { // We are working under the assumption that the origin file option had been properly expressed by the upper layer @@ -101,7 +115,16 @@ func (sb *Sandbox) buildHostsFile() error { extraContent = append(extraContent, etchosts.Record{Hosts: extraHost.name, IP: extraHost.IP}) } - return etchosts.Build(sb.config.hostsPath, "", sb.config.hostName, sb.config.domainName, extraContent) + // Assume IPv6 support, unless it's definitely disabled. + buildf := etchosts.Build + if en, ok := sb.ipv6Enabled(); ok && !en { + buildf = etchosts.BuildNoIPv6 + } + if err := buildf(sb.config.hostsPath, "", sb.config.hostName, sb.config.domainName, extraContent); err != nil { + return err + } + + return sb.updateParentHosts() } func (sb *Sandbox) updateHostsFile(ifaceIPs []string) error { @@ -135,6 +158,16 @@ func (sb *Sandbox) updateHostsFile(ifaceIPs []string) error { } func (sb *Sandbox) addHostsEntries(recs []etchosts.Record) { + // Assume IPv6 support, unless it's definitely disabled. + if en, ok := sb.ipv6Enabled(); ok && !en { + var filtered []etchosts.Record + for _, rec := range recs { + if addr, err := netip.ParseAddr(rec.IP); err == nil && !addr.Is6() { + filtered = append(filtered, rec) + } + } + recs = filtered + } if err := etchosts.Add(sb.config.hostsPath, recs); err != nil { log.G(context.TODO()).Warnf("Failed adding service host entries to the running container: %v", err) } @@ -157,6 +190,16 @@ func (sb *Sandbox) updateParentHosts() error { if pSb == nil { continue } + // TODO(robmry) - filter out IPv6 addresses here if !sb.ipv6Enabled() but... + // - this is part of the implementation of '--link', which will be removed along + // with the rest of legacy networking. + // - IPv6 addresses shouldn't be allocated if IPv6 is not available in a container, + // and that change will come along later. + // - I think this may be dead code, it's not possible to start a parent container with + // '--link child' unless the child has already started ("Error response from daemon: + // Cannot link to a non running container"). So, when the child starts and this method + // is called with updates for parents, the parents aren't running and GetSandbox() + // returns nil.) if err := etchosts.Update(pSb.config.hostsPath, update.ip, update.name); err != nil { return err } diff --git a/libnetwork/sandbox_linux.go b/libnetwork/sandbox_linux.go index fc3bbb344740f..21c43f755d695 100644 --- a/libnetwork/sandbox_linux.go +++ b/libnetwork/sandbox_linux.go @@ -7,6 +7,7 @@ import ( "time" "github.com/containerd/log" + "github.com/docker/docker/libnetwork/netutils" "github.com/docker/docker/libnetwork/osl" "github.com/docker/docker/libnetwork/types" ) @@ -157,14 +158,39 @@ func (sb *Sandbox) SetKey(basePath string) error { } } + if err := sb.finishInitDNS(); err != nil { + return err + } + for _, ep := range sb.Endpoints() { if err = sb.populateNetworkResources(ep); err != nil { return err } } + return nil } +// IPv6 support can always be determined for host networking. For other network +// types it can only be determined once there's a container namespace to probe, +// return ok=false in that case. +func (sb *Sandbox) ipv6Enabled() (enabled, ok bool) { + // For host networking, IPv6 support depends on the host. + if sb.config.useDefaultSandBox { + return netutils.IsV6Listenable(), true + } + + // For other network types, look at whether the container's loopback interface has an IPv6 address. + sb.mu.Lock() + osSbox := sb.osSbox + sb.mu.Unlock() + + if osSbox == nil { + return false, false + } + return osSbox.IPv6LoEnabled(), true +} + func (sb *Sandbox) releaseOSSbox() error { sb.mu.Lock() osSbox := sb.osSbox diff --git a/oci/namespaces.go b/oci/namespaces.go index 851edd61ef240..befcefcc40145 100644 --- a/oci/namespaces.go +++ b/oci/namespaces.go @@ -14,3 +14,14 @@ func RemoveNamespace(s *specs.Spec, nsType specs.LinuxNamespaceType) { } } } + +// NamespacePath returns the configured Path of the first namespace in +// s.Linux.Namespaces of type nsType. +func NamespacePath(s *specs.Spec, nsType specs.LinuxNamespaceType) (path string, ok bool) { + for _, n := range s.Linux.Namespaces { + if n.Type == nsType { + return n.Path, true + } + } + return "", false +}