diff --git a/daemon/container_operations.go b/daemon/container_operations.go index ac366c08d2f42..91a5dd62aa1d1 100644 --- a/daemon/container_operations.go +++ b/daemon/container_operations.go @@ -71,6 +71,10 @@ func (daemon *Daemon) buildSandboxOptions(cfg *config.Config, container *contain sboxOptions = append(sboxOptions, libnetwork.OptionDNSOptions(cfg.DNSOptions)) } + if len(container.HostConfig.Sysctls) > 0 { + sboxOptions = append(sboxOptions, libnetwork.OptionSysctls(container.HostConfig.Sysctls)) + } + for _, extraHost := range container.HostConfig.ExtraHosts { // allow IPv6 addresses in extra hosts; only split on first ":" if _, err := opts.ValidateExtraHost(extraHost); err != nil { diff --git a/integration/internal/container/ops.go b/integration/internal/container/ops.go index 6962032bad82f..b2d35ca8a7be0 100644 --- a/integration/internal/container/ops.go +++ b/integration/internal/container/ops.go @@ -1,6 +1,7 @@ package container import ( + "maps" "strings" "github.com/docker/docker/api/types/container" @@ -46,6 +47,13 @@ func WithNetworkMode(mode string) func(*TestContainerConfig) { } } +// WithSysctls sets sysctl options for the container +func WithSysctls(sysctls map[string]string) func(*TestContainerConfig) { + return func(c *TestContainerConfig) { + c.HostConfig.Sysctls = maps.Clone(sysctls) + } +} + // WithExposedPorts sets the exposed ports of the container func WithExposedPorts(ports ...string) func(*TestContainerConfig) { return func(c *TestContainerConfig) { diff --git a/integration/networking/etchosts_test.go b/integration/networking/etchosts_test.go new file mode 100644 index 0000000000000..bab28d3659f93 --- /dev/null +++ b/integration/networking/etchosts_test.go @@ -0,0 +1,107 @@ +package networking + +import ( + "context" + "testing" + "time" + + containertypes "github.com/docker/docker/api/types/container" + "github.com/docker/docker/integration/internal/container" + "github.com/docker/docker/testutil" + "github.com/docker/docker/testutil/daemon" + "gotest.tools/v3/assert" + is "gotest.tools/v3/assert/cmp" + "gotest.tools/v3/skip" +) + +// Check that the '/etc/hosts' file in a container is created according to +// whether the container supports IPv6. +// Regression test for https://github.com/moby/moby/issues/35954 +func TestEtcHostsIpv6(t *testing.T) { + skip.If(t, testEnv.DaemonInfo.OSType == "windows") + + ctx := setupTest(t) + d := daemon.New(t) + d.StartWithBusybox(ctx, t, + "--ipv6", + "--ip6tables", + "--experimental", + "--fixed-cidr-v6=fdc8:ffe2:d8d7:1234::/64") + defer d.Stop(t) + + c := d.NewClientT(t) + defer c.Close() + + testcases := []struct { + name string + sysctls map[string]string + expIPv6Enabled bool + expEtcHosts string + }{ + { + // Create a container with no overrides, on the IPv6-enabled default bridge. + // Expect the container to have a working '::1' address, on the assumption + // the test host's kernel supports IPv6 - and for its '/etc/hosts' file to + // include IPv6 addresses. + name: "IPv6 enabled", + expIPv6Enabled: true, + expEtcHosts: `127.0.0.1 localhost +::1 localhost ip6-localhost ip6-loopback +fe00::0 ip6-localnet +ff00::0 ip6-mcastprefix +ff02::1 ip6-allnodes +ff02::2 ip6-allrouters +`, + }, + { + // Create a container in the same network, with IPv6 disabled. Expect '::1' + // not to be pingable, and no IPv6 addresses in its '/etc/hosts'. + name: "IPv6 disabled", + sysctls: map[string]string{"net.ipv6.conf.all.disable_ipv6": "1"}, + expIPv6Enabled: false, + expEtcHosts: "127.0.0.1\tlocalhost\n", + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + ctx := testutil.StartSpan(ctx, t) + ctrId := container.Run(ctx, t, c, + container.WithName("etchosts_"+sanitizeCtrName(t.Name())), + container.WithImage("busybox:latest"), + container.WithCmd("top"), + container.WithSysctls(tc.sysctls), + ) + defer func() { + c.ContainerRemove(ctx, ctrId, containertypes.RemoveOptions{Force: true}) + }() + + runCmd := func(ctrId string, cmd []string, expExitCode int) string { + t.Helper() + execCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + res, err := container.Exec(execCtx, c, ctrId, cmd) + assert.Check(t, is.Nil(err)) + assert.Check(t, is.Equal(res.ExitCode, expExitCode)) + return res.Stdout() + } + + // Check that IPv6 is/isn't enabled, as expected. + var expPingExitStatus int + if !tc.expIPv6Enabled { + expPingExitStatus = 1 + } + runCmd(ctrId, []string{"ping", "-6", "-c1", "-W3", "::1"}, expPingExitStatus) + + // Check the contents of /etc/hosts. + stdout := runCmd(ctrId, []string{"cat", "/etc/hosts"}, 0) + // Append the container's own addresses/name to the expected hosts file content. + inspect := container.Inspect(ctx, t, c, ctrId) + exp := tc.expEtcHosts + inspect.NetworkSettings.IPAddress + "\t" + inspect.Config.Hostname + "\n" + if tc.expIPv6Enabled { + exp += inspect.NetworkSettings.GlobalIPv6Address + "\t" + inspect.Config.Hostname + "\n" + } + assert.Check(t, is.Equal(stdout, exp)) + }) + } +} diff --git a/libnetwork/controller.go b/libnetwork/controller.go index bcbbdd06cacbf..5cf65d4795e16 100644 --- a/libnetwork/controller.go +++ b/libnetwork/controller.go @@ -64,6 +64,7 @@ import ( "github.com/docker/docker/libnetwork/drvregistry" "github.com/docker/docker/libnetwork/ipamapi" "github.com/docker/docker/libnetwork/netlabel" + "github.com/docker/docker/libnetwork/netutils" "github.com/docker/docker/libnetwork/osl" "github.com/docker/docker/libnetwork/scope" "github.com/docker/docker/libnetwork/types" @@ -922,6 +923,11 @@ func (c *Controller) NewSandbox(containerID string, options ...SandboxOption) (_ sb.processOptions(options...) + // Set the initial value of ipv6Disabled based on whether the host has IPv6. This + // value will be used for host networking. For other types of networking, it will + // be updated once the container has been created and can be inspected. + sb.ipv6Disabled = !netutils.IsV6Listenable() + c.mu.Lock() if sb.ingress && c.ingressSandbox != nil { c.mu.Unlock() diff --git a/libnetwork/drivers/bridge/port_mapping_linux.go b/libnetwork/drivers/bridge/port_mapping_linux.go index d9b95490d7ab0..de489ecad74f0 100644 --- a/libnetwork/drivers/bridge/port_mapping_linux.go +++ b/libnetwork/drivers/bridge/port_mapping_linux.go @@ -6,9 +6,9 @@ import ( "errors" "fmt" "net" - "sync" "github.com/containerd/log" + "github.com/docker/docker/libnetwork/netutils" "github.com/docker/docker/libnetwork/types" "github.com/ishidawataru/sctp" ) @@ -55,7 +55,7 @@ func (n *bridgeNetwork) allocatePortsInternal(bindings []types.PortBinding, cont // skip adding implicit v6 addr, when the kernel was booted with `ipv6.disable=1` // https://github.com/moby/moby/issues/42288 isV6Binding := c.HostIP != nil && c.HostIP.To4() == nil - if !isV6Binding && !IsV6Listenable() { + if !isV6Binding && !netutils.IsV6Listenable() { continue } @@ -219,26 +219,3 @@ func (n *bridgeNetwork) releasePort(bnd types.PortBinding) error { return portmapper.Unmap(host) } - -var ( - v6ListenableCached bool - v6ListenableOnce sync.Once -) - -// IsV6Listenable returns true when `[::1]:0` is listenable. -// IsV6Listenable returns false mostly when the kernel was booted with `ipv6.disable=1` option. -func IsV6Listenable() bool { - v6ListenableOnce.Do(func() { - ln, err := net.Listen("tcp6", "[::1]:0") - if err != nil { - // When the kernel was booted with `ipv6.disable=1`, - // we get err "listen tcp6 [::1]:0: socket: address family not supported by protocol" - // https://github.com/moby/moby/issues/42288 - log.G(context.TODO()).Debugf("port_mapping: v6Listenable=false (%v)", err) - } else { - v6ListenableCached = true - ln.Close() - } - }) - return v6ListenableCached -} diff --git a/libnetwork/endpoint.go b/libnetwork/endpoint.go index fa53450868d84..8375c07df3eea 100644 --- a/libnetwork/endpoint.go +++ b/libnetwork/endpoint.go @@ -478,18 +478,8 @@ func (ep *Endpoint) sbJoin(sb *Sandbox, options ...EndpointOption) (err error) { } } - // Do not update hosts file with internal networks endpoint IP - if !n.ingress && n.Name() != libnGWNetwork { - var addresses []string - if ip := ep.getFirstInterfaceIPv4Address(); ip != nil { - addresses = append(addresses, ip.String()) - } - if ip := ep.getFirstInterfaceIPv6Address(); ip != nil { - addresses = append(addresses, ip.String()) - } - if err = sb.updateHostsFile(addresses); err != nil { - return err - } + if err := sb.updateHostsFile(ep.getEtcHostsAddrs()); err != nil { + return err } if err = sb.updateDNS(n.enableIPv6); err != nil { return err @@ -860,26 +850,24 @@ func (ep *Endpoint) getSandbox() (*Sandbox, bool) { return ps, ok } -func (ep *Endpoint) getFirstInterfaceIPv4Address() net.IP { +// Return a list of this endpoint's addresses to add to '/etc/hosts'. +func (ep *Endpoint) getEtcHostsAddrs() []string { ep.mu.Lock() defer ep.mu.Unlock() - if ep.iface.addr != nil { - return ep.iface.addr.IP + // Do not update hosts file with internal network's endpoint IP + if n := ep.network; n == nil || n.ingress || n.Name() == libnGWNetwork { + return nil } - return nil -} - -func (ep *Endpoint) getFirstInterfaceIPv6Address() net.IP { - ep.mu.Lock() - defer ep.mu.Unlock() - + var addresses []string + if ep.iface.addr != nil { + addresses = append(addresses, ep.iface.addr.IP.String()) + } if ep.iface.addrv6 != nil { - return ep.iface.addrv6.IP + addresses = append(addresses, ep.iface.addrv6.IP.String()) } - - return nil + return addresses } // EndpointOptionGeneric function returns an option setter for a Generic option defined diff --git a/libnetwork/etchosts/etchosts.go b/libnetwork/etchosts/etchosts.go index 21e74922c2674..231971096bf5c 100644 --- a/libnetwork/etchosts/etchosts.go +++ b/libnetwork/etchosts/etchosts.go @@ -5,6 +5,7 @@ import ( "bytes" "fmt" "io" + "net/netip" "os" "regexp" "strings" @@ -25,8 +26,10 @@ func (r Record) WriteTo(w io.Writer) (int64, error) { var ( // Default hosts config records slice - defaultContent = []Record{ + defaultContentIPv4 = []Record{ {Hosts: "localhost", IP: "127.0.0.1"}, + } + defaultContentIPv6 = []Record{ {Hosts: "localhost ip6-localhost ip6-loopback", IP: "::1"}, {Hosts: "ip6-localnet", IP: "fe00::0"}, {Hosts: "ip6-mcastprefix", IP: "ff00::0"}, @@ -71,6 +74,31 @@ func Drop(path string) { // IP, hostname, and domainname set main record leave empty for no master record // extraContent is an array of extra host records. func Build(path, IP, hostname, domainname string, extraContent []Record) error { + return build(path, IP, hostname, domainname, extraContent, true) +} + +// BuildNoIPv6 is the same as Build, but will not include IPv6 entries. +func BuildNoIPv6(path, IP, hostname, domainname string, extraContent []Record) error { + if isIPv6(IP) { + IP = "" + } + + var ipv4ExtraContent []Record + for _, rec := range extraContent { + if !isIPv6(rec.IP) { + ipv4ExtraContent = append(ipv4ExtraContent, rec) + } + } + + return build(path, IP, hostname, domainname, ipv4ExtraContent, false) +} + +func isIPv6(s string) bool { + addr, err := netip.ParseAddr(s) + return err == nil && addr.Is6() +} + +func build(path, IP, hostname, domainname string, extraContent []Record, ipv6 bool) error { defer pathLock(path)() content := bytes.NewBuffer(nil) @@ -94,11 +122,18 @@ func Build(path, IP, hostname, domainname string, extraContent []Record) error { } } // Write defaultContent slice to buffer - for _, r := range defaultContent { + for _, r := range defaultContentIPv4 { if _, err := r.WriteTo(content); err != nil { return err } } + if ipv6 { + for _, r := range defaultContentIPv6 { + if _, err := r.WriteTo(content); err != nil { + return err + } + } + } // Write extra content from function arguments for _, r := range extraContent { if _, err := r.WriteTo(content); err != nil { diff --git a/libnetwork/etchosts/etchosts_test.go b/libnetwork/etchosts/etchosts_test.go index 4a89d67246bf1..ce94a1eda912f 100644 --- a/libnetwork/etchosts/etchosts_test.go +++ b/libnetwork/etchosts/etchosts_test.go @@ -4,9 +4,12 @@ import ( "bytes" "fmt" "os" + "path/filepath" "testing" "golang.org/x/sync/errgroup" + "gotest.tools/v3/assert" + is "gotest.tools/v3/assert/cmp" ) func TestBuildDefault(t *testing.T) { @@ -35,6 +38,26 @@ func TestBuildDefault(t *testing.T) { } } +func TestBuildNoIPv6(t *testing.T) { + d := t.TempDir() + filename := filepath.Join(d, "hosts") + + err := BuildNoIPv6(filename, "fdbb:c59c:d015::2", "an.example", "", []Record{ + { + Hosts: "another.example", + IP: "fdbb:c59c:d015::3", + }, + { + Hosts: "another.example", + IP: "10.11.12.13", + }, + }) + assert.NilError(t, err) + content, err := os.ReadFile(filename) + assert.NilError(t, err) + assert.Check(t, is.Equal(string(content), "127.0.0.1\tlocalhost\n10.11.12.13\tanother.example\n")) +} + func TestBuildHostnameDomainname(t *testing.T) { file, err := os.CreateTemp("", "") if err != nil { diff --git a/libnetwork/netutils/utils.go b/libnetwork/netutils/utils.go index c8af237d83640..4896d1099623b 100644 --- a/libnetwork/netutils/utils.go +++ b/libnetwork/netutils/utils.go @@ -3,6 +3,7 @@ package netutils import ( + "context" "crypto/rand" "encoding/hex" "errors" @@ -10,7 +11,9 @@ import ( "io" "net" "strings" + "sync" + "github.com/containerd/log" "github.com/docker/docker/libnetwork/types" ) @@ -144,3 +147,26 @@ func ReverseIP(IP string) string { return strings.Join(reverseIP, ".") } + +var ( + v6ListenableCached bool + v6ListenableOnce sync.Once +) + +// IsV6Listenable returns true when `[::1]:0` is listenable. +// IsV6Listenable returns false mostly when the kernel was booted with `ipv6.disable=1` option. +func IsV6Listenable() bool { + v6ListenableOnce.Do(func() { + ln, err := net.Listen("tcp6", "[::1]:0") + if err != nil { + // When the kernel was booted with `ipv6.disable=1`, + // we get err "listen tcp6 [::1]:0: socket: address family not supported by protocol" + // https://github.com/moby/moby/issues/42288 + log.G(context.TODO()).Debugf("v6Listenable=false (%v)", err) + } else { + v6ListenableCached = true + ln.Close() + } + }) + return v6ListenableCached +} diff --git a/libnetwork/osl/interface_linux.go b/libnetwork/osl/interface_linux.go index 27e079d1b9335..4ca064bf9c9d6 100644 --- a/libnetwork/osl/interface_linux.go +++ b/libnetwork/osl/interface_linux.go @@ -257,8 +257,6 @@ func (n *Namespace) AddInterface(srcName, dstPrefix string, options ...IfaceOpti n.iFaces = append(n.iFaces, i) n.mu.Unlock() - n.checkLoV6() - return nil } @@ -311,8 +309,6 @@ func (n *Namespace) RemoveInterface(i *Interface) error { } n.mu.Unlock() - // TODO(aker): This function will disable IPv6 on lo interface if the removed interface was the last one offering IPv6 connectivity. That's a weird behavior, and shouldn't be hiding this deep down in this function. - n.checkLoV6() return nil } @@ -368,6 +364,9 @@ func setInterfaceIP(nlh *netlink.Handle, iface netlink.Link, i *Interface) error func setInterfaceIPv6(nlh *netlink.Handle, iface netlink.Link, i *Interface) error { if i.AddressIPv6() == nil { + if err := setIPv6(i.ns.path, i.DstName(), false); err != nil { + log.G(context.TODO()).WithError(err).Debug("failed to disable IPv6") + } return nil } if err := checkRouteConflict(nlh, i.AddressIPv6(), netlink.FAMILY_V6); err != nil { diff --git a/libnetwork/osl/namespace_linux.go b/libnetwork/osl/namespace_linux.go index 1ea66b65cb863..1e52d198b5593 100644 --- a/libnetwork/osl/namespace_linux.go +++ b/libnetwork/osl/namespace_linux.go @@ -20,6 +20,7 @@ import ( "github.com/docker/docker/libnetwork/osl/kernel" "github.com/docker/docker/libnetwork/types" "github.com/vishvananda/netlink" + "github.com/vishvananda/netlink/nl" "github.com/vishvananda/netns" "golang.org/x/sys/unix" ) @@ -206,16 +207,6 @@ func NewSandbox(key string, osCreate, isRestore bool) (*Namespace, error) { if err != nil { log.G(context.TODO()).Warnf("Failed to set the timeout on the sandbox netlink handle sockets: %v", err) } - // In live-restore mode, IPV6 entries are getting cleaned up due to below code - // We should retain IPV6 configurations in live-restore mode when Docker Daemon - // comes back. It should work as it is on other cases - // As starting point, disable IPv6 on all interfaces - if !isRestore && !n.isDefault { - err = setIPv6(n.path, "all", false) - if err != nil { - log.G(context.TODO()).Warnf("Failed to disable IPv6 on all interfaces on network namespace %q: %v", n.path, err) - } - } if err = n.loopbackUp(); err != nil { n.nlHandle.Close() @@ -256,12 +247,6 @@ func GetSandboxForExternalKey(basePath string, key string) (*Namespace, error) { log.G(context.TODO()).Warnf("Failed to set the timeout on the sandbox netlink handle sockets: %v", err) } - // As starting point, disable IPv6 on all interfaces - err = setIPv6(n.path, "all", false) - if err != nil { - log.G(context.TODO()).Warnf("Failed to disable IPv6 on all interfaces on network namespace %q: %v", n.path, err) - } - if err = n.loopbackUp(); err != nil { n.nlHandle.Close() return nil, err @@ -330,7 +315,6 @@ type Namespace struct { nextIfIndex map[string]int isDefault bool nlHandle *netlink.Handle - loV6Enabled bool mu sync.Mutex } @@ -555,32 +539,15 @@ func (n *Namespace) Restore(interfaces map[Iface][]IfaceOption, routes []*types. return nil } -// Checks whether IPv6 needs to be enabled/disabled on the loopback interface -func (n *Namespace) checkLoV6() { - var ( - enable = false - action = "disable" - ) - - n.mu.Lock() - for _, iface := range n.iFaces { - if iface.AddressIPv6() != nil { - enable = true - action = "enable" - break - } - } - n.mu.Unlock() - - if n.loV6Enabled == enable { - return - } - - if err := setIPv6(n.path, "lo", enable); err != nil { - log.G(context.TODO()).Warnf("Failed to %s IPv6 on loopback interface on network namespace %q: %v", action, n.path, err) +// IPv6LoEnabled checks whether the loopback interface has an IPv6 address ('::1' +// is assigned by the kernel if IPv6 is enabled). +func (n *Namespace) IPv6LoEnabled() bool { + iface, err := n.nlHandle.LinkByName("lo") + if err != nil { + return false } - - n.loV6Enabled = enable + addrs, err := n.nlHandle.AddrList(iface, nl.FAMILY_V6) + return err == nil && len(addrs) > 0 } // ApplyOSTweaks applies operating system specific knobs on the sandbox. diff --git a/libnetwork/sandbox.go b/libnetwork/sandbox.go index 81a0db14082a1..0479cff8cc8c8 100644 --- a/libnetwork/sandbox.go +++ b/libnetwork/sandbox.go @@ -52,9 +52,14 @@ type Sandbox struct { inDelete bool ingress bool ndotsSet bool - oslTypes []osl.SandboxType // slice of properties of this sandbox - loadBalancerNID string // NID that this SB is a load balancer for - mu sync.Mutex + // ipv6Disabled is initially set to match the host's IPv6 support. For non-Windows, + // once the container task has been created, before it is started, it is set + // according to whether the 'lo' interface has address '::1' (assigned by the + // kernel), and whether a sysctl option to disable IPv6 has been given. + ipv6Disabled bool + oslTypes []osl.SandboxType // slice of properties of this sandbox + loadBalancerNID string // NID that this SB is a load balancer for + mu sync.Mutex // This mutex is used to serialize service related operation for an endpoint // The lock is here because the endpoint is saved into the store so is not unique service sync.Mutex @@ -98,6 +103,7 @@ type containerConfig struct { useDefaultSandBox bool useExternalKey bool exposedPorts []types.TransportPort + sysctls map[string]string } // ID returns the ID of the sandbox. diff --git a/libnetwork/sandbox_dns_unix.go b/libnetwork/sandbox_dns_unix.go index e30f394057688..994b88a7dc3b6 100644 --- a/libnetwork/sandbox_dns_unix.go +++ b/libnetwork/sandbox_dns_unix.go @@ -7,6 +7,7 @@ import ( "context" "fmt" "net" + "net/netip" "os" "path" "path/filepath" @@ -14,6 +15,7 @@ import ( "strings" "github.com/containerd/log" + "github.com/docker/docker/errdefs" "github.com/docker/docker/libnetwork/etchosts" "github.com/docker/docker/libnetwork/resolvconf" "github.com/docker/docker/libnetwork/types" @@ -27,6 +29,21 @@ const ( resolverIPSandbox = "127.0.0.11" ) +// ctrPrestartDNS is to be called after the container task has been created, +// before it is started. Sandbox.ipv6Disabled must be set according to the +// container's IPv6 support at this point. +func (sb *Sandbox) ctrPrestartDNS() error { + if err := sb.buildHostsFile(); err != nil { + return errdefs.System(err) + } + for _, ep := range sb.Endpoints() { + if err := sb.updateHostsFile(ep.getEtcHostsAddrs()); err != nil { + return errdefs.System(err) + } + } + return nil +} + func (sb *Sandbox) startResolver(restore bool) { sb.resolverOnce.Do(func() { var err error @@ -65,11 +82,17 @@ func (sb *Sandbox) startResolver(restore bool) { } func (sb *Sandbox) setupResolutionFiles() error { - if err := sb.buildHostsFile(); err != nil { + // Create a hosts file that can be mounted during container setup. For most + // networking modes (not host networking) it will be re-created before the + // container start, once its support for IPv6 is known. + if sb.config.hostsPath == "" { + sb.config.hostsPath = defaultPrefix + "/" + sb.id + "/hosts" + } + dir, _ := filepath.Split(sb.config.hostsPath) + if err := createBasePath(dir); err != nil { return err } - - if err := sb.updateParentHosts(); err != nil { + if err := sb.buildHostsFile(); err != nil { return err } @@ -77,15 +100,6 @@ func (sb *Sandbox) setupResolutionFiles() error { } func (sb *Sandbox) buildHostsFile() error { - if sb.config.hostsPath == "" { - sb.config.hostsPath = defaultPrefix + "/" + sb.id + "/hosts" - } - - dir, _ := filepath.Split(sb.config.hostsPath) - if err := createBasePath(dir); err != nil { - return err - } - // This is for the host mode networking if sb.config.useDefaultSandBox && len(sb.config.extraHosts) == 0 { // We are working under the assumption that the origin file option had been properly expressed by the upper layer @@ -101,7 +115,26 @@ func (sb *Sandbox) buildHostsFile() error { extraContent = append(extraContent, etchosts.Record{Hosts: extraHost.name, IP: extraHost.IP}) } - return etchosts.Build(sb.config.hostsPath, "", sb.config.hostName, sb.config.domainName, extraContent) + // sb.ipv6Disabled is initially set according to the host's support for IPv6. For + // host networking, when there are extraHosts, that is the final value. For other + // networking modes, after the container namespace has been created, it is set according + // to whether the container has IPv6 support, then the hosts file is rebuilt here. + // TODO(robmry) - for host networking... + // - if extraHosts were merged with the host's /etc/hosts, instead of generating a + // new file here, it wouldn't be necessary to detect the host's IPv6-ness in + // controller.NewSandbox() in order to get it right here. + // - It'd be less-surprising too, adding an extraHosts option currently causes the + // container's /etc/hosts to lose content that'd otherwise be inherited from the + // host's file. + buildf := etchosts.Build + if sb.ipv6Disabled { + buildf = etchosts.BuildNoIPv6 + } + if err := buildf(sb.config.hostsPath, "", sb.config.hostName, sb.config.domainName, extraContent); err != nil { + return err + } + + return sb.updateParentHosts() } func (sb *Sandbox) updateHostsFile(ifaceIPs []string) error { @@ -135,6 +168,15 @@ func (sb *Sandbox) updateHostsFile(ifaceIPs []string) error { } func (sb *Sandbox) addHostsEntries(recs []etchosts.Record) { + if sb.ipv6Disabled { + var filtered []etchosts.Record + for _, rec := range recs { + if addr, err := netip.ParseAddr(rec.IP); err == nil && !addr.Is6() { + filtered = append(filtered, rec) + } + } + recs = filtered + } if err := etchosts.Add(sb.config.hostsPath, recs); err != nil { log.G(context.TODO()).Warnf("Failed adding service host entries to the running container: %v", err) } @@ -157,6 +199,16 @@ func (sb *Sandbox) updateParentHosts() error { if pSb == nil { continue } + // TODO(robmry) - filter out IPv6 addresses here if "sb.ipv6Disabled || pSb.ipv6Disabled", but... + // - this is part of the implementation of '--link', which will be removed along + // with the rest of legacy networking. + // - IPv6 addresses shouldn't be allocated if IPv6 is not available in a container, + // and that change will come along later. + // - I think this may be dead code, it's not possible to start a parent container with + // '--link child' unless the child has already started ("Error response from daemon: + // Cannot link to a non running container"). So, when the child starts and this method + // is called with updates for parents, the parents aren't running and GetSandbox() + // returns nil.) if err := etchosts.Update(pSb.config.hostsPath, update.ip, update.name); err != nil { return err } diff --git a/libnetwork/sandbox_linux.go b/libnetwork/sandbox_linux.go index fc3bbb344740f..700406f12f192 100644 --- a/libnetwork/sandbox_linux.go +++ b/libnetwork/sandbox_linux.go @@ -157,14 +157,35 @@ func (sb *Sandbox) SetKey(basePath string) error { } } + sb.ipv6Disabled = sb.calcIPv6Disabled(osSbox) + + // Finish configuration, based on the newly-discovered availability of IPv6 + // in the container. + if err := sb.ctrPrestartDNS(); err != nil { + return err + } + for _, ep := range sb.Endpoints() { if err = sb.populateNetworkResources(ep); err != nil { return err } } + return nil } +// Set Sandbox.ipv6Disabled to its sysctl'd value (which runC hasn't yet +// applied), else according to whether the container's 'lo' interface has +// an IPv6 address ('::1' is assigned by the kernel when IPv6 is enabled). +func (sb *Sandbox) calcIPv6Disabled(osSbox *osl.Namespace) bool { + for k, v := range sb.config.sysctls { + if k == "net.ipv6.conf.all.disable_ipv6" { + return v == "1" + } + } + return !osSbox.IPv6LoEnabled() +} + func (sb *Sandbox) releaseOSSbox() error { sb.mu.Lock() osSbox := sb.osSbox diff --git a/libnetwork/sandbox_options.go b/libnetwork/sandbox_options.go index 0d914512fa166..76013f03ac37f 100644 --- a/libnetwork/sandbox_options.go +++ b/libnetwork/sandbox_options.go @@ -1,6 +1,8 @@ package libnetwork import ( + "maps" + "github.com/docker/docker/libnetwork/netlabel" "github.com/docker/docker/libnetwork/osl" "github.com/docker/docker/libnetwork/types" @@ -94,6 +96,14 @@ func OptionDNSOptions(options []string) SandboxOption { } } +// OptionSysctls function returns an option setter for sysctls to +// be passed to container Create method. +func OptionSysctls(sysctls map[string]string) SandboxOption { + return func(sb *Sandbox) { + sb.config.sysctls = maps.Clone(sysctls) + } +} + // OptionUseDefaultSandbox function returns an option setter for using default sandbox // (host namespace) to be passed to container Create method. func OptionUseDefaultSandbox() SandboxOption {