Skip to content

Commit

Permalink
Detect IPv6 support in containers.
Browse files Browse the repository at this point in the history
Some configuration in a container depends on whether it has support for
IPv6 (including default entries for '::1' etc in '/etc/hosts').

Before this change, the container's support for IPv6 was determined by
whether it was connected to any IPv6-enabled networks. But, that can
change over time, it isn't a property of the container itself.

So, instead, detect IPv6 support by looking for '::1' on the container's
loopback interface. It will not be present if the kernel does not have
IPv6 support, or the user has disabled it in new namespaces by other
means.

That detection happens in the container runtime's 'SetKey' prestart
hook, but runC calls that hook before applying sysctls. So, if the user
provides sysctl 'net.ipv6.conf.all.disable_ipv6=1' to explicitly disable
IPv6 when creating a container, treat that as an override and do not try
to detect support via the '::1' address.

Once IPv6 support has been determined for the container, its '/etc/hosts'
is re-generated accordingly.

The daemon no longer disables IPv6 on all interfaces during initialisation.
It now disables IPv6 only for interfaces that have not been assigned an
IPv6 address. (But, even if IPv6 is disabled for the container using the
sysctl, interfaces connected to IPv6 networks still get IPv6 addresses
that appear in the internal DNS. There's more to-do!)

Signed-off-by: Rob Murray <rob.murray@docker.com>
  • Loading branch information
robmry committed Jan 19, 2024
1 parent 4f9c865 commit bb89202
Show file tree
Hide file tree
Showing 16 changed files with 405 additions and 138 deletions.
3 changes: 3 additions & 0 deletions daemon/container_operations.go
Expand Up @@ -150,6 +150,8 @@ func (daemon *Daemon) buildSandboxOptions(cfg *config.Config, container *contain

sboxOptions = append(sboxOptions, libnetwork.OptionPortMapping(publishedPorts), libnetwork.OptionExposedPorts(exposedPorts))

sboxOptions = append(sboxOptions, libnetwork.OptionTwoPhaseInit())

// Legacy Link feature is supported only for the default bridge network.
// return if this call to build join options is not for default bridge network
// Legacy Link is only supported by docker run --link
Expand Down Expand Up @@ -207,6 +209,7 @@ func (daemon *Daemon) buildSandboxOptions(cfg *config.Config, container *contain
"ChildEndpoints": childEndpoints,
},
}))

return sboxOptions, nil
}

Expand Down
10 changes: 10 additions & 0 deletions daemon/start.go
Expand Up @@ -224,6 +224,16 @@ func (daemon *Daemon) containerStart(ctx context.Context, daemonCfg *configStore
}
}()

// Finish sandbox initialisation (if there's a libnetwork.Sandbox object for the
// new container, which there isn't when using container networking).
if sb, err := daemon.netController.GetSandbox(container.ID); err == nil {
if err := sb.PhaseTwoInit(); err != nil {
log.G(ctx).WithError(err).WithField("container", container.ID).
Error("failed to complete network configuration")
return errors.Wrap(err, "failed to complete network configuration")
}
}

if err := tsk.Start(context.TODO()); err != nil { // passing ctx caused integration tests to be stuck in the cleanup phase
return setExitCodeFromError(container.SetExitCode, err)
}
Expand Down
8 changes: 8 additions & 0 deletions integration/internal/container/ops.go
@@ -1,6 +1,7 @@
package container

import (
"maps"
"strings"

"github.com/docker/docker/api/types/container"
Expand Down Expand Up @@ -46,6 +47,13 @@ func WithNetworkMode(mode string) func(*TestContainerConfig) {
}
}

// WithSysctls sets sysctl options for the container
func WithSysctls(sysctls map[string]string) func(*TestContainerConfig) {
return func(c *TestContainerConfig) {
c.HostConfig.Sysctls = maps.Clone(sysctls)
}
}

// WithExposedPorts sets the exposed ports of the container
func WithExposedPorts(ports ...string) func(*TestContainerConfig) {
return func(c *TestContainerConfig) {
Expand Down
107 changes: 107 additions & 0 deletions integration/networking/etchosts_test.go
@@ -0,0 +1,107 @@
package networking

import (
"context"
"testing"
"time"

containertypes "github.com/docker/docker/api/types/container"
"github.com/docker/docker/integration/internal/container"
"github.com/docker/docker/testutil"
"github.com/docker/docker/testutil/daemon"
"gotest.tools/v3/assert"
is "gotest.tools/v3/assert/cmp"
"gotest.tools/v3/skip"
)

// Check that the '/etc/hosts' file in a container is created according to
// whether the container supports IPv6.
// Regression test for https://github.com/moby/moby/issues/35954
func TestEtcHostsIpv6(t *testing.T) {
skip.If(t, testEnv.DaemonInfo.OSType == "windows")

ctx := setupTest(t)
d := daemon.New(t)
d.StartWithBusybox(ctx, t,
"--ipv6",
"--ip6tables",
"--experimental",
"--fixed-cidr-v6=fdc8:ffe2:d8d7:1234::/64")
defer d.Stop(t)

c := d.NewClientT(t)
defer c.Close()

testcases := []struct {
name string
sysctls map[string]string
expIPv6Enabled bool
expEtcHosts string
}{
{
// Create a container with no overrides, on the IPv6-enabled default bridge.
// Expect the container to have a working '::1' address, on the assumption
// the test host's kernel supports IPv6 - and for its '/etc/hosts' file to
// include IPv6 addresses.
name: "IPv6 enabled",
expIPv6Enabled: true,
expEtcHosts: `127.0.0.1 localhost
::1 localhost ip6-localhost ip6-loopback
fe00::0 ip6-localnet
ff00::0 ip6-mcastprefix
ff02::1 ip6-allnodes
ff02::2 ip6-allrouters
`,
},
{
// Create a container in the same network, with IPv6 disabled. Expect '::1'
// not to be pingable, and no IPv6 addresses in its '/etc/hosts'.
name: "IPv6 disabled",
sysctls: map[string]string{"net.ipv6.conf.all.disable_ipv6": "1"},
expIPv6Enabled: false,
expEtcHosts: "127.0.0.1\tlocalhost\n",
},
}

for _, tc := range testcases {
t.Run(tc.name, func(t *testing.T) {
ctx := testutil.StartSpan(ctx, t)
ctrId := container.Run(ctx, t, c,
container.WithName("etchosts_"+sanitizeCtrName(t.Name())),
container.WithImage("busybox:latest"),
container.WithCmd("top"),
container.WithSysctls(tc.sysctls),
)
defer func() {
c.ContainerRemove(ctx, ctrId, containertypes.RemoveOptions{Force: true})
}()

runCmd := func(ctrId string, cmd []string, expExitCode int) string {
t.Helper()
execCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
res, err := container.Exec(execCtx, c, ctrId, cmd)
assert.Check(t, is.Nil(err))
assert.Check(t, is.Equal(res.ExitCode, expExitCode))
return res.Stdout()
}

// Check that IPv6 is/isn't enabled, as expected.
var expPingExitStatus int
if !tc.expIPv6Enabled {
expPingExitStatus = 1
}
runCmd(ctrId, []string{"ping", "-6", "-c1", "-W3", "::1"}, expPingExitStatus)

// Check the contents of /etc/hosts.
stdout := runCmd(ctrId, []string{"cat", "/etc/hosts"}, 0)
// Append the container's own addresses/name to the expected hosts file content.
inspect := container.Inspect(ctx, t, c, ctrId)
exp := tc.expEtcHosts + inspect.NetworkSettings.IPAddress + "\t" + inspect.Config.Hostname + "\n"
if tc.expIPv6Enabled {
exp += inspect.NetworkSettings.GlobalIPv6Address + "\t" + inspect.Config.Hostname + "\n"
}
assert.Check(t, is.Equal(stdout, exp))
})
}
}
27 changes: 2 additions & 25 deletions libnetwork/drivers/bridge/port_mapping_linux.go
Expand Up @@ -6,9 +6,9 @@ import (
"errors"
"fmt"
"net"
"sync"

"github.com/containerd/log"
"github.com/docker/docker/libnetwork/netutils"
"github.com/docker/docker/libnetwork/types"
"github.com/ishidawataru/sctp"
)
Expand Down Expand Up @@ -55,7 +55,7 @@ func (n *bridgeNetwork) allocatePortsInternal(bindings []types.PortBinding, cont
// skip adding implicit v6 addr, when the kernel was booted with `ipv6.disable=1`
// https://github.com/moby/moby/issues/42288
isV6Binding := c.HostIP != nil && c.HostIP.To4() == nil
if !isV6Binding && !IsV6Listenable() {
if !isV6Binding && !netutils.IsV6Listenable() {
continue
}

Expand Down Expand Up @@ -219,26 +219,3 @@ func (n *bridgeNetwork) releasePort(bnd types.PortBinding) error {

return portmapper.Unmap(host)
}

var (
v6ListenableCached bool
v6ListenableOnce sync.Once
)

// IsV6Listenable returns true when `[::1]:0` is listenable.
// IsV6Listenable returns false mostly when the kernel was booted with `ipv6.disable=1` option.
func IsV6Listenable() bool {
v6ListenableOnce.Do(func() {
ln, err := net.Listen("tcp6", "[::1]:0")
if err != nil {
// When the kernel was booted with `ipv6.disable=1`,
// we get err "listen tcp6 [::1]:0: socket: address family not supported by protocol"
// https://github.com/moby/moby/issues/42288
log.G(context.TODO()).Debugf("port_mapping: v6Listenable=false (%v)", err)
} else {
v6ListenableCached = true
ln.Close()
}
})
return v6ListenableCached
}
38 changes: 13 additions & 25 deletions libnetwork/endpoint.go
Expand Up @@ -478,18 +478,8 @@ func (ep *Endpoint) sbJoin(sb *Sandbox, options ...EndpointOption) (err error) {
}
}

// Do not update hosts file with internal networks endpoint IP
if !n.ingress && n.Name() != libnGWNetwork {
var addresses []string
if ip := ep.getFirstInterfaceIPv4Address(); ip != nil {
addresses = append(addresses, ip.String())
}
if ip := ep.getFirstInterfaceIPv6Address(); ip != nil {
addresses = append(addresses, ip.String())
}
if err = sb.updateHostsFile(addresses); err != nil {
return err
}
if err := sb.updateHostsFile(ep.getEtcHostsAddrs()); err != nil {
return err
}
if err = sb.updateDNS(n.enableIPv6); err != nil {
return err
Expand Down Expand Up @@ -860,26 +850,24 @@ func (ep *Endpoint) getSandbox() (*Sandbox, bool) {
return ps, ok
}

func (ep *Endpoint) getFirstInterfaceIPv4Address() net.IP {
// Return a list of this endpoint's addresses to add to '/etc/hosts'.
func (ep *Endpoint) getEtcHostsAddrs() []string {
ep.mu.Lock()
defer ep.mu.Unlock()

if ep.iface.addr != nil {
return ep.iface.addr.IP
// Do not update hosts file with internal network's endpoint IP
if n := ep.network; n == nil || n.ingress || n.Name() == libnGWNetwork {
return nil
}

return nil
}

func (ep *Endpoint) getFirstInterfaceIPv6Address() net.IP {
ep.mu.Lock()
defer ep.mu.Unlock()

var addresses []string
if ep.iface.addr != nil {
addresses = append(addresses, ep.iface.addr.IP.String())
}
if ep.iface.addrv6 != nil {
return ep.iface.addrv6.IP
addresses = append(addresses, ep.iface.addrv6.IP.String())
}

return nil
return addresses
}

// EndpointOptionGeneric function returns an option setter for a Generic option defined
Expand Down
53 changes: 39 additions & 14 deletions libnetwork/etchosts/etchosts.go
Expand Up @@ -5,6 +5,7 @@ import (
"bytes"
"fmt"
"io"
"net/netip"
"os"
"regexp"
"strings"
Expand All @@ -25,8 +26,10 @@ func (r Record) WriteTo(w io.Writer) (int64, error) {

var (
// Default hosts config records slice
defaultContent = []Record{
defaultContentIPv4 = []Record{
{Hosts: "localhost", IP: "127.0.0.1"},
}
defaultContentIPv6 = []Record{
{Hosts: "localhost ip6-localhost ip6-loopback", IP: "::1"},
{Hosts: "ip6-localnet", IP: "fe00::0"},
{Hosts: "ip6-mcastprefix", IP: "ff00::0"},
Expand Down Expand Up @@ -71,9 +74,34 @@ func Drop(path string) {
// IP, hostname, and domainname set main record leave empty for no master record
// extraContent is an array of extra host records.
func Build(path, IP, hostname, domainname string, extraContent []Record) error {
return build(path, IP, hostname, domainname, defaultContentIPv4, defaultContentIPv6, extraContent)
}

// BuildNoIPv6 is the same as Build, but will not include IPv6 entries.
func BuildNoIPv6(path, IP, hostname, domainname string, extraContent []Record) error {
if isIPv6(IP) {
IP = ""
}

var ipv4ExtraContent []Record
for _, rec := range extraContent {
if !isIPv6(rec.IP) {
ipv4ExtraContent = append(ipv4ExtraContent, rec)
}
}

return build(path, IP, hostname, domainname, defaultContentIPv4, ipv4ExtraContent)
}

func isIPv6(s string) bool {
addr, err := netip.ParseAddr(s)
return err == nil && addr.Is6()
}

func build(path, IP, hostname, domainname string, contents ...[]Record) error {
defer pathLock(path)()

content := bytes.NewBuffer(nil)
buf := bytes.NewBuffer(nil)
if IP != "" {
// set main record
var mainRec Record
Expand All @@ -89,24 +117,21 @@ func Build(path, IP, hostname, domainname string, extraContent []Record) error {
if hostName, _, ok := strings.Cut(fqdn, "."); ok {
mainRec.Hosts += " " + hostName
}
if _, err := mainRec.WriteTo(content); err != nil {
return err
}
}
// Write defaultContent slice to buffer
for _, r := range defaultContent {
if _, err := r.WriteTo(content); err != nil {
if _, err := mainRec.WriteTo(buf); err != nil {
return err
}
}
// Write extra content from function arguments
for _, r := range extraContent {
if _, err := r.WriteTo(content); err != nil {
return err

// Write content from function arguments
for _, content := range contents {
for _, c := range content {
if _, err := c.WriteTo(buf); err != nil {
return err
}
}
}

return os.WriteFile(path, content.Bytes(), 0o644)
return os.WriteFile(path, buf.Bytes(), 0o644)
}

// Add adds an arbitrary number of Records to an already existing /etc/hosts file
Expand Down
23 changes: 23 additions & 0 deletions libnetwork/etchosts/etchosts_test.go
Expand Up @@ -4,9 +4,12 @@ import (
"bytes"
"fmt"
"os"
"path/filepath"
"testing"

"golang.org/x/sync/errgroup"
"gotest.tools/v3/assert"
is "gotest.tools/v3/assert/cmp"
)

func TestBuildDefault(t *testing.T) {
Expand Down Expand Up @@ -35,6 +38,26 @@ func TestBuildDefault(t *testing.T) {
}
}

func TestBuildNoIPv6(t *testing.T) {
d := t.TempDir()
filename := filepath.Join(d, "hosts")

err := BuildNoIPv6(filename, "fdbb:c59c:d015::2", "an.example", "", []Record{
{
Hosts: "another.example",
IP: "fdbb:c59c:d015::3",
},
{
Hosts: "another.example",
IP: "10.11.12.13",
},
})
assert.NilError(t, err)
content, err := os.ReadFile(filename)
assert.NilError(t, err)
assert.Check(t, is.DeepEqual(string(content), "127.0.0.1\tlocalhost\n10.11.12.13\tanother.example\n"))
}

func TestBuildHostnameDomainname(t *testing.T) {
file, err := os.CreateTemp("", "")
if err != nil {
Expand Down

0 comments on commit bb89202

Please sign in to comment.