From f0b5ca47fbc2212ef1bdedcb29a4238bab447141 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Gronowski?= Date: Tue, 28 Nov 2023 10:21:25 +0100 Subject: [PATCH 1/2] liverestore: Don't remove `--rm` containers on restart MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When live-restore is enabled, containers with autoremove enabled shouldn't be forcibly killed when engine restarts. They still should be removed if they exited while the engine was down though. Signed-off-by: Paweł Gronowski (cherry picked from commit c5ea3d595c0321a3fbd8e5470525804b3015a5e3) Signed-off-by: Paweł Gronowski --- daemon/daemon.go | 9 +++-- integration/daemon/daemon_test.go | 61 +++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 3 deletions(-) diff --git a/daemon/daemon.go b/daemon/daemon.go index 4d76c57988884..2f6e7254a0752 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -472,9 +472,12 @@ func (daemon *Daemon) restore() error { restartContainers[c] = make(chan struct{}) mapLock.Unlock() } else if c.HostConfig != nil && c.HostConfig.AutoRemove { - mapLock.Lock() - removeContainers[c.ID] = c - mapLock.Unlock() + // Remove the container if live-restore is disabled or if the container has already exited. + if !daemon.configStore.LiveRestoreEnabled || !alive { + mapLock.Lock() + removeContainers[c.ID] = c + mapLock.Unlock() + } } c.Lock() diff --git a/integration/daemon/daemon_test.go b/integration/daemon/daemon_test.go index 5ce70d8418e56..adbfc733c3e73 100644 --- a/integration/daemon/daemon_test.go +++ b/integration/daemon/daemon_test.go @@ -14,6 +14,7 @@ import ( "strings" "syscall" "testing" + "time" "github.com/docker/docker/api/types" "github.com/docker/docker/api/types/mount" @@ -370,6 +371,66 @@ func TestLiveRestore(t *testing.T) { skip.If(t, runtime.GOOS == "windows", "cannot start multiple daemons on windows") t.Run("volume references", testLiveRestoreVolumeReferences) + t.Run("autoremove", testLiveRestoreAutoRemove) +} + +func testLiveRestoreAutoRemove(t *testing.T) { + skip.If(t, testEnv.IsRootless(), "restarted rootless daemon will have a new process namespace") + + t.Parallel() + ctx := context.Background() + + run := func(t *testing.T) (*daemon.Daemon, func(), string) { + d := daemon.New(t) + d.StartWithBusybox(t, "--live-restore", "--iptables=false") + t.Cleanup(func() { + d.Stop(t) + d.Cleanup(t) + }) + + tmpDir := t.TempDir() + + apiClient := d.NewClientT(t) + + cID := container.Run(ctx, t, apiClient, + container.WithBind(tmpDir, "/v"), + // Run until a 'stop' file is created. + container.WithCmd("sh", "-c", "while [ ! -f /v/stop ]; do sleep 0.1; done"), + container.WithAutoRemove) + t.Cleanup(func() { apiClient.ContainerRemove(ctx, cID, types.ContainerRemoveOptions{Force: true}) }) + finishContainer := func() { + file, err := os.Create(filepath.Join(tmpDir, "stop")) + assert.NilError(t, err, "Failed to create 'stop' file") + file.Close() + } + return d, finishContainer, cID + } + + t.Run("engine restart shouldnt kill alive containers", func(t *testing.T) { + d, finishContainer, cID := run(t) + + d.Restart(t, "--live-restore", "--iptables=false") + + apiClient := d.NewClientT(t) + _, err := apiClient.ContainerInspect(ctx, cID) + assert.NilError(t, err, "Container shouldn't be removed after engine restart") + + finishContainer() + + poll.WaitOn(t, container.IsRemoved(ctx, apiClient, cID)) + }) + t.Run("engine restart should remove containers that exited", func(t *testing.T) { + d, finishContainer, cID := run(t) + + d.Stop(t) + + finishContainer() + time.Sleep(time.Millisecond * 200) + + d.Start(t, "--live-restore", "--iptables=false") + + poll.WaitOn(t, container.IsRemoved(ctx, d.NewClientT(t), cID)) + }) } func testLiveRestoreVolumeReferences(t *testing.T) { From d0b5a5a8a51bc1b885fd3566ae05d1f87faae9b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Gronowski?= Date: Wed, 29 Nov 2023 15:37:04 +0100 Subject: [PATCH 2/2] integration/TestLiveRestore: Wait for process to exit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace `time.Sleep` with a poll that checks if process no longer exists to avoid possible race condition. Signed-off-by: Paweł Gronowski (cherry picked from commit 3a0af5ad30ba9d4d1c16c914fb4e60d7d46468ee) Signed-off-by: Paweł Gronowski --- integration/daemon/daemon_test.go | 13 ++++++++++--- integration/internal/process/wait.go | 17 +++++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) create mode 100644 integration/internal/process/wait.go diff --git a/integration/daemon/daemon_test.go b/integration/daemon/daemon_test.go index adbfc733c3e73..61c06c07150df 100644 --- a/integration/daemon/daemon_test.go +++ b/integration/daemon/daemon_test.go @@ -14,7 +14,6 @@ import ( "strings" "syscall" "testing" - "time" "github.com/docker/docker/api/types" "github.com/docker/docker/api/types/mount" @@ -22,6 +21,7 @@ import ( "github.com/docker/docker/daemon/config" "github.com/docker/docker/errdefs" "github.com/docker/docker/integration/internal/container" + "github.com/docker/docker/integration/internal/process" "github.com/docker/docker/pkg/stdcopy" "github.com/docker/docker/testutil/daemon" "gotest.tools/v3/assert" @@ -422,14 +422,21 @@ func testLiveRestoreAutoRemove(t *testing.T) { t.Run("engine restart should remove containers that exited", func(t *testing.T) { d, finishContainer, cID := run(t) + apiClient := d.NewClientT(t) + + // Get PID of the container process. + inspect, err := apiClient.ContainerInspect(ctx, cID) + assert.NilError(t, err) + pid := inspect.State.Pid + d.Stop(t) finishContainer() - time.Sleep(time.Millisecond * 200) + poll.WaitOn(t, process.NotAlive(pid)) d.Start(t, "--live-restore", "--iptables=false") - poll.WaitOn(t, container.IsRemoved(ctx, d.NewClientT(t), cID)) + poll.WaitOn(t, container.IsRemoved(ctx, apiClient, cID)) }) } diff --git a/integration/internal/process/wait.go b/integration/internal/process/wait.go new file mode 100644 index 0000000000000..1190fa76b058b --- /dev/null +++ b/integration/internal/process/wait.go @@ -0,0 +1,17 @@ +package process + +import ( + procpkg "github.com/docker/docker/pkg/process" + "gotest.tools/v3/poll" +) + +// NotAlive verifies the process doesn't exist (finished or never started). +func NotAlive(pid int) func(log poll.LogT) poll.Result { + return func(log poll.LogT) poll.Result { + if !procpkg.Alive(pid) { + return poll.Success() + } + + return poll.Continue("waiting for process to finish") + } +}