diff --git a/daemon/restart.go b/daemon/restart.go index 38828ef8eaa09..d56ce080fc24a 100644 --- a/daemon/restart.go +++ b/daemon/restart.go @@ -6,6 +6,7 @@ import ( containertypes "github.com/docker/docker/api/types/container" "github.com/docker/docker/container" + "github.com/docker/docker/internal/compatcontext" ) // ContainerRestart stops and starts a container. It attempts to @@ -31,6 +32,10 @@ func (daemon *Daemon) ContainerRestart(ctx context.Context, name string, options // gracefully stop, before forcefully terminating the container. If // given a negative duration, wait forever for a graceful stop. func (daemon *Daemon) containerRestart(ctx context.Context, container *container.Container, options containertypes.StopOptions) error { + // Restarting is expected to be an atomic operation, and cancelling + // the request should not cancel the stop -> start sequence. + ctx = compatcontext.WithoutCancel(ctx) + // Determine isolation. If not specified in the hostconfig, use daemon default. actualIsolation := container.HostConfig.Isolation if containertypes.Isolation.IsDefault(actualIsolation) { diff --git a/integration/container/restart_test.go b/integration/container/restart_test.go index 3a6155003a59d..bceb605c34582 100644 --- a/integration/container/restart_test.go +++ b/integration/container/restart_test.go @@ -3,15 +3,18 @@ package container // import "github.com/docker/docker/integration/container" import ( "context" "fmt" + "runtime" "testing" "time" "github.com/docker/docker/api/types" "github.com/docker/docker/api/types/container" + "github.com/docker/docker/api/types/filters" "github.com/docker/docker/client" testContainer "github.com/docker/docker/integration/internal/container" "github.com/docker/docker/testutil/daemon" "gotest.tools/v3/assert" + is "gotest.tools/v3/assert/cmp" "gotest.tools/v3/poll" "gotest.tools/v3/skip" ) @@ -208,3 +211,72 @@ func TestContainerWithAutoRemoveCanBeRestarted(t *testing.T) { }) } } + +// TestContainerRestartWithCancelledRequest verifies that cancelling a restart +// request does not cancel the restart operation, and still starts the container +// after it was stopped. +// +// Regression test for https://github.com/moby/moby/discussions/46682 +func TestContainerRestartWithCancelledRequest(t *testing.T) { + ctx := context.TODO() + defer setupTest(t) + apiClient := testEnv.APIClient() + + // Create a container that ignores SIGTERM and doesn't stop immediately, + // giving us time to cancel the request. + // + // Restarting a container is "stop" (and, if needed, "kill"), then "start" + // the container. We're trying to create the scenario where the "stop" is + // handled, but the request was cancelled and therefore the "start" not + // taking place. + cID := testContainer.Run(ctx, t, apiClient, testContainer.WithCmd("sh", "-c", "trap 'echo received TERM' TERM; while true; do usleep 10; done")) + defer func() { + err := apiClient.ContainerRemove(ctx, cID, types.ContainerRemoveOptions{Force: true}) + if t.Failed() && err != nil { + t.Logf("Cleaning up test container failed with error: %v", err) + } + }() + + // Start listening for events. + messages, errs := apiClient.Events(ctx, types.EventsOptions{ + Filters: filters.NewArgs( + filters.Arg("container", cID), + filters.Arg("event", "restart"), + ), + }) + + // Make restart request, but cancel the request before the container + // is (forcibly) killed. + ctx2, cancel := context.WithTimeout(ctx, 100*time.Millisecond) + stopTimeout := 1 + err := apiClient.ContainerRestart(ctx2, cID, container.StopOptions{ + Timeout: &stopTimeout, + }) + assert.Check(t, is.ErrorIs(err, context.DeadlineExceeded)) + cancel() + + // Validate that the restart event occurred, which is emitted + // after the restart (stop (kill) start) finished. + // + // Note that we cannot use RestartCount for this, as that's only + // used for restart-policies. + restartTimeout := 2 * time.Second + if runtime.GOOS == "windows" { + // hcs can sometimes take a long time to stop container. + restartTimeout = StopContainerWindowsPollTimeout + } + select { + case m := <-messages: + assert.Check(t, is.Equal(m.Actor.ID, cID)) + assert.Check(t, is.Equal(m.Action, "restart")) + case err := <-errs: + assert.NilError(t, err) + case <-time.After(restartTimeout): + t.Errorf("timeout waiting for restart event") + } + + // Container should be restarted (running). + inspect, err := apiClient.ContainerInspect(ctx, cID) + assert.NilError(t, err) + assert.Check(t, is.Equal(inspect.State.Status, "running")) +}