Skip to content

Commit

Permalink
daemon: daemon.containerRestart: don't cancel restart on context cancel
Browse files Browse the repository at this point in the history
commit def549c passed through the context
to the daemon.ContainerStart function. As a result, restarting containers
no longer is an atomic operation, because a context cancellation could
interrupt the restart (between "stopping" and "(re)starting"), resulting
in the container being stopped, but not restarted.

Restarting a container, or more factually; making a successful request on
the `/containers/{id]/restart` endpoint, should be an atomic operation.

This patch uses a context.WithoutCancel for restart requests.

It's worth noting that daemon.containerStop already uses context.WithoutCancel,
so in that function, we'll be wrapping the context twice, but this should
likely not cause issues (just redundant for this code-path).

Before this patch, starting a container that bind-mounts the docker socket,
then restarting itself from within the container would cancel the restart
operation. The container would be stopped, but not started after that:

    docker run -dit --name myself -v /var/run/docker.sock:/var/run/docker.sock docker:cli sh
    docker exec myself sh -c 'docker restart myself'

    docker ps -a
    CONTAINER ID   IMAGE         COMMAND                  CREATED          STATUS                       PORTS     NAMES
    3a2a741c65ff   docker:cli    "docker-entrypoint.s…"   26 seconds ago   Exited (128) 7 seconds ago             myself

With this patch: the stop still cancels the exec, but does not cancel the
restart operation, and the container is started again:

    docker run -dit --name myself -v /var/run/docker.sock:/var/run/docker.sock docker:cli sh
    docker exec myself sh -c 'docker restart myself'
    docker ps
    CONTAINER ID   IMAGE        COMMAND                  CREATED              STATUS         PORTS     NAMES
    4393a01f7c75   docker:cli   "docker-entrypoint.s…"   About a minute ago   Up 4 seconds             myself

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
(cherry picked from commit aeb8972)
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
  • Loading branch information
thaJeztah committed Oct 24, 2023
1 parent 9b20b1a commit 3c91c0a
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 0 deletions.
5 changes: 5 additions & 0 deletions daemon/restart.go
Expand Up @@ -6,6 +6,7 @@ import (

containertypes "github.com/docker/docker/api/types/container"
"github.com/docker/docker/container"
"github.com/docker/docker/internal/compatcontext"
)

// ContainerRestart stops and starts a container. It attempts to
Expand All @@ -31,6 +32,10 @@ func (daemon *Daemon) ContainerRestart(ctx context.Context, name string, options
// gracefully stop, before forcefully terminating the container. If
// given a negative duration, wait forever for a graceful stop.
func (daemon *Daemon) containerRestart(ctx context.Context, container *container.Container, options containertypes.StopOptions) error {
// Restarting is expected to be an atomic operation, and cancelling
// the request should not cancel the stop -> start sequence.
ctx = compatcontext.WithoutCancel(ctx)

// Determine isolation. If not specified in the hostconfig, use daemon default.
actualIsolation := container.HostConfig.Isolation
if containertypes.Isolation.IsDefault(actualIsolation) {
Expand Down
72 changes: 72 additions & 0 deletions integration/container/restart_test.go
Expand Up @@ -3,15 +3,18 @@ package container // import "github.com/docker/docker/integration/container"
import (
"context"
"fmt"
"runtime"
"testing"
"time"

"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/container"
"github.com/docker/docker/api/types/filters"
"github.com/docker/docker/client"
testContainer "github.com/docker/docker/integration/internal/container"
"github.com/docker/docker/testutil/daemon"
"gotest.tools/v3/assert"
is "gotest.tools/v3/assert/cmp"
"gotest.tools/v3/poll"
"gotest.tools/v3/skip"
)
Expand Down Expand Up @@ -208,3 +211,72 @@ func TestContainerWithAutoRemoveCanBeRestarted(t *testing.T) {
})
}
}

// TestContainerRestartWithCancelledRequest verifies that cancelling a restart
// request does not cancel the restart operation, and still starts the container
// after it was stopped.
//
// Regression test for https://github.com/moby/moby/discussions/46682
func TestContainerRestartWithCancelledRequest(t *testing.T) {
ctx := context.TODO()
defer setupTest(t)
apiClient := testEnv.APIClient()

// Create a container that ignores SIGTERM and doesn't stop immediately,
// giving us time to cancel the request.
//
// Restarting a container is "stop" (and, if needed, "kill"), then "start"
// the container. We're trying to create the scenario where the "stop" is
// handled, but the request was cancelled and therefore the "start" not
// taking place.
cID := testContainer.Run(ctx, t, apiClient, testContainer.WithCmd("sh", "-c", "trap 'echo received TERM' TERM; while true; do usleep 10; done"))
defer func() {
err := apiClient.ContainerRemove(ctx, cID, types.ContainerRemoveOptions{Force: true})
if t.Failed() && err != nil {
t.Logf("Cleaning up test container failed with error: %v", err)
}
}()

// Start listening for events.
messages, errs := apiClient.Events(ctx, types.EventsOptions{
Filters: filters.NewArgs(
filters.Arg("container", cID),
filters.Arg("event", "restart"),
),
})

// Make restart request, but cancel the request before the container
// is (forcibly) killed.
ctx2, cancel := context.WithTimeout(ctx, 100*time.Millisecond)
stopTimeout := 1
err := apiClient.ContainerRestart(ctx2, cID, container.StopOptions{
Timeout: &stopTimeout,
})
assert.Check(t, is.ErrorIs(err, context.DeadlineExceeded))
cancel()

// Validate that the restart event occurred, which is emitted
// after the restart (stop (kill) start) finished.
//
// Note that we cannot use RestartCount for this, as that's only
// used for restart-policies.
restartTimeout := 2 * time.Second
if runtime.GOOS == "windows" {
// hcs can sometimes take a long time to stop container.
restartTimeout = StopContainerWindowsPollTimeout
}
select {
case m := <-messages:
assert.Check(t, is.Equal(m.Actor.ID, cID))
assert.Check(t, is.Equal(m.Action, "restart"))
case err := <-errs:
assert.NilError(t, err)
case <-time.After(restartTimeout):
t.Errorf("timeout waiting for restart event")
}

// Container should be restarted (running).
inspect, err := apiClient.ContainerInspect(ctx, cID)
assert.NilError(t, err)
assert.Check(t, is.Equal(inspect.State.Status, "running"))
}

0 comments on commit 3c91c0a

Please sign in to comment.