Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[25.0 backport] De-flake TestSwarmClusterRotateUnlockKey... again... maybe? #47201

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions integration-cli/check_test.go
Expand Up @@ -657,6 +657,9 @@ func (s *DockerSwarmSuite) TearDownTest(ctx context.Context, c *testing.T) {
s.daemonsLock.Lock()
for _, d := range s.daemons {
if d != nil {
if c.Failed() {
d.TailLogsT(c, 100)
}
d.Stop(c)
d.Cleanup(c)
}
Expand Down
107 changes: 54 additions & 53 deletions integration-cli/docker_cli_swarm_test.go
Expand Up @@ -27,6 +27,7 @@ import (
remoteipam "github.com/docker/docker/libnetwork/ipams/remote/api"
"github.com/docker/docker/pkg/plugins"
"github.com/docker/docker/testutil"
testdaemon "github.com/docker/docker/testutil/daemon"
"github.com/moby/swarmkit/v2/ca/keyutils"
"github.com/vishvananda/netlink"
"gotest.tools/v3/assert"
Expand Down Expand Up @@ -1260,6 +1261,8 @@ func (s *DockerSwarmSuite) TestSwarmJoinPromoteLocked(c *testing.T) {
poll.WaitOn(c, pollCheck(c, d3.CheckLocalNodeState(ctx), checker.Equals(swarm.LocalNodeStateActive)), poll.WithTimeout(time.Second))
}

const swarmIsEncryptedMsg = "Swarm is encrypted and needs to be unlocked"

func (s *DockerSwarmSuite) TestSwarmRotateUnlockKey(c *testing.T) {
ctx := testutil.GetContext(c)
d := s.AddDaemon(ctx, c, true, true)
Expand All @@ -1280,12 +1283,16 @@ func (s *DockerSwarmSuite) TestSwarmRotateUnlockKey(c *testing.T) {
d.RestartNode(c)
assert.Equal(c, getNodeStatus(c, d), swarm.LocalNodeStateLocked)

unlock := func(d *daemon.Daemon, key string) *icmd.Result {
cmd := d.Command("swarm", "unlock")
cmd.Stdin = strings.NewReader(key)
return icmd.RunCmd(cmd)
}

outs, _ = d.Cmd("node", "ls")
assert.Assert(c, strings.Contains(outs, "Swarm is encrypted and needs to be unlocked"), outs)
cmd := d.Command("swarm", "unlock")
cmd.Stdin = bytes.NewBufferString(unlockKey)
result := icmd.RunCmd(cmd)
assert.Assert(c, strings.Contains(outs, swarmIsEncryptedMsg), outs)

result := unlock(d, unlockKey)
if result.Error == nil {
// On occasion, the daemon may not have finished
// rotating the KEK before restarting. The test is
Expand All @@ -1295,42 +1302,37 @@ func (s *DockerSwarmSuite) TestSwarmRotateUnlockKey(c *testing.T) {
// restart again, the new key should be required this
// time.

time.Sleep(3 * time.Second)
// Wait for the rotation to happen
// Since there are multiple rotations, we need to wait until for the number of rotations we are currently on to be reflected in the logs
// This is a little janky... its depending on specific log messages AND these are debug logs... but it is the best we can do for now.
matcher := testdaemon.ScanLogsMatchCount(testdaemon.ScanLogsMatchString("successfully rotated KEK"), i+1)
poll.WaitOn(c, d.PollCheckLogs(ctx, matcher), poll.WithDelay(3*time.Second), poll.WithTimeout(time.Minute))
d.Restart(c)

d.RestartNode(c)

cmd = d.Command("swarm", "unlock")
cmd.Stdin = bytes.NewBufferString(unlockKey)
result = icmd.RunCmd(cmd)
result = unlock(d, unlockKey)
}
result.Assert(c, icmd.Expected{
ExitCode: 1,
Err: "invalid key",
})

outs, _ = d.Cmd("node", "ls")
assert.Assert(c, strings.Contains(outs, "Swarm is encrypted and needs to be unlocked"), outs)
cmd = d.Command("swarm", "unlock")
cmd.Stdin = bytes.NewBufferString(newUnlockKey)
icmd.RunCmd(cmd).Assert(c, icmd.Success)
assert.Assert(c, strings.Contains(outs, swarmIsEncryptedMsg), outs)
unlock(d, newUnlockKey).Assert(c, icmd.Success)

assert.Equal(c, getNodeStatus(c, d), swarm.LocalNodeStateActive)

retry := 0
for {
checkNodeLs := func(t poll.LogT) poll.Result {
// an issue sometimes prevents leader to be available right away
outs, err = d.Cmd("node", "ls")
if err != nil && retry < 5 {
if strings.Contains(outs, "swarm does not have a leader") {
retry++
time.Sleep(3 * time.Second)
continue
}
out, err := d.Cmd("node", "ls")
if err != nil {
return poll.Continue("error running node ls: %v: %s", err, out)
}
assert.NilError(c, err)
assert.Assert(c, !strings.Contains(outs, "Swarm is encrypted and needs to be unlocked"), outs)
break
return poll.Success()
}
poll.WaitOn(c, checkNodeLs, poll.WithDelay(3*time.Second), poll.WithTimeout(time.Minute))

unlockKey = newUnlockKey
}
Expand Down Expand Up @@ -1368,15 +1370,23 @@ func (s *DockerSwarmSuite) TestSwarmClusterRotateUnlockKey(c *testing.T) {
d2.RestartNode(c)
d3.RestartNode(c)

unlock := func(d *daemon.Daemon, key string) *icmd.Result {
cmd := d.Command("swarm", "unlock")
cmd.Stdin = strings.NewReader(key)
return icmd.RunCmd(cmd)
}

const swarmIsEncryptedMsg = "Swarm is encrypted and needs to be unlocked"

for _, d := range []*daemon.Daemon{d2, d3} {
assert.Equal(c, getNodeStatus(c, d), swarm.LocalNodeStateLocked)

outs, _ := d.Cmd("node", "ls")
assert.Assert(c, strings.Contains(outs, "Swarm is encrypted and needs to be unlocked"), outs)
cmd := d.Command("swarm", "unlock")
cmd.Stdin = bytes.NewBufferString(unlockKey)
result := icmd.RunCmd(cmd)
assert.Assert(c, strings.Contains(outs, swarmIsEncryptedMsg), outs)

// unlock with the original key should fail
// Use poll here because the daemon may not have finished
result := unlock(d, unlockKey)
if result.Error == nil {
// On occasion, the daemon may not have finished
// rotating the KEK before restarting. The test is
Expand All @@ -1386,45 +1396,36 @@ func (s *DockerSwarmSuite) TestSwarmClusterRotateUnlockKey(c *testing.T) {
// restart again, the new key should be required this
// time.

time.Sleep(3 * time.Second)

d.RestartNode(c)
// Wait for the rotation to happen
// Since there are multiple rotations, we need to wait until for the number of rotations we are currently on to be reflected in the logs
// This is a little janky... its depending on specific log messages AND these are debug logs... but it is the best we can do for now.
matcher := testdaemon.ScanLogsMatchCount(testdaemon.ScanLogsMatchString("successfully rotated KEK"), i+1)
poll.WaitOn(c, d.PollCheckLogs(ctx, matcher), poll.WithDelay(3*time.Second), poll.WithTimeout(time.Minute))
d.Restart(c)

cmd = d.Command("swarm", "unlock")
cmd.Stdin = bytes.NewBufferString(unlockKey)
result = icmd.RunCmd(cmd)
result = unlock(d, unlockKey)
}
result.Assert(c, icmd.Expected{
ExitCode: 1,
Err: "invalid key",
})

outs, _ = d.Cmd("node", "ls")
assert.Assert(c, strings.Contains(outs, "Swarm is encrypted and needs to be unlocked"), outs)
cmd = d.Command("swarm", "unlock")
cmd.Stdin = bytes.NewBufferString(newUnlockKey)
icmd.RunCmd(cmd).Assert(c, icmd.Success)
assert.Assert(c, strings.Contains(outs, swarmIsEncryptedMsg), outs)

// now unlock with the rotated key, this should succeed
unlock(d, newUnlockKey).Assert(c, icmd.Success)
assert.Equal(c, getNodeStatus(c, d), swarm.LocalNodeStateActive)

retry := 0
for {
checkNodeLs := func(t poll.LogT) poll.Result {
// an issue sometimes prevents leader to be available right away
outs, err = d.Cmd("node", "ls")
if err != nil && retry < 5 {
if strings.Contains(outs, "swarm does not have a leader") {
retry++
c.Logf("[%s] got 'swarm does not have a leader'. retrying (attempt %d/5)", d.ID(), retry)
time.Sleep(3 * time.Second)
continue
} else {
c.Logf("[%s] gave error: '%v'. retrying (attempt %d/5): %s", d.ID(), err, retry, outs)
}
out, err := d.Cmd("node", "ls")
if err != nil {
return poll.Continue("error running node ls: %v: %s", err, out)
}
assert.NilError(c, err, "[%s] failed after %d retries: %v (%s)", d.ID(), retry, err, outs)
assert.Assert(c, !strings.Contains(outs, "Swarm is encrypted and needs to be unlocked"), outs)
break
return poll.Success()
}
poll.WaitOn(c, checkNodeLs, poll.WithDelay(3*time.Second), poll.WithTimeout(time.Minute))
}

unlockKey = newUnlockKey
Expand Down
11 changes: 11 additions & 0 deletions testutil/daemon/daemon.go
Expand Up @@ -338,6 +338,17 @@ func ScanLogsMatchString(contains string) func(string) bool {
}
}

// ScanLogsMatchCount returns a function that can be used to scan the daemon logs until the passed in matcher function matches `count` times
func ScanLogsMatchCount(f func(string) bool, count int) func(string) bool {
matched := 0
return func(line string) bool {
if f(line) {
matched++
}
return matched == count
}
}

// ScanLogsMatchAll returns a function that can be used to scan the daemon logs until *all* the passed in strings are matched
func ScanLogsMatchAll(contains ...string) func(string) bool {
matched := make(map[string]bool)
Expand Down