Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(cli): ecs hotswap deployment waits correctly for success or failure #28448

Merged
merged 14 commits into from
Mar 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions packages/@aws-cdk-testing/cli-integ/lib/aws.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ export class AwsClients {
public readonly cloudFormation: AwsCaller<AWS.CloudFormation>;
public readonly s3: AwsCaller<AWS.S3>;
public readonly ecr: AwsCaller<AWS.ECR>;
public readonly ecs: AwsCaller<AWS.ECS>;
public readonly sns: AwsCaller<AWS.SNS>;
public readonly iam: AwsCaller<AWS.IAM>;
public readonly lambda: AwsCaller<AWS.Lambda>;
Expand All @@ -34,6 +35,7 @@ export class AwsClients {
this.cloudFormation = makeAwsCaller(AWS.CloudFormation, this.config);
this.s3 = makeAwsCaller(AWS.S3, this.config);
this.ecr = makeAwsCaller(AWS.ECR, this.config);
this.ecs = makeAwsCaller(AWS.ECS, this.config);
this.sns = makeAwsCaller(AWS.SNS, this.config);
this.iam = makeAwsCaller(AWS.IAM, this.config);
this.lambda = makeAwsCaller(AWS.Lambda, this.config);
Expand Down
57 changes: 57 additions & 0 deletions packages/@aws-cdk-testing/cli-integ/resources/cdk-apps/app/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ var constructs = require('constructs');
if (process.env.PACKAGE_LAYOUT_VERSION === '1') {
var cdk = require('@aws-cdk/core');
var ec2 = require('@aws-cdk/aws-ec2');
var ecs = require('@aws-cdk/aws-ecs');
var s3 = require('@aws-cdk/aws-s3');
var ssm = require('@aws-cdk/aws-ssm');
var iam = require('@aws-cdk/aws-iam');
Expand All @@ -17,6 +18,7 @@ if (process.env.PACKAGE_LAYOUT_VERSION === '1') {
DefaultStackSynthesizer,
LegacyStackSynthesizer,
aws_ec2: ec2,
aws_ecs: ecs,
aws_s3: s3,
aws_ssm: ssm,
aws_iam: iam,
Expand Down Expand Up @@ -357,6 +359,60 @@ class LambdaHotswapStack extends cdk.Stack {
}
}

class EcsHotswapStack extends cdk.Stack {
constructor(parent, id, props) {
super(parent, id, props);

// define a simple vpc and cluster
const vpc = new ec2.Vpc(this, 'vpc', {
natGateways: 0,
subnetConfiguration: [
{
cidrMask: 24,
name: 'Public',
subnetType: ec2.SubnetType.PUBLIC,
},
],
maxAzs: 1,
});
const cluster = new ecs.Cluster(this, 'cluster', {
vpc,
});

// allow stack to be used to test failed deployments
const image =
process.env.USE_INVALID_ECS_HOTSWAP_IMAGE == 'true'
? 'nginx:invalidtag'
: 'nginx:alpine';

// deploy basic service
const taskDefinition = new ecs.FargateTaskDefinition(
this,
'task-definition'
);
taskDefinition.addContainer('nginx', {
image: ecs.ContainerImage.fromRegistry(image),
environment: {
SOME_VARIABLE: process.env.DYNAMIC_ECS_PROPERTY_VALUE ?? 'environment',
},
healthCheck: {
command: ['CMD-SHELL', 'exit 0'], // fake health check to speed up deployment
interval: cdk.Duration.seconds(5),
},
});
const service = new ecs.FargateService(this, 'service', {
cluster,
taskDefinition,
assignPublicIp: true, // required without NAT to pull image
circuitBreaker: { rollback: false },
desiredCount: 1,
});

new cdk.CfnOutput(this, 'ClusterName', { value: cluster.clusterName });
new cdk.CfnOutput(this, 'ServiceName', { value: service.serviceName });
}
}

class DockerStack extends cdk.Stack {
constructor(parent, id, props) {
super(parent, id, props);
Expand Down Expand Up @@ -532,6 +588,7 @@ switch (stackSet) {

new LambdaStack(app, `${stackPrefix}-lambda`);
new LambdaHotswapStack(app, `${stackPrefix}-lambda-hotswap`);
new EcsHotswapStack(app, `${stackPrefix}-ecs-hotswap`);
new DockerStack(app, `${stackPrefix}-docker`);
new DockerStackWithCustomFile(app, `${stackPrefix}-docker-with-custom-file`);
const failed = new FailedStack(app, `${stackPrefix}-failed`)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1571,6 +1571,83 @@ integTest('hotswap deployment supports Fn::ImportValue intrinsic', withDefaultFi
}
}));

integTest('hotswap deployment supports ecs service', withDefaultFixture(async (fixture) => {
// GIVEN
const stackArn = await fixture.cdkDeploy('ecs-hotswap', {
captureStderr: false,
});

// WHEN
const deployOutput = await fixture.cdkDeploy('ecs-hotswap', {
options: ['--hotswap'],
captureStderr: true,
onlyStderr: true,
modEnv: {
DYNAMIC_ECS_PROPERTY_VALUE: 'new value',
},
});

const response = await fixture.aws.cloudFormation('describeStacks', {
StackName: stackArn,
});
const serviceName = response.Stacks?.[0].Outputs?.find(output => output.OutputKey == 'ServiceName')?.OutputValue;

// THEN

// The deployment should not trigger a full deployment, thus the stack's status must remains
// "CREATE_COMPLETE"
expect(response.Stacks?.[0].StackStatus).toEqual('CREATE_COMPLETE');
expect(deployOutput).toContain(`ECS Service '${serviceName}' hotswapped!`);
}));

integTest('hotswap deployment for ecs service waits for deployment to complete', withDefaultFixture(async (fixture) => {
// GIVEN
const stackArn = await fixture.cdkDeploy('ecs-hotswap', {
captureStderr: false,
});

// WHEN
await fixture.cdkDeploy('ecs-hotswap', {
options: ['--hotswap'],
modEnv: {
DYNAMIC_ECS_PROPERTY_VALUE: 'new value',
},
});

const describeStacksResponse = await fixture.aws.cloudFormation('describeStacks', {
StackName: stackArn,
});
const clusterName = describeStacksResponse.Stacks?.[0].Outputs?.find(output => output.OutputKey == 'ClusterName')?.OutputValue!;
const serviceName = describeStacksResponse.Stacks?.[0].Outputs?.find(output => output.OutputKey == 'ServiceName')?.OutputValue!;

// THEN

const describeServicesResponse = await fixture.aws.ecs('describeServices', {
cluster: clusterName,
services: [serviceName],
});
expect(describeServicesResponse.services?.[0].deployments).toHaveLength(1); // only one deployment present

}));

integTest('hotswap deployment for ecs service detects failed deployment and errors', withDefaultFixture(async (fixture) => {
// GIVEN
await fixture.cdkDeploy('ecs-hotswap');

// WHEN
const deployOutput = await fixture.cdkDeploy('ecs-hotswap', {
options: ['--hotswap'],
modEnv: {
USE_INVALID_ECS_HOTSWAP_IMAGE: 'true',
},
allowErrExit: true,
});

// THEN
expect(deployOutput).toContain(`❌ ${fixture.stackNamePrefix}-ecs-hotswap failed: ResourceNotReady: Resource is not in the state deploymentCompleted`);
expect(deployOutput).not.toContain('hotswapped!');
}));

async function listChildren(parent: string, pred: (x: string) => Promise<boolean>) {
const ret = new Array<string>();
for (const child of await fs.readdir(parent, { encoding: 'utf-8' })) {
Expand Down
24 changes: 17 additions & 7 deletions packages/aws-cdk/lib/api/hotswap/ecs-services.ts
Original file line number Diff line number Diff line change
Expand Up @@ -119,11 +119,11 @@ export async function isHotswappableEcsServiceChange(

// Step 3 - wait for the service deployments triggered in Step 2 to finish
// configure a custom Waiter
(sdk.ecs() as any).api.waiters.deploymentToFinish = {
name: 'DeploymentToFinish',
(sdk.ecs() as any).api.waiters.deploymentCompleted = {
name: 'DeploymentCompleted',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not well versed in this arena, whats the difference between deploymentToFinish and deploymentCompleted?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's purely just naming :) I describe this more in my original PR that was closed (here) but because we can now detect that the deployment failed I noticed the error message based on this naming wasn't clear what had occured

Before:

❌  cdktest-0l5zgdtshw5-ecs-hotswap failed: ResourceNotReady: Resource is not in the state deploymentToFinish

After:

❌  cdktest-0l5zgdtshw5-ecs-hotswap failed: ResourceNotReady: Resource is not in the state deploymentCompleted

operation: 'describeServices',
delay: 10,
maxAttempts: 60,
delay: 6,
maxAttempts: 100,
Comment on lines +125 to +126
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you briefly explain the reason for changing these?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The original max delay was 10 * 60 = 600 and the new max delay is 6 * 100 = 600 so the overall max delay is the same

I've shortened the delay here to make the deployment more responsive -- people are using hotswap because it's meant to be faster and the value of delay here represents the "extra" overhead time of the polling that users might experience. I figured tweaking if from 10 to 6 would be a good balance between improving responsiveness and respecting API limits and not polling too often

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I just reminded myself from my commit message for this change as well: the FAILED intermediate state for the deployment in the ECS API exists for a reasonably short time. So this change also ensures that we catch that intermediate FAILED state

acceptors: [
{
matcher: 'pathAny',
Expand All @@ -143,16 +143,26 @@ export async function isHotswappableEcsServiceChange(
expected: 'INACTIVE',
state: 'failure',
},

// failure if any services report a deployment with status FAILED
{
matcher: 'path',
argument: "length(services[].deployments[? rolloutState == 'FAILED'][]) > `0`",
expected: true,
state: 'failure',
},

// wait for all services to report only a single deployment
{
matcher: 'path',
argument: "length(services[].deployments[? status == 'PRIMARY' && runningCount < desiredCount][]) == `0`",
argument: 'length(services[? length(deployments) > `1`]) == `0`',
expected: true,
state: 'success',
},
],
};
// create a custom Waiter that uses the deploymentToFinish configuration added above
const deploymentWaiter = new (AWS as any).ResourceWaiter(sdk.ecs(), 'deploymentToFinish');
// create a custom Waiter that uses the deploymentCompleted configuration added above
const deploymentWaiter = new (AWS as any).ResourceWaiter(sdk.ecs(), 'deploymentCompleted');
// wait for all of the waiters to finish
await Promise.all(Object.entries(servicePerClusterUpdates).map(([clusterName, serviceUpdates]) => {
return deploymentWaiter.wait({
Expand Down