Skip to content

Commit f3cf7c3

Browse files
authoredSep 12, 2024··
xds: Add xDS node ID in few control plane errors (#11519)
1 parent 15cd2f9 commit f3cf7c3

File tree

4 files changed

+59
-22
lines changed

4 files changed

+59
-22
lines changed
 

Diff for: ‎xds/src/main/java/io/grpc/xds/CdsLoadBalancer2.java

+11-7
Original file line numberDiff line numberDiff line change
@@ -206,8 +206,9 @@ private void handleClusterDiscovered() {
206206
}
207207
loopStatus = Status.UNAVAILABLE.withDescription(String.format(
208208
"CDS error: circular aggregate clusters directly under %s for "
209-
+ "root cluster %s, named %s",
210-
clusterState.name, root.name, namesCausingLoops));
209+
+ "root cluster %s, named %s, xDS node ID: %s",
210+
clusterState.name, root.name, namesCausingLoops,
211+
xdsClient.getBootstrapInfo().node().getId()));
211212
}
212213
}
213214
}
@@ -224,9 +225,9 @@ private void handleClusterDiscovered() {
224225
childLb.shutdown();
225226
childLb = null;
226227
}
227-
Status unavailable =
228-
Status.UNAVAILABLE.withDescription("CDS error: found 0 leaf (logical DNS or EDS) "
229-
+ "clusters for root cluster " + root.name);
228+
Status unavailable = Status.UNAVAILABLE.withDescription(String.format(
229+
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster %s"
230+
+ " xDS node ID: %s", root.name, xdsClient.getBootstrapInfo().node().getId()));
230231
helper.updateBalancingState(
231232
TRANSIENT_FAILURE, new FixedResultPicker(PickResult.withError(unavailable)));
232233
return;
@@ -288,11 +289,14 @@ private void addAncestors(Set<String> ancestors, ClusterState clusterState,
288289
}
289290

290291
private void handleClusterDiscoveryError(Status error) {
292+
String description = error.getDescription() == null ? "" : error.getDescription() + " ";
293+
Status errorWithNodeId = error.withDescription(
294+
description + "xDS node ID: " + xdsClient.getBootstrapInfo().node().getId());
291295
if (childLb != null) {
292-
childLb.handleNameResolutionError(error);
296+
childLb.handleNameResolutionError(errorWithNodeId);
293297
} else {
294298
helper.updateBalancingState(
295-
TRANSIENT_FAILURE, new FixedResultPicker(PickResult.withError(error)));
299+
TRANSIENT_FAILURE, new FixedResultPicker(PickResult.withError(errorWithNodeId)));
296300
}
297301
}
298302

Diff for: ‎xds/src/main/java/io/grpc/xds/XdsNameResolver.java

+3-1
Original file line numberDiff line numberDiff line change
@@ -815,10 +815,12 @@ private void cleanUpRoutes(String error) {
815815
// the config selector handles the error message itself. Once the LB API allows providing
816816
// failure information for addresses yet still providing a service config, the config seector
817817
// could be avoided.
818+
String errorWithNodeId =
819+
error + ", xDS node ID: " + xdsClient.getBootstrapInfo().node().getId();
818820
listener.onResult(ResolutionResult.newBuilder()
819821
.setAttributes(Attributes.newBuilder()
820822
.set(InternalConfigSelector.KEY,
821-
new FailingConfigSelector(Status.UNAVAILABLE.withDescription(error)))
823+
new FailingConfigSelector(Status.UNAVAILABLE.withDescription(errorWithNodeId)))
822824
.build())
823825
.setServiceConfig(emptyServiceConfig)
824826
.build());

Diff for: ‎xds/src/main/java/io/grpc/xds/XdsServerWrapper.java

+11-4
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,8 @@ public void onResourceDoesNotExist(final String resourceName) {
425425
return;
426426
}
427427
StatusException statusException = Status.UNAVAILABLE.withDescription(
428-
"Listener " + resourceName + " unavailable").asException();
428+
String.format("Listener %s unavailable, xDS node ID: %s", resourceName,
429+
xdsClient.getBootstrapInfo().node().getId())).asException();
429430
handleConfigNotFound(statusException);
430431
}
431432

@@ -434,9 +435,12 @@ public void onError(final Status error) {
434435
if (stopped) {
435436
return;
436437
}
437-
logger.log(Level.FINE, "Error from XdsClient", error);
438+
String description = error.getDescription() == null ? "" : error.getDescription() + " ";
439+
Status errorWithNodeId = error.withDescription(
440+
description + "xDS node ID: " + xdsClient.getBootstrapInfo().node().getId());
441+
logger.log(Level.FINE, "Error from XdsClient", errorWithNodeId);
438442
if (!isServing) {
439-
listener.onNotServing(error.asException());
443+
listener.onNotServing(errorWithNodeId.asException());
440444
}
441445
}
442446

@@ -664,8 +668,11 @@ public void run() {
664668
if (!routeDiscoveryStates.containsKey(resourceName)) {
665669
return;
666670
}
671+
String description = error.getDescription() == null ? "" : error.getDescription() + " ";
672+
Status errorWithNodeId = error.withDescription(
673+
description + "xDS node ID: " + xdsClient.getBootstrapInfo().node().getId());
667674
logger.log(Level.WARNING, "Error loading RDS resource {0} from XdsClient: {1}.",
668-
new Object[]{resourceName, error});
675+
new Object[]{resourceName, errorWithNodeId});
669676
maybeUpdateSelector();
670677
}
671678
});

Diff for: ‎xds/src/test/java/io/grpc/xds/CdsLoadBalancer2Test.java

+34-10
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,9 @@
5858
import io.grpc.xds.LeastRequestLoadBalancer.LeastRequestConfig;
5959
import io.grpc.xds.RingHashLoadBalancer.RingHashConfig;
6060
import io.grpc.xds.XdsClusterResource.CdsUpdate;
61+
import io.grpc.xds.client.Bootstrapper.BootstrapInfo;
6162
import io.grpc.xds.client.Bootstrapper.ServerInfo;
63+
import io.grpc.xds.client.EnvoyProtoData;
6264
import io.grpc.xds.client.XdsClient;
6365
import io.grpc.xds.client.XdsResourceType;
6466
import io.grpc.xds.internal.security.CommonTlsContextTestsUtil;
@@ -94,6 +96,16 @@ public class CdsLoadBalancer2Test {
9496
private static final String DNS_HOST_NAME = "backend-service-dns.googleapis.com:443";
9597
private static final ServerInfo LRS_SERVER_INFO =
9698
ServerInfo.create("lrs.googleapis.com", InsecureChannelCredentials.create());
99+
private static final String SERVER_URI = "trafficdirector.googleapis.com";
100+
private static final String NODE_ID =
101+
"projects/42/networks/default/nodes/5c85b298-6f5b-4722-b74a-f7d1f0ccf5ad";
102+
private static final EnvoyProtoData.Node BOOTSTRAP_NODE =
103+
EnvoyProtoData.Node.newBuilder().setId(NODE_ID).build();
104+
private static final BootstrapInfo BOOTSTRAP_INFO = BootstrapInfo.builder()
105+
.servers(ImmutableList.of(
106+
ServerInfo.create(SERVER_URI, InsecureChannelCredentials.create())))
107+
.node(BOOTSTRAP_NODE)
108+
.build();
97109
private final UpstreamTlsContext upstreamTlsContext =
98110
CommonTlsContextTestsUtil.buildUpstreamTlsContext("google_cloud_private_spiffe", true);
99111
private final OutlierDetection outlierDetection = OutlierDetection.create(
@@ -211,7 +223,8 @@ public void nonAggregateCluster_resourceNotExist_returnErrorPicker() {
211223
verify(helper).updateBalancingState(
212224
eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture());
213225
Status unavailable = Status.UNAVAILABLE.withDescription(
214-
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER);
226+
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER
227+
+ " xDS node ID: " + NODE_ID);
215228
assertPicker(pickerCaptor.getValue(), unavailable, null);
216229
assertThat(childBalancers).isEmpty();
217230
}
@@ -254,7 +267,8 @@ public void nonAggregateCluster_resourceRevoked() {
254267
xdsClient.deliverResourceNotExist(CLUSTER);
255268
assertThat(childBalancer.shutdown).isTrue();
256269
Status unavailable = Status.UNAVAILABLE.withDescription(
257-
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER);
270+
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER
271+
+ " xDS node ID: " + NODE_ID);
258272
verify(helper).updateBalancingState(
259273
eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture());
260274
assertPicker(pickerCaptor.getValue(), unavailable, null);
@@ -331,7 +345,8 @@ public void aggregateCluster_noNonAggregateClusterExits_returnErrorPicker() {
331345
verify(helper).updateBalancingState(
332346
eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture());
333347
Status unavailable = Status.UNAVAILABLE.withDescription(
334-
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER);
348+
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER
349+
+ " xDS node ID: " + NODE_ID);
335350
assertPicker(pickerCaptor.getValue(), unavailable, null);
336351
assertThat(childBalancers).isEmpty();
337352
}
@@ -379,7 +394,8 @@ public void aggregateCluster_descendantClustersRevoked() {
379394
verify(helper).updateBalancingState(
380395
eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture());
381396
Status unavailable = Status.UNAVAILABLE.withDescription(
382-
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER);
397+
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER
398+
+ " xDS node ID: " + NODE_ID);
383399
assertPicker(pickerCaptor.getValue(), unavailable, null);
384400
assertThat(childBalancer.shutdown).isTrue();
385401
assertThat(childBalancers).isEmpty();
@@ -418,7 +434,8 @@ public void aggregateCluster_rootClusterRevoked() {
418434
verify(helper).updateBalancingState(
419435
eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture());
420436
Status unavailable = Status.UNAVAILABLE.withDescription(
421-
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER);
437+
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER
438+
+ " xDS node ID: " + NODE_ID);
422439
assertPicker(pickerCaptor.getValue(), unavailable, null);
423440
assertThat(childBalancer.shutdown).isTrue();
424441
assertThat(childBalancers).isEmpty();
@@ -466,7 +483,8 @@ public void aggregateCluster_intermediateClusterChanges() {
466483
verify(helper).updateBalancingState(
467484
eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture());
468485
Status unavailable = Status.UNAVAILABLE.withDescription(
469-
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER);
486+
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER
487+
+ " xDS node ID: " + NODE_ID);
470488
assertPicker(pickerCaptor.getValue(), unavailable, null);
471489
assertThat(childBalancer.shutdown).isTrue();
472490
assertThat(childBalancers).isEmpty();
@@ -507,7 +525,7 @@ public void aggregateCluster_withLoops() {
507525
Status unavailable = Status.UNAVAILABLE.withDescription(
508526
"CDS error: circular aggregate clusters directly under cluster-02.googleapis.com for root"
509527
+ " cluster cluster-foo.googleapis.com, named [cluster-01.googleapis.com,"
510-
+ " cluster-02.googleapis.com]");
528+
+ " cluster-02.googleapis.com], xDS node ID: " + NODE_ID);
511529
assertPicker(pickerCaptor.getValue(), unavailable, null);
512530
}
513531

@@ -549,7 +567,7 @@ public void aggregateCluster_withLoops_afterEds() {
549567
Status unavailable = Status.UNAVAILABLE.withDescription(
550568
"CDS error: circular aggregate clusters directly under cluster-02.googleapis.com for root"
551569
+ " cluster cluster-foo.googleapis.com, named [cluster-01.googleapis.com,"
552-
+ " cluster-02.googleapis.com]");
570+
+ " cluster-02.googleapis.com], xDS node ID: " + NODE_ID);
553571
assertPicker(pickerCaptor.getValue(), unavailable, null);
554572
}
555573

@@ -617,7 +635,7 @@ public void aggregateCluster_discoveryErrorBeforeChildLbCreated_returnErrorPicke
617635
eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture());
618636
Status expectedError = Status.UNAVAILABLE.withDescription(
619637
"Unable to load CDS cluster-foo.googleapis.com. xDS server returned: "
620-
+ "RESOURCE_EXHAUSTED: OOM");
638+
+ "RESOURCE_EXHAUSTED: OOM xDS node ID: " + NODE_ID);
621639
assertPicker(pickerCaptor.getValue(), expectedError, null);
622640
assertThat(childBalancers).isEmpty();
623641
}
@@ -647,7 +665,8 @@ public void aggregateCluster_discoveryErrorAfterChildLbCreated_propagateToChildL
647665

648666
@Test
649667
public void handleNameResolutionErrorFromUpstream_beforeChildLbCreated_returnErrorPicker() {
650-
Status upstreamError = Status.UNAVAILABLE.withDescription("unreachable");
668+
Status upstreamError = Status.UNAVAILABLE.withDescription(
669+
"unreachable xDS node ID: " + NODE_ID);
651670
loadBalancer.handleNameResolutionError(upstreamError);
652671
verify(helper).updateBalancingState(
653672
eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture());
@@ -821,6 +840,11 @@ public <T extends ResourceUpdate> void cancelXdsResourceWatch(XdsResourceType<T>
821840
}
822841
}
823842

843+
@Override
844+
public BootstrapInfo getBootstrapInfo() {
845+
return BOOTSTRAP_INFO;
846+
}
847+
824848
private void deliverCdsUpdate(String clusterName, CdsUpdate update) {
825849
if (watchers.containsKey(clusterName)) {
826850
List<ResourceWatcher<CdsUpdate>> resourceWatchers =

0 commit comments

Comments
 (0)
Please sign in to comment.