Skip to content

Commit 7f9c1f3

Browse files
authoredNov 27, 2024··
rls: Reduce RLS channel logging
The channel log is shared by many components and is poorly suited to the noise of per-RPC events. This commit restricts RLS usage of the logger to no more frequent than cache entry events. This may still be too frequent, but should substantially improve the signal-to-noise and we can do further rework as needed. Many of the log entries were poor because they lacked enough context. They weren't even clear they were from RLS. The cache entry events now regularly include the request key in the logs, allowing you to follow events for specific keys. I would have preferred using the hash code, but NumberFormat is annoying and toString() may be acceptable given its convenience. This commit reverts much of eba699a. Those logs have not proven to be helpful as they produce more output than can be reasonably stored.
1 parent ebb43a6 commit 7f9c1f3

File tree

2 files changed

+19
-50
lines changed

2 files changed

+19
-50
lines changed
 

‎rls/src/main/java/io/grpc/rls/CachingRlsLbClient.java

+18-41
Original file line numberDiff line numberDiff line change
@@ -278,37 +278,38 @@ private void periodicClean() {
278278
@GuardedBy("lock")
279279
private CachedRouteLookupResponse asyncRlsCall(
280280
RouteLookupRequest request, @Nullable BackoffPolicy backoffPolicy) {
281-
logger.log(ChannelLogLevel.DEBUG, "Making an async call to RLS");
282281
if (throttler.shouldThrottle()) {
283-
logger.log(ChannelLogLevel.DEBUG, "Request is throttled");
282+
logger.log(ChannelLogLevel.DEBUG, "[RLS Entry {0}] Throttled RouteLookup", request);
284283
// Cache updated, but no need to call updateBalancingState because no RPCs were queued waiting
285284
// on this result
286285
return CachedRouteLookupResponse.backoffEntry(createBackOffEntry(
287286
request, Status.RESOURCE_EXHAUSTED.withDescription("RLS throttled"), backoffPolicy));
288287
}
289288
final SettableFuture<RouteLookupResponse> response = SettableFuture.create();
290289
io.grpc.lookup.v1.RouteLookupRequest routeLookupRequest = REQUEST_CONVERTER.convert(request);
291-
logger.log(ChannelLogLevel.DEBUG, "Sending RouteLookupRequest: {0}", routeLookupRequest);
290+
logger.log(ChannelLogLevel.DEBUG,
291+
"[RLS Entry {0}] Starting RouteLookup: {1}", request, routeLookupRequest);
292292
rlsStub.withDeadlineAfter(callTimeoutNanos, TimeUnit.NANOSECONDS)
293293
.routeLookup(
294294
routeLookupRequest,
295295
new StreamObserver<io.grpc.lookup.v1.RouteLookupResponse>() {
296296
@Override
297297
public void onNext(io.grpc.lookup.v1.RouteLookupResponse value) {
298-
logger.log(ChannelLogLevel.DEBUG, "Received RouteLookupResponse: {0}", value);
298+
logger.log(ChannelLogLevel.DEBUG,
299+
"[RLS Entry {0}] RouteLookup succeeded: {1}", request, value);
299300
response.set(RESPONSE_CONVERTER.reverse().convert(value));
300301
}
301302

302303
@Override
303304
public void onError(Throwable t) {
304-
logger.log(ChannelLogLevel.DEBUG, "Error looking up route:", t);
305+
logger.log(ChannelLogLevel.DEBUG,
306+
"[RLS Entry {0}] RouteLookup failed: {1}", request, t);
305307
response.setException(t);
306308
throttler.registerBackendResponse(true);
307309
}
308310

309311
@Override
310312
public void onCompleted() {
311-
logger.log(ChannelLogLevel.DEBUG, "routeLookup call completed");
312313
throttler.registerBackendResponse(false);
313314
}
314315
});
@@ -323,13 +324,10 @@ public void onCompleted() {
323324
*/
324325
@CheckReturnValue
325326
final CachedRouteLookupResponse get(final RouteLookupRequest request) {
326-
logger.log(ChannelLogLevel.DEBUG, "Acquiring lock to get cached entry");
327327
synchronized (lock) {
328-
logger.log(ChannelLogLevel.DEBUG, "Acquired lock to get cached entry");
329328
final CacheEntry cacheEntry;
330329
cacheEntry = linkedHashLruCache.read(request);
331330
if (cacheEntry == null) {
332-
logger.log(ChannelLogLevel.DEBUG, "No cache entry found, making a new RLS request");
333331
PendingCacheEntry pendingEntry = pendingCallCache.get(request);
334332
if (pendingEntry != null) {
335333
return CachedRouteLookupResponse.pendingResponse(pendingEntry);
@@ -339,15 +337,12 @@ final CachedRouteLookupResponse get(final RouteLookupRequest request) {
339337

340338
if (cacheEntry instanceof DataCacheEntry) {
341339
// cache hit, initiate async-refresh if entry is staled
342-
logger.log(ChannelLogLevel.DEBUG, "Cache hit for the request");
343340
DataCacheEntry dataEntry = ((DataCacheEntry) cacheEntry);
344341
if (dataEntry.isStaled(ticker.read())) {
345-
logger.log(ChannelLogLevel.DEBUG, "Cache entry is stale");
346342
dataEntry.maybeRefresh();
347343
}
348344
return CachedRouteLookupResponse.dataEntry((DataCacheEntry) cacheEntry);
349345
}
350-
logger.log(ChannelLogLevel.DEBUG, "Cache hit for a backup entry");
351346
return CachedRouteLookupResponse.backoffEntry((BackoffCacheEntry) cacheEntry);
352347
}
353348
}
@@ -409,8 +404,8 @@ private DataCacheEntry createDataEntry(
409404
RouteLookupRequest request, RouteLookupResponse routeLookupResponse) {
410405
logger.log(
411406
ChannelLogLevel.DEBUG,
412-
"Transition to data cache: routeLookupResponse={0}",
413-
routeLookupResponse);
407+
"[RLS Entry {0}] Transition to data cache: routeLookupResponse={1}",
408+
request, routeLookupResponse);
414409
DataCacheEntry entry = new DataCacheEntry(request, routeLookupResponse);
415410
// Constructor for DataCacheEntry causes updateBalancingState, but the picks can't happen until
416411
// this cache update because the lock is held
@@ -421,18 +416,19 @@ private DataCacheEntry createDataEntry(
421416
@GuardedBy("lock")
422417
private BackoffCacheEntry createBackOffEntry(
423418
RouteLookupRequest request, Status status, @Nullable BackoffPolicy backoffPolicy) {
424-
logger.log(ChannelLogLevel.DEBUG, "Transition to back off: status={0}", status);
425419
if (backoffPolicy == null) {
426420
backoffPolicy = backoffProvider.get();
427421
}
428422
long delayNanos = backoffPolicy.nextBackoffNanos();
423+
logger.log(
424+
ChannelLogLevel.DEBUG,
425+
"[RLS Entry {0}] Transition to back off: status={1}, delayNanos={2}",
426+
request, status, delayNanos);
429427
BackoffCacheEntry entry = new BackoffCacheEntry(request, status, backoffPolicy);
430428
// Lock is held, so the task can't execute before the assignment
431429
entry.scheduledFuture = scheduledExecutorService.schedule(
432430
() -> refreshBackoffEntry(entry), delayNanos, TimeUnit.NANOSECONDS);
433431
linkedHashLruCache.cacheAndClean(request, entry);
434-
logger.log(ChannelLogLevel.DEBUG, "BackoffCacheEntry created with a delay of {0} nanos",
435-
delayNanos);
436432
return entry;
437433
}
438434

@@ -443,7 +439,8 @@ private void refreshBackoffEntry(BackoffCacheEntry entry) {
443439
// Future was previously cancelled
444440
return;
445441
}
446-
logger.log(ChannelLogLevel.DEBUG, "Calling RLS for transition to pending");
442+
logger.log(ChannelLogLevel.DEBUG,
443+
"[RLS Entry {0}] Calling RLS for transition to pending", entry.request);
447444
linkedHashLruCache.invalidate(entry.request);
448445
asyncRlsCall(entry.request, entry.backoffPolicy);
449446
}
@@ -659,10 +656,10 @@ void maybeRefresh() {
659656
synchronized (lock) { // Lock is already held, but ErrorProne can't tell
660657
if (pendingCallCache.containsKey(request)) {
661658
// pending already requested
662-
logger.log(ChannelLogLevel.DEBUG,
663-
"A pending refresh request already created, no need to proceed with refresh");
664659
return;
665660
}
661+
logger.log(ChannelLogLevel.DEBUG,
662+
"[RLS Entry {0}] Cache entry is stale, refreshing", request);
666663
asyncRlsCall(request, /* backoffPolicy= */ null);
667664
}
668665
}
@@ -943,13 +940,10 @@ private final class BackoffRefreshListener implements ChildLbStatusListener {
943940

944941
@Override
945942
public void onStatusChanged(ConnectivityState newState) {
946-
logger.log(ChannelLogLevel.DEBUG, "LB status changed to: {0}", newState);
947943
if (prevState == ConnectivityState.TRANSIENT_FAILURE
948944
&& newState == ConnectivityState.READY) {
949945
logger.log(ChannelLogLevel.DEBUG, "Transitioning from TRANSIENT_FAILURE to READY");
950-
logger.log(ChannelLogLevel.DEBUG, "Acquiring lock force refresh backoff cache entries");
951946
synchronized (lock) {
952-
logger.log(ChannelLogLevel.DEBUG, "Lock acquired for refreshing backoff cache entries");
953947
for (CacheEntry value : linkedHashLruCache.values()) {
954948
if (value instanceof BackoffCacheEntry) {
955949
refreshBackoffEntry((BackoffCacheEntry) value);
@@ -983,31 +977,22 @@ public PickResult pickSubchannel(PickSubchannelArgs args) {
983977
RouteLookupRequest request =
984978
requestFactory.create(serviceName, methodName, args.getHeaders());
985979
final CachedRouteLookupResponse response = CachingRlsLbClient.this.get(request);
986-
logger.log(ChannelLogLevel.DEBUG,
987-
"Got route lookup cache entry for service={0}, method={1}, headers={2}:\n {3}",
988-
new Object[]{serviceName, methodName, args.getHeaders(), response});
989980

990981
if (response.getHeaderData() != null && !response.getHeaderData().isEmpty()) {
991-
logger.log(ChannelLogLevel.DEBUG, "Updating RLS metadata from the RLS response headers");
992982
Metadata headers = args.getHeaders();
993983
headers.discardAll(RLS_DATA_KEY);
994984
headers.put(RLS_DATA_KEY, response.getHeaderData());
995985
}
996986
String defaultTarget = lbPolicyConfig.getRouteLookupConfig().defaultTarget();
997-
logger.log(ChannelLogLevel.DEBUG, "defaultTarget = {0}", defaultTarget);
998987
boolean hasFallback = defaultTarget != null && !defaultTarget.isEmpty();
999988
if (response.hasData()) {
1000-
logger.log(ChannelLogLevel.DEBUG, "RLS response has data, proceed with selecting a picker");
1001989
ChildPolicyWrapper childPolicyWrapper = response.getChildPolicyWrapper();
1002990
SubchannelPicker picker =
1003991
(childPolicyWrapper != null) ? childPolicyWrapper.getPicker() : null;
1004992
if (picker == null) {
1005-
logger.log(ChannelLogLevel.DEBUG,
1006-
"Child policy wrapper didn't return a picker, returning PickResult with no results");
1007993
return PickResult.withNoResult();
1008994
}
1009995
// Happy path
1010-
logger.log(ChannelLogLevel.DEBUG, "Returning PickResult");
1011996
PickResult pickResult = picker.pickSubchannel(args);
1012997
if (pickResult.hasResult()) {
1013998
helper.getMetricRecorder().addLongCounter(TARGET_PICKS_COUNTER, 1,
@@ -1017,20 +1002,15 @@ public PickResult pickSubchannel(PickSubchannelArgs args) {
10171002
}
10181003
return pickResult;
10191004
} else if (response.hasError()) {
1020-
logger.log(ChannelLogLevel.DEBUG, "RLS response has errors");
10211005
if (hasFallback) {
1022-
logger.log(ChannelLogLevel.DEBUG, "Using RLS fallback");
10231006
return useFallback(args);
10241007
}
1025-
logger.log(ChannelLogLevel.DEBUG, "No RLS fallback, returning PickResult with an error");
10261008
helper.getMetricRecorder().addLongCounter(FAILED_PICKS_COUNTER, 1,
10271009
Arrays.asList(helper.getChannelTarget(), lookupService), Collections.emptyList());
10281010
return PickResult.withError(
10291011
convertRlsServerStatus(response.getStatus(),
10301012
lbPolicyConfig.getRouteLookupConfig().lookupService()));
10311013
} else {
1032-
logger.log(ChannelLogLevel.DEBUG,
1033-
"RLS response had no data, return a PickResult with no data");
10341014
return PickResult.withNoResult();
10351015
}
10361016
}
@@ -1067,21 +1047,18 @@ private String determineMetricsPickResult(PickResult pickResult) {
10671047

10681048
private void startFallbackChildPolicy() {
10691049
String defaultTarget = lbPolicyConfig.getRouteLookupConfig().defaultTarget();
1070-
logger.log(ChannelLogLevel.DEBUG, "starting fallback to {0}", defaultTarget);
1071-
logger.log(ChannelLogLevel.DEBUG, "Acquiring lock to start fallback child policy");
10721050
synchronized (lock) {
1073-
logger.log(ChannelLogLevel.DEBUG, "Acquired lock for starting fallback child policy");
10741051
if (fallbackChildPolicyWrapper != null) {
10751052
return;
10761053
}
1054+
logger.log(ChannelLogLevel.DEBUG, "starting fallback to {0}", defaultTarget);
10771055
fallbackChildPolicyWrapper = refCountedChildPolicyWrapperFactory.createOrGet(defaultTarget);
10781056
}
10791057
}
10801058

10811059
// GuardedBy CachingRlsLbClient.lock
10821060
void close() {
10831061
synchronized (lock) { // Lock is already held, but ErrorProne can't tell
1084-
logger.log(ChannelLogLevel.DEBUG, "Closing RLS picker");
10851062
if (fallbackChildPolicyWrapper != null) {
10861063
refCountedChildPolicyWrapperFactory.release(fallbackChildPolicyWrapper);
10871064
}

‎rls/src/main/java/io/grpc/rls/RlsLoadBalancer.java

+1-9
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,11 @@ final class RlsLoadBalancer extends LoadBalancer {
5050

5151
@Override
5252
public Status acceptResolvedAddresses(ResolvedAddresses resolvedAddresses) {
53-
logger.log(ChannelLogLevel.DEBUG, "Received resolution result: {0}", resolvedAddresses);
5453
LbPolicyConfiguration lbPolicyConfiguration =
5554
(LbPolicyConfiguration) resolvedAddresses.getLoadBalancingPolicyConfig();
5655
checkNotNull(lbPolicyConfiguration, "Missing RLS LB config");
5756
if (!lbPolicyConfiguration.equals(this.lbPolicyConfiguration)) {
58-
logger.log(ChannelLogLevel.DEBUG, "A new RLS LB config received");
57+
logger.log(ChannelLogLevel.DEBUG, "A new RLS LB config received: {0}", lbPolicyConfiguration);
5958
boolean needToConnect = this.lbPolicyConfiguration == null
6059
|| !this.lbPolicyConfiguration.getRouteLookupConfig().lookupService().equals(
6160
lbPolicyConfiguration.getRouteLookupConfig().lookupService());
@@ -80,22 +79,18 @@ public Status acceptResolvedAddresses(ResolvedAddresses resolvedAddresses) {
8079
// not required.
8180
this.lbPolicyConfiguration = lbPolicyConfiguration;
8281
}
83-
logger.log(ChannelLogLevel.DEBUG, "RLS LB accepted resolved addresses successfully");
8482
return Status.OK;
8583
}
8684

8785
@Override
8886
public void requestConnection() {
89-
logger.log(ChannelLogLevel.DEBUG, "connection requested from RLS LB");
9087
if (routeLookupClient != null) {
91-
logger.log(ChannelLogLevel.DEBUG, "requesting a connection from the routeLookupClient");
9288
routeLookupClient.requestConnection();
9389
}
9490
}
9591

9692
@Override
9793
public void handleNameResolutionError(final Status error) {
98-
logger.log(ChannelLogLevel.DEBUG, "Received resolution error: {0}", error);
9994
class ErrorPicker extends SubchannelPicker {
10095
@Override
10196
public PickResult pickSubchannel(PickSubchannelArgs args) {
@@ -116,14 +111,11 @@ public String toString() {
116111
routeLookupClient = null;
117112
lbPolicyConfiguration = null;
118113
}
119-
logger.log(ChannelLogLevel.DEBUG,
120-
"Updating balancing state to TRANSIENT_FAILURE with an error picker");
121114
helper.updateBalancingState(ConnectivityState.TRANSIENT_FAILURE, new ErrorPicker());
122115
}
123116

124117
@Override
125118
public void shutdown() {
126-
logger.log(ChannelLogLevel.DEBUG, "Rls lb shutdown");
127119
if (routeLookupClient != null) {
128120
logger.log(ChannelLogLevel.DEBUG, "closing the routeLookupClient because of RLS LB shutdown");
129121
routeLookupClient.close();

0 commit comments

Comments
 (0)
Please sign in to comment.