Skip to content

Commit 9796d57

Browse files
authoredNov 13, 2024··
fix: simplify remaining deadline metric impl (#2410)
* fix: clean up remaining deadline metric * make naming consistent with gax * add a comment and skip record 0 * add comment * update calculation and test * fix test
1 parent a11d56f commit 9796d57

File tree

7 files changed

+50
-94
lines changed

7 files changed

+50
-94
lines changed
 

‎google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/EnhancedBigtableStub.java

+13-44
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,6 @@
105105
import com.google.cloud.bigtable.data.v2.stub.changestream.GenerateInitialChangeStreamPartitionsUserCallable;
106106
import com.google.cloud.bigtable.data.v2.stub.changestream.ReadChangeStreamResumptionStrategy;
107107
import com.google.cloud.bigtable.data.v2.stub.changestream.ReadChangeStreamUserCallable;
108-
import com.google.cloud.bigtable.data.v2.stub.metrics.BigtableTracer;
109108
import com.google.cloud.bigtable.data.v2.stub.metrics.BigtableTracerStreamingCallable;
110109
import com.google.cloud.bigtable.data.v2.stub.metrics.BigtableTracerUnaryCallable;
111110
import com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsTracerFactory;
@@ -549,9 +548,7 @@ public <RowT> ServerStreamingCallable<Query, RowT> createReadRowsCallable(
549548
return traced.withDefaultCallContext(
550549
clientContext
551550
.getDefaultCallContext()
552-
.withOption(
553-
BigtableTracer.OPERATION_TIMEOUT_KEY,
554-
settings.readRowsSettings().getRetrySettings().getTotalTimeout()));
551+
.withRetrySettings(settings.readRowsSettings().getRetrySettings()));
555552
}
556553

557554
/**
@@ -588,9 +585,7 @@ public <RowT> UnaryCallable<Query, RowT> createReadRowCallable(RowAdapter<RowT>
588585
return traced.withDefaultCallContext(
589586
clientContext
590587
.getDefaultCallContext()
591-
.withOption(
592-
BigtableTracer.OPERATION_TIMEOUT_KEY,
593-
settings.readRowSettings().getRetrySettings().getTotalTimeout()));
588+
.withRetrySettings(settings.readRowSettings().getRetrySettings()));
594589
} else {
595590
ServerStreamingCallable<ReadRowsRequest, RowT> readRowsCallable =
596591
createReadRowsBaseCallable(
@@ -612,9 +607,7 @@ public <RowT> UnaryCallable<Query, RowT> createReadRowCallable(RowAdapter<RowT>
612607
readRowCallable,
613608
clientContext
614609
.getDefaultCallContext()
615-
.withOption(
616-
BigtableTracer.OPERATION_TIMEOUT_KEY,
617-
settings.readRowSettings().getRetrySettings().getTotalTimeout()),
610+
.withRetrySettings(settings.readRowSettings().getRetrySettings()),
618611
clientContext.getTracerFactory(),
619612
getSpanName("ReadRow"),
620613
/*allowNoResponses=*/ true);
@@ -733,9 +726,7 @@ private <RowT> UnaryCallable<Query, List<RowT>> createBulkReadRowsCallable(
733726
return traced.withDefaultCallContext(
734727
clientContext
735728
.getDefaultCallContext()
736-
.withOption(
737-
BigtableTracer.OPERATION_TIMEOUT_KEY,
738-
settings.bulkReadRowsSettings().getRetrySettings().getTotalTimeout()));
729+
.withRetrySettings(settings.readRowsSettings().getRetrySettings()));
739730
}
740731

741732
/**
@@ -805,9 +796,7 @@ public ApiFuture<List<KeyOffset>> futureCall(String s, ApiCallContext apiCallCon
805796
.withDefaultCallContext(
806797
clientContext
807798
.getDefaultCallContext()
808-
.withOption(
809-
BigtableTracer.OPERATION_TIMEOUT_KEY,
810-
settings.sampleRowKeysSettings().getRetrySettings().getTotalTimeout())));
799+
.withRetrySettings(settings.sampleRowKeysSettings().getRetrySettings())));
811800
}
812801

813802
/**
@@ -933,9 +922,7 @@ private UnaryCallable<BulkMutation, MutateRowsAttemptResult> createMutateRowsBas
933922
return traced.withDefaultCallContext(
934923
clientContext
935924
.getDefaultCallContext()
936-
.withOption(
937-
BigtableTracer.OPERATION_TIMEOUT_KEY,
938-
settings.bulkMutateRowsSettings().getRetrySettings().getTotalTimeout()));
925+
.withRetrySettings(settings.bulkMutateRowsSettings().getRetrySettings()));
939926
}
940927

941928
/**
@@ -1143,12 +1130,8 @@ private UnaryCallable<ReadModifyWriteRow, Row> createReadModifyWriteRowCallable(
11431130
return traced.withDefaultCallContext(
11441131
clientContext
11451132
.getDefaultCallContext()
1146-
.withOption(
1147-
BigtableTracer.OPERATION_TIMEOUT_KEY,
1148-
settings
1149-
.generateInitialChangeStreamPartitionsSettings()
1150-
.getRetrySettings()
1151-
.getTotalTimeout()));
1133+
.withRetrySettings(
1134+
settings.generateInitialChangeStreamPartitionsSettings().getRetrySettings()));
11521135
}
11531136

11541137
/**
@@ -1223,9 +1206,7 @@ private UnaryCallable<ReadModifyWriteRow, Row> createReadModifyWriteRowCallable(
12231206
return traced.withDefaultCallContext(
12241207
clientContext
12251208
.getDefaultCallContext()
1226-
.withOption(
1227-
BigtableTracer.OPERATION_TIMEOUT_KEY,
1228-
settings.readChangeStreamSettings().getRetrySettings().getTotalTimeout()));
1209+
.withRetrySettings(settings.readChangeStreamSettings().getRetrySettings()));
12291210
}
12301211

12311212
/**
@@ -1314,9 +1295,7 @@ public Map<String, String> extract(ExecuteQueryRequest executeQueryRequest) {
13141295
traced.withDefaultCallContext(
13151296
clientContext
13161297
.getDefaultCallContext()
1317-
.withOption(
1318-
BigtableTracer.OPERATION_TIMEOUT_KEY,
1319-
settings.executeQuerySettings().getRetrySettings().getTotalTimeout())),
1298+
.withRetrySettings(settings.executeQuerySettings().getRetrySettings())),
13201299
requestContext);
13211300
}
13221301

@@ -1396,11 +1375,7 @@ public ApiFuture<RespT> futureCall(ReqT reqT, ApiCallContext apiCallContext) {
13961375
getSpanName(methodDescriptor.getBareMethodName()));
13971376

13981377
return traced.withDefaultCallContext(
1399-
clientContext
1400-
.getDefaultCallContext()
1401-
.withOption(
1402-
BigtableTracer.OPERATION_TIMEOUT_KEY,
1403-
callSettings.getRetrySettings().getTotalTimeout()));
1378+
clientContext.getDefaultCallContext().withRetrySettings(callSettings.getRetrySettings()));
14041379
}
14051380

14061381
private <BaseReqT, BaseRespT, ReqT, RespT> UnaryCallable<ReqT, RespT> createUnaryCallableNew(
@@ -1429,11 +1404,7 @@ private <BaseReqT, BaseRespT, ReqT, RespT> UnaryCallable<ReqT, RespT> createUnar
14291404

14301405
return new BigtableUnaryOperationCallable<>(
14311406
transformed,
1432-
clientContext
1433-
.getDefaultCallContext()
1434-
.withOption(
1435-
BigtableTracer.OPERATION_TIMEOUT_KEY,
1436-
callSettings.getRetrySettings().getTotalTimeout()),
1407+
clientContext.getDefaultCallContext().withRetrySettings(callSettings.getRetrySettings()),
14371408
clientContext.getTracerFactory(),
14381409
getSpanName(methodDescriptor.getBareMethodName()),
14391410
/* allowNoResponse= */ false);
@@ -1470,9 +1441,7 @@ public Map<String, String> extract(PingAndWarmRequest request) {
14701441
return pingAndWarm.withDefaultCallContext(
14711442
clientContext
14721443
.getDefaultCallContext()
1473-
.withOption(
1474-
BigtableTracer.OPERATION_TIMEOUT_KEY,
1475-
settings.pingAndWarmSettings().getRetrySettings().getTotalTimeout()));
1444+
.withRetrySettings(settings.pingAndWarmSettings().getRetrySettings()));
14761445
}
14771446

14781447
private <RequestT, ResponseT> UnaryCallable<RequestT, ResponseT> withRetries(

‎google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/metrics/BigtableTracer.java

+5-9
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,11 @@
1616
package com.google.cloud.bigtable.data.v2.stub.metrics;
1717

1818
import com.google.api.core.BetaApi;
19-
import com.google.api.core.InternalApi;
2019
import com.google.api.gax.rpc.ApiCallContext;
2120
import com.google.api.gax.tracing.ApiTracer;
2221
import com.google.api.gax.tracing.BaseApiTracer;
22+
import java.time.Duration;
2323
import javax.annotation.Nullable;
24-
import org.threeten.bp.Duration;
2524

2625
/**
2726
* A Bigtable specific {@link ApiTracer} that includes additional contexts. This class is a base
@@ -32,10 +31,6 @@ public class BigtableTracer extends BaseApiTracer {
3231

3332
private volatile int attempt = 0;
3433

35-
@InternalApi("for internal use only")
36-
public static final ApiCallContext.Key<Duration> OPERATION_TIMEOUT_KEY =
37-
ApiCallContext.Key.create("OPERATION_TIMEOUT");
38-
3934
@Override
4035
public void attemptStarted(int attemptNumber) {
4136
this.attempt = attemptNumber;
@@ -108,10 +103,11 @@ public void grpcMessageSent() {
108103
}
109104

110105
/**
111-
* Record the operation timeout from user settings for calculating remaining deadline. This will
112-
* be called in BuiltinMetricsTracer.
106+
* Record the operation timeout from user settings for calculating remaining deadline. Currently,
107+
* it's called in BuiltinMetricsTracer on attempt start from {@link BigtableTracerUnaryCallable}
108+
* and {@link BigtableTracerStreamingCallable}.
113109
*/
114-
public void setOperationTimeout(Duration operationTimeout) {
110+
public void setTotalTimeoutDuration(Duration totalTimeoutDuration) {
115111
// noop
116112
}
117113
}

‎google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/metrics/BigtableTracerStreamingCallable.java

+4-8
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
package com.google.cloud.bigtable.data.v2.stub.metrics;
1717

1818
import com.google.api.core.InternalApi;
19-
import com.google.api.gax.grpc.GrpcCallContext;
2019
import com.google.api.gax.grpc.GrpcResponseMetadata;
2120
import com.google.api.gax.rpc.ApiCallContext;
2221
import com.google.api.gax.rpc.ResponseObserver;
@@ -27,7 +26,6 @@
2726
import com.google.common.base.Stopwatch;
2827
import java.util.concurrent.TimeUnit;
2928
import javax.annotation.Nonnull;
30-
import org.threeten.bp.Duration;
3129

3230
/**
3331
* This callable will
@@ -61,13 +59,11 @@ public void call(
6159
final GrpcResponseMetadata responseMetadata = new GrpcResponseMetadata();
6260
// tracer should always be an instance of bigtable tracer
6361
if (context.getTracer() instanceof BigtableTracer) {
62+
BigtableTracer tracer = (BigtableTracer) context.getTracer();
6463
BigtableTracerResponseObserver<ResponseT> innerObserver =
65-
new BigtableTracerResponseObserver<>(
66-
responseObserver, (BigtableTracer) context.getTracer(), responseMetadata);
67-
GrpcCallContext callContext = (GrpcCallContext) context;
68-
Duration deadline = callContext.getOption(BigtableTracer.OPERATION_TIMEOUT_KEY);
69-
if (deadline != null) {
70-
((BigtableTracer) context.getTracer()).setOperationTimeout(deadline);
64+
new BigtableTracerResponseObserver<>(responseObserver, tracer, responseMetadata);
65+
if (context.getRetrySettings() != null) {
66+
tracer.setTotalTimeoutDuration(context.getRetrySettings().getTotalTimeoutDuration());
7167
}
7268
innerCallable.call(
7369
request,

‎google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/metrics/BigtableTracerUnaryCallable.java

+3-6
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,12 @@
1919
import com.google.api.core.ApiFutureCallback;
2020
import com.google.api.core.ApiFutures;
2121
import com.google.api.core.InternalApi;
22-
import com.google.api.gax.grpc.GrpcCallContext;
2322
import com.google.api.gax.grpc.GrpcResponseMetadata;
2423
import com.google.api.gax.rpc.ApiCallContext;
2524
import com.google.api.gax.rpc.UnaryCallable;
2625
import com.google.common.base.Preconditions;
2726
import com.google.common.util.concurrent.MoreExecutors;
2827
import javax.annotation.Nonnull;
29-
import org.threeten.bp.Duration;
3028

3129
/**
3230
* This callable will:
@@ -56,14 +54,13 @@ public BigtableTracerUnaryCallable(@Nonnull UnaryCallable<RequestT, ResponseT> i
5654
public ApiFuture<ResponseT> futureCall(RequestT request, ApiCallContext context) {
5755
// tracer should always be an instance of BigtableTracer
5856
if (context.getTracer() instanceof BigtableTracer) {
57+
BigtableTracer tracer = (BigtableTracer) context.getTracer();
5958
final GrpcResponseMetadata responseMetadata = new GrpcResponseMetadata();
6059
BigtableTracerUnaryCallback<ResponseT> callback =
6160
new BigtableTracerUnaryCallback<ResponseT>(
6261
(BigtableTracer) context.getTracer(), responseMetadata);
63-
GrpcCallContext callContext = (GrpcCallContext) context;
64-
Duration deadline = callContext.getOption(BigtableTracer.OPERATION_TIMEOUT_KEY);
65-
if (deadline != null) {
66-
((BigtableTracer) context.getTracer()).setOperationTimeout(deadline);
62+
if (context.getRetrySettings() != null) {
63+
tracer.setTotalTimeoutDuration(context.getRetrySettings().getTotalTimeoutDuration());
6764
}
6865
ApiFuture<ResponseT> future =
6966
innerCallable.futureCall(

‎google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/metrics/BuiltinMetricsTracer.java

+19-22
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import com.google.cloud.bigtable.Version;
3030
import com.google.common.base.Stopwatch;
3131
import com.google.common.math.IntMath;
32+
import io.grpc.Deadline;
3233
import io.opentelemetry.api.common.Attributes;
3334
import io.opentelemetry.api.metrics.DoubleHistogram;
3435
import io.opentelemetry.api.metrics.LongCounter;
@@ -37,7 +38,6 @@
3738
import java.util.concurrent.atomic.AtomicBoolean;
3839
import java.util.concurrent.atomic.AtomicInteger;
3940
import java.util.concurrent.atomic.AtomicLong;
40-
import java.util.logging.Level;
4141
import java.util.logging.Logger;
4242
import javax.annotation.Nullable;
4343
import org.threeten.bp.Duration;
@@ -90,8 +90,8 @@ class BuiltinMetricsTracer extends BigtableTracer {
9090
private Long serverLatencies = null;
9191
private final AtomicLong grpcMessageSentDelay = new AtomicLong(0);
9292

93-
private Duration operationTimeout = Duration.ofMillis(0);
94-
private long remainingOperationTimeout = 0;
93+
private Deadline operationDeadline = null;
94+
private volatile long remainingDeadlineAtAttemptStart = 0;
9595

9696
// OpenCensus (and server) histogram buckets use [start, end), however OpenTelemetry uses (start,
9797
// end]. To work around this, we measure all the latencies in nanoseconds and convert them
@@ -175,6 +175,9 @@ public void attemptStarted(Object request, int attemptNumber) {
175175
this.attempt = attemptNumber;
176176
attemptCount++;
177177
attemptTimer = Stopwatch.createStarted();
178+
if (operationDeadline != null) {
179+
remainingDeadlineAtAttemptStart = operationDeadline.timeRemaining(TimeUnit.MILLISECONDS);
180+
}
178181
if (request != null) {
179182
this.tableId = Util.extractTableId(request);
180183
}
@@ -185,11 +188,6 @@ public void attemptStarted(Object request, int attemptNumber) {
185188
}
186189
}
187190
}
188-
// OperationTimeout is only set after the first attempt.
189-
if (attemptCount > 1) {
190-
remainingOperationTimeout =
191-
operationTimeout.toMillis() - operationTimer.elapsed(TimeUnit.MILLISECONDS);
192-
}
193191
}
194192

195193
@Override
@@ -301,12 +299,16 @@ public void grpcMessageSent() {
301299
grpcMessageSentDelay.set(attemptTimer.elapsed(TimeUnit.NANOSECONDS));
302300
}
303301

304-
/*
305-
This is called by BigtableTracerCallables that sets operation timeout from user settings.
306-
*/
307302
@Override
308-
public void setOperationTimeout(Duration operationTimeout) {
309-
this.operationTimeout = operationTimeout;
303+
public void setTotalTimeoutDuration(java.time.Duration totalTimeoutDuration) {
304+
// This method is called by BigtableTracerStreamingCallable and
305+
// BigtableTracerUnaryCallable which is called per attempt. We only set
306+
// the operationDeadline on the first attempt and when totalTimeout is set.
307+
if (operationDeadline == null && !totalTimeoutDuration.isZero()) {
308+
this.operationDeadline =
309+
Deadline.after(totalTimeoutDuration.toMillis(), TimeUnit.MILLISECONDS);
310+
this.remainingDeadlineAtAttemptStart = totalTimeoutDuration.toMillis();
311+
}
310312
}
311313

312314
@Override
@@ -403,15 +405,10 @@ private void recordAttemptCompletion(@Nullable Throwable status) {
403405
attemptLatenciesHistogram.record(
404406
convertToMs(attemptTimer.elapsed(TimeUnit.NANOSECONDS)), attributes);
405407

406-
if (attemptCount <= 1) {
407-
remainingDeadlineHistogram.record(operationTimeout.toMillis(), attributes);
408-
} else if (remainingOperationTimeout >= 0) {
409-
remainingDeadlineHistogram.record(remainingOperationTimeout, attributes);
410-
} else if (operationTimeout.toMillis() != 0) {
411-
// If the operationTimeout is set but remaining deadline is < 0, log a warning. This should
412-
// never happen.
413-
logger.log(
414-
Level.WARNING, "The remaining deadline was less than 0: " + remainingOperationTimeout);
408+
// When operationDeadline is set, it's possible that the deadline is passed by the time we send
409+
// a new attempt. In this case we'll record 0.
410+
if (operationDeadline != null) {
411+
remainingDeadlineHistogram.record(Math.max(0, remainingDeadlineAtAttemptStart), attributes);
415412
}
416413

417414
if (serverLatencies != null) {

‎google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/metrics/CompositeTracer.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -241,9 +241,9 @@ public void grpcMessageSent() {
241241
}
242242

243243
@Override
244-
public void setOperationTimeout(Duration operationTimeout) {
244+
public void setTotalTimeoutDuration(java.time.Duration totalTimeoutDuration) {
245245
for (BigtableTracer tracer : bigtableTracers) {
246-
tracer.setOperationTimeout(operationTimeout);
246+
tracer.setTotalTimeoutDuration(totalTimeoutDuration);
247247
}
248248
}
249249
}

‎google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/metrics/BuiltinMetricsTracerTest.java

+4-3
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,6 @@ public class BuiltinMetricsTracerTest {
134134
private static final long APPLICATION_LATENCY = 200;
135135
private static final long SLEEP_VARIABILITY = 15;
136136
private static final String CLIENT_NAME = "java-bigtable/" + Version.VERSION;
137-
138137
private static final long CHANNEL_BLOCKING_LATENCY = 200;
139138

140139
@Rule public final MockitoRule mockitoRule = MockitoJUnit.rule();
@@ -221,7 +220,7 @@ public void sendHeaders(Metadata headers) {
221220
.readRowsSettings()
222221
.retrySettings()
223222
.setTotalTimeoutDuration(Duration.ofMillis(9000))
224-
.setMaxRpcTimeoutDuration(Duration.ofMillis(6000))
223+
.setMaxRpcTimeoutDuration(Duration.ofMillis(9000))
225224
.setRpcTimeoutMultiplier(1)
226225
.setInitialRpcTimeoutDuration(Duration.ofMillis(6000))
227226
.setInitialRetryDelayDuration(Duration.ofMillis(10))
@@ -809,7 +808,9 @@ public void testRemainingDeadline() {
809808
.get(0);
810809

811810
double okRemainingDeadline = okHistogramPointData.getSum();
812-
assertThat(okRemainingDeadline).isWithin(200).of(8500);
811+
// first attempt latency + retry delay
812+
double expected = 9000 - SERVER_LATENCY - CHANNEL_BLOCKING_LATENCY - 10;
813+
assertThat(okRemainingDeadline).isIn(Range.closed(expected - 500, expected + 10));
813814
}
814815

815816
private static class FakeService extends BigtableGrpc.BigtableImplBase {

0 commit comments

Comments
 (0)
Please sign in to comment.