Skip to content

Commit 5505d04

Browse files
authoredOct 24, 2024··
Fix failure_reason label for nexus_task_execution_failed metric on task timeout (#1684)
1 parent 56b601d commit 5505d04

File tree

3 files changed

+34
-3
lines changed

3 files changed

+34
-3
lines changed
 

‎internal/internal_nexus_task_handler.go

+7-2
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,11 @@ import (
4545
"go.temporal.io/sdk/log"
4646
)
4747

48+
// errNexusTaskTimeout is returned when the Nexus task handler times out.
49+
// It is used instead of context.DeadlineExceeded to allow the poller to differentiate between Nexus task handler
50+
// timeout and other errors.
51+
var errNexusTaskTimeout = errors.New("nexus task timeout")
52+
4853
func nexusHandlerError(t nexus.HandlerErrorType, message string) *nexuspb.HandlerError {
4954
return &nexuspb.HandlerError{
5055
ErrorType: string(t),
@@ -211,7 +216,7 @@ func (h *nexusTaskHandler) handleStartOperation(
211216
opres, err = h.nexusHandler.StartOperation(ctx, req.GetService(), req.GetOperation(), input, startOptions)
212217
}()
213218
if ctx.Err() != nil {
214-
return nil, nil, ctx.Err()
219+
return nil, nil, errNexusTaskTimeout
215220
}
216221
if err != nil {
217222
var unsuccessfulOperationErr *nexus.UnsuccessfulOperationError
@@ -302,7 +307,7 @@ func (h *nexusTaskHandler) handleCancelOperation(ctx context.Context, nctx *Nexu
302307
err = h.nexusHandler.CancelOperation(ctx, req.GetService(), req.GetOperation(), req.GetOperationId(), cancelOptions)
303308
}()
304309
if ctx.Err() != nil {
305-
return nil, nil, ctx.Err()
310+
return nil, nil, errNexusTaskTimeout
306311
}
307312
if err != nil {
308313
err = convertKnownErrors(err)

‎internal/internal_nexus_task_poller.go

+7-1
Original file line numberDiff line numberDiff line change
@@ -158,8 +158,14 @@ func (ntp *nexusTaskPoller) ProcessTask(task interface{}) error {
158158
// Failure from user handler.
159159
// Special case for the start response with operation error.
160160
if err != nil {
161+
var failureTag string
162+
if err == errNexusTaskTimeout {
163+
failureTag = "timeout"
164+
} else {
165+
failureTag = "internal_sdk_error"
166+
}
161167
metricsHandler.
162-
WithTags(metrics.NexusTaskFailureTags("internal_sdk_error")).
168+
WithTags(metrics.NexusTaskFailureTags(failureTag)).
163169
Counter(metrics.NexusTaskExecutionFailedCounter).
164170
Inc(1)
165171
} else if failure != nil {

‎test/nexus_test.go

+20
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,9 @@ var syncOp = temporalnexus.NewSyncOperation("sync-op", func(ctx context.Context,
194194
return "", temporal.NewApplicationErrorWithOptions("fake app error for test", "FakeTestError", temporal.ApplicationErrorOptions{
195195
NonRetryable: true,
196196
})
197+
case "timeout":
198+
<-ctx.Done()
199+
return "", ctx.Err()
197200
case "panic":
198201
panic("panic requested")
199202
}
@@ -351,6 +354,23 @@ func TestNexusSyncOperation(t *testing.T) {
351354
tc.requireFailureCounter(t, service.Name, syncOp.Name(), "handler_error_INTERNAL")
352355
}, time.Second*3, time.Millisecond*100)
353356
})
357+
358+
t.Run("timeout", func(t *testing.T) {
359+
_, err := nexus.ExecuteOperation(ctx, nc, syncOp, "timeout", nexus.ExecuteOperationOptions{
360+
// Force shorter timeout to speed up the test and get a response back.
361+
Header: nexus.Header{nexus.HeaderRequestTimeout: "300ms"},
362+
})
363+
var handlerErr *nexus.HandlerError
364+
require.ErrorAs(t, err, &handlerErr)
365+
require.Equal(t, nexus.HandlerErrorTypeUpstreamTimeout, handlerErr.Type)
366+
367+
require.EventuallyWithT(t, func(t *assert.CollectT) {
368+
// NOTE metrics.NexusTaskEndToEndLatency isn't recorded on timeouts.
369+
tc.requireTimer(t, metrics.NexusTaskScheduleToStartLatency, service.Name, syncOp.Name())
370+
tc.requireTimer(t, metrics.NexusTaskExecutionLatency, service.Name, syncOp.Name())
371+
tc.requireFailureCounter(t, service.Name, syncOp.Name(), "timeout")
372+
}, time.Second*3, time.Millisecond*100)
373+
})
354374
}
355375

356376
func TestNexusWorkflowRunOperation(t *testing.T) {

0 commit comments

Comments
 (0)
Please sign in to comment.