Skip to content

Commit

Permalink
stats/opencensus: Fix flaky test span (#6296)
Browse files Browse the repository at this point in the history
  • Loading branch information
zasweq committed May 25, 2023
1 parent f19266c commit 157db19
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 55 deletions.
137 changes: 84 additions & 53 deletions stats/opencensus/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1338,6 +1338,25 @@ func (fe *fakeExporter) ExportSpan(sd *trace.SpanData) {
fe.seenSpans = append(fe.seenSpans, gotSI)
}

// waitForServerSpan waits until a server span appears somewhere in the span
// list in an exporter. Returns an error if no server span found within the
// passed context's timeout.
func waitForServerSpan(ctx context.Context, fe *fakeExporter) error {
ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
defer cancel()
for ; ctx.Err() == nil; <-time.After(time.Millisecond) {
fe.mu.Lock()
for _, seenSpan := range fe.seenSpans {
if seenSpan.spanKind == trace.SpanKindServer {
fe.mu.Unlock()
return nil
}
}
fe.mu.Unlock()
}
return fmt.Errorf("timeout when waiting for server span to be present in exporter")
}

// TestSpan tests emitted spans from gRPC. It configures a system with a gRPC
// Client and gRPC server with the OpenCensus Dial and Server Option configured,
// and makes a Unary RPC and a Streaming RPC. This should cause spans with
Expand Down Expand Up @@ -1375,18 +1394,30 @@ func (s) TestSpan(t *testing.T) {

// Make a Unary RPC. This should cause a span with message events
// corresponding to the request message and response message to be emitted
// both from the client and the server. Note that RPCs trigger exports of
// corresponding span data synchronously, thus the Span Data is guaranteed
// to have been read by exporter and is ready to make assertions on.
// both from the client and the server.
if _, err := ss.Client.UnaryCall(ctx, &testpb.SimpleRequest{Payload: &testpb.Payload{}}); err != nil {
t.Fatalf("Unexpected error from UnaryCall: %v", err)
}

// The spans received are server first, then client. This is due to the RPC
// finishing on the server first. The ordering of message events for a Unary
// Call is as follows: (client send, server recv), (server send (server span
// end), client recv (client span end)).
wantSI := []spanInformation{
{
sc: trace.SpanContext{
TraceOptions: 1,
},
name: "Attempt.grpc.testing.TestService.UnaryCall",
messageEvents: []trace.MessageEvent{
{
EventType: trace.MessageEventTypeSent,
MessageID: 1, // First msg send so 1 (see comment above)
UncompressedByteSize: 2,
CompressedByteSize: 2,
},
{
EventType: trace.MessageEventTypeRecv,
MessageID: 1, // First msg recv so 1 (see comment above)
},
},
hasRemoteParent: false,
},
{
// Sampling rate of 100 percent, so this should populate every span
// with the information that this span is being sampled. Here and
Expand Down Expand Up @@ -1424,25 +1455,6 @@ func (s) TestSpan(t *testing.T) {
// instrumentation code, so I'm iffy on it but fine.
hasRemoteParent: true,
},
{
sc: trace.SpanContext{
TraceOptions: 1,
},
name: "Attempt.grpc.testing.TestService.UnaryCall",
messageEvents: []trace.MessageEvent{
{
EventType: trace.MessageEventTypeSent,
MessageID: 1, // First msg send so 1 (see comment above)
UncompressedByteSize: 2,
CompressedByteSize: 2,
},
{
EventType: trace.MessageEventTypeRecv,
MessageID: 1, // First msg recv so 1 (see comment above)
},
},
hasRemoteParent: false,
},
{
sc: trace.SpanContext{
TraceOptions: 1,
Expand All @@ -1453,19 +1465,32 @@ func (s) TestSpan(t *testing.T) {
childSpanCount: 1,
},
}
if err := waitForServerSpan(ctx, fe); err != nil {
t.Fatal(err)
}
var spanInfoSort = func(i, j int) bool {
// This will order into attempt span (which has an unset span kind to
// not prepend Sent. to span names in backends), then call span, then
// server span.
return fe.seenSpans[i].spanKind < fe.seenSpans[j].spanKind
}
fe.mu.Lock()
// Sort the underlying seen Spans for cmp.Diff assertions and ID
// relationship assertions.
sort.Slice(fe.seenSpans, spanInfoSort)
if diff := cmp.Diff(fe.seenSpans, wantSI); diff != "" {
fe.mu.Unlock()
t.Fatalf("got unexpected spans, diff (-got, +want): %v", diff)
}
fe.mu.Lock()
if err := validateTraceAndSpanIDs(fe.seenSpans); err != nil {
fe.mu.Unlock()
t.Fatalf("Error in runtime data assertions: %v", err)
}
if !cmp.Equal(fe.seenSpans[0].parentSpanID, fe.seenSpans[1].sc.SpanID) {
t.Fatalf("server span should point to the client attempt span as its parent. parentSpanID: %v, clientAttemptSpanID: %v", fe.seenSpans[0].parentSpanID, fe.seenSpans[1].sc.SpanID)
if !cmp.Equal(fe.seenSpans[1].parentSpanID, fe.seenSpans[0].sc.SpanID) {
t.Fatalf("server span should point to the client attempt span as its parent. parentSpanID: %v, clientAttemptSpanID: %v", fe.seenSpans[1].parentSpanID, fe.seenSpans[0].sc.SpanID)
}
if !cmp.Equal(fe.seenSpans[1].parentSpanID, fe.seenSpans[2].sc.SpanID) {
t.Fatalf("client attempt span should point to the client call span as its parent. parentSpanID: %v, clientCallSpanID: %v", fe.seenSpans[1].parentSpanID, fe.seenSpans[2].sc.SpanID)
if !cmp.Equal(fe.seenSpans[0].parentSpanID, fe.seenSpans[2].sc.SpanID) {
t.Fatalf("client attempt span should point to the client call span as its parent. parentSpanID: %v, clientCallSpanID: %v", fe.seenSpans[0].parentSpanID, fe.seenSpans[2].sc.SpanID)
}

fe.seenSpans = nil
Expand All @@ -1490,6 +1515,23 @@ func (s) TestSpan(t *testing.T) {
}

wantSI = []spanInformation{
{
sc: trace.SpanContext{
TraceOptions: 1,
},
name: "Attempt.grpc.testing.TestService.FullDuplexCall",
messageEvents: []trace.MessageEvent{
{
EventType: trace.MessageEventTypeSent,
MessageID: 1, // First msg send so 1
},
{
EventType: trace.MessageEventTypeSent,
MessageID: 2, // Second msg send so 2
},
},
hasRemoteParent: false,
},
{
sc: trace.SpanContext{
TraceOptions: 1,
Expand Down Expand Up @@ -1522,36 +1564,25 @@ func (s) TestSpan(t *testing.T) {
hasRemoteParent: false,
childSpanCount: 1,
},
{
sc: trace.SpanContext{
TraceOptions: 1,
},
name: "Attempt.grpc.testing.TestService.FullDuplexCall",
messageEvents: []trace.MessageEvent{
{
EventType: trace.MessageEventTypeSent,
MessageID: 1, // First msg send so 1
},
{
EventType: trace.MessageEventTypeSent,
MessageID: 2, // Second msg send so 2
},
},
hasRemoteParent: false,
},
}
if err := waitForServerSpan(ctx, fe); err != nil {
t.Fatal(err)
}
fe.mu.Lock()
defer fe.mu.Unlock()
// Sort the underlying seen Spans for cmp.Diff assertions and ID
// relationship assertions.
sort.Slice(fe.seenSpans, spanInfoSort)
if diff := cmp.Diff(fe.seenSpans, wantSI); diff != "" {
t.Fatalf("got unexpected spans, diff (-got, +want): %v", diff)
}
if err := validateTraceAndSpanIDs(fe.seenSpans); err != nil {
t.Fatalf("Error in runtime data assertions: %v", err)
}
if !cmp.Equal(fe.seenSpans[0].parentSpanID, fe.seenSpans[2].sc.SpanID) {
t.Fatalf("server span should point to the client attempt span as its parent. parentSpanID: %v, clientAttemptSpanID: %v", fe.seenSpans[0].parentSpanID, fe.seenSpans[2].sc.SpanID)
if !cmp.Equal(fe.seenSpans[1].parentSpanID, fe.seenSpans[0].sc.SpanID) {
t.Fatalf("server span should point to the client attempt span as its parent. parentSpanID: %v, clientAttemptSpanID: %v", fe.seenSpans[1].parentSpanID, fe.seenSpans[0].sc.SpanID)
}
if !cmp.Equal(fe.seenSpans[2].parentSpanID, fe.seenSpans[1].sc.SpanID) {
t.Fatalf("client attempt span should point to the client call span as its parent. parentSpanID: %v, clientCallSpanID: %v", fe.seenSpans[2].parentSpanID, fe.seenSpans[1].sc.SpanID)
if !cmp.Equal(fe.seenSpans[0].parentSpanID, fe.seenSpans[2].sc.SpanID) {
t.Fatalf("client attempt span should point to the client call span as its parent. parentSpanID: %v, clientCallSpanID: %v", fe.seenSpans[0].parentSpanID, fe.seenSpans[2].sc.SpanID)
}
}
5 changes: 3 additions & 2 deletions stats/opencensus/trace.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ type traceInfo struct {
func (csh *clientStatsHandler) traceTagRPC(ctx context.Context, rti *stats.RPCTagInfo) (context.Context, *traceInfo) {
// TODO: get consensus on whether this method name of "s.m" is correct.
mn := "Attempt." + strings.Replace(removeLeadingSlash(rti.FullMethodName), "/", ".", -1)
// Returned context is ignored because will populate context with data
// that wraps the span instead.
// Returned context is ignored because will populate context with data that
// wraps the span instead. Don't set span kind client on this attempt span
// to prevent backend from prepending span name with "Sent.".
_, span := trace.StartSpan(ctx, mn, trace.WithSampler(csh.to.TS))

tcBin := propagation.Binary(span.SpanContext())
Expand Down

0 comments on commit 157db19

Please sign in to comment.