Skip to content

Commit

Permalink
Add jitter to spread out requests to get poet proof and submit challe…
Browse files Browse the repository at this point in the history
…nge (#4871)

## Motivation
Closes #4860 

## Changes
Add small positive jitter when:
- waiting for a poet round to end (to get proof),
- waiting to build a nipost challenge (to spread out challenge registrations).
Jitter is only added if the round end is in the future to avoid unnecessary wait.

The range of jitter duration is calculated as a percentage of a cycle gap duration so it also works in unit tests and system tests. 

### Jitter before getting the proof
The min and max % (0.02% and 0.04%) were chosen so that on mainnet:
- minimum jitter is roughly equivalent to the time it takes to generate a proof on bare metal poets (it happens after the round ends): 8.64s. This helps avoid situations when many nodes got a 404 response.
- maximum jitter is 2xmin - roughly 17s

### Jitter before submitting challenges
on mainnet:
- min = 0s
- max = 36s (1% of grace period)

## Test Plan
Added unit tests checking calculating wait and jitter times.
  • Loading branch information
poszu committed Aug 21, 2023
1 parent b03aa76 commit b86540b
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 7 deletions.
19 changes: 16 additions & 3 deletions activation/activation.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,15 @@ func DefaultPoetConfig() PoetConfig {
}
}

const defaultPoetRetryInterval = 5 * time.Second
const (
defaultPoetRetryInterval = 5 * time.Second

// Jitter added to the wait time before building a nipost challenge.
// It's expressed as % of poet grace period which translates to:
// mainnet (grace period 1h) -> 36s
// systest (grace period 10s) -> 0.1s
maxNipostChallengeBuildJitter = 1.0
)

// Config defines configuration for Builder.
type Config struct {
Expand Down Expand Up @@ -432,8 +440,8 @@ func (b *Builder) buildNIPostChallenge(ctx context.Context) (*types.NIPostChalle
ErrATXChallengeExpired, current, -until)
}
metrics.PublishOntimeWindowLatency.Observe(until.Seconds())
if until > b.poetCfg.GracePeriod {
wait := until - b.poetCfg.GracePeriod
wait := timeToWaitToBuildNipostChallenge(until, b.poetCfg.GracePeriod)
if wait >= 0 {
b.log.WithContext(ctx).With().Debug("waiting for fresh atxs",
log.Duration("till poet round", until),
log.Uint32("current epoch", current.Uint32()),
Expand Down Expand Up @@ -721,3 +729,8 @@ func SignAndFinalizeAtx(signer *signing.EdSigner, atx *types.ActivationTx) error
atx.SmesherID = signer.NodeID()
return atx.Initialize()
}

func timeToWaitToBuildNipostChallenge(untilRoundStart, gracePeriod time.Duration) time.Duration {
jitter := randomDurationInRange(time.Duration(0), gracePeriod*maxNipostChallengeBuildJitter/100.0)
return untilRoundStart + jitter - gracePeriod
}
33 changes: 33 additions & 0 deletions activation/activation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1265,3 +1265,36 @@ func TestWaitPositioningAtx(t *testing.T) {
})
}
}

func TestWaitingToBuildNipostChallengeWithJitter(t *testing.T) {
t.Run("before grace period", func(t *testing.T) {
// ┌──grace period──┐
// │ │
// ───▲─────|──────|─────────|----> time
// │ └jitter| └round start
// now
wait := timeToWaitToBuildNipostChallenge(2*time.Hour, time.Hour)
require.Greater(t, wait, time.Hour)
require.LessOrEqual(t, wait, time.Hour+time.Second*36)
})
t.Run("after grace period, within max jitter value", func(t *testing.T) {
// ┌──grace period──┐
// │ │
// ─────────|──▲────|────────|----> time
// └ji│tter| └round start
// now
wait := timeToWaitToBuildNipostChallenge(time.Hour-time.Second*10, time.Hour)
require.GreaterOrEqual(t, wait, -time.Second*10)
// jitter is 1% = 36s for 1h grace period
require.LessOrEqual(t, wait, time.Second*(36-10))
})
t.Run("after jitter max value", func(t *testing.T) {
// ┌──grace period──┐
// │ │
// ─────────|──────|──▲──────|----> time
// └jitter| │ └round start
// now
wait := timeToWaitToBuildNipostChallenge(time.Hour-time.Second*37, time.Hour)
require.Less(t, wait, time.Duration(0))
})
}
37 changes: 33 additions & 4 deletions activation/nipost.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"encoding/hex"
"errors"
"fmt"
"math/rand"
"time"

"github.com/spacemeshos/merkle-tree"
Expand All @@ -22,6 +23,24 @@ import (
"github.com/spacemeshos/go-spacemesh/signing"
)

const (
// Jitter values to avoid all nodes querying the poet at the same time.
// Note: the jitter values are represented as a percentage of cycle gap.
// mainnet cycle-gap: 12h
// systest cycle-gap: 30s

// Minimum jitter value before querying for the proof.
// Gives the poet service time to generate proof after a round ends (~8s on mainnet).
// mainnet -> 8.64s
// systest -> 0.36s
minPoetGetProofJitter = 0.02

// The maximum jitter value before querying for the proof.
// mainnet -> 17.28s
// systest -> 0.72s
maxPoetGetProofJitter = 0.04
)

//go:generate mockgen -package=activation -destination=./nipost_mocks.go -source=./nipost.go PoetProvingServiceClient

// PoetProvingServiceClient provides a gateway to a trust-less public proving service, which may serve many PoET
Expand Down Expand Up @@ -385,10 +404,7 @@ func (nb *NIPostBuilder) getBestProof(ctx context.Context, challenge types.Hash3
continue
}
round := r.PoetRound.ID
// Time to wait before querying for the proof
// The additional second is an optimization to be nicer to poet
// and don't accidentally ask it to soon and have to retry.
waitTime := time.Until(r.PoetRound.End.IntoTime()) + time.Second
waitTime := calcGetProofWaitTime(time.Until(r.PoetRound.End.IntoTime()), nb.poetCfg.CycleGap)
eg.Go(func() error {
logger.With().Info("waiting till poet round end", log.Duration("wait time", waitTime))
select {
Expand Down Expand Up @@ -479,3 +495,16 @@ func constructMerkleProof(challenge types.Hash32, members []types.Member) (*type
Nodes: nodesH32,
}, nil
}

func randomDurationInRange(min, max time.Duration) time.Duration {
return min + time.Duration(rand.Int63n(int64(max-min+1)))
}

// Calculate the time to wait before querying for the proof
// We add a jitter to avoid all nodes querying for the proof at the same time.
func calcGetProofWaitTime(tillRoundEnd, cycleGap time.Duration) (waitTime time.Duration) {
minJitter := time.Duration(float64(cycleGap) * minPoetGetProofJitter / 100.0)
maxJitter := time.Duration(float64(cycleGap) * maxPoetGetProofJitter / 100.0)
jitter := randomDurationInRange(minJitter, maxJitter)
return tillRoundEnd + jitter
}
36 changes: 36 additions & 0 deletions activation/nipost_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1054,3 +1054,39 @@ func FuzzBuilderStateConsistency(f *testing.F) {
func FuzzBuilderStateSafety(f *testing.F) {
tester.FuzzSafety[types.NIPostBuilderState](f)
}

func TestRandomDurationInRange(t *testing.T) {
t.Parallel()
test := func(min, max time.Duration) {
for i := 0; i < 100; i++ {
waittime := randomDurationInRange(min, max)
require.LessOrEqual(t, waittime, max)
require.GreaterOrEqual(t, waittime, min)
}
}
t.Run("min = 0", func(t *testing.T) {
t.Parallel()
test(0, 7*time.Second)
})
t.Run("min != 0", func(t *testing.T) {
t.Parallel()
test(5*time.Second, 7*time.Second)
})
}

func TestCalculatingGetProofWaitTime(t *testing.T) {
t.Parallel()
t.Run("past round end", func(t *testing.T) {
t.Parallel()
waitTime := calcGetProofWaitTime(-time.Hour, time.Hour*12)
require.Less(t, waitTime, time.Duration(0))
})
t.Run("before round end", func(t *testing.T) {
t.Parallel()
cycleGap := 12 * time.Hour
waitTime := calcGetProofWaitTime(time.Hour, cycleGap)

require.Greater(t, waitTime, time.Hour+time.Duration(float64(cycleGap)*minPoetGetProofJitter/100))
require.LessOrEqual(t, waitTime, time.Hour+time.Duration(float64(cycleGap)*maxPoetGetProofJitter/100))
})
}

0 comments on commit b86540b

Please sign in to comment.