Skip to content

Commit

Permalink
Give NiPoSTBuilder more time to create the PoST (#4893)
Browse files Browse the repository at this point in the history
## Motivation
Closes #4797

## Changes
Phase 2 of the BuildNiPoST step in publishing an ATX is restricted with a deadline of `nextPoetRoundStart`. This deadline doesn't need to be this strict. This PR adapts deadlines to the following:

- Phase 0 (submitting challenge to PoET): timeout `poetRoundStart` - unchanged
- Phase 1 (fetching proof from PoET): before the deadline was the end of the round round it submitted to + a grace period of 1 hour; this effectively means there is only a 1 hour window to fetch the proof from PoET before the node gives up. Now instead it will fetch with a deadline until the end of the next PoET round. This means that if the node doesn't manage to publish its ATX before next PoET round starts, it won't be able to publish one next epoch (unchanged) but will still try to publish one for the current epoch, even if its late. It will still miss at most 1 epoch (when its late).
- Phase 2 (publishing the ATX): before timeout was `nextPoetRoundStart`, with the intention to publish the ATX and have enough time left to register for the next PoET round. This is a very short window (just the time between the end of one PoET round and the start of the next one). If the node doesn't manage to generate a PoST proof within that time it gives up on publishing an ATX and misses out on rewards it could get by instead giving up on registering on the next PoET round.

This gives the last phase ~ 2 weeks of time instead of just a few hours, but if the node takes longer than `nextPoetRoundStart` it will not be able to submit a challenge for the next PoET round in time. This is because the new challenge contains the ID of the ATX that is being built right now. In that case the node will skip publishing an ATX for one epoch.

## Test Plan
existing tests pass or have been adapted

## TODO
<!-- This section should be removed when all items are complete -->
- [x] Explain motivation or link existing issue(s)
- [x] Test changes and document test plan
- [x] Update documentation as needed

## DevOps Notes
<!-- Please uncheck these items as applicable to make DevOps aware of changes that may affect releases -->
- [x] This PR does not require configuration changes (e.g., environment variables, GitHub secrets, VM resources)
- [x] This PR does not affect public APIs
- [x] This PR does not rely on a new version of external services (PoET, elasticsearch, etc.)
- [x] This PR does not make changes to log messages (which monitoring infrastructure may rely on)
  • Loading branch information
fasmat committed Aug 23, 2023
1 parent 4bd907c commit 7ee7126
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 99 deletions.
8 changes: 3 additions & 5 deletions activation/activation.go
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,8 @@ func (b *Builder) PublishActivationTx(ctx context.Context) error {

if b.pendingATX == nil {
var err error
ctx, cancel := context.WithDeadline(ctx, b.layerClock.LayerToTime((challenge.TargetEpoch() + 1).FirstLayer()))
defer cancel()
b.pendingATX, err = b.createAtx(ctx, challenge)
if err != nil {
return fmt.Errorf("create ATX: %w", err)
Expand Down Expand Up @@ -622,12 +624,8 @@ func (b *Builder) poetRoundStart(epoch types.EpochID) time.Time {

func (b *Builder) createAtx(ctx context.Context, challenge *types.NIPostChallenge) (*types.ActivationTx, error) {
pubEpoch := challenge.PublishEpoch
nextPoetRoundStart := b.poetRoundStart(pubEpoch)

// NiPoST must be ready before start of the next poet round.
buildingNipostCtx, cancel := context.WithDeadline(ctx, nextPoetRoundStart)
defer cancel()
nipost, postDuration, err := b.nipostBuilder.BuildNIPost(buildingNipostCtx, challenge)
nipost, postDuration, err := b.nipostBuilder.BuildNIPost(ctx, challenge)
if err != nil {
return nil, fmt.Errorf("build NIPost: %w", err)
}
Expand Down
44 changes: 27 additions & 17 deletions activation/nipost.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,18 +194,18 @@ func (nb *NIPostBuilder) BuildNIPost(ctx context.Context, challenge *types.NIPos
// WAITING FOR POET DEADLINE
// PROOFS

pubEpoch := challenge.PublishEpoch
poetRoundStart := nb.layerClock.LayerToTime((pubEpoch - 1).FirstLayer()).Add(nb.poetCfg.PhaseShift)
nextPoetRoundStart := nb.layerClock.LayerToTime(pubEpoch.FirstLayer()).Add(nb.poetCfg.PhaseShift)
poetRoundEnd := nextPoetRoundStart.Add(-nb.poetCfg.CycleGap)
poetProofDeadline := poetRoundEnd.Add(nb.poetCfg.GracePeriod)
publishEpoch := challenge.PublishEpoch
poetRoundStart := nb.layerClock.LayerToTime((publishEpoch - 1).FirstLayer()).Add(nb.poetCfg.PhaseShift)
poetRoundEnd := nb.layerClock.LayerToTime(publishEpoch.FirstLayer()).Add(nb.poetCfg.PhaseShift).Add(-nb.poetCfg.CycleGap)

nextPoetRoundStart := nb.layerClock.LayerToTime(publishEpoch.FirstLayer()).Add(nb.poetCfg.PhaseShift)
nextPoetRoundEnd := nb.layerClock.LayerToTime((publishEpoch + 1).FirstLayer()).Add(nb.poetCfg.PhaseShift).Add(-nb.poetCfg.CycleGap)

logger.With().Info("building nipost",
log.Time("poet round start", poetRoundStart),
log.Time("poet round end", poetRoundEnd),
log.Time("next poet round start", nextPoetRoundStart),
log.Time("poet proof deadline", poetProofDeadline),
log.Stringer("publish epoch", pubEpoch),
log.Stringer("publish epoch", publishEpoch),
log.Stringer("target epoch", challenge.TargetEpoch()),
)

Expand All @@ -217,8 +217,9 @@ func (nb *NIPostBuilder) BuildNIPost(ctx context.Context, challenge *types.NIPos
}

// Phase 0: Submit challenge to PoET services.
now := time.Now()
if len(nb.state.PoetRequests) == 0 {
now := time.Now()
// Deadline: start of PoET round for publish epoch. PoET won't accept registrations after that.
if poetRoundStart.Before(now) {
return nil, 0, fmt.Errorf("%w: poet round has already started at %s (now: %s)", ErrATXChallengeExpired, poetRoundStart, now)
}
Expand All @@ -229,23 +230,23 @@ func (nb *NIPostBuilder) BuildNIPost(ctx context.Context, challenge *types.NIPos
defer cancel()
poetRequests := nb.submitPoetChallenges(submitCtx, prefix, challengeHash.Bytes(), signature, nb.signer.NodeID())
if len(poetRequests) == 0 {
return nil, 0, &PoetSvcUnstableError{msg: "failed to submit challenge to any PoET", source: ctx.Err()}
return nil, 0, &PoetSvcUnstableError{msg: "failed to submit challenge to any PoET", source: submitCtx.Err()}
}

nb.state.Challenge = challengeHash
nb.state.PoetRequests = poetRequests
nb.persistState()
if err := ctx.Err(); err != nil {
return nil, 0, fmt.Errorf("submitting challenges: %w", err)
}
}

// Phase 1: query PoET services for proofs
if nb.state.PoetProofRef == types.EmptyPoetProofRef {
if poetProofDeadline.Before(now) {
return nil, 0, fmt.Errorf("%w: deadline to query poet proof for pub epoch %d exceeded (deadline: %s, now: %s)", ErrATXChallengeExpired, challenge.PublishEpoch, poetProofDeadline, now)
now := time.Now()
// Deadline: end of next PoET round. We can still publish an ATX late (i.e. within target epoch) and receive rewards,
// but we don't want to miss more than one PoET round.
if nextPoetRoundEnd.Before(now) {
return nil, 0, fmt.Errorf("%w: deadline to query poet proof for pub epoch %d exceeded (deadline: %s, now: %s)", ErrATXChallengeExpired, challenge.PublishEpoch, nextPoetRoundEnd, now)
}
getProofsCtx, cancel := context.WithDeadline(ctx, poetProofDeadline)
getProofsCtx, cancel := context.WithDeadline(ctx, nextPoetRoundEnd)
defer cancel()

events.EmitPoetWaitProof(challenge.PublishEpoch, challenge.TargetEpoch(), time.Until(poetRoundEnd))
Expand All @@ -264,11 +265,20 @@ func (nb *NIPostBuilder) BuildNIPost(ctx context.Context, challenge *types.NIPos
// Phase 2: Post execution.
var postGenDuration time.Duration = 0
if nb.state.NIPost.Post == nil {
now := time.Now()
// Deadline: end of next PoET round. We can still publish an ATX late (i.e. within target epoch) and receive rewards,
// but we don't want to miss more than one PoET round.
if nextPoetRoundEnd.Before(now) {
return nil, 0, fmt.Errorf("%w: deadline to publish ATX for pub epoch %d exceeded (deadline: %s, now: %s)", ErrATXChallengeExpired, challenge.PublishEpoch, nextPoetRoundEnd, now)
}
postCtx, cancel := context.WithDeadline(ctx, nextPoetRoundEnd)
defer cancel()

nb.log.With().Info("starting post execution", log.Binary("challenge", nb.state.PoetProofRef[:]))
startTime := time.Now()
events.EmitPostStart(nb.state.PoetProofRef[:])

proof, proofMetadata, err := nb.postSetupProvider.GenerateProof(ctx, nb.state.PoetProofRef[:], proving.WithPowCreator(nb.nodeID.Bytes()))
proof, proofMetadata, err := nb.postSetupProvider.GenerateProof(postCtx, nb.state.PoetProofRef[:], proving.WithPowCreator(nb.nodeID.Bytes()))
if err != nil {
events.EmitPostFailure()
return nil, 0, fmt.Errorf("failed to generate Post: %w", err)
Expand All @@ -278,7 +288,7 @@ func (nb *NIPostBuilder) BuildNIPost(ctx context.Context, challenge *types.NIPos
return nil, 0, fmt.Errorf("failed to get commitment ATX: %w", err)
}
if err := nb.validator.Post(
ctx,
postCtx,
challenge.PublishEpoch,
nb.nodeID,
commitmentAtxId,
Expand Down
121 changes: 44 additions & 77 deletions activation/nipost_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -493,81 +493,6 @@ func TestNIPostBuilder_ManyPoETs_SubmittingChallenge_DeadlineReached(t *testing.
req.EqualValues(ref[:], nipost.PostMetadata.Challenge)
}

func TestNIPostBuilder_ManyPoETs_WaitingForProof_DeadlineReached(t *testing.T) {
t.Parallel()
// Arrange
req := require.New(t)
challenge := types.NIPostChallenge{
PublishEpoch: postGenesisEpoch + 1,
}

proof := &types.PoetProofMessage{PoetProof: types.PoetProof{}}
ctrl := gomock.NewController(t)
nipostValidator := NewMocknipostValidator(ctrl)
poetDb := NewMockpoetDbAPI(ctrl)
poetDb.EXPECT().ValidateAndStore(gomock.Any(), gomock.Any()).Return(nil)
mclock := defaultLayerClockMock(t)

poets := make([]PoetProvingServiceClient, 0, 2)
{
poet := defaultPoetServiceMock(t, []byte("poet0"))
poet.EXPECT().Proof(gomock.Any(), gomock.Any()).Return(proof, []types.Member{types.Member(challenge.Hash())}, nil)
poets = append(poets, poet)
}
{
poet := defaultPoetServiceMock(t, []byte("poet1"))
poet.EXPECT().Proof(gomock.Any(), gomock.Any()).DoAndReturn(
func(ctx context.Context, _ string) (*types.PoetProofMessage, []types.Member, error) {
// Hang up after the context expired
<-ctx.Done()
return nil, nil, ctx.Err()
},
)
poets = append(poets, poet)
}

sig, err := signing.NewEdSigner()
req.NoError(err)
poetCfg := PoetConfig{
PhaseShift: layerDuration * layersPerEpoch / 2,
GracePeriod: layerDuration * layersPerEpoch / 2,
}
postProvider := NewMockpostSetupProvider(ctrl)
postProvider.EXPECT().Status().Return(&PostSetupStatus{State: PostSetupStateComplete})
postProvider.EXPECT().CommitmentAtx().Return(types.EmptyATXID, nil).AnyTimes()
postProvider.EXPECT().LastOpts().Return(&PostSetupOpts{}).AnyTimes()
postProvider.EXPECT().GenerateProof(gomock.Any(), gomock.Any(), gomock.Any()).DoAndReturn(
func(_ context.Context, challenge []byte, _ proving.OptionFunc) (*types.Post, *types.PostMetadata, error) {
return &types.Post{}, &types.PostMetadata{
Challenge: challenge,
}, nil
},
)
nipostValidator.EXPECT().Post(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Times(1).Return(nil)
nb, err := NewNIPostBuilder(
types.NodeID{1},
postProvider,
poetDb,
[]string{},
t.TempDir(),
logtest.New(t),
sig,
poetCfg,
mclock,
WithNipostValidator(nipostValidator),
withPoetClients(poets),
)
req.NoError(err)

// Act
nipost, _, err := nb.BuildNIPost(context.Background(), &challenge)
req.NoError(err)

// Verify
ref, _ := proof.Ref()
req.EqualValues(ref[:], nipost.PostMetadata.Challenge)
}

func TestNIPostBuilder_ManyPoETs_AllFinished(t *testing.T) {
t.Parallel()
// Arrange
Expand Down Expand Up @@ -847,13 +772,12 @@ func TestNIPoSTBuilder_StaleChallenge(t *testing.T) {

currLayer := types.EpochID(10).FirstLayer()
genesis := time.Now().Add(-time.Duration(currLayer) * layerDuration)
challenge := types.NIPostChallenge{PublishEpoch: types.EpochID(10)}

sig, err := signing.NewEdSigner()
require.NoError(t, err)

// Act & Verify
t.Run("not requests poet round started", func(t *testing.T) {
t.Run("no requests, poet round started", func(t *testing.T) {
ctrl := gomock.NewController(t)
poetDb := NewMockpoetDbAPI(ctrl)
mclock := NewMocklayerClock(ctrl)
Expand All @@ -878,6 +802,8 @@ func TestNIPoSTBuilder_StaleChallenge(t *testing.T) {
withPoetClients([]PoetProvingServiceClient{poetProver}),
)
require.NoError(t, err)

challenge := types.NIPostChallenge{PublishEpoch: currLayer.GetEpoch()}
nipost, _, err := nb.BuildNIPost(context.Background(), &challenge)
require.ErrorIs(t, err, ErrATXChallengeExpired)
require.ErrorContains(t, err, "poet round has already started")
Expand Down Expand Up @@ -909,6 +835,7 @@ func TestNIPoSTBuilder_StaleChallenge(t *testing.T) {
withPoetClients([]PoetProvingServiceClient{poetProver}),
)
require.NoError(t, err)
challenge := types.NIPostChallenge{PublishEpoch: currLayer.GetEpoch() - 1}
state := types.NIPostBuilderState{
Challenge: challenge.Hash(),
PoetRequests: []types.PoetRequest{{}},
Expand All @@ -920,6 +847,46 @@ func TestNIPoSTBuilder_StaleChallenge(t *testing.T) {
require.ErrorContains(t, err, "poet proof for pub epoch")
require.Nil(t, nipost)
})
t.Run("too late for proof generation", func(t *testing.T) {
ctrl := gomock.NewController(t)
poetDb := NewMockpoetDbAPI(ctrl)
mclock := NewMocklayerClock(ctrl)
poetProver := NewMockPoetProvingServiceClient(ctrl)
postProver := NewMockpostSetupProvider(ctrl)
postProver.EXPECT().Status().Return(&PostSetupStatus{State: PostSetupStateComplete})
mclock.EXPECT().LayerToTime(gomock.Any()).DoAndReturn(
func(got types.LayerID) time.Time {
return genesis.Add(layerDuration * time.Duration(got))
}).AnyTimes()

dir := t.TempDir()
nb, err := NewNIPostBuilder(
types.NodeID{1},
postProver,
poetDb,
[]string{},
dir,
logtest.New(t),
sig,
PoetConfig{},
mclock,
withPoetClients([]PoetProvingServiceClient{poetProver}),
)
require.NoError(t, err)
challenge := types.NIPostChallenge{PublishEpoch: currLayer.GetEpoch() - 1}
state := types.NIPostBuilderState{
Challenge: challenge.Hash(),
PoetRequests: []types.PoetRequest{{}},
PoetProofRef: [32]byte{1, 2, 3},
NIPost: &types.NIPost{},
}
require.NoError(t, saveBuilderState(dir, &state))

nipost, _, err := nb.BuildNIPost(context.Background(), &challenge)
require.ErrorIs(t, err, ErrATXChallengeExpired)
require.ErrorContains(t, err, "deadline to publish ATX for pub epoch")
require.Nil(t, nipost)
})
}

// Test if the NIPoSTBuilder continues after being interrupted just after
Expand Down

0 comments on commit 7ee7126

Please sign in to comment.