Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Merged by Bors] - Give NiPoSTBuilder more time to create the PoST #4893

Closed
wants to merge 14 commits into from
14 changes: 9 additions & 5 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,19 @@ See [RELEASE](./RELEASE.md) for workflow instructions.

### Upgrade information

Legacy discovery protocol was removed in [4836](https://github.com/spacemeshos/go-spacemesh/pull/4836).
Config option and flag `p2p-disable-legacy-discovery` is noop, and will be completely removed in future versions.
Legacy discovery protocol was removed in [#4836](https://github.com/spacemeshos/go-spacemesh/pull/4836).
Config option and flag `p2p-disable-legacy-discovery` is noop, and will be completely removed in future versions.

### Highlights

With [#4893](https://github.com/spacemeshos/go-spacemesh/pull/4893) Nodes are given more time to publish an ATX
Nodes still need to publish an ATX before the new PoET round starts (within 12h on mainnet) to make it into the
next PoET round, but if they miss that deadline they will now continue to publish an ATX to receive rewards for
the upcoming epoch and skip one after that.

### Features

* [#4845](https://github.com/spacemeshos/go-spacemesh/pull/4845) API to fetche opened connections.
* [#4845](https://github.com/spacemeshos/go-spacemesh/pull/4845) API to fetch opened connections.

> grpcurl -plaintext 127.0.0.1:9093 spacemesh.v1.AdminService.PeerInfoStream

Expand Down Expand Up @@ -47,7 +52,6 @@ Config option and flag `p2p-disable-legacy-discovery` is noop, and will be compl

Doesn't affect direct peers. In order to disable:


```json
{
"p2p": {
Expand All @@ -60,4 +64,4 @@ Doesn't affect direct peers. In order to disable:
### Improvements

* [#4882](https://github.com/spacemeshos/go-spacemesh/pull/4882) Increase cache size and parametrize datastore.
* [#4887](https://github.com/spacemeshos/go-spacemesh/pull/4887) Fixed crashes on API call.
* [#4887](https://github.com/spacemeshos/go-spacemesh/pull/4887) Fixed crashes on API call.
10 changes: 5 additions & 5 deletions RELEASE.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
# Release

## Releasing a major version.
## Releasing a major version

Rename UNRELEASED to a concrete <version> and create a PR with commit: Release <version>.
Branch name should match the <version> set in CHANGELOG.
Rename UNRELEASED to a concrete <version> and create a PR with commit: Release <version>.
Branch name should match the <version> set in CHANGELOG.
Additionally tag that same commit with 0 as a patch version (if branch is v1.1 - tag is v1.1.0).

## Releasing a minor version.
## Releasing a minor version

### Latest

Rename UNRELEASED to a concrete <version>. If previous major was v1.1.0, new version will
be v1.1.1.
be v1.1.1.
Commit changes, create pr, and create a tag.
Rebase released branch onto develop.

Expand Down
27 changes: 12 additions & 15 deletions activation/activation.go
Original file line number Diff line number Diff line change
Expand Up @@ -537,9 +537,8 @@
if err != nil {
return nil, err
}
if nipost.TargetEpoch() < b.currentEpoch() {
if nipost.PublishEpoch < b.currentEpoch() {
b.log.With().Info("atx nipost challenge is stale - discarding it",
log.Stringer("target_epoch", nipost.TargetEpoch()),
log.Stringer("publish_epoch", nipost.PublishEpoch),
log.Stringer("current_epoch", b.currentEpoch()),
)
Expand Down Expand Up @@ -577,6 +576,8 @@

if b.pendingATX == nil {
var err error
ctx, cancel := context.WithDeadline(ctx, b.layerClock.LayerToTime((challenge.TargetEpoch()).FirstLayer()))
defer cancel()
b.pendingATX, err = b.createAtx(ctx, challenge)
if err != nil {
return fmt.Errorf("create ATX: %w", err)
Expand All @@ -602,17 +603,17 @@
select {
case <-atxReceived:
logger.With().Info("received atx in db", atx.ID())
case <-b.layerClock.AwaitLayer((atx.TargetEpoch() + 1).FirstLayer()):
if err = b.discardChallenge(); err != nil {
return fmt.Errorf("%w: target epoch has passed", err)
if err := b.discardChallenge(); err != nil {
return fmt.Errorf("%w: after published atx", err)

Check warning on line 607 in activation/activation.go

View check run for this annotation

Codecov / codecov/patch

activation/activation.go#L607

Added line #L607 was not covered by tests
}
return fmt.Errorf("%w: target epoch has passed", ErrATXChallengeExpired)
case <-b.layerClock.AwaitLayer((atx.TargetEpoch()).FirstLayer()):
if err := b.discardChallenge(); err != nil {
return fmt.Errorf("%w: publish epoch has passed", err)
}

Check warning on line 612 in activation/activation.go

View check run for this annotation

Codecov / codecov/patch

activation/activation.go#L611-L612

Added lines #L611 - L612 were not covered by tests
return fmt.Errorf("%w: publish epoch has passed", ErrATXChallengeExpired)
case <-ctx.Done():
return ctx.Err()
}
if err = b.discardChallenge(); err != nil {
return fmt.Errorf("%w: after published atx", err)
}
return nil
}

Expand All @@ -622,12 +623,8 @@

func (b *Builder) createAtx(ctx context.Context, challenge *types.NIPostChallenge) (*types.ActivationTx, error) {
pubEpoch := challenge.PublishEpoch
nextPoetRoundStart := b.poetRoundStart(pubEpoch)

// NiPoST must be ready before start of the next poet round.
buildingNipostCtx, cancel := context.WithDeadline(ctx, nextPoetRoundStart)
defer cancel()
nipost, postDuration, err := b.nipostBuilder.BuildNIPost(buildingNipostCtx, challenge)
nipost, postDuration, err := b.nipostBuilder.BuildNIPost(ctx, challenge)
if err != nil {
return nil, fmt.Errorf("build NIPost: %w", err)
}
Expand All @@ -645,7 +642,7 @@
}
b.log.Debug("publication epoch has arrived!")

if challenge.TargetEpoch() < b.currentEpoch() {
if challenge.PublishEpoch < b.currentEpoch() {
if err = b.discardChallenge(); err != nil {
return nil, fmt.Errorf("%w: atx publish epoch has passed during nipost construction", err)
}
Expand Down
8 changes: 4 additions & 4 deletions activation/activation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ func publishAtx(
})
never := make(chan struct{})
tab.mhdlr.EXPECT().AwaitAtx(gomock.Any()).Return(ch)
tab.mclock.EXPECT().AwaitLayer((publishEpoch + 2).FirstLayer()).Return(never)
tab.mclock.EXPECT().AwaitLayer((publishEpoch + 1).FirstLayer()).Return(never)
tab.mhdlr.EXPECT().UnsubscribeAtx(gomock.Any()).Do(
func(got types.ATXID) {
require.Equal(t, built.ID(), got)
Expand Down Expand Up @@ -589,7 +589,7 @@ func TestBuilder_PublishActivationTx_FaultyNet(t *testing.T) {
require.Equal(t, &gotAtx, built)
return nil
})
expireEpoch := publishEpoch + 2
expireEpoch := publishEpoch + 1
tab.mclock.EXPECT().AwaitLayer(expireEpoch.FirstLayer()).Return(done)
tab.mhdlr.EXPECT().UnsubscribeAtx(gomock.Any()).Do(
func(got types.ATXID) {
Expand Down Expand Up @@ -1042,7 +1042,7 @@ func TestBuilder_NIPostPublishRecovery(t *testing.T) {
require.Equal(t, &gotAtx, built)
return nil
})
expireEpoch := publishEpoch + 2
expireEpoch := publishEpoch + 1
tab.mclock.EXPECT().AwaitLayer(expireEpoch.FirstLayer()).Return(done)
tab.mhdlr.EXPECT().UnsubscribeAtx(gomock.Any()).Do(
func(got types.ATXID) {
Expand Down Expand Up @@ -1251,7 +1251,7 @@ func TestWaitPositioningAtx(t *testing.T) {
closed := make(chan struct{})
close(closed)
tab.mclock.EXPECT().AwaitLayer(types.EpochID(1).FirstLayer()).Return(closed).AnyTimes()
tab.mclock.EXPECT().AwaitLayer(types.EpochID(3).FirstLayer()).Return(make(chan struct{})).AnyTimes()
tab.mclock.EXPECT().AwaitLayer(types.EpochID(2).FirstLayer()).Return(make(chan struct{})).AnyTimes()
tab.mpub.EXPECT().Publish(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).AnyTimes()
tab.mhdlr.EXPECT().AwaitAtx(gomock.Any()).Return(closed).AnyTimes()
tab.mhdlr.EXPECT().UnsubscribeAtx(gomock.Any()).AnyTimes()
Expand Down
70 changes: 38 additions & 32 deletions activation/nipost.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,35 +177,31 @@ func (nb *NIPostBuilder) UpdatePoETProvers(poetProvers []PoetProvingServiceClien
// publish a proof - a process that takes about an epoch.
func (nb *NIPostBuilder) BuildNIPost(ctx context.Context, challenge *types.NIPostChallenge) (*types.NIPost, time.Duration, error) {
logger := nb.log.WithContext(ctx)
// Calculate deadline for waiting for poet proofs.
// Deadline must fit between:
// - the end of the current poet round
// - the start of the next one.
// It must also accommodate for PoST duration.
//
// Note: to avoid missing next PoET round, we need to publish the ATX before the next PoET round starts.
// We can still publish an ATX late (i.e. within publish epoch) and receive rewards, but we will miss one
// epoch because we didn't submit the challenge to PoET in time for next round.
// PoST
// ┌─────────────────────┐ ┌┐┌─────────────────────┐
// │ POET ROUND │ │││ NEXT POET ROUND │
// ┌────▲──┴──────────────────┬──┴─▲┴┴┴─────────────────▲┬──┴───► time
// │ │ EPOCH │ EPOCH ││
// └────┼─────────────────────┴────┼────────────────────┼┴──────
// │
// WE ARE HERE DEADLINE FOR ATX PUBLICATION
// WAITING FOR POET DEADLINE
// PROOFS

pubEpoch := challenge.PublishEpoch
poetRoundStart := nb.layerClock.LayerToTime((pubEpoch - 1).FirstLayer()).Add(nb.poetCfg.PhaseShift)
nextPoetRoundStart := nb.layerClock.LayerToTime(pubEpoch.FirstLayer()).Add(nb.poetCfg.PhaseShift)
poetRoundEnd := nextPoetRoundStart.Add(-nb.poetCfg.CycleGap)
poetProofDeadline := poetRoundEnd.Add(nb.poetCfg.GracePeriod)
// ┌────▲──┴──────────────────┬──▲──┴┴┴─────────────────▲┬──┴─────────────► time
// │ │ EPOCH │ │ PUBLISH EPOCH ││ TARGET EPOCH
// └────┼─────────────────────┴──┼──────────────────────┼┴────────────────
// │
// WE ARE HERE PROOF BECOMES ATX PUBLICATION
// AVAILABLE DEADLINE

publishEpoch := challenge.PublishEpoch
poetRoundStart := nb.layerClock.LayerToTime((publishEpoch - 1).FirstLayer()).Add(nb.poetCfg.PhaseShift)
poetRoundEnd := nb.layerClock.LayerToTime(publishEpoch.FirstLayer()).Add(nb.poetCfg.PhaseShift).Add(-nb.poetCfg.CycleGap)

// we want to publish before the publish epoch ends or we won't receive rewards
publishEpochEnd := nb.layerClock.LayerToTime((publishEpoch + 1).FirstLayer())

logger.With().Info("building nipost",
log.Time("poet round start", poetRoundStart),
log.Time("poet round end", poetRoundEnd),
log.Time("next poet round start", nextPoetRoundStart),
log.Time("poet proof deadline", poetProofDeadline),
log.Stringer("publish epoch", pubEpoch),
log.Stringer("publish epoch", publishEpoch),
log.Time("publish epoch end", publishEpochEnd),
log.Stringer("target epoch", challenge.TargetEpoch()),
)

Expand All @@ -217,8 +213,9 @@ func (nb *NIPostBuilder) BuildNIPost(ctx context.Context, challenge *types.NIPos
}

// Phase 0: Submit challenge to PoET services.
now := time.Now()
if len(nb.state.PoetRequests) == 0 {
now := time.Now()
// Deadline: start of PoET round for publish epoch. PoET won't accept registrations after that.
if poetRoundStart.Before(now) {
return nil, 0, fmt.Errorf("%w: poet round has already started at %s (now: %s)", ErrATXChallengeExpired, poetRoundStart, now)
}
Expand All @@ -229,23 +226,23 @@ func (nb *NIPostBuilder) BuildNIPost(ctx context.Context, challenge *types.NIPos
defer cancel()
poetRequests := nb.submitPoetChallenges(submitCtx, prefix, challengeHash.Bytes(), signature, nb.signer.NodeID())
if len(poetRequests) == 0 {
return nil, 0, &PoetSvcUnstableError{msg: "failed to submit challenge to any PoET", source: ctx.Err()}
return nil, 0, &PoetSvcUnstableError{msg: "failed to submit challenge to any PoET", source: submitCtx.Err()}
}

nb.state.Challenge = challengeHash
nb.state.PoetRequests = poetRequests
nb.persistState()
if err := ctx.Err(); err != nil {
return nil, 0, fmt.Errorf("submitting challenges: %w", err)
}
}

// Phase 1: query PoET services for proofs
if nb.state.PoetProofRef == types.EmptyPoetProofRef {
if poetProofDeadline.Before(now) {
return nil, 0, fmt.Errorf("%w: deadline to query poet proof for pub epoch %d exceeded (deadline: %s, now: %s)", ErrATXChallengeExpired, challenge.PublishEpoch, poetProofDeadline, now)
now := time.Now()
// Deadline: the end of the publish epoch (with a safety margin of `GracePeriod`). If we do not publish within
// the publish epoch we won't receive any rewards in the target epoch.
if publishEpochEnd.Before(now) {
return nil, 0, fmt.Errorf("%w: deadline to query poet proof for pub epoch %d exceeded (deadline: %s, now: %s)", ErrATXChallengeExpired, challenge.PublishEpoch, publishEpochEnd, now)
}
getProofsCtx, cancel := context.WithDeadline(ctx, poetProofDeadline)
getProofsCtx, cancel := context.WithDeadline(ctx, publishEpochEnd)
defer cancel()

events.EmitPoetWaitProof(challenge.PublishEpoch, challenge.TargetEpoch(), time.Until(poetRoundEnd))
Expand All @@ -264,11 +261,20 @@ func (nb *NIPostBuilder) BuildNIPost(ctx context.Context, challenge *types.NIPos
// Phase 2: Post execution.
var postGenDuration time.Duration = 0
if nb.state.NIPost.Post == nil {
now := time.Now()
// Deadline: the end of the publish epoch (with a safety margin of `GracePeriod`). If we do not publish within
// the publish epoch we won't receive any rewards in the target epoch.
if publishEpochEnd.Before(now) {
return nil, 0, fmt.Errorf("%w: deadline to publish ATX for pub epoch %d exceeded (deadline: %s, now: %s)", ErrATXChallengeExpired, challenge.PublishEpoch, publishEpochEnd, now)
}
postCtx, cancel := context.WithDeadline(ctx, publishEpochEnd)
defer cancel()

nb.log.With().Info("starting post execution", log.Binary("challenge", nb.state.PoetProofRef[:]))
startTime := time.Now()
events.EmitPostStart(nb.state.PoetProofRef[:])

proof, proofMetadata, err := nb.postSetupProvider.GenerateProof(ctx, nb.state.PoetProofRef[:], proving.WithPowCreator(nb.nodeID.Bytes()))
proof, proofMetadata, err := nb.postSetupProvider.GenerateProof(postCtx, nb.state.PoetProofRef[:], proving.WithPowCreator(nb.nodeID.Bytes()))
if err != nil {
events.EmitPostFailure()
return nil, 0, fmt.Errorf("failed to generate Post: %w", err)
Expand All @@ -278,7 +284,7 @@ func (nb *NIPostBuilder) BuildNIPost(ctx context.Context, challenge *types.NIPos
return nil, 0, fmt.Errorf("failed to get commitment ATX: %w", err)
}
if err := nb.validator.Post(
ctx,
postCtx,
challenge.PublishEpoch,
nb.nodeID,
commitmentAtxId,
Expand Down