Skip to content

Commit

Permalink
Verify self-generated POST proofs to catch errors early (#4721)
Browse files Browse the repository at this point in the history
## Motivation
Currently, if a node generated invalid POST proof (possibly because it has invalid POST data), it will learn it after Poet round ended (2 weeks) - upon publishing the ATX. It's suboptimal, we can check the proof right after it was created to give the operator more time to react.

## Changes
Verify the generated POST proofs immediately after they are created.

## Test Plan
- UT
- system test
- [x] manual test joining the mainnet
  • Loading branch information
poszu committed Jul 18, 2023
1 parent c40a4ab commit 618aae2
Show file tree
Hide file tree
Showing 7 changed files with 171 additions and 33 deletions.
41 changes: 36 additions & 5 deletions activation/activation.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ type Builder struct {
nipostBuilder nipostBuilder
postSetupProvider postSetupProvider
initialPost *types.Post
validator nipostValidator

// smeshingMutex protects `StartSmeshing` and `StopSmeshing` from concurrent access
smeshingMutex sync.Mutex
Expand Down Expand Up @@ -125,6 +126,12 @@ func WithPoetConfig(c PoetConfig) BuilderOption {
}
}

func WithValidator(v nipostValidator) BuilderOption {
return func(b *Builder) {
b.validator = v
}
}

// NewBuilder returns an atx builder that will start a routine that will attempt to create an atx upon each new layer.
func NewBuilder(
conf Config,
Expand Down Expand Up @@ -211,6 +218,7 @@ func (b *Builder) StartSmeshing(coinbase types.Address, opts PostSetupOpts) erro
return nil
case err != nil:
b.log.Panic("initialization failed: %v", err)
return err
}

b.run(ctx)
Expand Down Expand Up @@ -254,7 +262,8 @@ func (b *Builder) SmesherID() types.NodeID {
}

func (b *Builder) run(ctx context.Context) {
if err := b.generateInitialPost(ctx); err != nil {
err := b.generateInitialPost(ctx)
if err != nil {
b.log.Error("Failed to generate proof: %s", err)
return
}
Expand All @@ -274,28 +283,50 @@ func (b *Builder) generateInitialPost(ctx context.Context) error {
}
// ...and if we don't have an initial POST persisted already.
if post, err := loadPost(b.nipostBuilder.DataDir()); err == nil {
b.initialPost = post
return nil
b.log.Info("loaded the initial post from disk")
return b.verifyInitialPost(ctx, post, &types.PostMetadata{
Challenge: shared.ZeroChallenge,
LabelsPerUnit: b.postSetupProvider.Config().LabelsPerUnit,
})
}

// Create the initial post and save it.
startTime := time.Now()
var err error
events.EmitPostStart(shared.ZeroChallenge)
b.initialPost, _, err = b.postSetupProvider.GenerateProof(ctx, shared.ZeroChallenge, proving.WithPowCreator(b.nodeID.Bytes()))
post, metadata, err := b.postSetupProvider.GenerateProof(ctx, shared.ZeroChallenge, proving.WithPowCreator(b.nodeID.Bytes()))
if err != nil {
events.EmitPostFailure()
return fmt.Errorf("post execution: %w", err)
}
events.EmitPostComplete(shared.ZeroChallenge)
metrics.PostDuration.Set(float64(time.Since(startTime).Nanoseconds()))
b.log.Info("created the initial post")
if b.verifyInitialPost(ctx, post, metadata) != nil {
return err
}

if err := savePost(b.nipostBuilder.DataDir(), b.initialPost); err != nil {
if err := savePost(b.nipostBuilder.DataDir(), post); err != nil {
b.log.With().Warning("failed to save initial post: %w", log.Err(err))
}
return nil
}

func (b *Builder) verifyInitialPost(ctx context.Context, post *types.Post, metadata *types.PostMetadata) error {
b.log.With().Info("verifying the initial post", log.Object("post", post), log.Object("metadata", metadata))
commitmentAtxId, err := b.postSetupProvider.CommitmentAtx()
if err != nil {
b.log.With().Panic("failed to fetch commitment ATX ID.", log.Err(err))
}
if err := b.validator.Post(ctx, types.EpochID(0), b.nodeID, commitmentAtxId, post, metadata, b.postSetupProvider.LastOpts().NumUnits); err != nil {
events.EmitInvalidPostProof()
b.log.With().Fatal("initial POST proof is invalid. Probably the initialized POST data is corrupted. Please verify the data with postcli and regenerate the corrupted files.", log.Err(err))
return err
}
b.initialPost = post
return nil
}

func (b *Builder) receivePendingPoetClients() *[]PoetProvingServiceClient {
return b.pendingPoetClients.Swap(nil)
}
Expand Down
41 changes: 33 additions & 8 deletions activation/activation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,13 @@ type testAtxBuilder struct {
coinbase types.Address
goldenATXID types.ATXID

mhdlr *MockatxHandler
mpub *mocks.MockPublisher
mnipost *MocknipostBuilder
mpost *MockpostSetupProvider
mclock *MocklayerClock
msync *Mocksyncer
mhdlr *MockatxHandler
mpub *mocks.MockPublisher
mnipost *MocknipostBuilder
mpost *MockpostSetupProvider
mclock *MocklayerClock
msync *Mocksyncer
mValidator *MocknipostValidator
}

func newTestBuilder(tb testing.TB, opts ...BuilderOption) *testAtxBuilder {
Expand All @@ -121,8 +122,11 @@ func newTestBuilder(tb testing.TB, opts ...BuilderOption) *testAtxBuilder {
mpost: NewMockpostSetupProvider(ctrl),
mclock: NewMocklayerClock(ctrl),
msync: NewMocksyncer(ctrl),
mValidator: NewMocknipostValidator(ctrl),
}

opts = append(opts, WithValidator(tab.mValidator))

cfg := Config{
CoinbaseAccount: tab.coinbase,
GoldenATXID: tab.goldenATXID,
Expand Down Expand Up @@ -254,7 +258,10 @@ func TestBuilder_StartSmeshingCoinbase(t *testing.T) {

tab.mpost.EXPECT().PrepareInitializer(gomock.Any(), gomock.Any()).AnyTimes()
tab.mpost.EXPECT().StartSession(gomock.Any()).AnyTimes()
tab.mpost.EXPECT().LastOpts().Return(&PostSetupOpts{}).AnyTimes()
tab.mpost.EXPECT().CommitmentAtx().Return(tab.goldenATXID, nil).AnyTimes()
tab.mpost.EXPECT().GenerateProof(gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes().Return(&types.Post{}, &types.PostMetadata{}, nil)
tab.mValidator.EXPECT().Post(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes().Return(nil)
tab.mclock.EXPECT().AwaitLayer(gomock.Any()).Return(make(chan struct{})).AnyTimes()
require.NoError(t, tab.StartSmeshing(coinbase, postSetupOpts))
require.Equal(t, coinbase, tab.Coinbase())
Expand All @@ -271,9 +278,15 @@ func TestBuilder_RestartSmeshing(t *testing.T) {
getBuilder := func(t *testing.T) *Builder {
tab := newTestBuilder(t)
tab.mpost.EXPECT().PrepareInitializer(gomock.Any(), gomock.Any()).AnyTimes()
tab.mpost.EXPECT().CommitmentAtx().Return(types.EmptyATXID, nil).AnyTimes()
tab.mpost.EXPECT().LastOpts().Return(&PostSetupOpts{}).AnyTimes()
tab.mpost.EXPECT().StartSession(gomock.Any()).AnyTimes()
tab.mpost.EXPECT().GenerateProof(gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes().Return(&types.Post{}, &types.PostMetadata{}, nil)
tab.mpost.EXPECT().GenerateProof(gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes().Return(&types.Post{}, &types.PostMetadata{
Challenge: shared.ZeroChallenge,
}, nil)
tab.mpost.EXPECT().Reset().AnyTimes()
tab.mValidator.EXPECT().Post(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes().Return(nil)
tab.mpost.EXPECT().Config().AnyTimes()
ch := make(chan struct{})
close(ch)
tab.mclock.EXPECT().AwaitLayer(gomock.Any()).Return(ch).AnyTimes()
Expand Down Expand Up @@ -386,7 +399,10 @@ func TestBuilder_StopSmeshing_OnPoSTError(t *testing.T) {
tab := newTestBuilder(t)
tab.mpost.EXPECT().PrepareInitializer(gomock.Any(), gomock.Any()).AnyTimes()
tab.mpost.EXPECT().StartSession(gomock.Any()).Return(nil).AnyTimes()
tab.mpost.EXPECT().CommitmentAtx().Return(types.EmptyATXID, nil).AnyTimes()
tab.mpost.EXPECT().LastOpts().Return(&PostSetupOpts{}).AnyTimes()
tab.mpost.EXPECT().GenerateProof(gomock.Any(), gomock.Any(), gomock.Any()).Return(&types.Post{}, &types.PostMetadata{}, nil).AnyTimes()
tab.mValidator.EXPECT().Post(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes().Return(nil)
ch := make(chan struct{})
close(ch)
now := time.Now()
Expand Down Expand Up @@ -1084,6 +1100,9 @@ func TestBuilder_RetryPublishActivationTx(t *testing.T) {
func TestBuilder_InitialProofGeneratedOnce(t *testing.T) {
tab := newTestBuilder(t, WithPoetConfig(PoetConfig{PhaseShift: layerDuration * 4}))
tab.mpost.EXPECT().GenerateProof(gomock.Any(), shared.ZeroChallenge, gomock.Any()).Return(&types.Post{}, &types.PostMetadata{}, nil)
tab.mpost.EXPECT().LastOpts().Return(&PostSetupOpts{})
tab.mpost.EXPECT().CommitmentAtx().Return(tab.goldenATXID, nil)
tab.mValidator.EXPECT().Post(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes().Return(nil)
require.NoError(t, tab.generateInitialPost(context.Background()))

posEpoch := postGenesisEpoch + 1
Expand All @@ -1109,7 +1128,13 @@ func TestBuilder_InitialProofGeneratedOnce(t *testing.T) {

func TestBuilder_InitialPostIsPersisted(t *testing.T) {
tab := newTestBuilder(t, WithPoetConfig(PoetConfig{PhaseShift: layerDuration * 4}))
tab.mpost.EXPECT().GenerateProof(gomock.Any(), shared.ZeroChallenge, gomock.Any()).Return(&types.Post{}, &types.PostMetadata{}, nil)
tab.mpost.EXPECT().Config().AnyTimes().Return(PostConfig{})
tab.mpost.EXPECT().LastOpts().Return(&PostSetupOpts{}).Times(3)
tab.mpost.EXPECT().CommitmentAtx().Return(tab.goldenATXID, nil).Times(3)
tab.mpost.EXPECT().GenerateProof(gomock.Any(), shared.ZeroChallenge, gomock.Any()).Return(&types.Post{}, &types.PostMetadata{
Challenge: shared.ZeroChallenge,
}, nil)
tab.mValidator.EXPECT().Post(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes().Return(nil)
require.NoError(t, tab.generateInitialPost(context.Background()))

// GenerateProof() should not be called again
Expand Down
35 changes: 34 additions & 1 deletion activation/nipost.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/spacemeshos/merkle-tree"
"github.com/spacemeshos/poet/shared"
"github.com/spacemeshos/post/proving"
"github.com/spacemeshos/post/verifying"
"golang.org/x/sync/errgroup"

"github.com/spacemeshos/go-spacemesh/activation/metrics"
Expand Down Expand Up @@ -69,6 +70,15 @@ type NIPostBuilder struct {
signer *signing.EdSigner
layerClock layerClock
poetCfg PoetConfig
validator nipostValidator
}

type NIPostBuilderOption func(*NIPostBuilder)

func WithNipostValidator(v nipostValidator) NIPostBuilderOption {
return func(nb *NIPostBuilder) {
nb.validator = v
}
}

type poetDbAPI interface {
Expand All @@ -87,8 +97,9 @@ func NewNIPostBuilder(
signer *signing.EdSigner,
poetCfg PoetConfig,
layerClock layerClock,
opts ...NIPostBuilderOption,
) *NIPostBuilder {
return &NIPostBuilder{
b := &NIPostBuilder{
nodeID: nodeID,
postSetupProvider: postSetupProvider,
poetProvers: poetProvers,
Expand All @@ -100,6 +111,11 @@ func NewNIPostBuilder(
poetCfg: poetCfg,
layerClock: layerClock,
}

for _, opt := range opts {
opt(b)
}
return b
}

func (nb *NIPostBuilder) DataDir() string {
Expand Down Expand Up @@ -217,6 +233,23 @@ func (nb *NIPostBuilder) BuildNIPost(ctx context.Context, challenge *types.NIPos
events.EmitPostFailure()
return nil, 0, fmt.Errorf("failed to generate Post: %v", err)
}
commitmentAtxId, err := nb.postSetupProvider.CommitmentAtx()
if err != nil {
return nil, 0, fmt.Errorf("failed to get commitment ATX: %v", err)
}
if err := nb.validator.Post(
ctx,
challenge.PublishEpoch,
nb.nodeID,
commitmentAtxId,
proof,
proofMetadata,
nb.postSetupProvider.LastOpts().NumUnits,
verifying.WithLabelScryptParams(nb.postSetupProvider.LastOpts().Scrypt),
); err != nil {
events.EmitInvalidPostProof()
return nil, 0, fmt.Errorf("failed to verify Post: %v", err)
}
events.EmitPostComplete(nb.state.PoetProofRef[:])
postGenDuration = time.Since(startTime)
nb.log.With().Info("finished post execution", log.Duration("duration", postGenDuration))
Expand Down

0 comments on commit 618aae2

Please sign in to comment.