Skip to content

Commit

Permalink
Merge branch 'develop' into proper-latency
Browse files Browse the repository at this point in the history
  • Loading branch information
dshulyak committed Sep 28, 2023
2 parents 379cda1 + fe23c05 commit 9a54227
Show file tree
Hide file tree
Showing 17 changed files with 184 additions and 267 deletions.
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,16 @@ See [RELEASE](./RELEASE.md) for workflow instructions.

### Upgrade information

This upgrade is incompatible with versions older than v1.1.6.

At the start of the upgrade, mesh data will be pruned and compacted/vacuumed. Pruning takes longer for
nodes that joined the network earlier. For 1-week-old nodes, it takes ~20 minutes. for 3-week-old
nodes it takes ~40 minutes. The vacuum operation takes ~5 minutes and requires extra disk space
to complete successfully. If the size of state.sql is 25 GiB at the beginning of upgrade, the WAL file
(state.sql-wal) will grow to 25 GiB and then drop to 0 when the vacuum is complete. The node will resume
its normal startup routine after the pruning and vacuum is complete. The final size of state.sql is
expected to be ~1.5 GiB.

A new config `poet-request-timeout` has been added, that defines the timeout for requesting PoET proofs.
It defaults to 9 minutes so there is enough time to retry if the request fails.

Expand Down Expand Up @@ -37,6 +47,7 @@ In order to enable provide following configuration:
* [#5032](https://github.com/spacemeshos/go-spacemesh/pull/5032) Ativeset data pruned from ballots.
* [#5035](https://github.com/spacemeshos/go-spacemesh/pull/5035) Fix possible nil pointer panic when node fails to persist nipost builder state.
* [#5079](https://github.com/spacemeshos/go-spacemesh/pull/5079) increase atx cache to 50 000 to reduce disk reads.
* [#5083](https://github.com/spacemeshos/go-spacemesh/pull/5083) Disable beacon protocol temporarily.

## v1.1.5

Expand Down
48 changes: 44 additions & 4 deletions activation/activation.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,10 @@ const (

// Config defines configuration for Builder.
type Config struct {
CoinbaseAccount types.Address
GoldenATXID types.ATXID
LayersPerEpoch uint32
CoinbaseAccount types.Address
GoldenATXID types.ATXID
LayersPerEpoch uint32
RegossipInterval time.Duration
}

// Builder struct is the struct that orchestrates the creation of activation transactions
Expand All @@ -79,6 +80,7 @@ type Builder struct {
coinbaseAccount types.Address
goldenATXID types.ATXID
layersPerEpoch uint32
regossipInterval time.Duration
cdb *datastore.CachedDB
atxHandler atxHandler
publisher pubsub.Publisher
Expand Down Expand Up @@ -165,6 +167,7 @@ func NewBuilder(
coinbaseAccount: conf.CoinbaseAccount,
goldenATXID: conf.GoldenATXID,
layersPerEpoch: conf.LayersPerEpoch,
regossipInterval: conf.RegossipInterval,
cdb: cdb,
atxHandler: hdlr,
publisher: publisher,
Expand Down Expand Up @@ -235,7 +238,22 @@ func (b *Builder) StartSmeshing(coinbase types.Address, opts PostSetupOpts) erro
b.run(ctx)
return nil
})

if b.regossipInterval != 0 {
b.eg.Go(func() error {
ticker := time.NewTicker(b.regossipInterval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return ctx.Err()
case <-ticker.C:
if err := b.Regossip(ctx); err != nil {
b.log.With().Warning("failed to regossip", log.Context(ctx), log.Err(err))
}
}
}
})
}
return nil
}

Expand Down Expand Up @@ -724,6 +742,28 @@ func (b *Builder) GetPositioningAtx() (types.ATXID, error) {
return id, nil
}

func (b *Builder) Regossip(ctx context.Context) error {
epoch := b.layerClock.CurrentLayer().GetEpoch()
atx, err := atxs.GetIDByEpochAndNodeID(b.cdb, epoch, b.signer.NodeID())
if errors.Is(err, sql.ErrNotFound) {
return nil
} else if err != nil {
return err
}
blob, err := atxs.GetBlob(b.cdb, atx[:])
if err != nil {
return fmt.Errorf("get blob %s: %w", atx.ShortString(), err)
}
if len(blob) == 0 {
return nil // checkpoint
}
if err := b.publisher.Publish(ctx, pubsub.AtxProtocol, blob); err != nil {
return fmt.Errorf("republish %s: %w", atx.ShortString(), err)
}
b.log.With().Debug("regossipped atx", log.Context(ctx), log.ShortStringer("atx", atx))
return nil
}

// SignAndFinalizeAtx signs the atx with specified signer and calculates the ID of the ATX.
func SignAndFinalizeAtx(signer *signing.EdSigner, atx *types.ActivationTx) error {
atx.Signature = signer.Sign(signing.ATX, atx.SignedBytes())
Expand Down
30 changes: 30 additions & 0 deletions activation/activation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1271,6 +1271,36 @@ func TestWaitPositioningAtx(t *testing.T) {
}
}

func TestRegossip(t *testing.T) {
layer := types.LayerID(10)
t.Run("not found", func(t *testing.T) {
h := newTestBuilder(t)
h.mclock.EXPECT().CurrentLayer().Return(layer)
require.NoError(t, h.Regossip(context.Background()))
})
t.Run("success", func(t *testing.T) {
h := newTestBuilder(t)
atx := newActivationTx(t,
h.signer, 0, types.EmptyATXID, types.EmptyATXID, nil,
layer.GetEpoch(), 0, 1, types.Address{}, 1, &types.NIPost{})
require.NoError(t, atxs.Add(h.cdb.Database, atx))
blob, err := atxs.GetBlob(h.cdb.Database, atx.ID().Bytes())
require.NoError(t, err)
h.mclock.EXPECT().CurrentLayer().Return(layer)

ctx := context.Background()
h.mpub.EXPECT().Publish(ctx, pubsub.AtxProtocol, blob)
require.NoError(t, h.Regossip(ctx))
})
t.Run("checkpointed", func(t *testing.T) {
h := newTestBuilder(t)
require.NoError(t, atxs.AddCheckpointed(h.cdb.Database,
&atxs.CheckpointAtx{ID: types.ATXID{1}, Epoch: layer.GetEpoch(), SmesherID: h.sig.NodeID()}))
h.mclock.EXPECT().CurrentLayer().Return(layer)
require.NoError(t, h.Regossip(context.Background()))
})
}

func TestWaitingToBuildNipostChallengeWithJitter(t *testing.T) {
t.Run("before grace period", func(t *testing.T) {
// ┌──grace period──┐
Expand Down
6 changes: 6 additions & 0 deletions activation/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ type atxChan struct {

// Handler processes the atxs received from all nodes and their validity status.
type Handler struct {
local p2p.Peer
cdb *datastore.CachedDB
edVerifier *signing.EdVerifier
clock layerClock
Expand All @@ -56,6 +57,7 @@ type Handler struct {

// NewHandler returns a data handler for ATX.
func NewHandler(
local p2p.Peer,
cdb *datastore.CachedDB,
edVerifier *signing.EdVerifier,
c layerClock,
Expand All @@ -70,6 +72,7 @@ func NewHandler(
poetCfg PoetConfig,
) *Handler {
return &Handler{
local: local,
cdb: cdb,
edVerifier: edVerifier,
clock: c,
Expand Down Expand Up @@ -492,6 +495,9 @@ func (h *Handler) HandleGossipAtx(ctx context.Context, peer p2p.Peer, msg []byte
log.Err(err),
)
}
if errors.Is(err, errKnownAtx) && peer == h.local {
return nil
}
return err
}

Expand Down
27 changes: 25 additions & 2 deletions activation/handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,10 @@ import (
"github.com/spacemeshos/go-spacemesh/system/mocks"
)

const layersPerEpochBig = 1000
const (
layersPerEpochBig = 1000
localID = "local"
)

func newMerkleProof(t testing.TB, challenge types.Hash32, otherLeafs []types.Hash32) (types.MerkleProof, types.Hash32) {
t.Helper()
Expand Down Expand Up @@ -101,7 +104,7 @@ func newTestHandler(tb testing.TB, goldenATXID types.ATXID) *testHandler {
mbeacon := NewMockAtxReceiver(ctrl)
mtortoise := mocks.NewMockTortoise(ctrl)

atxHdlr := NewHandler(cdb, verifier, mclock, mpub, mockFetch, 1, goldenATXID, mValidator, mbeacon, mtortoise, lg, PoetConfig{})
atxHdlr := NewHandler(localID, cdb, verifier, mclock, mpub, mockFetch, 1, goldenATXID, mValidator, mbeacon, mtortoise, lg, PoetConfig{})
return &testHandler{
Handler: atxHdlr,

Expand Down Expand Up @@ -996,6 +999,26 @@ func TestHandler_HandleSyncedAtx(t *testing.T) {

require.Error(t, atxHdlr.HandleGossipAtx(context.Background(), "", buf))
})
t.Run("known atx from local id is allowed", func(t *testing.T) {
t.Parallel()

atxHdlr := newTestHandler(t, goldenATXID)

atx := newActivationTx(t, sig, 0, types.EmptyATXID, types.EmptyATXID, nil, 0, 0, 0, types.Address{2, 4, 5}, 2, nil)

atxHdlr.mbeacon.EXPECT().OnAtx(gomock.Any())
atxHdlr.mtortoise.EXPECT().OnAtx(gomock.Any())
require.NoError(t, atxHdlr.ProcessAtx(context.Background(), atx))

buf, err := codec.Encode(atx)
require.NoError(t, err)

atxHdlr.mclock.EXPECT().LayerToTime(gomock.Any()).Return(time.Now())
require.NoError(t, atxHdlr.HandleSyncedAtx(context.Background(), atx.ID().Hash32(), p2p.NoPeer, buf))

atxHdlr.mclock.EXPECT().LayerToTime(gomock.Any()).Return(time.Now())
require.NoError(t, atxHdlr.HandleGossipAtx(context.Background(), localID, buf))
})

t.Run("atx with invalid signature", func(t *testing.T) {
t.Parallel()
Expand Down
10 changes: 5 additions & 5 deletions activation/nipost_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,11 +194,11 @@ func buildNIPost(tb testing.TB, postProvider *testPostManager, nipostChallenge t

epoch := layersPerEpoch * layerDuration
poetCfg := PoetConfig{
PhaseShift: epoch / 5,
CycleGap: epoch / 10,
GracePeriod: epoch / 10,
RequestTimeout: epoch / 10,
RequestRetryDelay: epoch / 100,
PhaseShift: epoch / 2,
CycleGap: epoch / 5,
GracePeriod: epoch / 5,
RequestTimeout: epoch / 5,
RequestRetryDelay: epoch / 50,
MaxRequestRetries: 10,
}

Expand Down
7 changes: 5 additions & 2 deletions beacon/beacon.go
Original file line number Diff line number Diff line change
Expand Up @@ -231,13 +231,16 @@ func (pd *ProtocolDriver) setMetricsRegistry(registry *prometheus.Registry) {
// Start starts listening for layers and outputs.
func (pd *ProtocolDriver) Start(ctx context.Context) {
pd.startOnce.Do(func() {
pd.logger.With().Info("starting beacon protocol", log.String("config", fmt.Sprintf("%+v", pd.config)))
if pd.sync == nil {
pd.logger.Fatal("update sync state provider can't be nil")
}

pd.metricsCollector.Start(nil)

if pd.config.RoundsNumber == 0 {
pd.logger.Info("beacon protocol disabled")
return
}
pd.logger.With().Info("starting beacon protocol", log.String("config", fmt.Sprintf("%+v", pd.config)))
pd.setProposalTimeForNextEpoch()
pd.eg.Go(func() error {
pd.listenEpochs(ctx)
Expand Down
28 changes: 19 additions & 9 deletions blocks/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import (
"github.com/spacemeshos/go-spacemesh/datastore"
"github.com/spacemeshos/go-spacemesh/events"
"github.com/spacemeshos/go-spacemesh/log"
"github.com/spacemeshos/go-spacemesh/proposals"
"github.com/spacemeshos/go-spacemesh/sql/ballots"
"github.com/spacemeshos/go-spacemesh/sql/layers"
"github.com/spacemeshos/go-spacemesh/sql/transactions"
"github.com/spacemeshos/go-spacemesh/txs"
Expand Down Expand Up @@ -225,16 +225,26 @@ func rewardInfoAndHeight(logger log.Log, cdb *datastore.CachedDB, cfg Config, pr
if atx.BaseTickHeight > max {
max = atx.BaseTickHeight
}
ballot := &p.Ballot
weightPer, err := proposals.ComputeWeightPerEligibility(cdb, ballot)
if err != nil {
logger.With().Error("failed to calculate weight per eligibility", p.ID(), log.Err(err))
return 0, nil, err
var count uint32
if p.Ballot.EpochData != nil {
count = p.Ballot.EpochData.EligibilityCount
} else {
ref, err := ballots.Get(cdb, p.RefBallot)
if err != nil {
return 0, nil, fmt.Errorf("get ballot %s: %w", p.RefBallot.String(), err)
}
if ref.EpochData == nil {
return 0, nil, fmt.Errorf("corrupted data: ref ballot %s with empty epoch data", p.RefBallot.String())
}
count = ref.EpochData.EligibilityCount
}
logger.With().Debug("weight per eligibility", p.ID(), log.Stringer("weight_per", weightPer))
actual := weightPer.Mul(weightPer, new(big.Rat).SetUint64(uint64(len(ballot.EligibilityProofs))))
if _, ok := weights[atx.ID]; !ok {
weights[atx.ID] = actual
weight := new(big.Rat).SetFrac(
new(big.Int).SetUint64(atx.GetWeight()),
new(big.Int).SetUint64(uint64(count)),
)
weight.Mul(weight, new(big.Rat).SetUint64(uint64(len(p.Ballot.EligibilityProofs))))
weights[atx.ID] = weight
atxids = append(atxids, atx.ID)
} else {
logger.With().Error("multiple proposals with the same ATX", atx.ID, p.ID())
Expand Down
1 change: 1 addition & 0 deletions checkpoint/recovery_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ func validateAndPreserveData(tb testing.TB, db *sql.Database, deps []*types.Veri
mtrtl := smocks.NewMockTortoise(ctrl)
cdb := datastore.NewCachedDB(db, lg)
atxHandler := activation.NewHandler(
"",
cdb,
edVerifier,
mclock,
Expand Down
2 changes: 2 additions & 0 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ type BaseConfig struct {
// MinerGoodAtxsPercent is a threshold to decide if tortoise activeset should be
// picked from first block insted of synced data.
MinerGoodAtxsPercent int `mapstructure:"miner-good-atxs-percent"`

RegossipAtxInterval time.Duration `mapstructure:"regossip-atx-interval"`
}

type PublicMetrics struct {
Expand Down
17 changes: 16 additions & 1 deletion config/mainnet.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,19 @@ func MainnetConfig() Config {
}
p2pconfig := p2p.DefaultConfig()

p2pconfig.Bootnodes = []string{
"/dns4/mainnet-bootnode-0.spacemesh.network/tcp/5000/p2p/12D3KooWPStnitMbLyWAGr32gHmPr538mT658Thp6zTUujZt3LRf",
"/dns4/mainnet-bootnode-2.spacemesh.network/tcp/5000/p2p/12D3KooWAsMgXLpyGdsRNjHBF3FaXwnXhyMEqWQYBXUpvCHNzFNK",
"/dns4/mainnet-bootnode-4.spacemesh.network/tcp/5000/p2p/12D3KooWRcTWDHzptnhJn5h6CtwnokzzMaDLcXv6oM9CxQEXd5FL",
"/dns4/mainnet-bootnode-6.spacemesh.network/tcp/5000/p2p/12D3KooWRS47KAs3ZLkBtE2AqjJCwxRYqZKmyLkvombJJdrca8Hz",
"/dns4/mainnet-bootnode-8.spacemesh.network/tcp/5000/p2p/12D3KooWFYv99aGbtXnZQy6UZxyf72NpkWJp3K4HS8Py35WhKtzE",
"/dns4/mainnet-bootnode-10.spacemesh.network/tcp/5000/p2p/12D3KooWHK5m83sNj2eNMJMGAngcS9gBja27ho83t79Q2CD4iRjQ",
"/dns4/mainnet-bootnode-12.spacemesh.network/tcp/5000/p2p/12D3KooWG4gk8GtMsAjYxHtbNC7oEoBTMRLbLDpKgSQMQkYBFRsw",
"/dns4/mainnet-bootnode-14.spacemesh.network/tcp/5000/p2p/12D3KooWRkZMjGNrQfRyeKQC9U58cUwAfyQMtjNsupixkBFag8AY",
"/dns4/mainnet-bootnode-16.spacemesh.network/tcp/5000/p2p/12D3KooWDAFRuFrMNgVQMDy8cgD71GLtPyYyfQzFxMZr2yUBgjHK",
"/dns4/mainnet-bootnode-18.spacemesh.network/tcp/5000/p2p/12D3KooWMJmdfwxDctuGGoTYJD8Wj9jubQBbPfrgrzzXaQ1RTKE6",
}

smeshing := DefaultSmeshingConfig()
smeshing.ProvingOpts.Nonces = 288
smeshing.ProvingOpts.Threads = uint(runtime.NumCPU() * 3 / 4)
Expand Down Expand Up @@ -65,7 +78,9 @@ func MainnetConfig() Config {
"https://mainnet-poet-2.spacemesh.network",
"https://poet-110.spacemesh.network",
"https://poet-111.spacemesh.network",
"https://poet-112.spacemesh.network",
},
RegossipAtxInterval: 2 * time.Hour,
},
Genesis: &GenesisConfig{
GenesisTime: "2023-07-14T08:00:00Z",
Expand Down Expand Up @@ -100,7 +115,7 @@ func MainnetConfig() Config {
GracePeriodDuration: 10 * time.Minute,
ProposalDuration: 4 * time.Minute,
FirstVotingRoundDuration: 30 * time.Minute,
RoundsNumber: 300,
RoundsNumber: 0,
VotingRoundDuration: 4 * time.Minute,
WeakCoinRoundDuration: 4 * time.Minute,
VotesLimit: 100,
Expand Down
1 change: 1 addition & 0 deletions config/presets/fastnet.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ func fastnet() config.Config {
conf.Sync.Interval = 5 * time.Second
conf.Sync.GossipDuration = 10 * time.Second
conf.LayersPerEpoch = 4
conf.RegossipAtxInterval = 30 * time.Second

conf.Tortoise.Hdist = 4
conf.Tortoise.Zdist = 2
Expand Down
14 changes: 8 additions & 6 deletions config/presets/testnet.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,9 @@ func testnet() config.Config {

OptFilterThreshold: 90,

TickSize: 666514,
PoETServers: []string{},
TickSize: 666514,
PoETServers: []string{},
RegossipAtxInterval: time.Hour,
},
Genesis: &config.GenesisConfig{
GenesisTime: "2023-09-13T18:00:00Z",
Expand Down Expand Up @@ -135,10 +136,11 @@ func testnet() config.Config {
FETCH: fetch.DefaultConfig(),
LOGGING: config.DefaultLoggingConfig(),
Sync: syncer.Config{
Interval: time.Minute,
EpochEndFraction: 0.8,
MaxStaleDuration: time.Hour,
GossipDuration: 50 * time.Second,
Interval: time.Minute,
EpochEndFraction: 0.8,
MaxStaleDuration: time.Hour,
GossipDuration: 50 * time.Second,
OutOfSyncThresholdLayers: 10,
},
Recovery: checkpoint.DefaultConfig(),
Cache: datastore.DefaultConfig(),
Expand Down

0 comments on commit 9a54227

Please sign in to comment.