Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Merged by Bors] - sync: parametrize out of sync threshold and set it to 3h for mainnet #5040

Closed
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
11 changes: 6 additions & 5 deletions config/mainnet.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,11 +136,12 @@ func MainnetConfig() Config {
FETCH: fetch.DefaultConfig(),
LOGGING: logging,
Sync: syncer.Config{
Interval: time.Minute,
EpochEndFraction: 0.8,
MaxStaleDuration: time.Hour,
Standalone: false,
GossipDuration: 50 * time.Second,
Interval: time.Minute,
EpochEndFraction: 0.8,
MaxStaleDuration: time.Hour,
Standalone: false,
GossipDuration: 50 * time.Second,
OutOfSyncThreshold: 36, // 3h
dshulyak marked this conversation as resolved.
Show resolved Hide resolved
},
Recovery: checkpoint.DefaultConfig(),
Cache: datastore.DefaultConfig(),
Expand Down
38 changes: 18 additions & 20 deletions syncer/syncer.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,31 +21,29 @@ import (

// Config is the config params for syncer.
type Config struct {
Interval time.Duration
EpochEndFraction float64
HareDelayLayers uint32
SyncCertDistance uint32
MaxStaleDuration time.Duration
Standalone bool
GossipDuration time.Duration
Interval time.Duration
EpochEndFraction float64
HareDelayLayers uint32
SyncCertDistance uint32
MaxStaleDuration time.Duration
Standalone bool
GossipDuration time.Duration
OutOfSyncThreshold uint32 `mapstructure:"out-of-sync-threshold"`
}

// DefaultConfig for the syncer.
func DefaultConfig() Config {
return Config{
Interval: 10 * time.Second,
EpochEndFraction: 0.8,
HareDelayLayers: 10,
SyncCertDistance: 10,
MaxStaleDuration: time.Second,
GossipDuration: 15 * time.Second,
Interval: 10 * time.Second,
EpochEndFraction: 0.8,
HareDelayLayers: 10,
SyncCertDistance: 10,
MaxStaleDuration: time.Second,
GossipDuration: 15 * time.Second,
OutOfSyncThreshold: 3,
}
}

const (
outOfSyncThreshold uint32 = 3 // see notSynced
)

type syncState uint32

const (
Expand Down Expand Up @@ -452,7 +450,7 @@ func (s *Syncer) syncAtx(ctx context.Context) error {
return nil
}

func isTooFarBehind(ctx context.Context, logger log.Log, current, lastSynced types.LayerID) bool {
func isTooFarBehind(ctx context.Context, logger log.Log, current, lastSynced types.LayerID, outOfSyncThreshold uint32) bool {
if current.After(lastSynced) && current.Difference(lastSynced) >= outOfSyncThreshold {
logger.WithContext(ctx).With().Info("node is too far behind",
log.Stringer("current", current),
Expand All @@ -472,7 +470,7 @@ func (s *Syncer) setStateBeforeSync(ctx context.Context) {
}
return
}
if isTooFarBehind(ctx, s.logger, current, s.getLastSyncedLayer()) {
if isTooFarBehind(ctx, s.logger, current, s.getLastSyncedLayer(), s.cfg.OutOfSyncThreshold) {
s.setSyncState(ctx, notSynced)
}
}
Expand All @@ -492,7 +490,7 @@ func (s *Syncer) setStateAfterSync(ctx context.Context, success bool) {
// network outage.
switch currSyncState {
case synced:
if !success && isTooFarBehind(ctx, s.logger, current, s.getLastSyncedLayer()) {
if !success && isTooFarBehind(ctx, s.logger, current, s.getLastSyncedLayer(), s.cfg.OutOfSyncThreshold) {
s.setSyncState(ctx, notSynced)
}
case gossipSync:
Expand Down
15 changes: 9 additions & 6 deletions syncer/syncer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ import (
const (
layersPerEpoch = 3
never = time.Second * 60 * 24

outOfSyncThreshold = 3
)

func TestMain(m *testing.M) {
Expand Down Expand Up @@ -93,11 +95,12 @@ func newTestSyncer(t *testing.T, interval time.Duration) *testSyncer {
require.NoError(t, err)

cfg := Config{
Interval: interval,
GossipDuration: 5 * time.Millisecond,
EpochEndFraction: 0.66,
SyncCertDistance: 4,
HareDelayLayers: 5,
Interval: interval,
GossipDuration: 5 * time.Millisecond,
EpochEndFraction: 0.66,
SyncCertDistance: 4,
HareDelayLayers: 5,
OutOfSyncThreshold: outOfSyncThreshold,
}
ts.syncer = NewSyncer(ts.cdb, ts.mTicker, ts.mBeacon, ts.msh, nil, nil, ts.mLyrPatrol, ts.mCertHdr,
WithConfig(cfg),
Expand Down Expand Up @@ -535,7 +538,7 @@ func TestNetworkHasNoData(t *testing.T) {
require.True(t, ts.syncer.IsSynced(context.Background()))
}
// the network hasn't received any data
require.Greater(t, ts.syncer.ticker.CurrentLayer()-ts.msh.LatestLayer(), outOfSyncThreshold)
require.Greater(t, int(ts.syncer.ticker.CurrentLayer()-ts.msh.LatestLayer()), outOfSyncThreshold)
}

// test the case where the node was originally synced, and somehow gets out of sync, but
Expand Down