Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Buf sync: support module identity overrides #2356

Merged
merged 12 commits into from
Aug 9, 2023
68 changes: 53 additions & 15 deletions private/buf/bufsync/bufsync.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,24 +60,57 @@ type ReadModuleError struct {
moduleDir string
}

// Code returns the error code for this read module error.
func (e *ReadModuleError) Code() ReadModuleErrorCode {
return e.code
}

// Code returns the module directory in which this error code was thrown.
func (e *ReadModuleError) ModuleDir() string {
return e.moduleDir
}

func (e *ReadModuleError) Error() string {
return fmt.Sprintf(
"read module in branch %s, commit %s, directory %s: %s",
e.branch, e.commit, e.moduleDir, e.err.Error(),
)
}

const (
// LookbackDecisionCodeSkip instructs the syncer to skip the commit that threw the read module
// error, and keep looking back.
LookbackDecisionCodeSkip = iota + 1
// LookbackDecisionCodeOverride instructs the syncer to use the read module and override its
// identity with the target module identity for that directory, read either from the branch's HEAD
// commit, or the passed module identity override in the command.
LookbackDecisionCodeOverride
// LookbackDecisionCodeStop instructs the syncer to stop looking back when finding the read module
// error, and use the previous commit (if any) as the start sync point.
LookbackDecisionCodeStop
// LookbackDecisionCodeFail instructs the syncer to fail the lookback process for the branch,
// effectively failing the sync process.
LookbackDecisionCodeFail
)

// LookbackDecisionCode is the decision made by the ErrorHandler when finding a commit that throws
// an error reading a module.
type LookbackDecisionCode int

// ErrorHandler handles errors reported by the Syncer before or during the sync process.
type ErrorHandler interface {
// StopLookback is invoked when deciding on a git start sync point.
// HandleReadModuleError is invoked when navigating a branch from HEAD and seeing an error reading
// a module.
//
// For each branch to be synced, the Syncer travels back from HEAD looking for modules in the
// given module directories, until finding a commit that is already synced to the BSR, or the
// beginning of the Git repository.
//
// The syncer might find errors trying to read a module in that directory. Those errors are sent
// to this function to decide if the Syncer should stop looking back or not, and choose the
// previous one (if any) as the start sync point.
// to this function to know what to do on those commits.
//
// decide if the Syncer should stop looking back or not, and choose the previous one (if any) as
// the start sync point.
//
// e.g.: The git commits in topological order are: `a -> ... -> z (HEAD)`, and the modules on a
// given module directory are:
Expand All @@ -94,14 +127,14 @@ type ErrorHandler interface {
// s | buf.build/acme/foo | Y | same as HEAD
// r | buf.build/acme/foo | N | already synced to the BSR
//
// If this func returns 'true' for any `ReadModuleErrorCode`, then the syncer will stop looking
// when reaching the commit `r` because it already exists in the BSR, select `s` as the start sync
// point, and the synced commits into the BSR will be [s, t, x, y, z].
// If this func returns `LookbackDecisionCodeSkip` for any `ReadModuleErrorCode`, then the syncer
// will stop looking when reaching the commit `r` because it already exists in the BSR, select `s`
// as the start sync point, and the synced commits into the BSR will be [s, t, x, y, z].
//
// On the other hand, if this func returns true for `ReadModuleErrorCodeModuleNotFound`, the
// If this func returns `LookbackDecisionCodeStop` for `ReadModuleErrorCodeModuleNotFound`, the
// syncer will stop looking when reaching the commit `u`, will select `v` as the start sync point,
// and the synced commits into the BSR will be [x, y, z].
StopLookback(err *ReadModuleError) bool
HandleReadModuleError(err *ReadModuleError) LookbackDecisionCode
// InvalidRemoteSyncPoint is invoked by Syncer upon encountering a module's branch sync point that
// is invalid locally. A typical example is either a sync point that points to a commit that
// cannot be found anymore, or the commit itself has been corrupted.
Expand Down Expand Up @@ -148,19 +181,24 @@ func NewSyncer(
// SyncerOption configures the creation of a new Syncer.
type SyncerOption func(*syncer) error

// SyncerWithModuleDirectory configures a Syncer to sync a module in the specified module directory.
// SyncerWithModule configures a Syncer to sync a module in the specified module directory, with an
// optional module override.
//
// If a not-nil module identity is passed, it will be used as the expected module target for the
// module directory. On the other hand, if a nil module identity is passed, then the module identity
// target for that module directory is read from the HEAD commit on each git branch.
//
// This option can be provided multiple times to sync multiple distinct modules. The order in which
// the module directories are passed is preserved, and those modules are synced in the same order.
// If the same module directory is passed multiple times this option errors, since the order cannot
// be preserved anymore.
func SyncerWithModuleDirectory(moduleDir string) SyncerOption {
// the module are passed is preserved, and those modules are synced in the same order. If the same
// module directory is passed multiple times this option errors, since the order cannot be preserved
// anymore.
func SyncerWithModule(moduleDir string, identityOverride bufmoduleref.ModuleIdentity) SyncerOption {
return func(s *syncer) error {
moduleDir = normalpath.Normalize(moduleDir)
if _, alreadyAdded := s.modulesDirsForSync[moduleDir]; alreadyAdded {
if _, alreadyAdded := s.modulesDirsToIdentityOverrideForSync[moduleDir]; alreadyAdded {
return fmt.Errorf("module directory %s already added", moduleDir)
}
s.modulesDirsForSync[moduleDir] = struct{}{}
s.modulesDirsToIdentityOverrideForSync[moduleDir] = identityOverride
s.sortedModulesDirsForSync = append(s.sortedModulesDirsForSync, moduleDir)
return nil
}
Expand Down
116 changes: 72 additions & 44 deletions private/buf/bufsync/commits_to_sync_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package bufsync
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"os"
Expand All @@ -36,31 +37,15 @@ import (

func TestCommitsToSyncWithNoPreviousSyncPoints(t *testing.T) {
t.Parallel()
// share git repo, bsr checker and modules to sync for all the test scenarios, as a regular `buf
// sync` run would do.
mockBSRChecker := newMockSyncGitChecker()
moduleIdentityInHEAD, err := bufmoduleref.NewModuleIdentity("buf.build", "acme", "foo")
require.NoError(t, err)
moduleIdentityOverride, err := bufmoduleref.NewModuleIdentity("buf.build", "acme", "bar")
require.NoError(t, err)
// scaffoldGitRepository returns a repo with the following commits:
// | o-o----------o-----------------o (main)
// | └o-o (foo) └o--------o (bar)
// | └o (baz)
repo := scaffoldGitRepository(t, moduleIdentityInHEAD)
testSyncer := syncer{
repo: repo,
storageGitProvider: storagegit.NewProvider(repo.Objects()),
logger: zaptest.NewLogger(t),
modulesDirsForSync: map[string]struct{}{".": {}},
sortedModulesDirsForSync: []string{"."},
syncAllBranches: true,
syncedGitCommitChecker: mockBSRChecker.checkFunc(),
commitsToTags: make(map[string][]string),
branchesToModulesForSync: make(map[string]map[string]bufmoduleref.ModuleIdentity),
modulesToBranchesLastSyncPoints: make(map[string]map[string]string),
modulesIdentitiesToCommitsSyncedCache: make(map[string]map[string]struct{}),
}
require.NoError(t, testSyncer.prepareSync(context.Background()))

type testCase struct {
name string
branch string
Expand Down Expand Up @@ -88,37 +73,80 @@ func TestCommitsToSyncWithNoPreviousSyncPoints(t *testing.T) {
expectedCommits: 2, // counting the commit that branches off bar
},
}
for _, tc := range testCases {
func(tc testCase) {
t.Run(tc.name, func(t *testing.T) {
syncableCommits, err := testSyncer.branchSyncableCommits(
context.Background(),
tc.branch,
)
// uncomment for debug purposes
// s.printCommitsToSync(tc.branch, syncableCommits)
require.NoError(t, err)
require.Len(t, syncableCommits, tc.expectedCommits)
for i, syncableCommit := range syncableCommits {
assert.NotEmpty(t, syncableCommit.commit.Hash().Hex())
mockBSRChecker.markSynced(syncableCommit.commit.Hash().Hex())
if tc.branch != "main" && i == 0 {
// first commit in non-default branches will come with no modules to sync, because it's
// the commit in which it branches off the parent branch.
assert.Empty(t, syncableCommit.modules)
} else {
assert.Len(t, syncableCommit.modules, 1)
for moduleDir, builtModule := range syncableCommit.modules {
assert.Equal(t, ".", moduleDir)
assert.Equal(t, moduleIdentityInHEAD.IdentityString(), builtModule.ModuleIdentity().IdentityString())
for _, withOverride := range []bool{false, true} {
mockBSRChecker := newMockSyncGitChecker()
for _, tc := range testCases {
func(tc testCase) {
t.Run(fmt.Sprintf("%s/override_%t", tc.name, withOverride), func(t *testing.T) {
moduleDirsForSync := []string{"."}
moduleDirsToIdentity := make(map[string]bufmoduleref.ModuleIdentity)
for _, moduleDir := range moduleDirsForSync {
if withOverride {
moduleDirsToIdentity[moduleDir] = moduleIdentityOverride
} else {
moduleDirsToIdentity[moduleDir] = nil
}
}
testSyncer := syncer{
repo: repo,
storageGitProvider: storagegit.NewProvider(repo.Objects()),
logger: zaptest.NewLogger(t),
errorHandler: &mockErrorHandler{},
modulesDirsToIdentityOverrideForSync: moduleDirsToIdentity,
sortedModulesDirsForSync: moduleDirsForSync,
syncAllBranches: true,
syncedGitCommitChecker: mockBSRChecker.checkFunc(),
commitsToTags: make(map[string][]string),
branchesToModulesForSync: make(map[string]map[string]bufmoduleref.ModuleIdentity),
modulesToBranchesLastSyncPoints: make(map[string]map[string]string),
modulesIdentitiesToCommitsSyncedCache: make(map[string]map[string]struct{}),
}
require.NoError(t, testSyncer.prepareSync(context.Background()))
syncableCommits, err := testSyncer.branchSyncableCommits(
context.Background(),
tc.branch,
)
// uncomment for debug purposes
// s.printCommitsToSync(tc.branch, syncableCommits)
require.NoError(t, err)
require.Len(t, syncableCommits, tc.expectedCommits)
for i, syncableCommit := range syncableCommits {
assert.NotEmpty(t, syncableCommit.commit.Hash().Hex())
mockBSRChecker.markSynced(syncableCommit.commit.Hash().Hex())
if tc.branch != "main" && i == 0 {
// first commit in non-default branches will come with no modules to sync, because it's
// the commit in which it branches off the parent branch.
assert.Empty(t, syncableCommit.modules)
} else {
assert.Len(t, syncableCommit.modules, 1)
for moduleDir, builtModule := range syncableCommit.modules {
assert.Equal(t, ".", moduleDir)
if withOverride {
assert.Equal(t, moduleIdentityOverride.IdentityString(), builtModule.ModuleIdentity().IdentityString())
} else {
assert.Equal(t, moduleIdentityInHEAD.IdentityString(), builtModule.ModuleIdentity().IdentityString())
}
}
}
}
}
})
}(tc)
})
}(tc)
}
}
}

type mockErrorHandler struct{}

func (*mockErrorHandler) HandleReadModuleError(*ReadModuleError) LookbackDecisionCode {
// we know this test don't contain any hard read module error, for the sake of the test let's
// always override
return LookbackDecisionCodeOverride
}

func (*mockErrorHandler) InvalidRemoteSyncPoint(bufmoduleref.ModuleIdentity, string, git.Hash, bool, error) error {
return errors.New("unimplemented")
}

type mockSyncedGitChecker struct {
syncedCommitsSHAs map[string]struct{}
}
Expand Down