Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a Git-backed storage.ReadBucket via storagegit #2114

Merged
merged 18 commits into from
May 25, 2023
Merged
53 changes: 26 additions & 27 deletions private/pkg/git/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,30 @@ import (
)

const (
// DotGitDir is a relative path to the `.git` directory.
DotGitDir = ".git"

// ModeUnknown is a mode's zero value.
ModeUnknown FileMode = 0
// ModeFile is a blob that should be written as a plain file.
ModeFile FileMode = 010_0644
// ModeExec is a blob that should be written with the executable bit set.
ModeExe FileMode = 010_0755
// ModeDir is a tree to be unpacked as a subdirectory in the current
// directory.
ModeDir FileMode = 004_0000
// ModeSymlink is a blob with its content being the path linked to.
ModeSymlink FileMode = 012_0000
// ModeSubmodule is a commit that the submodule is checked out at.
ModeSubmodule FileMode = 016_0000
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need the left padded zeroes in all of them? I think at least one zero can be removed in all of them?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, these are octal literals :)

)

var ErrSubTreeNotFound = errors.New("subtree not found")

// FileMode is how to interpret a tree entry's object. See the Mode* constants
// for how to interpret each mode value.
type FileMode uint32
saquibmian marked this conversation as resolved.
Show resolved Hide resolved

// Name is a name identifiable by git.
type Name interface {
// If cloneBranch returns a non-empty string, any clones will be performed with --branch set to the value.
Expand Down Expand Up @@ -273,44 +294,22 @@ func OpenObjectReader(
return newObjectReader(gitDirPath, runner)
}

const (
// ModeUnknown is a mode's zero value.
ModeUnknown FileMode = 0
// ModeFile is a blob that should be written as a plain file.
ModeFile FileMode = 010_0644
// ModeExec is a blob that should be written with the executable bit set.
ModeExe FileMode = 010_0755
// ModeDir is a tree to be unpacked as a subdirectory in the current
// directory.
ModeDir FileMode = 004_0000
// ModeSymlink is a blob with its content being the path linked to.
ModeSymlink FileMode = 012_0000
// ModeSubmodule is a commit that the submodule is checked out at.
ModeSubmodule FileMode = 016_0000
)

// FileMode is how to interpret a tree entry's object. See the Mode* constants
// for how to interpret each mode value.
type FileMode uint32

var ErrSubTreeNotFound = errors.New("subtree not found")

// Tree is a git tree, which are a manifest of other git objects, including other trees.
type Tree interface {
// Hash is the Hash for this Tree.
Hash() Hash
// Entries is the set of entries in this Tree.
Entries() []TreeEntry
// Nodes is the set of nodes in this Tree.
Nodes() []Node
// Traverse walks down a tree, following the name-path specified,
// and returns the terminal TreeEntry. If no entry is found, it returns
// ErrSubTreeNotFound.
Traverse(objectReader ObjectReader, names ...string) (TreeEntry, error)
Traverse(objectReader ObjectReader, names ...string) (Node, error)
saquibmian marked this conversation as resolved.
Show resolved Hide resolved
}

// TreeEntry is a reference to an object contained in a tree. These objects have
// Node is a reference to an object contained in a tree. These objects have
// a file mode associated with them, which hints at the type of object located
// at ID (tree or blob).
type TreeEntry interface {
type Node interface {
// Hash is the Hash of the object referenced by this TreeEntry.
Hash() Hash
// Name is the name of the object referenced by this TreeEntry.
Expand Down
38 changes: 19 additions & 19 deletions private/pkg/git/tree.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,8 @@ import (
)

type tree struct {
hash Hash
entries []TreeEntry
}

func (t *tree) Hash() Hash {
return t.hash
}

func (t *tree) Entries() []TreeEntry {
return t.entries
}

func (t *tree) Traverse(objectReader ObjectReader, names ...string) (TreeEntry, error) {
return traverse(objectReader, t, names...)
hash Hash
nodes []Node
}

func parseTree(hash Hash, data []byte) (*tree, error) {
saquibmian marked this conversation as resolved.
Show resolved Hide resolved
Expand All @@ -47,26 +35,38 @@ func parseTree(hash Hash, data []byte) (*tree, error) {
return nil, errors.New("malformed tree")
}
length := i + 1 + hashLength
entry, err := parseTreeEntry(data[:length])
entry, err := parseTreeNode(data[:length])
saquibmian marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return nil, fmt.Errorf("malformed tree: %w", err)
saquibmian marked this conversation as resolved.
Show resolved Hide resolved
}
t.entries = append(t.entries, entry)
t.nodes = append(t.nodes, entry)
data = data[length:]
}
return t, nil
}

func (t *tree) Hash() Hash {
return t.hash
}

func (t *tree) Nodes() []Node {
return t.nodes
}

func (t *tree) Traverse(objectReader ObjectReader, names ...string) (Node, error) {
return traverse(objectReader, t, names...)
}

func traverse(
objectReader ObjectReader,
root Tree,
names ...string,
) (TreeEntry, error) {
) (Node, error) {
name := names[0]
names = names[1:]
saquibmian marked this conversation as resolved.
Show resolved Hide resolved
// Find name in this tree.
var found TreeEntry
for _, entry := range root.Entries() {
var found Node
for _, entry := range root.Nodes() {
if entry.Name() == name {
found = entry
break
Expand Down
38 changes: 19 additions & 19 deletions private/pkg/git/tree_entry.go → private/pkg/git/tree_node.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,48 +21,48 @@ import (
"strconv"
)

type treeEntry struct {
type treeNode struct {
name string
mode FileMode
hash Hash
}

func (e *treeEntry) Name() string {
return e.name
}

func (e *treeEntry) Mode() FileMode {
return e.mode
}

func (e *treeEntry) Hash() Hash {
return e.hash
}

func parseTreeEntry(data []byte) (*treeEntry, error) {
func parseTreeNode(data []byte) (*treeNode, error) {
modeAndName, hash, found := bytes.Cut(data, []byte{0})
if !found {
return nil, errors.New("malformed entry")
return nil, errors.New("malformed node")
}
parsedHash, err := newHashFromBytes(hash)
if err != nil {
return nil, fmt.Errorf("malformed git tree entry: %w", err)
return nil, fmt.Errorf("malformed git tree node: %w", err)
saquibmian marked this conversation as resolved.
Show resolved Hide resolved
}
mode, name, found := bytes.Cut(modeAndName, []byte{' '})
if !found {
return nil, errors.New("malformed entry")
return nil, errors.New("malformed node")
}
parsedFileMode, err := parseFileMode(mode)
if err != nil {
return nil, fmt.Errorf("malformed git tree entry: %w", err)
return nil, fmt.Errorf("malformed git tree node: %w", err)
}
return &treeEntry{
return &treeNode{
hash: parsedHash,
name: string(name),
mode: parsedFileMode,
}, nil
}

func (e *treeNode) Name() string {
return e.name
}

func (e *treeNode) Mode() FileMode {
return e.mode
}

func (e *treeNode) Hash() Hash {
return e.hash
}

// decodes the octal form of a file mode into one of the valid Mode* values.
func parseFileMode(data []byte) (FileMode, error) {
mode, err := strconv.ParseUint(string(data), 8, 32)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
)

func TestParseFileMode(t *testing.T) {
t.Parallel()
tests := []struct {
desc string
mode FileMode
Expand Down
21 changes: 11 additions & 10 deletions private/pkg/git/tree_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
)

func TestParseTree(t *testing.T) {
t.Parallel()
/*
saquibmian marked this conversation as resolved.
Show resolved Hide resolved
This is generated using the following procedure:
```sh
Expand All @@ -43,14 +44,14 @@ func TestParseTree(t *testing.T) {

assert.NoError(t, err)
assert.Equal(t, tree.Hash(), hash)
assert.Len(t, tree.Entries(), 3)
assert.Equal(t, tree.Entries()[0].Hash().Hex(), "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391")
assert.Equal(t, tree.Entries()[0].Name(), "a.proto")
assert.Equal(t, tree.Entries()[0].Mode(), ModeFile)
assert.Equal(t, tree.Entries()[1].Hash().Hex(), "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391")
assert.Equal(t, tree.Entries()[1].Name(), "b")
assert.Equal(t, tree.Entries()[1].Mode(), ModeExe)
assert.Equal(t, tree.Entries()[2].Hash().Hex(), "5c4c3b5f86fa2060081873bf2f06973dd139832b")
assert.Equal(t, tree.Entries()[2].Name(), "c")
assert.Equal(t, tree.Entries()[2].Mode(), ModeDir)
assert.Len(t, tree.Nodes(), 3)
assert.Equal(t, tree.Nodes()[0].Hash().Hex(), "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391")
assert.Equal(t, tree.Nodes()[0].Name(), "a.proto")
assert.Equal(t, tree.Nodes()[0].Mode(), ModeFile)
assert.Equal(t, tree.Nodes()[1].Hash().Hex(), "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391")
assert.Equal(t, tree.Nodes()[1].Name(), "b")
assert.Equal(t, tree.Nodes()[1].Mode(), ModeExe)
assert.Equal(t, tree.Nodes()[2].Hash().Hex(), "5c4c3b5f86fa2060081873bf2f06973dd139832b")
assert.Equal(t, tree.Nodes()[2].Name(), "c")
assert.Equal(t, tree.Nodes()[2].Mode(), ModeDir)
unmultimedio marked this conversation as resolved.
Show resolved Hide resolved
}
8 changes: 4 additions & 4 deletions private/pkg/storage/storagegit/bucket.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ func (b *bucket) Stat(ctx context.Context, path string) (storage.ObjectInfo, err

func (b *bucket) Walk(ctx context.Context, prefix string, f func(storage.ObjectInfo) error) error {
walkChecker := storageutil.NewWalkChecker()
saquibmian marked this conversation as resolved.
Show resolved Hide resolved
return b.walk(b.root, b.objectReader, prefix, func(path string, te git.TreeEntry) error {
return b.walk(b.root, b.objectReader, prefix, func(path string, te git.Node) error {
saquibmian marked this conversation as resolved.
Show resolved Hide resolved
if err := walkChecker.Check(ctx); err != nil {
return err
}
Expand All @@ -107,7 +107,7 @@ func (b *bucket) walk(
root git.Tree,
objectReader git.ObjectReader,
prefix string,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess that partial prefixes won't work then, so for a valid dir foo/bar/baz/*, the prefix foo/b won't walk anything. Unsure how other buckets handle this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's correct, that's be design for buckets. The prefix is a segment prefix, but a path key prefix.

walkFn func(string, git.TreeEntry) error,
walkFn func(string, git.Node) error,
) error {
prefix = normalpath.Normalize(prefix)
if prefix != "." {
Expand All @@ -131,9 +131,9 @@ func walkTree(
root git.Tree,
saquibmian marked this conversation as resolved.
Show resolved Hide resolved
objectReader git.ObjectReader,
prefix string,
walkFn func(string, git.TreeEntry) error,
walkFn func(string, git.Node) error,
) error {
for _, entry := range root.Entries() {
for _, entry := range root.Nodes() {
saquibmian marked this conversation as resolved.
Show resolved Hide resolved
path := normalpath.Join(prefix, entry.Name())
switch entry.Mode() {
case git.ModeFile, git.ModeExe, git.ModeSymlink:
saquibmian marked this conversation as resolved.
Show resolved Hide resolved
Expand Down
2 changes: 1 addition & 1 deletion private/pkg/storage/storagegit/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ func newProvider(objectReader git.ObjectReader) *provider {
}
}

func (p *provider) NewReadBucketForTreeHash(treeHash git.Hash) (storage.ReadBucket, error) {
func (p *provider) NewReadBucket(treeHash git.Hash) (storage.ReadBucket, error) {
tree, err := p.objectReader.Tree(treeHash)
if err != nil {
return nil, err
Expand Down
4 changes: 2 additions & 2 deletions private/pkg/storage/storagegit/storagegit.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ import (

// Provider provides storage buckets for a git repository.
type Provider interface {
// NewReadBucketForTreeHash returns a new ReadBucket that represents
// NewReadBucket returns a new ReadBucket that represents
// the state of the working tree at the particular tree.
//
// Typically, callers will want to source the tree from a commit, but
// they can also use a subtree of another tree.
NewReadBucketForTreeHash(hash git.Hash) (storage.ReadBucket, error)
NewReadBucket(hash git.Hash) (storage.ReadBucket, error)
}

// NewProvider creates a new Provider for a git repository.
Expand Down
2 changes: 1 addition & 1 deletion private/pkg/storage/storagegit/storagegit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func TestNewBucketAtTreeHash(t *testing.T) {
return nil
}))
require.NotNil(t, commit)
bucket, err := provider.NewReadBucketForTreeHash(commit.Tree())
bucket, err := provider.NewReadBucket(commit.Tree())
require.NoError(t, err)

storagetesting.AssertPaths(
Expand Down