Skip to content

Commit

Permalink
Add a Git-backed storage.ReadBucket via storagegit (#2114)
Browse files Browse the repository at this point in the history
  • Loading branch information
saquibmian committed May 25, 2023
1 parent fb4875e commit 45c2edc
Show file tree
Hide file tree
Showing 20 changed files with 890 additions and 14 deletions.
2 changes: 2 additions & 0 deletions private/pkg/git/branch_iterator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import (
)

func TestBranches(t *testing.T) {
t.Parallel()

repo := gittest.ScaffoldGitRepository(t)
var branches []string
err := repo.BranchIterator.ForEachBranch(func(branch string) error {
Expand Down
2 changes: 2 additions & 0 deletions private/pkg/git/commit_iterator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ import (
)

func TestCommits(t *testing.T) {
t.Parallel()

repo := gittest.ScaffoldGitRepository(t)
var commits []git.Commit
err := repo.CommitIterator.ForEachCommit(gittest.DefaultBranch, func(c git.Commit) error {
Expand Down
2 changes: 2 additions & 0 deletions private/pkg/git/commit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import (
)

func TestParseCommit(t *testing.T) {
t.Parallel()

hash, err := parseHashFromHex("43848150a6f5f6d76eeef6e0f69eb46290eefab6")
require.NoError(t, err)
commit, err := parseCommit(
Expand Down
49 changes: 49 additions & 0 deletions private/pkg/git/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,30 @@ import (
)

const (
// DotGitDir is a relative path to the `.git` directory.
DotGitDir = ".git"

// ModeUnknown is a mode's zero value.
ModeUnknown ObjectMode = 0
// ModeFile is a blob that should be written as a plain file.
ModeFile ObjectMode = 010_0644
// ModeExec is a blob that should be written with the executable bit set.
ModeExe ObjectMode = 010_0755
// ModeDir is a tree to be unpacked as a subdirectory in the current
// directory.
ModeDir ObjectMode = 004_0000
// ModeSymlink is a blob with its content being the path linked to.
ModeSymlink ObjectMode = 012_0000
// ModeSubmodule is a commit that the submodule is checked out at.
ModeSubmodule ObjectMode = 016_0000
)

var ErrTreeNodeNotFound = errors.New("node not found")

// ObjectMode is how to interpret a tree node's object. See the Mode* constants
// for how to interpret each mode value.
type ObjectMode uint32

// Name is a name identifiable by git.
type Name interface {
// If cloneBranch returns a non-empty string, any clones will be performed with --branch set to the value.
Expand Down Expand Up @@ -280,8 +301,12 @@ type AnnotatedTag interface {

// ObjectReader reads objects (commits, trees, blobs) from a `.git` directory.
type ObjectReader interface {
// Blob reads the blob identified by the hash.
Blob(id Hash) ([]byte, error)
// Commit reads the commit identified by the hash.
Commit(id Hash) (Commit, error)
// Tree reads the tree identified by the hash.
Tree(id Hash) (Tree, error)
// Tag reads the tag identified by the hash.
Tag(id Hash) (AnnotatedTag, error)
// Close closes the reader.
Expand All @@ -300,3 +325,27 @@ func OpenObjectReader(
) (ObjectReader, error) {
return newObjectReader(gitDirPath, runner)
}

// Tree is a git tree, which are a manifest of other git objects, including other trees.
type Tree interface {
// Hash is the Hash for this Tree.
Hash() Hash
// Nodes is the set of nodes in this Tree.
Nodes() []TreeNode
// Descendant walks down a tree, following the path specified,
// and returns the terminal Node. If no node is found, it returns
// ErrTreeNodeNotFound.
Descendant(path string, objectReader ObjectReader) (TreeNode, error)
}

// TreeNode is a reference to an object contained in a tree. These objects have
// a file mode associated with them, which hints at the type of object located
// at ID (tree or blob).
type TreeNode interface {
// Hash is the Hash of the object referenced by this Node.
Hash() Hash
// Name is the name of the object referenced by this Node.
Name() string
// Mode is the file mode of the object referenced by this Node.
Mode() ObjectMode
}
33 changes: 24 additions & 9 deletions private/pkg/git/gittest/gittest.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"io"
"os"
"path"
"path/filepath"
"strings"
"testing"

Expand Down Expand Up @@ -107,30 +108,37 @@ func scaffoldGitRepository(t *testing.T, runner command.Runner) string {
runInDir(t, runner, local, "git", "remote", "add", "origin", remote)

// (1) commit in main branch
runInDir(t, runner, local, "touch", "randomBinary")
writeFiles(t, local, map[string]string{
"randomBinary": "some executable",
"proto/buf.yaml": "some buf.yaml",
"proto/acme/petstore/v1/a.proto": "cats",
"proto/acme/petstore/v1/b.proto": "animals",
"proto/acme/grocerystore/v1/c.proto": "toysrus",
"proto/acme/grocerystore/v1/d.proto": "petsrus",
})
runInDir(t, runner, local, "chmod", "+x", "randomBinary")
runInDir(t, runner, local, "mkdir", "proto")
runInDir(t, runner, path.Join(local, "proto"), "touch", "buf.yaml")
runInDir(t, runner, local, "mkdir", "-p", "proto/acme/petstore/v1")
runInDir(t, runner, path.Join(local, "proto", "acme", "petstore", "v1"), "touch", "a.proto", "b.proto")
runInDir(t, runner, local, "mkdir", "-p", "proto/acme/grocerystore/v1")
runInDir(t, runner, path.Join(local, "proto", "acme", "grocerystore", "v1"), "touch", "c.proto", "d.proto")
runInDir(t, runner, local, "git", "add", ".")
runInDir(t, runner, local, "git", "commit", "-m", "initial commit")
runInDir(t, runner, local, "git", "tag", "release/v1")
runInDir(t, runner, local, "git", "push", "--follow-tags", "-u", "-f", "origin", DefaultBranch)

// (2) branch off main and begin work
runInDir(t, runner, local, "git", "checkout", "-b", "smian/branch1")
runInDir(t, runner, path.Join(local, "proto", "acme", "petstore", "v1"), "touch", "e.proto", "f.proto")
writeFiles(t, local, map[string]string{
"proto/acme/petstore/v1/e.proto": "loblaws",
"proto/acme/petstore/v1/f.proto": "merchant of venice",
})
runInDir(t, runner, local, "git", "add", ".")
runInDir(t, runner, local, "git", "commit", "-m", "branch1")
runInDir(t, runner, local, "git", "tag", "-m", "for testing", "branch/v1")
runInDir(t, runner, local, "git", "push", "--follow-tags", "origin", "smian/branch1")

// (3) branch off branch and begin work
runInDir(t, runner, local, "git", "checkout", "-b", "smian/branch2")
runInDir(t, runner, path.Join(local, "proto", "acme", "grocerystore", "v1"), "touch", "g.proto", "h.proto")
writeFiles(t, local, map[string]string{
"proto/acme/grocerystore/v1/g.proto": "hamlet",
"proto/acme/grocerystore/v1/h.proto": "bethoven",
})
runInDir(t, runner, local, "git", "add", ".")
runInDir(t, runner, local, "git", "commit", "-m", "branch2")
runInDir(t, runner, local, "git", "tag", "-m", "for testing", "branch/v2")
Expand Down Expand Up @@ -169,3 +177,10 @@ func runInDir(t *testing.T, runner command.Runner, dir string, cmd string, args
}
require.NoError(t, err)
}

func writeFiles(t *testing.T, dir string, files map[string]string) {
for path, contents := range files {
require.NoError(t, os.MkdirAll(filepath.Join(dir, filepath.Dir(path)), 0700))
require.NoError(t, os.WriteFile(filepath.Join(dir, path), []byte(contents), 0600))
}
}
4 changes: 4 additions & 0 deletions private/pkg/git/hash_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ import (
)

func TestParseHashFromHex(t *testing.T) {
t.Parallel()

const hex = "5edab9f970913225f985d9673ac19d61d36f0942"

id, err := parseHashFromHex(hex)
Expand All @@ -31,6 +33,8 @@ func TestParseHashFromHex(t *testing.T) {
}

func TestNewHashFromBytes(t *testing.T) {
t.Parallel()

bytes := []byte{0x5e, 0xda, 0xb9, 0xf9, 0x70, 0x91, 0x32, 0x25, 0xf9, 0x85, 0xd9, 0x67, 0x3a, 0xc1, 0x9d, 0x61, 0xd3, 0x6f, 0x9, 0x42}

id, err := newHashFromBytes(bytes)
Expand Down
2 changes: 2 additions & 0 deletions private/pkg/git/ident_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ import (
)

func TestParseIdent(t *testing.T) {
t.Parallel()

ident, err := parseIdent([]byte("Foo <bar@baz> 1680571785 +0445"))

require.NoError(t, err)
Expand Down
27 changes: 23 additions & 4 deletions private/pkg/git/object_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@ import (
"go.uber.org/multierr"
)

const (
objectTypeBlob = "blob"
objectTypeCommit = "commit"
objectTypeTree = "tree"
objectTypeTag = "tag"
)

// exitTime is the amount of time we'll wait for git-cat-file(1) to exit.
var exitTime = 5 * time.Second
var errObjectTypeMismatch = errors.New("object type mismatch")
Expand Down Expand Up @@ -82,16 +89,28 @@ func (o *objectReader) Close() error {
)
}

func (o *objectReader) Commit(id Hash) (Commit, error) {
data, err := o.read("commit", id)
func (o *objectReader) Blob(hash Hash) ([]byte, error) {
return o.read(objectTypeBlob, hash)
}

func (o *objectReader) Commit(hash Hash) (Commit, error) {
data, err := o.read(objectTypeCommit, hash)
if err != nil {
return nil, err
}
return parseCommit(hash, data)
}

func (o *objectReader) Tree(hash Hash) (Tree, error) {
data, err := o.read(objectTypeTree, hash)
if err != nil {
return nil, err
}
return parseCommit(id, data)
return parseTree(hash, data)
}

func (o *objectReader) Tag(hash Hash) (AnnotatedTag, error) {
data, err := o.read("tag", hash)
data, err := o.read(objectTypeTag, hash)
if err != nil {
return nil, err
}
Expand Down
2 changes: 2 additions & 0 deletions private/pkg/git/tag_iterator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ import (
)

func TestTags(t *testing.T) {
t.Parallel()

repo := gittest.ScaffoldGitRepository(t)
var tags []string
err := repo.TagIterator.ForEachTag(func(tag string, commitHash git.Hash) error {
Expand Down
115 changes: 115 additions & 0 deletions private/pkg/git/tree.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
// Copyright 2020-2023 Buf Technologies, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package git

import (
"bytes"
"errors"
"fmt"

"github.com/bufbuild/buf/private/pkg/normalpath"
)

type tree struct {
hash Hash
nodes []TreeNode
}

func parseTree(hash Hash, data []byte) (*tree, error) {
t := &tree{
hash: hash,
}
/*
data is in the format
<mode><space><name>\0<hash>
repeated
*/
for len(data) > 0 {
// We can find the \0 character before the <hash>
// and slice to the index of \0 + the length of a hash.
// That gives us a single node.
i := bytes.Index(data, []byte{0})
if i == -1 {
return nil, errors.New("parse tree")
}
length := i + 1 + hashLength
node, err := parseTreeNode(data[:length])
if err != nil {
return nil, fmt.Errorf("parse tree: %w", err)
}
t.nodes = append(t.nodes, node)
data = data[length:]
}
return t, nil
}

func (t *tree) Hash() Hash {
return t.hash
}

func (t *tree) Nodes() []TreeNode {
return t.nodes
}

func (t *tree) Descendant(path string, objectReader ObjectReader) (TreeNode, error) {
if path == "" {
return nil, errors.New("empty path")
}
return descendant(objectReader, t, normalpath.Components(path))
}

func descendant(
objectReader ObjectReader,
root Tree,
names []string,
) (TreeNode, error) {
// split by the name of the next node we're looking for
// and the names of the descendant nodes
name := names[0]
if len(names) >= 2 {
names = names[1:]
} else {
names = nil
}
// Find node with that name in this tree.
var found TreeNode
for _, node := range root.Nodes() {
if node.Name() == name {
found = node
break
}
}
if found == nil {
// No node with that name in this tree.
return nil, ErrTreeNodeNotFound
}
if len(names) == 0 {
// No more descendants, we've found our terminal node.
return found, nil
}
if found.Mode() != ModeDir {
// This is an intermediate (non-terminal) node, which are expected to be
// directories. This is node is not a directory, so we fail with a non-found
// errror.
return nil, ErrTreeNodeNotFound
}
// TODO: support symlinks (on intermediate dirs) with descendant option
// Descend down and traverse.
tree, err := objectReader.Tree(found.Hash())
if err != nil {
return nil, err
}
return descendant(objectReader, tree, names)
}

0 comments on commit 45c2edc

Please sign in to comment.