Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a Git-backed storage.ReadBucket via storagegit #2114

Merged
merged 18 commits into from
May 25, 2023
Merged
50 changes: 50 additions & 0 deletions private/pkg/git/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -250,8 +250,12 @@ type Commit interface {

// ObjectReader reads objects (commits, trees, blobs) from a `.git` directory.
type ObjectReader interface {
// Blob reads the blob identified by the hash.
Blob(id Hash) ([]byte, error)
// Commit reads the commit identified by the hash.
Commit(id Hash) (Commit, error)
// Tree reads the tree identified by the hash.
Tree(id Hash) (Tree, error)
// Close closes the reader.
Close() error
}
Expand All @@ -268,3 +272,49 @@ func OpenObjectReader(
) (ObjectReader, error) {
return newObjectReader(gitDirPath, runner)
}

const (
saquibmian marked this conversation as resolved.
Show resolved Hide resolved
// ModeUnknown is a mode's zero value.
ModeUnknown FileMode = 0
// ModeFile is a blob that should be written as a plain file.
ModeFile FileMode = 010_0644
// ModeExec is a blob that should be written with the executable bit set.
ModeExe FileMode = 010_0755
// ModeDir is a tree to be unpacked as a subdirectory in the current
// directory.
ModeDir FileMode = 004_0000
// ModeSymlink is a blob with its content being the path linked to.
ModeSymlink FileMode = 012_0000
// ModeSubmodule is a commit that the submodule is checked out at.
ModeSubmodule FileMode = 016_0000
)

// FileMode is how to interpret a tree entry's object. See the Mode* constants
// for how to interpret each mode value.
type FileMode uint32

var ErrSubTreeNotFound = errors.New("subtree not found")

// Tree is a git tree, which are a manifest of other git objects, including other trees.
type Tree interface {
// Hash is the Hash for this Tree.
Hash() Hash
// Entries is the set of entries in this Tree.
Entries() []TreeEntry
saquibmian marked this conversation as resolved.
Show resolved Hide resolved
// Traverse walks down a tree, following the name-path specified,
// and returns the terminal TreeEntry. If no entry is found, it returns
// ErrSubTreeNotFound.
Traverse(objectReader ObjectReader, names ...string) (TreeEntry, error)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not Walk?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could go either way, but I felt that Walk was overloaded with storage's walk (i.e, it's not walking all paths in a subtree, it's navigating to a particular descendant). Maybe Descendant is better?

}

// TreeEntry is a reference to an object contained in a tree. These objects have
// a file mode associated with them, which hints at the type of object located
// at ID (tree or blob).
type TreeEntry interface {
saquibmian marked this conversation as resolved.
Show resolved Hide resolved
// Hash is the Hash of the object referenced by this TreeEntry.
Hash() Hash
// Name is the name of the object referenced by this TreeEntry.
Name() string
// Mode is the file mode of the object referenced by this TreeEntry.
Mode() FileMode
}
33 changes: 24 additions & 9 deletions private/pkg/git/gittest/gittest.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"io"
"os"
"path"
"path/filepath"
"strings"
"testing"

Expand Down Expand Up @@ -103,28 +104,35 @@ func scaffoldGitRepository(t *testing.T, runner command.Runner) string {
runInDir(t, runner, local, "git", "remote", "add", "origin", remote)

// (1) commit in main branch
runInDir(t, runner, local, "touch", "randomBinary")
writeFiles(t, local, map[string]string{
"randomBinary": "some executable",
"proto/buf.yaml": "some buf.yaml",
"proto/acme/petstore/v1/a.proto": "cats",
"proto/acme/petstore/v1/b.proto": "animals",
"proto/acme/grocerystore/v1/c.proto": "toysrus",
"proto/acme/grocerystore/v1/d.proto": "petsrus",
})
runInDir(t, runner, local, "chmod", "+x", "randomBinary")
runInDir(t, runner, local, "mkdir", "proto")
runInDir(t, runner, path.Join(local, "proto"), "touch", "buf.yaml")
runInDir(t, runner, local, "mkdir", "-p", "proto/acme/petstore/v1")
runInDir(t, runner, path.Join(local, "proto", "acme", "petstore", "v1"), "touch", "a.proto", "b.proto")
runInDir(t, runner, local, "mkdir", "-p", "proto/acme/grocerystore/v1")
runInDir(t, runner, path.Join(local, "proto", "acme", "grocerystore", "v1"), "touch", "c.proto", "d.proto")
runInDir(t, runner, local, "git", "add", ".")
runInDir(t, runner, local, "git", "commit", "-m", "initial commit")
runInDir(t, runner, local, "git", "push", "-u", "-f", "origin", DefaultBranch)

// (2) branch off main and begin work
runInDir(t, runner, local, "git", "checkout", "-b", "smian/branch1")
runInDir(t, runner, path.Join(local, "proto", "acme", "petstore", "v1"), "touch", "e.proto", "f.proto")
writeFiles(t, local, map[string]string{
"proto/acme/petstore/v1/e.proto": "loblaws",
"proto/acme/petstore/v1/f.proto": "merchant of venice",
})
runInDir(t, runner, local, "git", "add", ".")
runInDir(t, runner, local, "git", "commit", "-m", "branch1")
runInDir(t, runner, local, "git", "push", "origin", "smian/branch1")

// (3) branch off branch and begin work
runInDir(t, runner, local, "git", "checkout", "-b", "smian/branch2")
runInDir(t, runner, path.Join(local, "proto", "acme", "grocerystore", "v1"), "touch", "g.proto", "h.proto")
writeFiles(t, local, map[string]string{
"proto/acme/grocerystore/v1/g.proto": "hamlet",
"proto/acme/grocerystore/v1/h.proto": "bethoven",
})
runInDir(t, runner, local, "git", "add", ".")
runInDir(t, runner, local, "git", "commit", "-m", "branch2")
runInDir(t, runner, local, "git", "push", "origin", "smian/branch2")
Expand Down Expand Up @@ -160,3 +168,10 @@ func runInDir(t *testing.T, runner command.Runner, dir string, cmd string, args
}
require.NoError(t, err)
}

func writeFiles(t *testing.T, dir string, files map[string]string) {
for path, contents := range files {
require.NoError(t, os.MkdirAll(filepath.Join(dir, filepath.Dir(path)), 0700))
require.NoError(t, os.WriteFile(filepath.Join(dir, path), []byte(contents), 0600))
}
}
18 changes: 15 additions & 3 deletions private/pkg/git/object_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,24 @@ func (o *objectReader) Close() error {
)
}

func (o *objectReader) Commit(id Hash) (Commit, error) {
data, err := o.read("commit", id)
func (o *objectReader) Blob(hash Hash) ([]byte, error) {
return o.read("blob", hash)
saquibmian marked this conversation as resolved.
Show resolved Hide resolved
}

func (o *objectReader) Commit(hash Hash) (Commit, error) {
data, err := o.read("commit", hash)
if err != nil {
return nil, err
}
return parseCommit(hash, data)
}

func (o *objectReader) Tree(hash Hash) (Tree, error) {
data, err := o.read("tree", hash)
if err != nil {
return nil, err
}
return parseCommit(id, data)
return parseTree(hash, data)
}

func (o *objectReader) read(objectType string, id Hash) ([]byte, error) {
Expand Down
93 changes: 93 additions & 0 deletions private/pkg/git/tree.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// Copyright 2020-2023 Buf Technologies, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package git

import (
"bytes"
"errors"
"fmt"
)

type tree struct {
hash Hash
entries []TreeEntry
}

func (t *tree) Hash() Hash {
return t.hash
}

func (t *tree) Entries() []TreeEntry {
return t.entries
}

func (t *tree) Traverse(objectReader ObjectReader, names ...string) (TreeEntry, error) {
return traverse(objectReader, t, names...)
}

func parseTree(hash Hash, data []byte) (*tree, error) {
saquibmian marked this conversation as resolved.
Show resolved Hide resolved
t := &tree{
hash: hash,
}
for len(data) > 0 {
i := bytes.Index(data, []byte{0})
if i == -1 {
return nil, errors.New("malformed tree")
}
length := i + 1 + hashLength
entry, err := parseTreeEntry(data[:length])
saquibmian marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return nil, fmt.Errorf("malformed tree: %w", err)
saquibmian marked this conversation as resolved.
Show resolved Hide resolved
}
t.entries = append(t.entries, entry)
data = data[length:]
}
return t, nil
}

func traverse(
objectReader ObjectReader,
root Tree,
names ...string,
) (TreeEntry, error) {
name := names[0]
names = names[1:]
saquibmian marked this conversation as resolved.
Show resolved Hide resolved
saquibmian marked this conversation as resolved.
Show resolved Hide resolved
// Find name in this tree.
var found TreeEntry
for _, entry := range root.Entries() {
if entry.Name() == name {
found = entry
break
}
}
if found == nil {
// No name in this tree.
return nil, ErrSubTreeNotFound
}
if len(names) == 0 {
// We found it.
return found, nil
}
if found.Mode() != ModeDir {
// Part of the path is not a directory.
return nil, ErrSubTreeNotFound
}
// Walk down the tree.
tree, err := objectReader.Tree(found.Hash())
if err != nil {
return nil, err
}
return traverse(objectReader, tree, names...)
}
82 changes: 82 additions & 0 deletions private/pkg/git/tree_entry.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
// Copyright 2020-2023 Buf Technologies, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package git

import (
"bytes"
"errors"
"fmt"
"strconv"
)

type treeEntry struct {
name string
mode FileMode
hash Hash
}

func (e *treeEntry) Name() string {
return e.name
}

func (e *treeEntry) Mode() FileMode {
return e.mode
}

func (e *treeEntry) Hash() Hash {
return e.hash
}

func parseTreeEntry(data []byte) (*treeEntry, error) {
saquibmian marked this conversation as resolved.
Show resolved Hide resolved
modeAndName, hash, found := bytes.Cut(data, []byte{0})
if !found {
return nil, errors.New("malformed entry")
}
parsedHash, err := newHashFromBytes(hash)
if err != nil {
return nil, fmt.Errorf("malformed git tree entry: %w", err)
}
mode, name, found := bytes.Cut(modeAndName, []byte{' '})
if !found {
return nil, errors.New("malformed entry")
}
parsedFileMode, err := parseFileMode(mode)
if err != nil {
return nil, fmt.Errorf("malformed git tree entry: %w", err)
}
return &treeEntry{
hash: parsedHash,
name: string(name),
mode: parsedFileMode,
}, nil
}

// decodes the octal form of a file mode into one of the valid Mode* values.
func parseFileMode(data []byte) (FileMode, error) {
mode, err := strconv.ParseUint(string(data), 8, 32)
if err != nil {
return 0, err
}
switch FileMode(mode) {
case ModeFile:
case ModeExe:
case ModeDir:
case ModeSymlink:
case ModeSubmodule:
default:
return 0, fmt.Errorf("unknown file mode: %o", mode)
}
return FileMode(mode), nil
saquibmian marked this conversation as resolved.
Show resolved Hide resolved
}