Skip to content

Commit e82f795

Browse files
committedAug 23, 2024··
Merge branch 'improvements'
2 parents 46cd1ae + dbb3576 commit e82f795

File tree

13 files changed

+190
-48
lines changed

13 files changed

+190
-48
lines changed
 

‎examples/log.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use clap::Parser;
88
use gix::{
99
bstr::{BString, ByteSlice},
1010
date::time::format,
11-
traverse::commit::simple::Sorting,
11+
revision::walk::Sorting,
1212
};
1313

1414
fn main() {

‎gitoxide-core/src/repository/commitgraph/list.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ pub(crate) mod function {
22
use std::{borrow::Cow, ffi::OsString};
33

44
use anyhow::{bail, Context};
5-
use gix::{prelude::ObjectIdExt, traverse::commit::simple::Sorting};
5+
use gix::{prelude::ObjectIdExt, revision::walk::Sorting};
66

77
use crate::OutputFormat;
88

‎gitoxide-core/src/repository/revision/list.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ pub const PROGRESS_RANGE: std::ops::RangeInclusive<u8> = 0..=2;
1717

1818
pub(crate) mod function {
1919
use anyhow::{bail, Context};
20-
use gix::{hashtable::HashMap, traverse::commit::simple::Sorting, Progress};
20+
use gix::{hashtable::HashMap, revision::walk::Sorting, Progress};
2121
use layout::{
2222
backends::svg::SVGWriter,
2323
core::{base::Orientation, geometry::Point, style::StyleAttr},

‎gix-traverse/src/commit/topo/iter.rs

+12-6
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ where
180180

181181
fn collect_parents(&mut self, id: &oid) -> Result<SmallVec<[(ObjectId, GenAndCommitTime); 1]>, Error> {
182182
collect_parents(
183-
self.commit_graph.as_ref(),
183+
&mut self.commit_graph,
184184
&self.find,
185185
id,
186186
matches!(self.parents, Parents::First),
@@ -193,7 +193,7 @@ where
193193
&mut self,
194194
id: &oid,
195195
) -> Result<SmallVec<[(ObjectId, GenAndCommitTime); 1]>, Error> {
196-
collect_parents(self.commit_graph.as_ref(), &self.find, id, false, &mut self.buf)
196+
collect_parents(&mut self.commit_graph, &self.find, id, false, &mut self.buf)
197197
}
198198

199199
fn pop_commit(&mut self) -> Option<Result<Info, Error>> {
@@ -236,7 +236,7 @@ where
236236
}
237237

238238
fn collect_parents<Find>(
239-
cache: Option<&gix_commitgraph::Graph>,
239+
cache: &mut Option<gix_commitgraph::Graph>,
240240
f: Find,
241241
id: &oid,
242242
first_only: bool,
@@ -246,7 +246,7 @@ where
246246
Find: gix_object::Find,
247247
{
248248
let mut parents = SmallVec::<[(ObjectId, GenAndCommitTime); 1]>::new();
249-
match find(cache, &f, id, buf)? {
249+
match find(cache.as_ref(), &f, id, buf)? {
250250
Either::CommitRefIter(c) => {
251251
for token in c {
252252
use gix_object::commit::ref_iter::Token as T;
@@ -265,15 +265,21 @@ where
265265
// Need to check the cache again. That a commit is not in the cache
266266
// doesn't mean a parent is not.
267267
for (id, gen_time) in parents.iter_mut() {
268-
let commit = find(cache, &f, id, buf)?;
268+
let commit = find(cache.as_ref(), &f, id, buf)?;
269269
*gen_time = gen_and_commit_time(commit)?;
270270
}
271271
}
272272
Either::CachedCommit(c) => {
273273
for pos in c.iter_parents() {
274+
let Ok(pos) = pos else {
275+
// drop corrupt cache and use ODB from now on.
276+
*cache = None;
277+
return collect_parents(cache, f, id, first_only, buf);
278+
};
274279
let parent_commit = cache
280+
.as_ref()
275281
.expect("cache exists if CachedCommit was returned")
276-
.commit_at(pos?);
282+
.commit_at(pos);
277283
parents.push((
278284
parent_commit.id().into(),
279285
(parent_commit.generation(), parent_commit.committer_timestamp() as i64),

‎gix-traverse/src/commit/topo/mod.rs

-2
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@ pub enum Error {
1111
#[error("Internal state (bitflags) not found")]
1212
MissingStateUnexpected,
1313
#[error(transparent)]
14-
CommitGraphFile(#[from] gix_commitgraph::file::commit::Error),
15-
#[error(transparent)]
1614
ObjectDecode(#[from] gix_object::decode::Error),
1715
#[error(transparent)]
1816
Find(#[from] gix_object::find::existing_iter::Error),

‎gix/src/repository/mod.rs

-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ mod object;
6767
mod pathspec;
6868
mod reference;
6969
mod remote;
70-
#[cfg(feature = "revision")]
7170
mod revision;
7271
mod shallow;
7372
mod state;

‎gix/src/repository/revision.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
use crate::{bstr::BStr, revision, Id};
1+
use crate::revision;
2+
#[cfg(feature = "revision")]
3+
use crate::{bstr::BStr, Id};
24

35
/// Methods for resolving revisions by spec or working with the commit graph.
46
impl crate::Repository {
@@ -9,6 +11,7 @@ impl crate::Repository {
911
/// - `@` actually stands for `HEAD`, whereas `git` resolves it to the object pointed to by `HEAD` without making the
1012
/// `HEAD` ref available for lookups.
1113
#[doc(alias = "revparse", alias = "git2")]
14+
#[cfg(feature = "revision")]
1215
pub fn rev_parse<'a>(&self, spec: impl Into<&'a BStr>) -> Result<revision::Spec<'_>, revision::spec::parse::Error> {
1316
revision::Spec::from_bstr(
1417
spec,
@@ -22,6 +25,7 @@ impl crate::Repository {
2225

2326
/// Parse a revision specification and return single object id as represented by this instance.
2427
#[doc(alias = "revparse_single", alias = "git2")]
28+
#[cfg(feature = "revision")]
2529
pub fn rev_parse_single<'repo, 'a>(
2630
&'repo self,
2731
spec: impl Into<&'a BStr>,

‎gix/src/revision/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ pub use gix_revision as plumbing;
88
///
99
#[allow(clippy::empty_docs)]
1010
pub mod walk;
11-
pub use walk::iter::Walk;
11+
pub use walk::iter_impl::Walk;
1212

1313
///
1414
#[cfg(feature = "revision")]

‎gix/src/revision/spec/parse/delegate/navigate.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ impl<'repo> delegate::Navigate for Delegate<'repo> {
192192
match oid
193193
.attach(repo)
194194
.ancestors()
195-
.sorting(gix_traverse::commit::simple::Sorting::ByCommitTimeNewestFirst)
195+
.sorting(crate::revision::walk::Sorting::ByCommitTimeNewestFirst)
196196
.all()
197197
{
198198
Ok(iter) => {
@@ -245,7 +245,7 @@ impl<'repo> delegate::Navigate for Delegate<'repo> {
245245
.filter(|r| r.id().header().ok().map_or(false, |obj| obj.kind().is_commit()))
246246
.filter_map(|r| r.detach().peeled),
247247
)
248-
.sorting(gix_traverse::commit::simple::Sorting::ByCommitTimeNewestFirst)
248+
.sorting(crate::revision::walk::Sorting::ByCommitTimeNewestFirst)
249249
.all()
250250
{
251251
Ok(iter) => {

‎gix/src/revision/spec/parse/types.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ pub enum Error {
189189
next: Option<Box<dyn std::error::Error + Send + Sync + 'static>>,
190190
},
191191
#[error(transparent)]
192-
Traverse(#[from] gix_traverse::commit::simple::Error),
192+
Traverse(#[from] crate::revision::walk::iter::Error),
193193
#[error(transparent)]
194194
Walk(#[from] crate::revision::walk::Error),
195195
#[error("Spec does not contain a single object id")]

‎gix/src/revision/walk.rs

+120-11
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use gix_object::FindExt;
33

44
use crate::{ext::ObjectIdExt, revision, Repository};
55

6-
/// The error returned by [`Platform::all()`].
6+
/// The error returned by [`Platform::all()`] and [`Platform::selected()`].
77
#[derive(Debug, thiserror::Error)]
88
#[allow(missing_docs)]
99
pub enum Error {
@@ -15,6 +15,65 @@ pub enum Error {
1515
ConfigBoolean(#[from] crate::config::boolean::Error),
1616
}
1717

18+
/// Specify how to sort commits during a [revision::Walk] traversal.
19+
///
20+
/// ### Sample History
21+
///
22+
/// The following history will be referred to for explaining how the sort order works, with the number denoting the commit timestamp
23+
/// (*their X-alignment doesn't matter*).
24+
///
25+
/// ```text
26+
/// ---1----2----4----7 <- second parent of 8
27+
/// \ \
28+
/// 3----5----6----8---
29+
/// ```
30+
#[derive(Default, Debug, Copy, Clone)]
31+
pub enum Sorting {
32+
/// Commits are sorted as they are mentioned in the commit graph.
33+
///
34+
/// In the *sample history* the order would be `8, 6, 7, 5, 4, 3, 2, 1`
35+
///
36+
/// ### Note
37+
///
38+
/// This is not to be confused with `git log/rev-list --topo-order`, which is notably different from
39+
/// as it avoids overlapping branches.
40+
#[default]
41+
BreadthFirst,
42+
/// Commits are sorted by their commit time in descending order, that is newest first.
43+
///
44+
/// The sorting applies to all currently queued commit ids and thus is full.
45+
///
46+
/// In the *sample history* the order would be `8, 7, 6, 4, 5, 2, 3, 1`
47+
///
48+
/// # Performance
49+
///
50+
/// This mode benefits greatly from having an [object cache](crate::Repository::object_cache_size) configured
51+
/// to avoid having to look up each commit twice.
52+
ByCommitTimeNewestFirst,
53+
/// This sorting is similar to `ByCommitTimeNewestFirst`, but adds a cutoff to not return commits older than
54+
/// a given time, stopping the iteration once no younger commits is queued to be traversed.
55+
///
56+
/// As the query is usually repeated with different cutoff dates, this search mode benefits greatly from an object cache.
57+
///
58+
/// In the *sample history* and a cut-off date of 4, the returned list of commits would be `8, 7, 6, 4`
59+
ByCommitTimeNewestFirstCutoffOlderThan {
60+
/// The amount of seconds since unix epoch to use as cut-off time.
61+
seconds: gix_date::SecondsSinceUnixEpoch,
62+
},
63+
}
64+
65+
impl Sorting {
66+
fn into_simple(self) -> Option<gix_traverse::commit::simple::Sorting> {
67+
Some(match self {
68+
Sorting::BreadthFirst => gix_traverse::commit::simple::Sorting::BreadthFirst,
69+
Sorting::ByCommitTimeNewestFirst => gix_traverse::commit::simple::Sorting::ByCommitTimeNewestFirst,
70+
Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds } => {
71+
gix_traverse::commit::simple::Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds }
72+
}
73+
})
74+
}
75+
}
76+
1877
/// Information about a commit that we obtained naturally as part of the iteration.
1978
#[derive(Debug, Clone)]
2079
pub struct Info<'repo> {
@@ -91,7 +150,8 @@ impl<'repo> Info<'repo> {
91150
pub struct Platform<'repo> {
92151
pub(crate) repo: &'repo Repository,
93152
pub(crate) tips: Vec<ObjectId>,
94-
pub(crate) sorting: gix_traverse::commit::simple::Sorting,
153+
pub(crate) prune: Vec<ObjectId>,
154+
pub(crate) sorting: Sorting,
95155
pub(crate) parents: gix_traverse::commit::Parents,
96156
pub(crate) use_commit_graph: Option<bool>,
97157
pub(crate) commit_graph: Option<gix_commitgraph::Graph>,
@@ -106,14 +166,15 @@ impl<'repo> Platform<'repo> {
106166
parents: Default::default(),
107167
use_commit_graph: None,
108168
commit_graph: None,
169+
prune: Vec::new(),
109170
}
110171
}
111172
}
112173

113174
/// Create-time builder methods
114175
impl<'repo> Platform<'repo> {
115176
/// Set the sort mode for commits to the given value. The default is to order topologically breadth-first.
116-
pub fn sorting(mut self, sorting: gix_traverse::commit::simple::Sorting) -> Self {
177+
pub fn sorting(mut self, sorting: Sorting) -> Self {
117178
self.sorting = sorting;
118179
self
119180
}
@@ -143,6 +204,37 @@ impl<'repo> Platform<'repo> {
143204
self.commit_graph = graph;
144205
self
145206
}
207+
208+
/// Prune the commit with the given `ids` such that they won't be returned, and such that none of their ancestors is returned either.
209+
///
210+
/// Note that this forces the [sorting](Self::sorting) to
211+
/// [`ByCommitTimeNewestFirstCutoffOlderThan`](Sorting::ByCommitTimeNewestFirstCutoffOlderThan) configured with
212+
/// the oldest available commit time, ensuring that no commits older than the oldest of `ids` will be returned either.
213+
///
214+
/// Also note that commits that can't be accessed or are missing are simply ignored for the purpose of obtaining the cutoff date.
215+
#[doc(alias = "hide", alias = "git2")]
216+
pub fn with_pruned(mut self, ids: impl IntoIterator<Item = impl Into<ObjectId>>) -> Self {
217+
let mut cutoff = match self.sorting {
218+
Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds } => Some(seconds),
219+
Sorting::BreadthFirst | Sorting::ByCommitTimeNewestFirst => None,
220+
};
221+
for id in ids.into_iter() {
222+
let id = id.into();
223+
if !self.prune.contains(&id) {
224+
if let Some(time) = self.repo.find_commit(id).ok().and_then(|c| c.time().ok()) {
225+
if cutoff.is_none() || cutoff > Some(time.seconds) {
226+
cutoff = time.seconds.into();
227+
}
228+
}
229+
self.prune.push(id);
230+
}
231+
}
232+
233+
if let Some(cutoff) = cutoff {
234+
self.sorting = Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds: cutoff }
235+
}
236+
self
237+
}
146238
}
147239

148240
/// Produce the iterator
@@ -162,7 +254,9 @@ impl<'repo> Platform<'repo> {
162254
parents,
163255
use_commit_graph,
164256
commit_graph,
257+
mut prune,
165258
} = self;
259+
prune.sort();
166260
Ok(revision::Walk {
167261
repo,
168262
inner: Box::new(
@@ -176,9 +270,12 @@ impl<'repo> Platform<'repo> {
176270
if !filter(id) {
177271
return false;
178272
}
273+
let id = id.to_owned();
274+
if prune.binary_search(&id).is_ok() {
275+
return false;
276+
}
179277
match shallow_commits.as_ref() {
180278
Some(commits) => {
181-
let id = id.to_owned();
182279
if let Ok(idx) = grafted_parents_to_skip.binary_search(&id) {
183280
grafted_parents_to_skip.remove(idx);
184281
return false;
@@ -195,38 +292,50 @@ impl<'repo> Platform<'repo> {
195292
}
196293
}
197294
})
198-
.sorting(sorting)?
295+
.sorting(sorting.into_simple().expect("for now there is nothing else"))?
199296
.parents(parents)
200297
.commit_graph(
201298
commit_graph.or(use_commit_graph
202299
.map_or_else(|| self.repo.config.may_use_commit_graph(), Ok)?
203300
.then(|| self.repo.commit_graph().ok())
204301
.flatten()),
205-
),
302+
)
303+
.map(|res| res.map_err(iter::Error::from)),
206304
),
207305
})
208306
}
209307
/// Return an iterator to traverse all commits reachable as configured by the [Platform].
210308
///
211309
/// # Performance
212310
///
213-
/// It's highly recommended to set an [`object cache`][Repository::object_cache_size()] on the parent repo
311+
/// It's highly recommended to set an [`object cache`](Repository::object_cache_size()) on the parent repo
214312
/// to greatly speed up performance if the returned id is supposed to be looked up right after.
215313
pub fn all(self) -> Result<revision::Walk<'repo>, Error> {
216314
self.selected(|_| true)
217315
}
218316
}
219317

220-
pub(crate) mod iter {
318+
///
319+
#[allow(clippy::empty_docs)]
320+
pub mod iter {
321+
/// The error returned by the [Walk](crate::revision::Walk) iterator.
322+
#[derive(Debug, thiserror::Error)]
323+
#[allow(missing_docs)]
324+
pub enum Error {
325+
#[error(transparent)]
326+
SimpleTraversal(#[from] gix_traverse::commit::simple::Error),
327+
}
328+
}
329+
330+
pub(crate) mod iter_impl {
221331
/// The iterator returned by [`crate::revision::walk::Platform::all()`].
222332
pub struct Walk<'repo> {
223333
pub(crate) repo: &'repo crate::Repository,
224-
pub(crate) inner:
225-
Box<dyn Iterator<Item = Result<gix_traverse::commit::Info, gix_traverse::commit::simple::Error>> + 'repo>,
334+
pub(crate) inner: Box<dyn Iterator<Item = Result<gix_traverse::commit::Info, super::iter::Error>> + 'repo>,
226335
}
227336

228337
impl<'repo> Iterator for Walk<'repo> {
229-
type Item = Result<super::Info<'repo>, gix_traverse::commit::simple::Error>;
338+
type Item = Result<super::Info<'repo>, super::iter::Error>;
230339

231340
fn next(&mut self) -> Option<Self::Item> {
232341
self.inner

‎gix/tests/id/mod.rs

+27-7
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,6 @@ fn display_and_debug() -> crate::Result {
6767
}
6868

6969
mod ancestors {
70-
use gix_traverse::commit;
71-
7270
use crate::id::hex_to_id;
7371

7472
#[test]
@@ -87,7 +85,7 @@ mod ancestors {
8785
let commits_by_commit_date = head
8886
.ancestors()
8987
.use_commit_graph(!use_commit_graph)
90-
.sorting(commit::simple::Sorting::ByCommitTimeNewestFirst)
88+
.sorting(gix::revision::walk::Sorting::ByCommitTimeNewestFirst)
9189
.all()?
9290
.map(|c| c.map(gix::revision::walk::Info::detach))
9391
.collect::<Result<Vec<_>, _>>()?;
@@ -121,7 +119,7 @@ mod ancestors {
121119
let head = repo.head()?.into_peeled_id()?;
122120
let commits = head
123121
.ancestors()
124-
.sorting(commit::simple::Sorting::ByCommitTimeNewestFirst) // assure we have time set
122+
.sorting(gix::revision::walk::Sorting::ByCommitTimeNewestFirst) // assure we have time set
125123
.use_commit_graph(use_commit_graph)
126124
.all()?
127125
.collect::<Result<Vec<_>, _>>()?;
@@ -134,16 +132,38 @@ mod ancestors {
134132
Ok(())
135133
}
136134

135+
#[test]
136+
fn prune_with_auto_cutoff() -> crate::Result {
137+
let repo = crate::repo("make_repo_with_fork_and_dates.sh")?.to_thread_local();
138+
let head = repo.head()?.into_peeled_id()?;
139+
140+
for use_commit_graph in [false, true] {
141+
let commits_graph_order = head
142+
.ancestors()
143+
.with_pruned(Some(hex_to_id("bcb05040a6925f2ff5e10d3ae1f9264f2e8c43ac")))
144+
.use_commit_graph(use_commit_graph)
145+
.all()?
146+
.map(|c| c.map(|c| c.id))
147+
.collect::<Result<Vec<_>, _>>()?;
148+
assert_eq!(
149+
commits_graph_order,
150+
&[hex_to_id("288e509293165cb5630d08f4185bdf2445bf6170")],
151+
"we ignore all but the first, and the cutoff takes care of that"
152+
);
153+
}
154+
Ok(())
155+
}
156+
137157
#[test]
138158
fn filtered() -> crate::Result {
139159
let repo = crate::repo("make_repo_with_fork_and_dates.sh")?.to_thread_local();
140160
let head = repo.head()?.into_peeled_id()?;
141161

142162
for use_commit_graph in [false, true] {
143163
for sorting in [
144-
commit::simple::Sorting::BreadthFirst,
145-
commit::simple::Sorting::ByCommitTimeNewestFirst,
146-
commit::simple::Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds: 0 },
164+
gix::revision::walk::Sorting::BreadthFirst,
165+
gix::revision::walk::Sorting::ByCommitTimeNewestFirst,
166+
gix::revision::walk::Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds: 0 },
147167
] {
148168
let commits_graph_order = head
149169
.ancestors()

‎gix/tests/repository/shallow.rs

+19-13
Original file line numberDiff line numberDiff line change
@@ -44,25 +44,31 @@ fn yes() -> crate::Result {
4444
}
4545

4646
mod traverse {
47-
use gix_traverse::commit::simple::Sorting;
4847
use serial_test::parallel;
4948

5049
use crate::util::{hex_to_id, named_subrepo_opts};
5150

5251
#[test]
5352
#[parallel]
5453
fn boundary_is_detected_triggering_no_error() -> crate::Result {
55-
for toggle in [false, true] {
56-
for name in ["shallow.git", "shallow"] {
57-
let repo = named_subrepo_opts("make_shallow_repo.sh", name, crate::restricted())?;
58-
let commits: Vec<_> = repo
59-
.head_id()?
60-
.ancestors()
61-
.use_commit_graph(toggle)
62-
.all()?
63-
.map(|c| c.map(|c| c.id))
64-
.collect::<Result<_, _>>()?;
65-
assert_eq!(commits, [hex_to_id("30887839de28edf7ab66c860e5c58b4d445f6b12")]);
54+
for sorting in [
55+
gix::revision::walk::Sorting::BreadthFirst,
56+
gix::revision::walk::Sorting::ByCommitTimeNewestFirst,
57+
gix::revision::walk::Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds: 0 },
58+
] {
59+
for toggle in [false, true] {
60+
for name in ["shallow.git", "shallow"] {
61+
let repo = named_subrepo_opts("make_shallow_repo.sh", name, crate::restricted())?;
62+
let commits: Vec<_> = repo
63+
.head_id()?
64+
.ancestors()
65+
.use_commit_graph(toggle)
66+
.sorting(sorting)
67+
.all()?
68+
.map(|c| c.map(|c| c.id))
69+
.collect::<Result<_, _>>()?;
70+
assert_eq!(commits, [hex_to_id("30887839de28edf7ab66c860e5c58b4d445f6b12")]);
71+
}
6672
}
6773
}
6874
Ok(())
@@ -91,7 +97,7 @@ mod traverse {
9197
.head_id()?
9298
.ancestors()
9399
.use_commit_graph(toggle)
94-
.sorting(Sorting::ByCommitTimeNewestFirst)
100+
.sorting(gix::revision::walk::Sorting::ByCommitTimeNewestFirst)
95101
.all()?
96102
.map(|c| c.map(|c| c.id))
97103
.collect::<Result<_, _>>()?;

0 commit comments

Comments
 (0)
Please sign in to comment.