Skip to content

Commit d63ec06

Browse files
committedAug 23, 2024
feat!: Do not let revision::walk::Platform rely on plumbing crate types.
This is a step towards a more stable API, but also, will allow using different implementations. Notably, this replaces `gix_traverse::commit::simple::Sorting` with `gix::revision::walk::Sorting`.
1 parent 5ef8ae8 commit d63ec06

File tree

6 files changed

+170
-35
lines changed

6 files changed

+170
-35
lines changed
 

‎gix/src/revision/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ pub use gix_revision as plumbing;
88
///
99
#[allow(clippy::empty_docs)]
1010
pub mod walk;
11-
pub use walk::iter::Walk;
11+
pub use walk::iter_impl::Walk;
1212

1313
///
1414
#[cfg(feature = "revision")]

‎gix/src/revision/spec/parse/delegate/navigate.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ impl<'repo> delegate::Navigate for Delegate<'repo> {
192192
match oid
193193
.attach(repo)
194194
.ancestors()
195-
.sorting(gix_traverse::commit::simple::Sorting::ByCommitTimeNewestFirst)
195+
.sorting(crate::revision::walk::Sorting::ByCommitTimeNewestFirst)
196196
.all()
197197
{
198198
Ok(iter) => {
@@ -245,7 +245,7 @@ impl<'repo> delegate::Navigate for Delegate<'repo> {
245245
.filter(|r| r.id().header().ok().map_or(false, |obj| obj.kind().is_commit()))
246246
.filter_map(|r| r.detach().peeled),
247247
)
248-
.sorting(gix_traverse::commit::simple::Sorting::ByCommitTimeNewestFirst)
248+
.sorting(crate::revision::walk::Sorting::ByCommitTimeNewestFirst)
249249
.all()
250250
{
251251
Ok(iter) => {

‎gix/src/revision/spec/parse/types.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ pub enum Error {
189189
next: Option<Box<dyn std::error::Error + Send + Sync + 'static>>,
190190
},
191191
#[error(transparent)]
192-
Traverse(#[from] gix_traverse::commit::simple::Error),
192+
Traverse(#[from] crate::revision::walk::iter::Error),
193193
#[error(transparent)]
194194
Walk(#[from] crate::revision::walk::Error),
195195
#[error("Spec does not contain a single object id")]

‎gix/src/revision/walk.rs

+120-11
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use gix_object::FindExt;
33

44
use crate::{ext::ObjectIdExt, revision, Repository};
55

6-
/// The error returned by [`Platform::all()`].
6+
/// The error returned by [`Platform::all()`] and [`Platform::selected()`].
77
#[derive(Debug, thiserror::Error)]
88
#[allow(missing_docs)]
99
pub enum Error {
@@ -15,6 +15,65 @@ pub enum Error {
1515
ConfigBoolean(#[from] crate::config::boolean::Error),
1616
}
1717

18+
/// Specify how to sort commits during a [revision::Walk] traversal.
19+
///
20+
/// ### Sample History
21+
///
22+
/// The following history will be referred to for explaining how the sort order works, with the number denoting the commit timestamp
23+
/// (*their X-alignment doesn't matter*).
24+
///
25+
/// ```text
26+
/// ---1----2----4----7 <- second parent of 8
27+
/// \ \
28+
/// 3----5----6----8---
29+
/// ```
30+
#[derive(Default, Debug, Copy, Clone)]
31+
pub enum Sorting {
32+
/// Commits are sorted as they are mentioned in the commit graph.
33+
///
34+
/// In the *sample history* the order would be `8, 6, 7, 5, 4, 3, 2, 1`
35+
///
36+
/// ### Note
37+
///
38+
/// This is not to be confused with `git log/rev-list --topo-order`, which is notably different from
39+
/// as it avoids overlapping branches.
40+
#[default]
41+
BreadthFirst,
42+
/// Commits are sorted by their commit time in descending order, that is newest first.
43+
///
44+
/// The sorting applies to all currently queued commit ids and thus is full.
45+
///
46+
/// In the *sample history* the order would be `8, 7, 6, 4, 5, 2, 3, 1`
47+
///
48+
/// # Performance
49+
///
50+
/// This mode benefits greatly from having an [object cache](crate::Repository::object_cache_size) configured
51+
/// to avoid having to look up each commit twice.
52+
ByCommitTimeNewestFirst,
53+
/// This sorting is similar to `ByCommitTimeNewestFirst`, but adds a cutoff to not return commits older than
54+
/// a given time, stopping the iteration once no younger commits is queued to be traversed.
55+
///
56+
/// As the query is usually repeated with different cutoff dates, this search mode benefits greatly from an object cache.
57+
///
58+
/// In the *sample history* and a cut-off date of 4, the returned list of commits would be `8, 7, 6, 4`
59+
ByCommitTimeNewestFirstCutoffOlderThan {
60+
/// The amount of seconds since unix epoch to use as cut-off time.
61+
seconds: gix_date::SecondsSinceUnixEpoch,
62+
},
63+
}
64+
65+
impl Sorting {
66+
fn into_simple(self) -> Option<gix_traverse::commit::simple::Sorting> {
67+
Some(match self {
68+
Sorting::BreadthFirst => gix_traverse::commit::simple::Sorting::BreadthFirst,
69+
Sorting::ByCommitTimeNewestFirst => gix_traverse::commit::simple::Sorting::ByCommitTimeNewestFirst,
70+
Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds } => {
71+
gix_traverse::commit::simple::Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds }
72+
}
73+
})
74+
}
75+
}
76+
1877
/// Information about a commit that we obtained naturally as part of the iteration.
1978
#[derive(Debug, Clone)]
2079
pub struct Info<'repo> {
@@ -91,7 +150,8 @@ impl<'repo> Info<'repo> {
91150
pub struct Platform<'repo> {
92151
pub(crate) repo: &'repo Repository,
93152
pub(crate) tips: Vec<ObjectId>,
94-
pub(crate) sorting: gix_traverse::commit::simple::Sorting,
153+
pub(crate) prune: Vec<ObjectId>,
154+
pub(crate) sorting: Sorting,
95155
pub(crate) parents: gix_traverse::commit::Parents,
96156
pub(crate) use_commit_graph: Option<bool>,
97157
pub(crate) commit_graph: Option<gix_commitgraph::Graph>,
@@ -106,14 +166,15 @@ impl<'repo> Platform<'repo> {
106166
parents: Default::default(),
107167
use_commit_graph: None,
108168
commit_graph: None,
169+
prune: Vec::new(),
109170
}
110171
}
111172
}
112173

113174
/// Create-time builder methods
114175
impl<'repo> Platform<'repo> {
115176
/// Set the sort mode for commits to the given value. The default is to order topologically breadth-first.
116-
pub fn sorting(mut self, sorting: gix_traverse::commit::simple::Sorting) -> Self {
177+
pub fn sorting(mut self, sorting: Sorting) -> Self {
117178
self.sorting = sorting;
118179
self
119180
}
@@ -143,6 +204,37 @@ impl<'repo> Platform<'repo> {
143204
self.commit_graph = graph;
144205
self
145206
}
207+
208+
/// Prune the commit with the given `ids` such that they won't be returned, and such that none of their ancestors is returned either.
209+
///
210+
/// Note that this forces the [sorting](Self::sorting) to
211+
/// [`ByCommitTimeNewestFirstCutoffOlderThan`](Sorting::ByCommitTimeNewestFirstCutoffOlderThan) configured with
212+
/// the oldest available commit time, ensuring that no commits older than the oldest of `ids` will be returned either.
213+
///
214+
/// Also note that commits that can't be accessed or are missing are simply ignored for the purpose of obtaining the cutoff date.
215+
#[doc(alias = "hide", alias = "git2")]
216+
pub fn with_pruned(mut self, ids: impl IntoIterator<Item = impl Into<ObjectId>>) -> Self {
217+
let mut cutoff = match self.sorting {
218+
Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds } => Some(seconds),
219+
Sorting::BreadthFirst | Sorting::ByCommitTimeNewestFirst => None,
220+
};
221+
for id in ids.into_iter() {
222+
let id = id.into();
223+
if !self.prune.contains(&id) {
224+
if let Some(time) = self.repo.find_commit(id).ok().and_then(|c| c.time().ok()) {
225+
if cutoff.is_none() || cutoff > Some(time.seconds) {
226+
cutoff = time.seconds.into();
227+
}
228+
}
229+
self.prune.push(id);
230+
}
231+
}
232+
233+
if let Some(cutoff) = cutoff {
234+
self.sorting = Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds: cutoff }
235+
}
236+
self
237+
}
146238
}
147239

148240
/// Produce the iterator
@@ -162,7 +254,9 @@ impl<'repo> Platform<'repo> {
162254
parents,
163255
use_commit_graph,
164256
commit_graph,
257+
mut prune,
165258
} = self;
259+
prune.sort();
166260
Ok(revision::Walk {
167261
repo,
168262
inner: Box::new(
@@ -176,9 +270,12 @@ impl<'repo> Platform<'repo> {
176270
if !filter(id) {
177271
return false;
178272
}
273+
let id = id.to_owned();
274+
if prune.binary_search(&id).is_ok() {
275+
return false;
276+
}
179277
match shallow_commits.as_ref() {
180278
Some(commits) => {
181-
let id = id.to_owned();
182279
if let Ok(idx) = grafted_parents_to_skip.binary_search(&id) {
183280
grafted_parents_to_skip.remove(idx);
184281
return false;
@@ -195,38 +292,50 @@ impl<'repo> Platform<'repo> {
195292
}
196293
}
197294
})
198-
.sorting(sorting)?
295+
.sorting(sorting.into_simple().expect("for now there is nothing else"))?
199296
.parents(parents)
200297
.commit_graph(
201298
commit_graph.or(use_commit_graph
202299
.map_or_else(|| self.repo.config.may_use_commit_graph(), Ok)?
203300
.then(|| self.repo.commit_graph().ok())
204301
.flatten()),
205-
),
302+
)
303+
.map(|res| res.map_err(iter::Error::from)),
206304
),
207305
})
208306
}
209307
/// Return an iterator to traverse all commits reachable as configured by the [Platform].
210308
///
211309
/// # Performance
212310
///
213-
/// It's highly recommended to set an [`object cache`][Repository::object_cache_size()] on the parent repo
311+
/// It's highly recommended to set an [`object cache`](Repository::object_cache_size()) on the parent repo
214312
/// to greatly speed up performance if the returned id is supposed to be looked up right after.
215313
pub fn all(self) -> Result<revision::Walk<'repo>, Error> {
216314
self.selected(|_| true)
217315
}
218316
}
219317

220-
pub(crate) mod iter {
318+
///
319+
#[allow(clippy::empty_docs)]
320+
pub mod iter {
321+
/// The error returned by the [Walk](crate::revision::Walk) iterator.
322+
#[derive(Debug, thiserror::Error)]
323+
#[allow(missing_docs)]
324+
pub enum Error {
325+
#[error(transparent)]
326+
SimpleTraversal(#[from] gix_traverse::commit::simple::Error),
327+
}
328+
}
329+
330+
pub(crate) mod iter_impl {
221331
/// The iterator returned by [`crate::revision::walk::Platform::all()`].
222332
pub struct Walk<'repo> {
223333
pub(crate) repo: &'repo crate::Repository,
224-
pub(crate) inner:
225-
Box<dyn Iterator<Item = Result<gix_traverse::commit::Info, gix_traverse::commit::simple::Error>> + 'repo>,
334+
pub(crate) inner: Box<dyn Iterator<Item = Result<gix_traverse::commit::Info, super::iter::Error>> + 'repo>,
226335
}
227336

228337
impl<'repo> Iterator for Walk<'repo> {
229-
type Item = Result<super::Info<'repo>, gix_traverse::commit::simple::Error>;
338+
type Item = Result<super::Info<'repo>, super::iter::Error>;
230339

231340
fn next(&mut self) -> Option<Self::Item> {
232341
self.inner

‎gix/tests/id/mod.rs

+27-7
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,6 @@ fn display_and_debug() -> crate::Result {
6767
}
6868

6969
mod ancestors {
70-
use gix_traverse::commit;
71-
7270
use crate::id::hex_to_id;
7371

7472
#[test]
@@ -87,7 +85,7 @@ mod ancestors {
8785
let commits_by_commit_date = head
8886
.ancestors()
8987
.use_commit_graph(!use_commit_graph)
90-
.sorting(commit::simple::Sorting::ByCommitTimeNewestFirst)
88+
.sorting(gix::revision::walk::Sorting::ByCommitTimeNewestFirst)
9189
.all()?
9290
.map(|c| c.map(gix::revision::walk::Info::detach))
9391
.collect::<Result<Vec<_>, _>>()?;
@@ -121,7 +119,7 @@ mod ancestors {
121119
let head = repo.head()?.into_peeled_id()?;
122120
let commits = head
123121
.ancestors()
124-
.sorting(commit::simple::Sorting::ByCommitTimeNewestFirst) // assure we have time set
122+
.sorting(gix::revision::walk::Sorting::ByCommitTimeNewestFirst) // assure we have time set
125123
.use_commit_graph(use_commit_graph)
126124
.all()?
127125
.collect::<Result<Vec<_>, _>>()?;
@@ -134,16 +132,38 @@ mod ancestors {
134132
Ok(())
135133
}
136134

135+
#[test]
136+
fn prune_with_auto_cutoff() -> crate::Result {
137+
let repo = crate::repo("make_repo_with_fork_and_dates.sh")?.to_thread_local();
138+
let head = repo.head()?.into_peeled_id()?;
139+
140+
for use_commit_graph in [false, true] {
141+
let commits_graph_order = head
142+
.ancestors()
143+
.with_pruned(Some(hex_to_id("bcb05040a6925f2ff5e10d3ae1f9264f2e8c43ac")))
144+
.use_commit_graph(use_commit_graph)
145+
.all()?
146+
.map(|c| c.map(|c| c.id))
147+
.collect::<Result<Vec<_>, _>>()?;
148+
assert_eq!(
149+
commits_graph_order,
150+
&[hex_to_id("288e509293165cb5630d08f4185bdf2445bf6170")],
151+
"we ignore all but the first, and the cutoff takes care of that"
152+
);
153+
}
154+
Ok(())
155+
}
156+
137157
#[test]
138158
fn filtered() -> crate::Result {
139159
let repo = crate::repo("make_repo_with_fork_and_dates.sh")?.to_thread_local();
140160
let head = repo.head()?.into_peeled_id()?;
141161

142162
for use_commit_graph in [false, true] {
143163
for sorting in [
144-
commit::simple::Sorting::BreadthFirst,
145-
commit::simple::Sorting::ByCommitTimeNewestFirst,
146-
commit::simple::Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds: 0 },
164+
gix::revision::walk::Sorting::BreadthFirst,
165+
gix::revision::walk::Sorting::ByCommitTimeNewestFirst,
166+
gix::revision::walk::Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds: 0 },
147167
] {
148168
let commits_graph_order = head
149169
.ancestors()

‎gix/tests/repository/shallow.rs

+19-13
Original file line numberDiff line numberDiff line change
@@ -44,25 +44,31 @@ fn yes() -> crate::Result {
4444
}
4545

4646
mod traverse {
47-
use gix_traverse::commit::simple::Sorting;
4847
use serial_test::parallel;
4948

5049
use crate::util::{hex_to_id, named_subrepo_opts};
5150

5251
#[test]
5352
#[parallel]
5453
fn boundary_is_detected_triggering_no_error() -> crate::Result {
55-
for toggle in [false, true] {
56-
for name in ["shallow.git", "shallow"] {
57-
let repo = named_subrepo_opts("make_shallow_repo.sh", name, crate::restricted())?;
58-
let commits: Vec<_> = repo
59-
.head_id()?
60-
.ancestors()
61-
.use_commit_graph(toggle)
62-
.all()?
63-
.map(|c| c.map(|c| c.id))
64-
.collect::<Result<_, _>>()?;
65-
assert_eq!(commits, [hex_to_id("30887839de28edf7ab66c860e5c58b4d445f6b12")]);
54+
for sorting in [
55+
gix::revision::walk::Sorting::BreadthFirst,
56+
gix::revision::walk::Sorting::ByCommitTimeNewestFirst,
57+
gix::revision::walk::Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds: 0 },
58+
] {
59+
for toggle in [false, true] {
60+
for name in ["shallow.git", "shallow"] {
61+
let repo = named_subrepo_opts("make_shallow_repo.sh", name, crate::restricted())?;
62+
let commits: Vec<_> = repo
63+
.head_id()?
64+
.ancestors()
65+
.use_commit_graph(toggle)
66+
.sorting(sorting)
67+
.all()?
68+
.map(|c| c.map(|c| c.id))
69+
.collect::<Result<_, _>>()?;
70+
assert_eq!(commits, [hex_to_id("30887839de28edf7ab66c860e5c58b4d445f6b12")]);
71+
}
6672
}
6773
}
6874
Ok(())
@@ -91,7 +97,7 @@ mod traverse {
9197
.head_id()?
9298
.ancestors()
9399
.use_commit_graph(toggle)
94-
.sorting(Sorting::ByCommitTimeNewestFirst)
100+
.sorting(gix::revision::walk::Sorting::ByCommitTimeNewestFirst)
95101
.all()?
96102
.map(|c| c.map(|c| c.id))
97103
.collect::<Result<_, _>>()?;

0 commit comments

Comments
 (0)
Please sign in to comment.