upstream/mercurial-mirror Commit - r44494:2abffea4

1

// discovery.rs

1

// discovery.rs

2

//

2

//

3

4

//

4

//

5

// This software may be used and distributed according to the terms of the

5

// This software may be used and distributed according to the terms of the

6

// GNU General Public License version 2 or any later version.

6

// GNU General Public License version 2 or any later version.

7

8

//! Discovery operations

8

//! Discovery operations

9

//!

9

//!

10

//! This is a Rust counterpart to the `partialdiscovery` class of

10

//! This is a Rust counterpart to the `partialdiscovery` class of

11

//! `mercurial.setdiscovery`

11

//! `mercurial.setdiscovery`

12

13

use super::{Graph, GraphError, Revision, NULL_REVISION};

13

use super::{Graph, GraphError, Revision, NULL_REVISION};

14

use crate::{ancestors::MissingAncestors, dagops, FastHashMap};

14

use crate::{ancestors::MissingAncestors, dagops, FastHashMap};

15

use rand::seq::SliceRandom;

15

use rand::seq::SliceRandom;

16

use rand::{thread_rng, RngCore, SeedableRng};

16

use rand::{thread_rng, RngCore, SeedableRng};

17

use std::cmp::{max, min};

17

use std::cmp::{max, min};

18

use std::collections::{HashSet, VecDeque};

18

use std::collections::{HashSet, VecDeque};

19

20

type Rng = rand_pcg::Pcg32;

20

type Rng = rand_pcg::Pcg32;

21

type Seed = [u8; 16];

21

22

pub struct PartialDiscovery<G: Graph + Clone> {

23

pub struct PartialDiscovery<G: Graph + Clone> {

23

target_heads: Option<Vec<Revision>>,

24

target_heads: Option<Vec<Revision>>,

24

graph: G, // plays the role of self._repo

25

graph: G, // plays the role of self._repo

25

common: MissingAncestors<G>,

26

common: MissingAncestors<G>,

26

undecided: Option<HashSet<Revision>>,

27

undecided: Option<HashSet<Revision>>,

27

children_cache: Option<FastHashMap<Revision, Vec<Revision>>>,

28

children_cache: Option<FastHashMap<Revision, Vec<Revision>>>,

28

missing: HashSet<Revision>,

29

missing: HashSet<Revision>,

29

rng: Rng,

30

rng: Rng,

30

respect_size: bool,

31

respect_size: bool,

31

randomize: bool,

32

randomize: bool,

32

}

33

}

33

34

pub struct DiscoveryStats {

35

pub struct DiscoveryStats {

35

pub undecided: Option<usize>,

36

pub undecided: Option<usize>,

36

}

37

}

37

38

/// Update an existing sample to match the expected size

39

/// Update an existing sample to match the expected size

39

///

40

///

40

/// The sample is updated with revisions exponentially distant from each

41

/// The sample is updated with revisions exponentially distant from each

41

/// element of `heads`.

42

/// element of `heads`.

42

///

43

///

43

/// If a target size is specified, the sampling will stop once this size is

44

/// If a target size is specified, the sampling will stop once this size is

44

/// reached. Otherwise sampling will happen until roots of the <revs> set are

45

/// reached. Otherwise sampling will happen until roots of the <revs> set are

45

/// reached.

46

/// reached.

46

///

47

///

47

/// - `revs`: set of revs we want to discover (if None, `assume` the whole dag

48

/// - `revs`: set of revs we want to discover (if None, `assume` the whole dag

48

/// represented by `parentfn`

49

/// represented by `parentfn`

49

/// - `heads`: set of DAG head revs

50

/// - `heads`: set of DAG head revs

50

/// - `sample`: a sample to update

51

/// - `sample`: a sample to update

51

/// - `parentfn`: a callable to resolve parents for a revision

52

/// - `parentfn`: a callable to resolve parents for a revision

52

/// - `quicksamplesize`: optional target size of the sample

53

/// - `quicksamplesize`: optional target size of the sample

53

fn update_sample<I>(

54

fn update_sample<I>(

54

revs: Option<&HashSet<Revision>>,

55

revs: Option<&HashSet<Revision>>,

55

heads: impl IntoIterator<Item = Revision>,

56

heads: impl IntoIterator<Item = Revision>,

56

sample: &mut HashSet<Revision>,

57

sample: &mut HashSet<Revision>,

57

parentsfn: impl Fn(Revision) -> Result<I, GraphError>,

58

parentsfn: impl Fn(Revision) -> Result<I, GraphError>,

58

quicksamplesize: Option<usize>,

59

quicksamplesize: Option<usize>,

59

) -> Result<(), GraphError>

60

) -> Result<(), GraphError>

60

where

61

where

61

I: Iterator<Item = Revision>,

62

I: Iterator<Item = Revision>,

62

{

63

{

63

let mut distances: FastHashMap<Revision, u32> = FastHashMap::default();

64

let mut distances: FastHashMap<Revision, u32> = FastHashMap::default();

64

let mut visit: VecDeque<Revision> = heads.into_iter().collect();

65

let mut visit: VecDeque<Revision> = heads.into_iter().collect();

65

let mut factor: u32 = 1;

66

let mut factor: u32 = 1;

66

let mut seen: HashSet<Revision> = HashSet::new();

67

let mut seen: HashSet<Revision> = HashSet::new();

67

while let Some(current) = visit.pop_front() {

68

while let Some(current) = visit.pop_front() {

68

if !seen.insert(current) {

69

if !seen.insert(current) {

69

continue;

70

continue;

70

}

71

}

71

72

let d = *distances.entry(current).or_insert(1);

73

let d = *distances.entry(current).or_insert(1);

73

if d > factor {

74

if d > factor {

74

factor *= 2;

75

factor *= 2;

75

}

76

}

76

if d == factor {

77

if d == factor {

77

sample.insert(current);

78

sample.insert(current);

78

if let Some(sz) = quicksamplesize {

79

if let Some(sz) = quicksamplesize {

79

if sample.len() >= sz {

80

if sample.len() >= sz {

80

return Ok(());

81

return Ok(());

81

}

82

}

82

}

83

}

83

}

84

}

84

for p in parentsfn(current)? {

85

for p in parentsfn(current)? {

85

if let Some(revs) = revs {

86

if let Some(revs) = revs {

86

if !revs.contains(&p) {

87

if !revs.contains(&p) {

87

continue;

88

continue;

88

}

89

}

89

}

90

}

90

distances.entry(p).or_insert(d + 1);

91

distances.entry(p).or_insert(d + 1);

91

visit.push_back(p);

92

visit.push_back(p);

92

}

93

}

93

}

94

}

94

Ok(())

95

Ok(())

95

}

96

}

96

97

struct ParentsIterator {

98

struct ParentsIterator {

98

parents: [Revision; 2],

99

parents: [Revision; 2],

99

cur: usize,

100

cur: usize,

100

}

101

}

101

102

impl ParentsIterator {

103

impl ParentsIterator {

103

fn graph_parents(

104

fn graph_parents(

104

graph: &impl Graph,

105

graph: &impl Graph,

105

r: Revision,

106

r: Revision,

106

) -> Result<ParentsIterator, GraphError> {

107

) -> Result<ParentsIterator, GraphError> {

107

Ok(ParentsIterator {

108

Ok(ParentsIterator {

108

parents: graph.parents(r)?,

109

parents: graph.parents(r)?,

109

cur: 0,

110

cur: 0,

110

})

111

})

111

}

112

}

112

}

113

}

113

114

impl Iterator for ParentsIterator {

115

impl Iterator for ParentsIterator {

115

type Item = Revision;

116

type Item = Revision;

116

117

fn next(&mut self) -> Option<Revision> {

118

fn next(&mut self) -> Option<Revision> {

118

if self.cur > 1 {

119

if self.cur > 1 {

119

return None;

120

return None;

120

}

121

}

121

let rev = self.parents[self.cur];

122

let rev = self.parents[self.cur];

122

self.cur += 1;

123

self.cur += 1;

123

if rev == NULL_REVISION {

124

if rev == NULL_REVISION {

124

return self.next();

125

return self.next();

125

}

126

}

126

Some(rev)

127

Some(rev)

127

}

128

}

128

}

129

}

129

130

impl<G: Graph + Clone> PartialDiscovery<G> {

131

impl<G: Graph + Clone> PartialDiscovery<G> {

131

/// Create a PartialDiscovery object, with the intent

132

/// Create a PartialDiscovery object, with the intent

132

/// of comparing our `::<target_heads>` revset to the contents of another

133

/// of comparing our `::<target_heads>` revset to the contents of another

133

/// repo.

134

/// repo.

134

///

135

///

135

/// For now `target_heads` is passed as a vector, and will be used

136

/// For now `target_heads` is passed as a vector, and will be used

136

/// at the first call to `ensure_undecided()`.

137

/// at the first call to `ensure_undecided()`.

137

///

138

///

138

/// If we want to make the signature more flexible,

139

/// If we want to make the signature more flexible,

139

/// we'll have to make it a type argument of `PartialDiscovery` or a trait

140

/// we'll have to make it a type argument of `PartialDiscovery` or a trait

140

/// object since we'll keep it in the meanwhile

141

/// object since we'll keep it in the meanwhile

141

///

142

///

142

/// The `respect_size` boolean controls how the sampling methods

143

/// The `respect_size` boolean controls how the sampling methods

143

/// will interpret the size argument requested by the caller. If it's

144

/// will interpret the size argument requested by the caller. If it's

144

/// `false`, they are allowed to produce a sample whose size is more

145

/// `false`, they are allowed to produce a sample whose size is more

145

/// appropriate to the situation (typically bigger).

146

/// appropriate to the situation (typically bigger).

146

///

147

///

147

/// The `randomize` boolean affects sampling, and specifically how

148

/// The `randomize` boolean affects sampling, and specifically how

148

/// limiting or last-minute expanding is been done:

149

/// limiting or last-minute expanding is been done:

149

///

150

///

150

/// If `true`, both will perform random picking from `self.undecided`.

151

/// If `true`, both will perform random picking from `self.undecided`.

151

/// This is currently the best for actual discoveries.

152

/// This is currently the best for actual discoveries.

152

///

153

///

153

/// If `false`, a reproductible picking strategy is performed. This is

154

/// If `false`, a reproductible picking strategy is performed. This is

154

/// useful for integration tests.

155

/// useful for integration tests.

155

pub fn new(

156

pub fn new(

156

graph: G,

157

graph: G,

157

target_heads: Vec<Revision>,

158

target_heads: Vec<Revision>,

158

respect_size: bool,

159

respect_size: bool,

159

randomize: bool,

160

randomize: bool,

160

) -> Self {

161

) -> Self {

161

let mut seed: [u8; 16] = [0; 16];

162

let mut seed = [0; 16];

162

if randomize {

163

if randomize {

163

thread_rng().fill_bytes(&mut seed);

164

thread_rng().fill_bytes(&mut seed);

164

}

165

}

165

Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)

166

Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)

166

}

167

}

167

168

pub fn new_with_seed(

169

pub fn new_with_seed(

169

graph: G,

170

graph: G,

170

target_heads: Vec<Revision>,

171

target_heads: Vec<Revision>,

171

seed: [u8; 16],

172

seed: Seed,

172

respect_size: bool,

173

respect_size: bool,

173

randomize: bool,

174

randomize: bool,

174

) -> Self {

175

) -> Self {

175

PartialDiscovery {

176

PartialDiscovery {

176

undecided: None,

177

undecided: None,

177

children_cache: None,

178

children_cache: None,

178

target_heads: Some(target_heads),

179

target_heads: Some(target_heads),

179

graph: graph.clone(),

180

graph: graph.clone(),

180

common: MissingAncestors::new(graph, vec![]),

181

common: MissingAncestors::new(graph, vec![]),

181

missing: HashSet::new(),

182

missing: HashSet::new(),

182

rng: Rng::from_seed(seed),

183

rng: Rng::from_seed(seed),

183

respect_size: respect_size,

184

respect_size: respect_size,

184

randomize: randomize,

185

randomize: randomize,

185

}

186

}

186

}

187

}

187

188

/// Extract at most `size` random elements from sample and return them

189

/// Extract at most `size` random elements from sample and return them

189

/// as a vector

190

/// as a vector

190

fn limit_sample(

191

fn limit_sample(

191

&mut self,

192

&mut self,

192

mut sample: Vec<Revision>,

193

mut sample: Vec<Revision>,

193

size: usize,

194

size: usize,

194

) -> Vec<Revision> {

195

) -> Vec<Revision> {

195

if !self.randomize {

196

if !self.randomize {

196

sample.sort();

197

sample.sort();

197

sample.truncate(size);

198

sample.truncate(size);

198

return sample;

199

return sample;

199

}

200

}

200

let sample_len = sample.len();

201

let sample_len = sample.len();

201

if sample_len <= size {

202

if sample_len <= size {

202

return sample;

203

return sample;

203

}

204

}

204

let rng = &mut self.rng;

205

let rng = &mut self.rng;

205

let dropped_size = sample_len - size;

206

let dropped_size = sample_len - size;

206

let limited_slice = if size < dropped_size {

207

let limited_slice = if size < dropped_size {

207

sample.partial_shuffle(rng, size).0

208

sample.partial_shuffle(rng, size).0

208

} else {

209

} else {

209

sample.partial_shuffle(rng, dropped_size).1

210

sample.partial_shuffle(rng, dropped_size).1

210

};

211

};

211

limited_slice.to_owned()

212

limited_slice.to_owned()

212

}

213

}

213

214

/// Register revisions known as being common

215

/// Register revisions known as being common

215

pub fn add_common_revisions(

216

pub fn add_common_revisions(

216

&mut self,

217

&mut self,

217

common: impl IntoIterator<Item = Revision>,

218

common: impl IntoIterator<Item = Revision>,

218

) -> Result<(), GraphError> {

219

) -> Result<(), GraphError> {

219

let before_len = self.common.get_bases().len();

220

let before_len = self.common.get_bases().len();

220

self.common.add_bases(common);

221

self.common.add_bases(common);

221

if self.common.get_bases().len() == before_len {

222

if self.common.get_bases().len() == before_len {

222

return Ok(());

223

return Ok(());

223

}

224

}

224

if let Some(ref mut undecided) = self.undecided {

225

if let Some(ref mut undecided) = self.undecided {

225

self.common.remove_ancestors_from(undecided)?;

226

self.common.remove_ancestors_from(undecided)?;

226

}

227

}

227

Ok(())

228

Ok(())

228

}

229

}

229

230

/// Register revisions known as being missing

231

/// Register revisions known as being missing

231

///

232

///

232

/// # Performance note

233

/// # Performance note

233

///

234

///

234

/// Except in the most trivial case, the first call of this method has

235

/// Except in the most trivial case, the first call of this method has

235

/// the side effect of computing `self.undecided` set for the first time,

236

/// the side effect of computing `self.undecided` set for the first time,

236

/// and the related caches it might need for efficiency of its internal

237

/// and the related caches it might need for efficiency of its internal

237

/// computation. This is typically faster if more information is

238

/// computation. This is typically faster if more information is

238

/// available in `self.common`. Therefore, for good performance, the

239

/// available in `self.common`. Therefore, for good performance, the

239

/// caller should avoid calling this too early.

240

/// caller should avoid calling this too early.

240

pub fn add_missing_revisions(

241

pub fn add_missing_revisions(

241

&mut self,

242

&mut self,

242

missing: impl IntoIterator<Item = Revision>,

243

missing: impl IntoIterator<Item = Revision>,

243

) -> Result<(), GraphError> {

244

) -> Result<(), GraphError> {

244

let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();

245

let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();

245

if tovisit.is_empty() {

246

if tovisit.is_empty() {

246

return Ok(());

247

return Ok(());

247

}

248

}

248

self.ensure_children_cache()?;

249

self.ensure_children_cache()?;

249

self.ensure_undecided()?; // for safety of possible future refactors

250

self.ensure_undecided()?; // for safety of possible future refactors

250

let children = self.children_cache.as_ref().unwrap();

251

let children = self.children_cache.as_ref().unwrap();

251

let mut seen: HashSet<Revision> = HashSet::new();

252

let mut seen: HashSet<Revision> = HashSet::new();

252

let undecided_mut = self.undecided.as_mut().unwrap();

253

let undecided_mut = self.undecided.as_mut().unwrap();

253

while let Some(rev) = tovisit.pop_front() {

254

while let Some(rev) = tovisit.pop_front() {

254

if !self.missing.insert(rev) {

255

if !self.missing.insert(rev) {

255

// either it's known to be missing from a previous

256

// either it's known to be missing from a previous

256

// invocation, and there's no need to iterate on its

257

// invocation, and there's no need to iterate on its

257

// children (we now they are all missing)

258

// children (we now they are all missing)

258

// or it's from a previous iteration of this loop

259

// or it's from a previous iteration of this loop

259

// and its children have already been queued

260

// and its children have already been queued

260

continue;

261

continue;

261

}

262

}

262

undecided_mut.remove(&rev);

263

undecided_mut.remove(&rev);

263

match children.get(&rev) {

264

match children.get(&rev) {

264

None => {

265

None => {

265

continue;

266

continue;

266

}

267

}

267

Some(this_children) => {

268

Some(this_children) => {

268

for child in this_children.iter().cloned() {

269

for child in this_children.iter().cloned() {

269

if seen.insert(child) {

270

if seen.insert(child) {

270

tovisit.push_back(child);

271

tovisit.push_back(child);

271

}

272

}

272

}

273

}

273

}

274

}

274

}

275

}

275

}

276

}

276

Ok(())

277

Ok(())

277

}

278

}

278

279

/// Do we have any information about the peer?

280

/// Do we have any information about the peer?

280

pub fn has_info(&self) -> bool {

281

pub fn has_info(&self) -> bool {

281

self.common.has_bases()

282

self.common.has_bases()

282

}

283

}

283

284

/// Did we acquire full knowledge of our Revisions that the peer has?

285

/// Did we acquire full knowledge of our Revisions that the peer has?

285

pub fn is_complete(&self) -> bool {

286

pub fn is_complete(&self) -> bool {

286

self.undecided.as_ref().map_or(false, |s| s.is_empty())

287

self.undecided.as_ref().map_or(false, |s| s.is_empty())

287

}

288

}

288

289

/// Return the heads of the currently known common set of revisions.

290

/// Return the heads of the currently known common set of revisions.

290

///

291

///

291

/// If the discovery process is not complete (see `is_complete()`), the

292

/// If the discovery process is not complete (see `is_complete()`), the

292

/// caller must be aware that this is an intermediate state.

293

/// caller must be aware that this is an intermediate state.

293

///

294

///

294

/// On the other hand, if it is complete, then this is currently

295

/// On the other hand, if it is complete, then this is currently

295

/// the only way to retrieve the end results of the discovery process.

296

/// the only way to retrieve the end results of the discovery process.

296

///

297

///

297

/// We may introduce in the future an `into_common_heads` call that

298

/// We may introduce in the future an `into_common_heads` call that

298

/// would be more appropriate for normal Rust callers, dropping `self`

299

/// would be more appropriate for normal Rust callers, dropping `self`

299

/// if it is complete.

300

/// if it is complete.

300

pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {

301

pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {

301

self.common.bases_heads()

302

self.common.bases_heads()

302

}

303

}

303

304

/// Force first computation of `self.undecided`

305

/// Force first computation of `self.undecided`

305

///

306

///

306

/// After this, `self.undecided.as_ref()` and `.as_mut()` can be

307

/// After this, `self.undecided.as_ref()` and `.as_mut()` can be

307

/// unwrapped to get workable immutable or mutable references without

308

/// unwrapped to get workable immutable or mutable references without

308

/// any panic.

309

/// any panic.

309

///

310

///

310

/// This is an imperative call instead of an access with added lazyness

311

/// This is an imperative call instead of an access with added lazyness

311

/// to reduce easily the scope of mutable borrow for the caller,

312

/// to reduce easily the scope of mutable borrow for the caller,

312

/// compared to undecided(&'a mut self) -> &'a… that would keep it

313

/// compared to undecided(&'a mut self) -> &'a… that would keep it

313

/// as long as the resulting immutable one.

314

/// as long as the resulting immutable one.

314

fn ensure_undecided(&mut self) -> Result<(), GraphError> {

315

fn ensure_undecided(&mut self) -> Result<(), GraphError> {

315

if self.undecided.is_some() {

316

if self.undecided.is_some() {

316

return Ok(());

317

return Ok(());

317

}

318

}

318

let tgt = self.target_heads.take().unwrap();

319

let tgt = self.target_heads.take().unwrap();

319

self.undecided =

320

self.undecided =

320

Some(self.common.missing_ancestors(tgt)?.into_iter().collect());

321

Some(self.common.missing_ancestors(tgt)?.into_iter().collect());

321

Ok(())

322

Ok(())

322

}

323

}

323

324

fn ensure_children_cache(&mut self) -> Result<(), GraphError> {

325

fn ensure_children_cache(&mut self) -> Result<(), GraphError> {

325

if self.children_cache.is_some() {

326

if self.children_cache.is_some() {

326

return Ok(());

327

return Ok(());

327

}

328

}

328

self.ensure_undecided()?;

329

self.ensure_undecided()?;

329

330

let mut children: FastHashMap<Revision, Vec<Revision>> =

331

let mut children: FastHashMap<Revision, Vec<Revision>> =

331

FastHashMap::default();

332

FastHashMap::default();

332

for &rev in self.undecided.as_ref().unwrap() {

333

for &rev in self.undecided.as_ref().unwrap() {

333

for p in ParentsIterator::graph_parents(&self.graph, rev)? {

334

for p in ParentsIterator::graph_parents(&self.graph, rev)? {

334

children.entry(p).or_insert_with(|| Vec::new()).push(rev);

335

children.entry(p).or_insert_with(|| Vec::new()).push(rev);

335

}

336

}

336

}

337

}

337

self.children_cache = Some(children);

338

self.children_cache = Some(children);

338

Ok(())

339

Ok(())

339

}

340

}

340

341

/// Provide statistics about the current state of the discovery process

342

/// Provide statistics about the current state of the discovery process

342

pub fn stats(&self) -> DiscoveryStats {

343

pub fn stats(&self) -> DiscoveryStats {

343

DiscoveryStats {

344

DiscoveryStats {

344

undecided: self.undecided.as_ref().map(|s| s.len()),

345

undecided: self.undecided.as_ref().map(|s| s.len()),

345

}

346

}

346

}

347

}

347

348

pub fn take_quick_sample(

349

pub fn take_quick_sample(

349

&mut self,

350

&mut self,

350

headrevs: impl IntoIterator<Item = Revision>,

351

headrevs: impl IntoIterator<Item = Revision>,

351

size: usize,

352

size: usize,

352

) -> Result<Vec<Revision>, GraphError> {

353

) -> Result<Vec<Revision>, GraphError> {

353

self.ensure_undecided()?;

354

self.ensure_undecided()?;

354

let mut sample = {

355

let mut sample = {

355

let undecided = self.undecided.as_ref().unwrap();

356

let undecided = self.undecided.as_ref().unwrap();

356

if undecided.len() <= size {

357

if undecided.len() <= size {

357

return Ok(undecided.iter().cloned().collect());

358

return Ok(undecided.iter().cloned().collect());

358

}

359

}

359

dagops::heads(&self.graph, undecided.iter())?

360

dagops::heads(&self.graph, undecided.iter())?

360

};

361

};

361

if sample.len() >= size {

362

if sample.len() >= size {

362

return Ok(self.limit_sample(sample.into_iter().collect(), size));

363

return Ok(self.limit_sample(sample.into_iter().collect(), size));

363

}

364

}

364

update_sample(

365

update_sample(

365

None,

366

None,

366

headrevs,

367

headrevs,

367

&mut sample,

368

&mut sample,

368

|r| ParentsIterator::graph_parents(&self.graph, r),

369

|r| ParentsIterator::graph_parents(&self.graph, r),

369

Some(size),

370

Some(size),

370

)?;

371

)?;

371

Ok(sample.into_iter().collect())

372

Ok(sample.into_iter().collect())

372

}

373

}

373

374

/// Extract a sample from `self.undecided`, going from its heads and roots.

375

/// Extract a sample from `self.undecided`, going from its heads and roots.

375

///

376

///

376

/// The `size` parameter is used to avoid useless computations if

377

/// The `size` parameter is used to avoid useless computations if

377

/// it turns out to be bigger than the whole set of undecided Revisions.

378

/// it turns out to be bigger than the whole set of undecided Revisions.

378

///

379

///

379

/// The sample is taken by using `update_sample` from the heads, then

380

/// The sample is taken by using `update_sample` from the heads, then

380

/// from the roots, working on the reverse DAG,

381

/// from the roots, working on the reverse DAG,

381

/// expressed by `self.children_cache`.

382

/// expressed by `self.children_cache`.

382

///

383

///

383

/// No effort is being made to complete or limit the sample to `size`

384

/// No effort is being made to complete or limit the sample to `size`

384

/// but this method returns another interesting size that it derives

385

/// but this method returns another interesting size that it derives

385

/// from its knowledge of the structure of the various sets, leaving

386

/// from its knowledge of the structure of the various sets, leaving

386

/// to the caller the decision to use it or not.

387

/// to the caller the decision to use it or not.

387

fn bidirectional_sample(

388

fn bidirectional_sample(

388

&mut self,

389

&mut self,

389

size: usize,

390

size: usize,

390

) -> Result<(HashSet<Revision>, usize), GraphError> {

391

) -> Result<(HashSet<Revision>, usize), GraphError> {

391

self.ensure_undecided()?;

392

self.ensure_undecided()?;

392

{

393

{

393

// we don't want to compute children_cache before this

394

// we don't want to compute children_cache before this

394

// but doing it after extracting self.undecided takes a mutable

395

// but doing it after extracting self.undecided takes a mutable

395

// ref to self while a shareable one is still active.

396

// ref to self while a shareable one is still active.

396

let undecided = self.undecided.as_ref().unwrap();

397

let undecided = self.undecided.as_ref().unwrap();

397

if undecided.len() <= size {

398

if undecided.len() <= size {

398

return Ok((undecided.clone(), size));

399

return Ok((undecided.clone(), size));

399

}

400

}

400

}

401

}

401

402

self.ensure_children_cache()?;

403

self.ensure_children_cache()?;

403

let revs = self.undecided.as_ref().unwrap();

404

let revs = self.undecided.as_ref().unwrap();

404

let mut sample: HashSet<Revision> = revs.clone();

405

let mut sample: HashSet<Revision> = revs.clone();

405

406

// it's possible that leveraging the children cache would be more

407

// it's possible that leveraging the children cache would be more

407

// efficient here

408

// efficient here

408

dagops::retain_heads(&self.graph, &mut sample)?;

409

dagops::retain_heads(&self.graph, &mut sample)?;

409

let revsheads = sample.clone(); // was again heads(revs) in python

410

let revsheads = sample.clone(); // was again heads(revs) in python

410

411

// update from heads

412

// update from heads

412

update_sample(

413

update_sample(

413

Some(revs),

414

Some(revs),

414

revsheads.iter().cloned(),

415

revsheads.iter().cloned(),

415

&mut sample,

416

&mut sample,

416

|r| ParentsIterator::graph_parents(&self.graph, r),

417

|r| ParentsIterator::graph_parents(&self.graph, r),

417

None,

418

None,

418

)?;

419

)?;

419

420

// update from roots

421

// update from roots

421

let revroots: HashSet<Revision> =

422

let revroots: HashSet<Revision> =

422

dagops::roots(&self.graph, revs)?.into_iter().collect();

423

dagops::roots(&self.graph, revs)?.into_iter().collect();

423

let prescribed_size = max(size, min(revroots.len(), revsheads.len()));

424

let prescribed_size = max(size, min(revroots.len(), revsheads.len()));

424

425

let children = self.children_cache.as_ref().unwrap();

426

let children = self.children_cache.as_ref().unwrap();

426

let empty_vec: Vec<Revision> = Vec::new();

427

let empty_vec: Vec<Revision> = Vec::new();

427

update_sample(

428

update_sample(

428

Some(revs),

429

Some(revs),

429

revroots,

430

revroots,

430

&mut sample,

431

&mut sample,

431

|r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),

432

|r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),

432

None,

433

None,

433

)?;

434

)?;

434

Ok((sample, prescribed_size))

435

Ok((sample, prescribed_size))

435

}

436

}

436

437

/// Fill up sample up to the wished size with random undecided Revisions.

438

/// Fill up sample up to the wished size with random undecided Revisions.

438

///

439

///

439

/// This is intended to be used as a last resort completion if the

440

/// This is intended to be used as a last resort completion if the

440

/// regular sampling algorithm returns too few elements.

441

/// regular sampling algorithm returns too few elements.

441

fn random_complete_sample(

442

fn random_complete_sample(

442

&mut self,

443

&mut self,

443

sample: &mut Vec<Revision>,

444

sample: &mut Vec<Revision>,

444

size: usize,

445

size: usize,

445

) {

446

) {

446

let sample_len = sample.len();

447

let sample_len = sample.len();

447

if size <= sample_len {

448

if size <= sample_len {

448

return;

449

return;

449

}

450

}

450

let take_from: Vec<Revision> = self

451

let take_from: Vec<Revision> = self

451

.undecided

452

.undecided

452

.as_ref()

453

.as_ref()

453

.unwrap()

454

.unwrap()

454

.iter()

455

.iter()

455

.filter(|&r| !sample.contains(r))

456

.filter(|&r| !sample.contains(r))

456

.cloned()

457

.cloned()

457

.collect();

458

.collect();

458

sample.extend(self.limit_sample(take_from, size - sample_len));

459

sample.extend(self.limit_sample(take_from, size - sample_len));

459

}

460

}

460

461

pub fn take_full_sample(

462

pub fn take_full_sample(

462

&mut self,

463

&mut self,

463

size: usize,

464

size: usize,

464

) -> Result<Vec<Revision>, GraphError> {

465

) -> Result<Vec<Revision>, GraphError> {

465

let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;

466

let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;

466

let size = if self.respect_size {

467

let size = if self.respect_size {

467

size

468

size

468

} else {

469

} else {

469

prescribed_size

470

prescribed_size

470

};

471

};

471

let mut sample =

472

let mut sample =

472

self.limit_sample(sample_set.into_iter().collect(), size);

473

self.limit_sample(sample_set.into_iter().collect(), size);

473

self.random_complete_sample(&mut sample, size);

474

self.random_complete_sample(&mut sample, size);

474

Ok(sample)

475

Ok(sample)

475

}

476

}

476

}

477

}

477

478

#[cfg(test)]

479

#[cfg(test)]

479

mod tests {

480

mod tests {

480

use super::*;

481

use super::*;

481

use crate::testing::SampleGraph;

482

use crate::testing::SampleGraph;

482

483

/// A PartialDiscovery as for pushing all the heads of `SampleGraph`

484

/// A PartialDiscovery as for pushing all the heads of `SampleGraph`

484

///

485

///

485

/// To avoid actual randomness in these tests, we give it a fixed

486

/// To avoid actual randomness in these tests, we give it a fixed

486

/// random seed, but by default we'll test the random version.

487

/// random seed, but by default we'll test the random version.

487

fn full_disco() -> PartialDiscovery<SampleGraph> {

488

fn full_disco() -> PartialDiscovery<SampleGraph> {

488

PartialDiscovery::new_with_seed(

489

PartialDiscovery::new_with_seed(

489

SampleGraph,

490

SampleGraph,

490

vec![10, 11, 12, 13],

491

vec![10, 11, 12, 13],

491

[0; 16],

492

[0; 16],

492

true,

493

true,

493

true,

494

true,

494

)

495

)

495

}

496

}

496

497

/// A PartialDiscovery as for pushing the 12 head of `SampleGraph`

498

/// A PartialDiscovery as for pushing the 12 head of `SampleGraph`

498

///

499

///

499

/// To avoid actual randomness in tests, we give it a fixed random seed.

500

/// To avoid actual randomness in tests, we give it a fixed random seed.

500

fn disco12() -> PartialDiscovery<SampleGraph> {

501

fn disco12() -> PartialDiscovery<SampleGraph> {

501

PartialDiscovery::new_with_seed(

502

PartialDiscovery::new_with_seed(

502

SampleGraph,

503

SampleGraph,

503

vec![12],

504

vec![12],

504

[0; 16],

505

[0; 16],

505

true,

506

true,

506

true,

507

true,

507

)

508

)

508

}

509

}

509

510

fn sorted_undecided(

511

fn sorted_undecided(

511

disco: &PartialDiscovery<SampleGraph>,

512

disco: &PartialDiscovery<SampleGraph>,

512

) -> Vec<Revision> {

513

) -> Vec<Revision> {

513

let mut as_vec: Vec<Revision> =

514

let mut as_vec: Vec<Revision> =

514

disco.undecided.as_ref().unwrap().iter().cloned().collect();

515

disco.undecided.as_ref().unwrap().iter().cloned().collect();

515

as_vec.sort();

516

as_vec.sort();

516

as_vec

517

as_vec

517

}

518

}

518

519

fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {

520

fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {

520

let mut as_vec: Vec<Revision> =

521

let mut as_vec: Vec<Revision> =

521

disco.missing.iter().cloned().collect();

522

disco.missing.iter().cloned().collect();

522

as_vec.sort();

523

as_vec.sort();

523

as_vec

524

as_vec

524

}

525

}

525

526

fn sorted_common_heads(

527

fn sorted_common_heads(

527

disco: &PartialDiscovery<SampleGraph>,

528

disco: &PartialDiscovery<SampleGraph>,

528

) -> Result<Vec<Revision>, GraphError> {

529

) -> Result<Vec<Revision>, GraphError> {

529

let mut as_vec: Vec<Revision> =

530

let mut as_vec: Vec<Revision> =

530

disco.common_heads()?.iter().cloned().collect();

531

disco.common_heads()?.iter().cloned().collect();

531

as_vec.sort();

532

as_vec.sort();

532

Ok(as_vec)

533

Ok(as_vec)

533

}

534

}

534

535

#[test]

536

#[test]

536

fn test_add_common_get_undecided() -> Result<(), GraphError> {

537

fn test_add_common_get_undecided() -> Result<(), GraphError> {

537

let mut disco = full_disco();

538

let mut disco = full_disco();

538

assert_eq!(disco.undecided, None);

539

assert_eq!(disco.undecided, None);

539

assert!(!disco.has_info());

540

assert!(!disco.has_info());

540

assert_eq!(disco.stats().undecided, None);

541

assert_eq!(disco.stats().undecided, None);

541

542

disco.add_common_revisions(vec![11, 12])?;

543

disco.add_common_revisions(vec![11, 12])?;

543

assert!(disco.has_info());

544

assert!(disco.has_info());

544

assert!(!disco.is_complete());

545

assert!(!disco.is_complete());

545

assert!(disco.missing.is_empty());

546

assert!(disco.missing.is_empty());

546

547

// add_common_revisions did not trigger a premature computation

548

// add_common_revisions did not trigger a premature computation

548

// of `undecided`, let's check that and ask for them

549

// of `undecided`, let's check that and ask for them

549

assert_eq!(disco.undecided, None);

550

assert_eq!(disco.undecided, None);

550

disco.ensure_undecided()?;

551

disco.ensure_undecided()?;

551

assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);

552

assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);

552

assert_eq!(disco.stats().undecided, Some(4));

553

assert_eq!(disco.stats().undecided, Some(4));

553

Ok(())

554

Ok(())

554

}

555

}

555

556

/// in this test, we pretend that our peer misses exactly (8+10)::

557

/// in this test, we pretend that our peer misses exactly (8+10)::

557

/// and we're comparing all our repo to it (as in a bare push)

558

/// and we're comparing all our repo to it (as in a bare push)

558

#[test]

559

#[test]

559

fn test_discovery() -> Result<(), GraphError> {

560

fn test_discovery() -> Result<(), GraphError> {

560

let mut disco = full_disco();

561

let mut disco = full_disco();

561

disco.add_common_revisions(vec![11, 12])?;

562

disco.add_common_revisions(vec![11, 12])?;

562

disco.add_missing_revisions(vec![8, 10])?;

563

disco.add_missing_revisions(vec![8, 10])?;

563

assert_eq!(sorted_undecided(&disco), vec![5]);

564

assert_eq!(sorted_undecided(&disco), vec![5]);

564

assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);

565

assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);

565

assert!(!disco.is_complete());

566

assert!(!disco.is_complete());

566

567

disco.add_common_revisions(vec![5])?;

568

disco.add_common_revisions(vec![5])?;

568

assert_eq!(sorted_undecided(&disco), vec![]);

569

assert_eq!(sorted_undecided(&disco), vec![]);

569

assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);

570

assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);

570

assert!(disco.is_complete());

571

assert!(disco.is_complete());

571

assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);

572

assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);

572

Ok(())

573

Ok(())

573

}

574

}

574

575

#[test]

576

#[test]

576

fn test_add_missing_early_continue() -> Result<(), GraphError> {

577

fn test_add_missing_early_continue() -> Result<(), GraphError> {

577

eprintln!("test_add_missing_early_stop");

578

eprintln!("test_add_missing_early_stop");

578

let mut disco = full_disco();

579

let mut disco = full_disco();

579

disco.add_common_revisions(vec![13, 3, 4])?;

580

disco.add_common_revisions(vec![13, 3, 4])?;

580

disco.ensure_children_cache()?;

581

disco.ensure_children_cache()?;

581

// 12 is grand-child of 6 through 9

582

// 12 is grand-child of 6 through 9

582

// passing them in this order maximizes the chances of the

583

// passing them in this order maximizes the chances of the

583

// early continue to do the wrong thing

584

// early continue to do the wrong thing

584

disco.add_missing_revisions(vec![6, 9, 12])?;

585

disco.add_missing_revisions(vec![6, 9, 12])?;

585

assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);

586

assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);

586

assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);

587

assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);

587

assert!(!disco.is_complete());

588

assert!(!disco.is_complete());

588

Ok(())

589

Ok(())

589

}

590

}

590

591

#[test]

592

#[test]

592

fn test_limit_sample_no_need_to() {

593

fn test_limit_sample_no_need_to() {

593

let sample = vec![1, 2, 3, 4];

594

let sample = vec![1, 2, 3, 4];

594

assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);

595

assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);

595

}

596

}

596

597

#[test]

598

#[test]

598

fn test_limit_sample_less_than_half() {

599

fn test_limit_sample_less_than_half() {

599

assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]);

600

assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]);

600

}

601

}

601

602

#[test]

603

#[test]

603

fn test_limit_sample_more_than_half() {

604

fn test_limit_sample_more_than_half() {

604

assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]);

605

assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]);

605

}

606

}

606

607

#[test]

608

#[test]

608

fn test_limit_sample_no_random() {

609

fn test_limit_sample_no_random() {

609

let mut disco = full_disco();

610

let mut disco = full_disco();

610

disco.randomize = false;

611

disco.randomize = false;

611

assert_eq!(

612

assert_eq!(

612

disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),

613

disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),

613

vec![1, 3, 5, 7]

614

vec![1, 3, 5, 7]

614

);

615

);

615

}

616

}

616

617

#[test]

618

#[test]

618

fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {

619

fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {

619

let mut disco = full_disco();

620

let mut disco = full_disco();

620

disco.undecided = Some((1..=13).collect());

621

disco.undecided = Some((1..=13).collect());

621

622

let mut sample_vec = disco.take_quick_sample(vec![], 4)?;

623

let mut sample_vec = disco.take_quick_sample(vec![], 4)?;

623

sample_vec.sort();

624

sample_vec.sort();

624

assert_eq!(sample_vec, vec![10, 11, 12, 13]);

625

assert_eq!(sample_vec, vec![10, 11, 12, 13]);

625

Ok(())

626

Ok(())

626

}

627

}

627

628

#[test]

629

#[test]

629

fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {

630

fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {

630

let mut disco = disco12();

631

let mut disco = disco12();

631

disco.ensure_undecided()?;

632

disco.ensure_undecided()?;

632

633

let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;

634

let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;

634

sample_vec.sort();

635

sample_vec.sort();

635

// r12's only parent is r9, whose unique grand-parent through the

636

// r12's only parent is r9, whose unique grand-parent through the

636

// diamond shape is r4. This ends there because the distance from r4

637

// diamond shape is r4. This ends there because the distance from r4

637

// to the root is only 3.

638

// to the root is only 3.

638

assert_eq!(sample_vec, vec![4, 9, 12]);

639

assert_eq!(sample_vec, vec![4, 9, 12]);

639

Ok(())

640

Ok(())

640

}

641

}

641

642

#[test]

643

#[test]

643

fn test_children_cache() -> Result<(), GraphError> {

644

fn test_children_cache() -> Result<(), GraphError> {

644

let mut disco = full_disco();

645

let mut disco = full_disco();

645

disco.ensure_children_cache()?;

646

disco.ensure_children_cache()?;

646

647

let cache = disco.children_cache.unwrap();

648

let cache = disco.children_cache.unwrap();

648

assert_eq!(cache.get(&2).cloned(), Some(vec![4]));

649

assert_eq!(cache.get(&2).cloned(), Some(vec![4]));

649

assert_eq!(cache.get(&10).cloned(), None);

650

assert_eq!(cache.get(&10).cloned(), None);

650

651

let mut children_4 = cache.get(&4).cloned().unwrap();

652

let mut children_4 = cache.get(&4).cloned().unwrap();

652

children_4.sort();

653

children_4.sort();

653

assert_eq!(children_4, vec![5, 6, 7]);

654

assert_eq!(children_4, vec![5, 6, 7]);

654

655

let mut children_7 = cache.get(&7).cloned().unwrap();

656

let mut children_7 = cache.get(&7).cloned().unwrap();

656

children_7.sort();

657

children_7.sort();

657

assert_eq!(children_7, vec![9, 11]);

658

assert_eq!(children_7, vec![9, 11]);

658

659

Ok(())

660

Ok(())

660

}

661

}

661

662

#[test]

663

#[test]

663

fn test_complete_sample() {

664

fn test_complete_sample() {

664

let mut disco = full_disco();

665

let mut disco = full_disco();

665

let undecided: HashSet<Revision> =

666

let undecided: HashSet<Revision> =

666

[4, 7, 9, 2, 3].iter().cloned().collect();

667

[4, 7, 9, 2, 3].iter().cloned().collect();

667

disco.undecided = Some(undecided);

668

disco.undecided = Some(undecided);

668

669

let mut sample = vec![0];

670

let mut sample = vec![0];

670

disco.random_complete_sample(&mut sample, 3);

671

disco.random_complete_sample(&mut sample, 3);

671

assert_eq!(sample.len(), 3);

672

assert_eq!(sample.len(), 3);

672

673

let mut sample = vec![2, 4, 7];

674

let mut sample = vec![2, 4, 7];

674

disco.random_complete_sample(&mut sample, 1);

675

disco.random_complete_sample(&mut sample, 1);

675

assert_eq!(sample.len(), 3);

676

assert_eq!(sample.len(), 3);

676

}

677

}

677

678

#[test]

679

#[test]

679

fn test_bidirectional_sample() -> Result<(), GraphError> {

680

fn test_bidirectional_sample() -> Result<(), GraphError> {

680

let mut disco = full_disco();

681

let mut disco = full_disco();

681

disco.undecided = Some((0..=13).into_iter().collect());

682

disco.undecided = Some((0..=13).into_iter().collect());

682

683

let (sample_set, size) = disco.bidirectional_sample(7)?;

684

let (sample_set, size) = disco.bidirectional_sample(7)?;

684

assert_eq!(size, 7);

685

assert_eq!(size, 7);

685

let mut sample: Vec<Revision> = sample_set.into_iter().collect();

686

let mut sample: Vec<Revision> = sample_set.into_iter().collect();

686

sample.sort();

687

sample.sort();

687

// our DAG is a bit too small for the results to be really interesting

688

// our DAG is a bit too small for the results to be really interesting

688

// at least it shows that

689

// at least it shows that

689

// - we went both ways

690

// - we went both ways

690

// - we didn't take all Revisions (6 is not in the sample)

691

// - we didn't take all Revisions (6 is not in the sample)

691

assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);

692

assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);

692

Ok(())

693

Ok(())

693

}

694

}

694

}

695

}

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             // discovery.rs
             //
             // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Discovery operations
             //!
             //! This is a Rust counterpart to the `partialdiscovery` class of
             //! `mercurial.setdiscovery`
             use super::{Graph, GraphError, Revision, NULL_REVISION};
             use crate::{ancestors::MissingAncestors, dagops, FastHashMap};
             use rand::seq::SliceRandom;
             use rand::{thread_rng, RngCore, SeedableRng};
             use std::cmp::{max, min};
             use std::collections::{HashSet, VecDeque};
             type Rng = rand_pcg::Pcg32;
+            type Seed = [u8; 16];
             pub struct PartialDiscovery<G: Graph + Clone> {
                 target_heads: Option<Vec<Revision>>,
                 graph: G, // plays the role of self._repo
                 common: MissingAncestors<G>,
                 undecided: Option<HashSet<Revision>>,
                 children_cache: Option<FastHashMap<Revision, Vec<Revision>>>,
                 missing: HashSet<Revision>,
                 rng: Rng,
                 respect_size: bool,
                 randomize: bool,
             }
             pub struct DiscoveryStats {
                 pub undecided: Option<usize>,
             }
             /// Update an existing sample to match the expected size
             ///
             /// The sample is updated with revisions exponentially distant from each
             /// element of `heads`.
             ///
             /// If a target size is specified, the sampling will stop once this size is
             /// reached. Otherwise sampling will happen until roots of the <revs> set are
             /// reached.
             ///
             /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
             ///   represented by `parentfn`
             /// - `heads`: set of DAG head revs
             /// - `sample`: a sample to update
             /// - `parentfn`: a callable to resolve parents for a revision
             /// - `quicksamplesize`: optional target size of the sample
             fn update_sample<I>(
                 revs: Option<&HashSet<Revision>>,
                 heads: impl IntoIterator<Item = Revision>,
                 sample: &mut HashSet<Revision>,
                 parentsfn: impl Fn(Revision) -> Result<I, GraphError>,
                 quicksamplesize: Option<usize>,
             ) -> Result<(), GraphError>
             where
                 I: Iterator<Item = Revision>,
             {
                 let mut distances: FastHashMap<Revision, u32> = FastHashMap::default();
                 let mut visit: VecDeque<Revision> = heads.into_iter().collect();
                 let mut factor: u32 = 1;
                 let mut seen: HashSet<Revision> = HashSet::new();
                 while let Some(current) = visit.pop_front() {
                     if !seen.insert(current) {
                         continue;
                     }
                     let d = *distances.entry(current).or_insert(1);
                     if d > factor {
                         factor *= 2;
                     }
                     if d == factor {
                         sample.insert(current);
                         if let Some(sz) = quicksamplesize {
                             if sample.len() >= sz {
                                 return Ok(());
                             }
                         }
                     }
                     for p in parentsfn(current)? {
                         if let Some(revs) = revs {
                             if !revs.contains(&p) {
                                 continue;
                             }
                         }
                         distances.entry(p).or_insert(d + 1);
                         visit.push_back(p);
                     }
                 }
                 Ok(())
             }
             struct ParentsIterator {
                 parents: [Revision; 2],
                 cur: usize,
             }
             impl ParentsIterator {
                 fn graph_parents(
                     graph: &impl Graph,
                     r: Revision,
                 ) -> Result<ParentsIterator, GraphError> {
                     Ok(ParentsIterator {
                         parents: graph.parents(r)?,
                         cur: 0,
                     })
                 }
             }
             impl Iterator for ParentsIterator {
                 type Item = Revision;
                 fn next(&mut self) -> Option<Revision> {
                     if self.cur > 1 {
                         return None;
                     }
                     let rev = self.parents[self.cur];
                     self.cur += 1;
                     if rev == NULL_REVISION {
                         return self.next();
                     }
                     Some(rev)
                 }
             }
             impl<G: Graph + Clone> PartialDiscovery<G> {
                 /// Create a PartialDiscovery object, with the intent
                 /// of comparing our `::<target_heads>` revset to the contents of another
                 /// repo.
                 ///
                 /// For now `target_heads` is passed as a vector, and will be used
                 /// at the first call to `ensure_undecided()`.
                 ///
                 /// If we want to make the signature more flexible,
                 /// we'll have to make it a type argument of `PartialDiscovery` or a trait
                 /// object since we'll keep it in the meanwhile
                 ///
                 /// The `respect_size` boolean controls how the sampling methods
                 /// will interpret the size argument requested by the caller. If it's
                 /// `false`, they are allowed to produce a sample whose size is more
                 /// appropriate to the situation (typically bigger).
                 ///
                 /// The `randomize` boolean affects sampling, and specifically how
                 /// limiting or last-minute expanding is been done:
                 ///
                 /// If `true`, both will perform random picking from `self.undecided`.
                 /// This is currently the best for actual discoveries.
                 ///
                 /// If `false`, a reproductible picking strategy is performed. This is
                 /// useful for integration tests.
                 pub fn new(
                     graph: G,
                     target_heads: Vec<Revision>,
                     respect_size: bool,
                     randomize: bool,
                 ) -> Self {
-                    let mut seed: [u8; 16] = [0; 16];
+                    let mut seed = [0; 16];
                     if randomize {
                         thread_rng().fill_bytes(&mut seed);
                     }
                     Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)
                 }
                 pub fn new_with_seed(
                     graph: G,
                     target_heads: Vec<Revision>,
-                    seed: [u8; 16],
+                    seed: Seed,
                     respect_size: bool,
                     randomize: bool,
                 ) -> Self {
                     PartialDiscovery {
                         undecided: None,
                         children_cache: None,
                         target_heads: Some(target_heads),
                         graph: graph.clone(),
                         common: MissingAncestors::new(graph, vec![]),
                         missing: HashSet::new(),
                         rng: Rng::from_seed(seed),
                         respect_size: respect_size,
                         randomize: randomize,
                     }
                 }
                 /// Extract at most `size` random elements from sample and return them
                 /// as a vector
                 fn limit_sample(
                     &mut self,
                     mut sample: Vec<Revision>,
                     size: usize,
                 ) -> Vec<Revision> {
                     if !self.randomize {
                         sample.sort();
                         sample.truncate(size);
                         return sample;
                     }
                     let sample_len = sample.len();
                     if sample_len <= size {
                         return sample;
                     }
                     let rng = &mut self.rng;
                     let dropped_size = sample_len - size;
                     let limited_slice = if size < dropped_size {
                         sample.partial_shuffle(rng, size).0
                     } else {
                         sample.partial_shuffle(rng, dropped_size).1
                     };
                     limited_slice.to_owned()
                 }
                 /// Register revisions known as being common
                 pub fn add_common_revisions(
                     &mut self,
                     common: impl IntoIterator<Item = Revision>,
                 ) -> Result<(), GraphError> {
                     let before_len = self.common.get_bases().len();
                     self.common.add_bases(common);
                     if self.common.get_bases().len() == before_len {
                         return Ok(());
                     }
                     if let Some(ref mut undecided) = self.undecided {
                         self.common.remove_ancestors_from(undecided)?;
                     }
                     Ok(())
                 }
                 /// Register revisions known as being missing
                 ///
                 /// # Performance note
                 ///
                 /// Except in the most trivial case, the first call of this method has
                 /// the side effect of computing `self.undecided` set for the first time,
                 /// and the related caches it might need for efficiency of its internal
                 /// computation. This is typically faster if more information is
                 /// available in `self.common`. Therefore, for good performance, the
                 /// caller should avoid calling this too early.
                 pub fn add_missing_revisions(
                     &mut self,
                     missing: impl IntoIterator<Item = Revision>,
                 ) -> Result<(), GraphError> {
                     let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();
                     if tovisit.is_empty() {
                         return Ok(());
                     }
                     self.ensure_children_cache()?;
                     self.ensure_undecided()?; // for safety of possible future refactors
                     let children = self.children_cache.as_ref().unwrap();
                     let mut seen: HashSet<Revision> = HashSet::new();
                     let undecided_mut = self.undecided.as_mut().unwrap();
                     while let Some(rev) = tovisit.pop_front() {
                         if !self.missing.insert(rev) {
                             // either it's known to be missing from a previous
                             // invocation, and there's no need to iterate on its
                             // children (we now they are all missing)
                             // or it's from a previous iteration of this loop
                             // and its children have already been queued
                             continue;
                         }
                         undecided_mut.remove(&rev);
                         match children.get(&rev) {
                             None => {
                                 continue;
                             }
                             Some(this_children) => {
                                 for child in this_children.iter().cloned() {
                                     if seen.insert(child) {
                                         tovisit.push_back(child);
                                     }
                                 }
                             }
                         }
                     }
                     Ok(())
                 }
                 /// Do we have any information about the peer?
                 pub fn has_info(&self) -> bool {
                     self.common.has_bases()
                 }
                 /// Did we acquire full knowledge of our Revisions that the peer has?
                 pub fn is_complete(&self) -> bool {
                     self.undecided.as_ref().map_or(false, |s| s.is_empty())
                 }
                 /// Return the heads of the currently known common set of revisions.
                 ///
                 /// If the discovery process is not complete (see `is_complete()`), the
                 /// caller must be aware that this is an intermediate state.
                 ///
                 /// On the other hand, if it is complete, then this is currently
                 /// the only way to retrieve the end results of the discovery process.
                 ///
                 /// We may introduce in the future an `into_common_heads` call that
                 /// would be more appropriate for normal Rust callers, dropping `self`
                 /// if it is complete.
                 pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
                     self.common.bases_heads()
                 }
                 /// Force first computation of `self.undecided`
                 ///
                 /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
                 /// unwrapped to get workable immutable or mutable references without
                 /// any panic.
                 ///
                 /// This is an imperative call instead of an access with added lazyness
                 /// to reduce easily the scope of mutable borrow for the caller,
                 /// compared to undecided(&'a mut self) -> &'a… that would keep it
                 /// as long as the resulting immutable one.
                 fn ensure_undecided(&mut self) -> Result<(), GraphError> {
                     if self.undecided.is_some() {
                         return Ok(());
                     }
                     let tgt = self.target_heads.take().unwrap();
                     self.undecided =
                         Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
                     Ok(())
                 }
                 fn ensure_children_cache(&mut self) -> Result<(), GraphError> {
                     if self.children_cache.is_some() {
                         return Ok(());
                     }
                     self.ensure_undecided()?;
                     let mut children: FastHashMap<Revision, Vec<Revision>> =
                         FastHashMap::default();
                     for &rev in self.undecided.as_ref().unwrap() {
                         for p in ParentsIterator::graph_parents(&self.graph, rev)? {
                             children.entry(p).or_insert_with(|| Vec::new()).push(rev);
                         }
                     }
                     self.children_cache = Some(children);
                     Ok(())
                 }
                 /// Provide statistics about the current state of the discovery process
                 pub fn stats(&self) -> DiscoveryStats {
                     DiscoveryStats {
                         undecided: self.undecided.as_ref().map(|s| s.len()),
                     }
                 }
                 pub fn take_quick_sample(
                     &mut self,
                     headrevs: impl IntoIterator<Item = Revision>,
                     size: usize,
                 ) -> Result<Vec<Revision>, GraphError> {
                     self.ensure_undecided()?;
                     let mut sample = {
                         let undecided = self.undecided.as_ref().unwrap();
                         if undecided.len() <= size {
                             return Ok(undecided.iter().cloned().collect());
                         }
                         dagops::heads(&self.graph, undecided.iter())?
                     };
                     if sample.len() >= size {
                         return Ok(self.limit_sample(sample.into_iter().collect(), size));
                     }
                     update_sample(
                         None,
                         headrevs,
                         &mut sample,
                         |r| ParentsIterator::graph_parents(&self.graph, r),
                         Some(size),
                     )?;
                     Ok(sample.into_iter().collect())
                 }
                 /// Extract a sample from `self.undecided`, going from its heads and roots.
                 ///
                 /// The `size` parameter is used to avoid useless computations if
                 /// it turns out to be bigger than the whole set of undecided Revisions.
                 ///
                 /// The sample is taken by using `update_sample` from the heads, then
                 /// from the roots, working on the reverse DAG,
                 /// expressed by `self.children_cache`.
                 ///
                 /// No effort is being made to complete or limit the sample to `size`
                 /// but this method returns another interesting size that it derives
                 /// from its knowledge of the structure of the various sets, leaving
                 /// to the caller the decision to use it or not.
                 fn bidirectional_sample(
                     &mut self,
                     size: usize,
                 ) -> Result<(HashSet<Revision>, usize), GraphError> {
                     self.ensure_undecided()?;
                     {
                         // we don't want to compute children_cache before this
                         // but doing it after extracting self.undecided takes a mutable
                         // ref to self while a shareable one is still active.
                         let undecided = self.undecided.as_ref().unwrap();
                         if undecided.len() <= size {
                             return Ok((undecided.clone(), size));
                         }
                     }
                     self.ensure_children_cache()?;
                     let revs = self.undecided.as_ref().unwrap();
                     let mut sample: HashSet<Revision> = revs.clone();
                     // it's possible that leveraging the children cache would be more
                     // efficient here
                     dagops::retain_heads(&self.graph, &mut sample)?;
                     let revsheads = sample.clone(); // was again heads(revs) in python
                     // update from heads
                     update_sample(
                         Some(revs),
                         revsheads.iter().cloned(),
                         &mut sample,
                         |r| ParentsIterator::graph_parents(&self.graph, r),
                         None,
                     )?;
                     // update from roots
                     let revroots: HashSet<Revision> =
                         dagops::roots(&self.graph, revs)?.into_iter().collect();
                     let prescribed_size = max(size, min(revroots.len(), revsheads.len()));
                     let children = self.children_cache.as_ref().unwrap();
                     let empty_vec: Vec<Revision> = Vec::new();
                     update_sample(
                         Some(revs),
                         revroots,
                         &mut sample,
                         |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),
                         None,
                     )?;
                     Ok((sample, prescribed_size))
                 }
                 /// Fill up sample up to the wished size with random undecided Revisions.
                 ///
                 /// This is intended to be used as a last resort completion if the
                 /// regular sampling algorithm returns too few elements.
                 fn random_complete_sample(
                     &mut self,
                     sample: &mut Vec<Revision>,
                     size: usize,
                 ) {
                     let sample_len = sample.len();
                     if size <= sample_len {
                         return;
                     }
                     let take_from: Vec<Revision> = self
                         .undecided
                         .as_ref()
                         .unwrap()
                         .iter()
                         .filter(|&r| !sample.contains(r))
                         .cloned()
                         .collect();
                     sample.extend(self.limit_sample(take_from, size - sample_len));
                 }
                 pub fn take_full_sample(
                     &mut self,
                     size: usize,
                 ) -> Result<Vec<Revision>, GraphError> {
                     let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;
                     let size = if self.respect_size {
                         size
                     } else {
                         prescribed_size
                     };
                     let mut sample =
                         self.limit_sample(sample_set.into_iter().collect(), size);
                     self.random_complete_sample(&mut sample, size);
                     Ok(sample)
                 }
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use crate::testing::SampleGraph;
                 /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
                 ///
                 /// To avoid actual randomness in these tests, we give it a fixed
                 /// random seed, but by default we'll test the random version.
                 fn full_disco() -> PartialDiscovery<SampleGraph> {
                     PartialDiscovery::new_with_seed(
                         SampleGraph,
                         vec![10, 11, 12, 13],
                         [0; 16],
                         true,
                         true,
                     )
                 }
                 /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
                 ///
                 /// To avoid actual randomness in tests, we give it a fixed random seed.
                 fn disco12() -> PartialDiscovery<SampleGraph> {
                     PartialDiscovery::new_with_seed(
                         SampleGraph,
                         vec![12],
                         [0; 16],
                         true,
                         true,
                     )
                 }
                 fn sorted_undecided(
                     disco: &PartialDiscovery<SampleGraph>,
                 ) -> Vec<Revision> {
                     let mut as_vec: Vec<Revision> =
                         disco.undecided.as_ref().unwrap().iter().cloned().collect();
                     as_vec.sort();
                     as_vec
                 }
                 fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
                     let mut as_vec: Vec<Revision> =
                         disco.missing.iter().cloned().collect();
                     as_vec.sort();
                     as_vec
                 }
                 fn sorted_common_heads(
                     disco: &PartialDiscovery<SampleGraph>,
                 ) -> Result<Vec<Revision>, GraphError> {
                     let mut as_vec: Vec<Revision> =
                         disco.common_heads()?.iter().cloned().collect();
                     as_vec.sort();
                     Ok(as_vec)
                 }
                 #[test]
                 fn test_add_common_get_undecided() -> Result<(), GraphError> {
                     let mut disco = full_disco();
                     assert_eq!(disco.undecided, None);
                     assert!(!disco.has_info());
                     assert_eq!(disco.stats().undecided, None);
                     disco.add_common_revisions(vec![11, 12])?;
                     assert!(disco.has_info());
                     assert!(!disco.is_complete());
                     assert!(disco.missing.is_empty());
                     // add_common_revisions did not trigger a premature computation
                     // of `undecided`, let's check that and ask for them
                     assert_eq!(disco.undecided, None);
                     disco.ensure_undecided()?;
                     assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
                     assert_eq!(disco.stats().undecided, Some(4));
                     Ok(())
                 }
                 /// in this test, we pretend that our peer misses exactly (8+10)::
                 /// and we're comparing all our repo to it (as in a bare push)
                 #[test]
                 fn test_discovery() -> Result<(), GraphError> {
                     let mut disco = full_disco();
                     disco.add_common_revisions(vec![11, 12])?;
                     disco.add_missing_revisions(vec![8, 10])?;
                     assert_eq!(sorted_undecided(&disco), vec![5]);
                     assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
                     assert!(!disco.is_complete());
                     disco.add_common_revisions(vec![5])?;
                     assert_eq!(sorted_undecided(&disco), vec![]);
                     assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
                     assert!(disco.is_complete());
                     assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
                     Ok(())
                 }
                 #[test]
                 fn test_add_missing_early_continue() -> Result<(), GraphError> {
                     eprintln!("test_add_missing_early_stop");
                     let mut disco = full_disco();
                     disco.add_common_revisions(vec![13, 3, 4])?;
                     disco.ensure_children_cache()?;
                     // 12 is grand-child of 6 through 9
                     // passing them in this order maximizes the chances of the
                     // early continue to do the wrong thing
                     disco.add_missing_revisions(vec![6, 9, 12])?;
                     assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);
                     assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);
                     assert!(!disco.is_complete());
                     Ok(())
                 }
                 #[test]
                 fn test_limit_sample_no_need_to() {
                     let sample = vec![1, 2, 3, 4];
                     assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
                 }
                 #[test]
                 fn test_limit_sample_less_than_half() {
                     assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]);
                 }
                 #[test]
                 fn test_limit_sample_more_than_half() {
                     assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]);
                 }
                 #[test]
                 fn test_limit_sample_no_random() {
                     let mut disco = full_disco();
                     disco.randomize = false;
                     assert_eq!(
                         disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),
                         vec![1, 3, 5, 7]
                     );
                 }
                 #[test]
                 fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
                     let mut disco = full_disco();
                     disco.undecided = Some((1..=13).collect());
                     let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
                     sample_vec.sort();
                     assert_eq!(sample_vec, vec![10, 11, 12, 13]);
                     Ok(())
                 }
                 #[test]
                 fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
                     let mut disco = disco12();
                     disco.ensure_undecided()?;
                     let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;
                     sample_vec.sort();
                     // r12's only parent is r9, whose unique grand-parent through the
                     // diamond shape is r4. This ends there because the distance from r4
                     // to the root is only 3.
                     assert_eq!(sample_vec, vec![4, 9, 12]);
                     Ok(())
                 }
                 #[test]
                 fn test_children_cache() -> Result<(), GraphError> {
                     let mut disco = full_disco();
                     disco.ensure_children_cache()?;
                     let cache = disco.children_cache.unwrap();
                     assert_eq!(cache.get(&2).cloned(), Some(vec![4]));
                     assert_eq!(cache.get(&10).cloned(), None);
                     let mut children_4 = cache.get(&4).cloned().unwrap();
                     children_4.sort();
                     assert_eq!(children_4, vec![5, 6, 7]);
                     let mut children_7 = cache.get(&7).cloned().unwrap();
                     children_7.sort();
                     assert_eq!(children_7, vec![9, 11]);
                     Ok(())
                 }
                 #[test]
                 fn test_complete_sample() {
                     let mut disco = full_disco();
                     let undecided: HashSet<Revision> =
                         [4, 7, 9, 2, 3].iter().cloned().collect();
                     disco.undecided = Some(undecided);
                     let mut sample = vec![0];
                     disco.random_complete_sample(&mut sample, 3);
                     assert_eq!(sample.len(), 3);
                     let mut sample = vec![2, 4, 7];
                     disco.random_complete_sample(&mut sample, 1);
                     assert_eq!(sample.len(), 3);
                 }
                 #[test]
                 fn test_bidirectional_sample() -> Result<(), GraphError> {
                     let mut disco = full_disco();
                     disco.undecided = Some((0..=13).into_iter().collect());
                     let (sample_set, size) = disco.bidirectional_sample(7)?;
                     assert_eq!(size, 7);
                     let mut sample: Vec<Revision> = sample_set.into_iter().collect();
                     sample.sort();
                     // our DAG is a bit too small for the results to be really interesting
                     // at least it shows that
                     // - we went both ways
                     // - we didn't take all Revisions (6 is not in the sample)
                     assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);
                     Ok(())
                 }
             }