upstream/mercurial-mirror Commit - r43009:88d6a6f7

1

// discovery.rs

1

// discovery.rs

2

//

2

//

3

4

//

4

//

5

// This software may be used and distributed according to the terms of the

5

// This software may be used and distributed according to the terms of the

6

// GNU General Public License version 2 or any later version.

6

// GNU General Public License version 2 or any later version.

7

8

//! Discovery operations

8

//! Discovery operations

9

//!

9

//!

10

//! This is a Rust counterpart to the `partialdiscovery` class of

10

//! This is a Rust counterpart to the `partialdiscovery` class of

11

//! `mercurial.setdiscovery`

11

//! `mercurial.setdiscovery`

12

13

use super::{Graph, GraphError, Revision, NULL_REVISION};

13

use super::{Graph, GraphError, Revision, NULL_REVISION};

14

use crate::ancestors::MissingAncestors;

14

use crate::ancestors::MissingAncestors;

15

use crate::dagops;

15

use crate::dagops;

16

use rand::seq::SliceRandom;

16

use rand::seq::SliceRandom;

17

use rand::{thread_rng, RngCore, SeedableRng};

17

use rand::{thread_rng, RngCore, SeedableRng};

18

use std::cmp::{max, min};

18

use std::cmp::{max, min};

19

use std::collections::{HashMap, HashSet, VecDeque};

19

use std::collections::{HashMap, HashSet, VecDeque};

20

21

type Rng = rand_pcg::Pcg32;

21

type Rng = rand_pcg::Pcg32;

22

23

pub struct PartialDiscovery<G: Graph + Clone> {

23

pub struct PartialDiscovery<G: Graph + Clone> {

24

target_heads: Option<Vec<Revision>>,

24

target_heads: Option<Vec<Revision>>,

25

graph: G, // plays the role of self._repo

25

graph: G, // plays the role of self._repo

26

common: MissingAncestors<G>,

26

common: MissingAncestors<G>,

27

undecided: Option<HashSet<Revision>>,

27

undecided: Option<HashSet<Revision>>,

28

children_cache: Option<HashMap<Revision, Vec<Revision>>>,

28

children_cache: Option<HashMap<Revision, Vec<Revision>>>,

29

missing: HashSet<Revision>,

29

missing: HashSet<Revision>,

30

rng: Rng,

30

rng: Rng,

31

respect_size: bool,

31

respect_size: bool,

32

randomize: bool,

32

randomize: bool,

33

}

33

}

34

35

pub struct DiscoveryStats {

35

pub struct DiscoveryStats {

36

pub undecided: Option<usize>,

36

pub undecided: Option<usize>,

37

}

37

}

38

39

/// Update an existing sample to match the expected size

39

/// Update an existing sample to match the expected size

40

///

40

///

41

/// The sample is updated with revisions exponentially distant from each

41

/// The sample is updated with revisions exponentially distant from each

42

/// element of `heads`.

42

/// element of `heads`.

43

///

43

///

44

/// If a target size is specified, the sampling will stop once this size is

44

/// If a target size is specified, the sampling will stop once this size is

45

/// reached. Otherwise sampling will happen until roots of the <revs> set are

45

/// reached. Otherwise sampling will happen until roots of the <revs> set are

46

/// reached.

46

/// reached.

47

///

47

///

48

/// - `revs`: set of revs we want to discover (if None, `assume` the whole dag

48

/// - `revs`: set of revs we want to discover (if None, `assume` the whole dag

49

/// represented by `parentfn`

49

/// represented by `parentfn`

50

/// - `heads`: set of DAG head revs

50

/// - `heads`: set of DAG head revs

51

/// - `sample`: a sample to update

51

/// - `sample`: a sample to update

52

/// - `parentfn`: a callable to resolve parents for a revision

52

/// - `parentfn`: a callable to resolve parents for a revision

53

/// - `quicksamplesize`: optional target size of the sample

53

/// - `quicksamplesize`: optional target size of the sample

54

fn update_sample<I>(

54

fn update_sample<I>(

55

revs: Option<&HashSet<Revision>>,

55

revs: Option<&HashSet<Revision>>,

56

heads: impl IntoIterator<Item = Revision>,

56

heads: impl IntoIterator<Item = Revision>,

57

sample: &mut HashSet<Revision>,

57

sample: &mut HashSet<Revision>,

58

parentsfn: impl Fn(Revision) -> Result<I, GraphError>,

58

parentsfn: impl Fn(Revision) -> Result<I, GraphError>,

59

quicksamplesize: Option<usize>,

59

quicksamplesize: Option<usize>,

60

) -> Result<(), GraphError>

60

) -> Result<(), GraphError>

61

where

61

where

62

I: Iterator<Item = Revision>,

62

I: Iterator<Item = Revision>,

63

{

63

{

64

let mut distances: HashMap<Revision, u32> = HashMap::new();

64

let mut distances: HashMap<Revision, u32> = HashMap::new();

65

let mut visit: VecDeque<Revision> = heads.into_iter().collect();

65

let mut visit: VecDeque<Revision> = heads.into_iter().collect();

66

let mut factor: u32 = 1;

66

let mut factor: u32 = 1;

67

let mut seen: HashSet<Revision> = HashSet::new();

67

let mut seen: HashSet<Revision> = HashSet::new();

68

loop {

68

while let Some(current) = visit.pop_front() {

69

let current = match visit.pop_front() {

70

None => {

71

break;

72

}

73

Some(r) => r,

74

};

75

if !seen.insert(current) {

69

if !seen.insert(current) {

76

continue;

70

continue;

77

}

71

}

78

72

79

let d = *distances.entry(current).or_insert(1);

73

let d = *distances.entry(current).or_insert(1);

80

if d > factor {

74

if d > factor {

81

factor *= 2;

75

factor *= 2;

82

}

76

}

83

if d == factor {

77

if d == factor {

84

sample.insert(current);

78

sample.insert(current);

85

if let Some(sz) = quicksamplesize {

79

if let Some(sz) = quicksamplesize {

86

if sample.len() >= sz {

80

if sample.len() >= sz {

87

return Ok(());

81

return Ok(());

88

}

82

}

89

}

83

}

90

}

84

}

91

for p in parentsfn(current)? {

85

for p in parentsfn(current)? {

92

if let Some(revs) = revs {

86

if let Some(revs) = revs {

93

if !revs.contains(&p) {

87

if !revs.contains(&p) {

94

continue;

88

continue;

95

}

89

}

96

}

90

}

97

distances.entry(p).or_insert(d + 1);

91

distances.entry(p).or_insert(d + 1);

98

visit.push_back(p);

92

visit.push_back(p);

99

}

93

}

100

}

94

}

101

Ok(())

95

Ok(())

102

}

96

}

103

97

104

struct ParentsIterator {

98

struct ParentsIterator {

105

parents: [Revision; 2],

99

parents: [Revision; 2],

106

cur: usize,

100

cur: usize,

107

}

101

}

108

102

109

impl ParentsIterator {

103

impl ParentsIterator {

110

fn graph_parents(

104

fn graph_parents(

111

graph: &impl Graph,

105

graph: &impl Graph,

112

r: Revision,

106

r: Revision,

113

) -> Result<ParentsIterator, GraphError> {

107

) -> Result<ParentsIterator, GraphError> {

114

Ok(ParentsIterator {

108

Ok(ParentsIterator {

115

parents: graph.parents(r)?,

109

parents: graph.parents(r)?,

116

cur: 0,

110

cur: 0,

117

})

111

})

118

}

112

}

119

}

113

}

120

114

121

impl Iterator for ParentsIterator {

115

impl Iterator for ParentsIterator {

122

type Item = Revision;

116

type Item = Revision;

123

117

124

fn next(&mut self) -> Option<Revision> {

118

fn next(&mut self) -> Option<Revision> {

125

if self.cur > 1 {

119

if self.cur > 1 {

126

return None;

120

return None;

127

}

121

}

128

let rev = self.parents[self.cur];

122

let rev = self.parents[self.cur];

129

self.cur += 1;

123

self.cur += 1;

130

if rev == NULL_REVISION {

124

if rev == NULL_REVISION {

131

return self.next();

125

return self.next();

132

}

126

}

133

Some(rev)

127

Some(rev)

134

}

128

}

135

}

129

}

136

130

137

impl<G: Graph + Clone> PartialDiscovery<G> {

131

impl<G: Graph + Clone> PartialDiscovery<G> {

138

/// Create a PartialDiscovery object, with the intent

132

/// Create a PartialDiscovery object, with the intent

139

/// of comparing our `::<target_heads>` revset to the contents of another

133

/// of comparing our `::<target_heads>` revset to the contents of another

140

/// repo.

134

/// repo.

141

///

135

///

142

/// For now `target_heads` is passed as a vector, and will be used

136

/// For now `target_heads` is passed as a vector, and will be used

143

/// at the first call to `ensure_undecided()`.

137

/// at the first call to `ensure_undecided()`.

144

///

138

///

145

/// If we want to make the signature more flexible,

139

/// If we want to make the signature more flexible,

146

/// we'll have to make it a type argument of `PartialDiscovery` or a trait

140

/// we'll have to make it a type argument of `PartialDiscovery` or a trait

147

/// object since we'll keep it in the meanwhile

141

/// object since we'll keep it in the meanwhile

148

///

142

///

149

/// The `respect_size` boolean controls how the sampling methods

143

/// The `respect_size` boolean controls how the sampling methods

150

/// will interpret the size argument requested by the caller. If it's

144

/// will interpret the size argument requested by the caller. If it's

151

/// `false`, they are allowed to produce a sample whose size is more

145

/// `false`, they are allowed to produce a sample whose size is more

152

/// appropriate to the situation (typically bigger).

146

/// appropriate to the situation (typically bigger).

153

///

147

///

154

/// The `randomize` boolean affects sampling, and specifically how

148

/// The `randomize` boolean affects sampling, and specifically how

155

/// limiting or last-minute expanding is been done:

149

/// limiting or last-minute expanding is been done:

156

///

150

///

157

/// If `true`, both will perform random picking from `self.undecided`.

151

/// If `true`, both will perform random picking from `self.undecided`.

158

/// This is currently the best for actual discoveries.

152

/// This is currently the best for actual discoveries.

159

///

153

///

160

/// If `false`, a reproductible picking strategy is performed. This is

154

/// If `false`, a reproductible picking strategy is performed. This is

161

/// useful for integration tests.

155

/// useful for integration tests.

162

pub fn new(

156

pub fn new(

163

graph: G,

157

graph: G,

164

target_heads: Vec<Revision>,

158

target_heads: Vec<Revision>,

165

respect_size: bool,

159

respect_size: bool,

166

randomize: bool,

160

randomize: bool,

167

) -> Self {

161

) -> Self {

168

let mut seed: [u8; 16] = [0; 16];

162

let mut seed: [u8; 16] = [0; 16];

169

if randomize {

163

if randomize {

170

thread_rng().fill_bytes(&mut seed);

164

thread_rng().fill_bytes(&mut seed);

171

}

165

}

172

Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)

166

Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)

173

}

167

}

174

168

175

pub fn new_with_seed(

169

pub fn new_with_seed(

176

graph: G,

170

graph: G,

177

target_heads: Vec<Revision>,

171

target_heads: Vec<Revision>,

178

seed: [u8; 16],

172

seed: [u8; 16],

179

respect_size: bool,

173

respect_size: bool,

180

randomize: bool,

174

randomize: bool,

181

) -> Self {

175

) -> Self {

182

PartialDiscovery {

176

PartialDiscovery {

183

undecided: None,

177

undecided: None,

184

children_cache: None,

178

children_cache: None,

185

target_heads: Some(target_heads),

179

target_heads: Some(target_heads),

186

graph: graph.clone(),

180

graph: graph.clone(),

187

common: MissingAncestors::new(graph, vec![]),

181

common: MissingAncestors::new(graph, vec![]),

188

missing: HashSet::new(),

182

missing: HashSet::new(),

189

rng: Rng::from_seed(seed),

183

rng: Rng::from_seed(seed),

190

respect_size: respect_size,

184

respect_size: respect_size,

191

randomize: randomize,

185

randomize: randomize,

192

}

186

}

193

}

187

}

194

188

195

/// Extract at most `size` random elements from sample and return them

189

/// Extract at most `size` random elements from sample and return them

196

/// as a vector

190

/// as a vector

197

fn limit_sample(

191

fn limit_sample(

198

&mut self,

192

&mut self,

199

mut sample: Vec<Revision>,

193

mut sample: Vec<Revision>,

200

size: usize,

194

size: usize,

201

) -> Vec<Revision> {

195

) -> Vec<Revision> {

202

if !self.randomize {

196

if !self.randomize {

203

sample.sort();

197

sample.sort();

204

sample.truncate(size);

198

sample.truncate(size);

205

return sample;

199

return sample;

206

}

200

}

207

let sample_len = sample.len();

201

let sample_len = sample.len();

208

if sample_len <= size {

202

if sample_len <= size {

209

return sample;

203

return sample;

210

}

204

}

211

let rng = &mut self.rng;

205

let rng = &mut self.rng;

212

let dropped_size = sample_len - size;

206

let dropped_size = sample_len - size;

213

let limited_slice = if size < dropped_size {

207

let limited_slice = if size < dropped_size {

214

sample.partial_shuffle(rng, size).0

208

sample.partial_shuffle(rng, size).0

215

} else {

209

} else {

216

sample.partial_shuffle(rng, dropped_size).1

210

sample.partial_shuffle(rng, dropped_size).1

217

};

211

};

218

limited_slice.to_owned()

212

limited_slice.to_owned()

219

}

213

}

220

214

221

/// Register revisions known as being common

215

/// Register revisions known as being common

222

pub fn add_common_revisions(

216

pub fn add_common_revisions(

223

&mut self,

217

&mut self,

224

common: impl IntoIterator<Item = Revision>,

218

common: impl IntoIterator<Item = Revision>,

225

) -> Result<(), GraphError> {

219

) -> Result<(), GraphError> {

226

let before_len = self.common.get_bases().len();

220

let before_len = self.common.get_bases().len();

227

self.common.add_bases(common);

221

self.common.add_bases(common);

228

if self.common.get_bases().len() == before_len {

222

if self.common.get_bases().len() == before_len {

229

return Ok(());

223

return Ok(());

230

}

224

}

231

if let Some(ref mut undecided) = self.undecided {

225

if let Some(ref mut undecided) = self.undecided {

232

self.common.remove_ancestors_from(undecided)?;

226

self.common.remove_ancestors_from(undecided)?;

233

}

227

}

234

Ok(())

228

Ok(())

235

}

229

}

236

230

237

/// Register revisions known as being missing

231

/// Register revisions known as being missing

238

///

232

///

239

/// # Performance note

233

/// # Performance note

240

///

234

///

241

/// Except in the most trivial case, the first call of this method has

235

/// Except in the most trivial case, the first call of this method has

242

/// the side effect of computing `self.undecided` set for the first time,

236

/// the side effect of computing `self.undecided` set for the first time,

243

/// and the related caches it might need for efficiency of its internal

237

/// and the related caches it might need for efficiency of its internal

244

/// computation. This is typically faster if more information is

238

/// computation. This is typically faster if more information is

245

/// available in `self.common`. Therefore, for good performance, the

239

/// available in `self.common`. Therefore, for good performance, the

246

/// caller should avoid calling this too early.

240

/// caller should avoid calling this too early.

247

pub fn add_missing_revisions(

241

pub fn add_missing_revisions(

248

&mut self,

242

&mut self,

249

missing: impl IntoIterator<Item = Revision>,

243

missing: impl IntoIterator<Item = Revision>,

250

) -> Result<(), GraphError> {

244

) -> Result<(), GraphError> {

251

let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();

245

let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();

252

if tovisit.is_empty() {

246

if tovisit.is_empty() {

253

return Ok(());

247

return Ok(());

254

}

248

}

255

self.ensure_children_cache()?;

249

self.ensure_children_cache()?;

256

self.ensure_undecided()?; // for safety of possible future refactors

250

self.ensure_undecided()?; // for safety of possible future refactors

257

let children = self.children_cache.as_ref().unwrap();

251

let children = self.children_cache.as_ref().unwrap();

258

let mut seen: HashSet<Revision> = HashSet::new();

252

let mut seen: HashSet<Revision> = HashSet::new();

259

let undecided_mut = self.undecided.as_mut().unwrap();

253

let undecided_mut = self.undecided.as_mut().unwrap();

260

while let Some(rev) = tovisit.pop_front() {

254

while let Some(rev) = tovisit.pop_front() {

261

if !self.missing.insert(rev) {

255

if !self.missing.insert(rev) {

262

// either it's known to be missing from a previous

256

// either it's known to be missing from a previous

263

// invocation, and there's no need to iterate on its

257

// invocation, and there's no need to iterate on its

264

// children (we now they are all missing)

258

// children (we now they are all missing)

265

// or it's from a previous iteration of this loop

259

// or it's from a previous iteration of this loop

266

// and its children have already been queued

260

// and its children have already been queued

267

continue;

261

continue;

268

}

262

}

269

undecided_mut.remove(&rev);

263

undecided_mut.remove(&rev);

270

match children.get(&rev) {

264

match children.get(&rev) {

271

None => {

265

None => {

272

continue;

266

continue;

273

}

267

}

274

Some(this_children) => {

268

Some(this_children) => {

275

for child in this_children.iter().cloned() {

269

for child in this_children.iter().cloned() {

276

if seen.insert(child) {

270

if seen.insert(child) {

277

tovisit.push_back(child);

271

tovisit.push_back(child);

278

}

272

}

279

}

273

}

280

}

274

}

281

}

275

}

282

}

276

}

283

Ok(())

277

Ok(())

284

}

278

}

285

279

286

/// Do we have any information about the peer?

280

/// Do we have any information about the peer?

287

pub fn has_info(&self) -> bool {

281

pub fn has_info(&self) -> bool {

288

self.common.has_bases()

282

self.common.has_bases()

289

}

283

}

290

284

291

/// Did we acquire full knowledge of our Revisions that the peer has?

285

/// Did we acquire full knowledge of our Revisions that the peer has?

292

pub fn is_complete(&self) -> bool {

286

pub fn is_complete(&self) -> bool {

293

self.undecided.as_ref().map_or(false, |s| s.is_empty())

287

self.undecided.as_ref().map_or(false, |s| s.is_empty())

294

}

288

}

295

289

296

/// Return the heads of the currently known common set of revisions.

290

/// Return the heads of the currently known common set of revisions.

297

///

291

///

298

/// If the discovery process is not complete (see `is_complete()`), the

292

/// If the discovery process is not complete (see `is_complete()`), the

299

/// caller must be aware that this is an intermediate state.

293

/// caller must be aware that this is an intermediate state.

300

///

294

///

301

/// On the other hand, if it is complete, then this is currently

295

/// On the other hand, if it is complete, then this is currently

302

/// the only way to retrieve the end results of the discovery process.

296

/// the only way to retrieve the end results of the discovery process.

303

///

297

///

304

/// We may introduce in the future an `into_common_heads` call that

298

/// We may introduce in the future an `into_common_heads` call that

305

/// would be more appropriate for normal Rust callers, dropping `self`

299

/// would be more appropriate for normal Rust callers, dropping `self`

306

/// if it is complete.

300

/// if it is complete.

307

pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {

301

pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {

308

self.common.bases_heads()

302

self.common.bases_heads()

309

}

303

}

310

304

311

/// Force first computation of `self.undecided`

305

/// Force first computation of `self.undecided`

312

///

306

///

313

/// After this, `self.undecided.as_ref()` and `.as_mut()` can be

307

/// After this, `self.undecided.as_ref()` and `.as_mut()` can be

314

/// unwrapped to get workable immutable or mutable references without

308

/// unwrapped to get workable immutable or mutable references without

315

/// any panic.

309

/// any panic.

316

///

310

///

317

/// This is an imperative call instead of an access with added lazyness

311

/// This is an imperative call instead of an access with added lazyness

318

/// to reduce easily the scope of mutable borrow for the caller,

312

/// to reduce easily the scope of mutable borrow for the caller,

319

/// compared to undecided(&'a mut self) -> &'a… that would keep it

313

/// compared to undecided(&'a mut self) -> &'a… that would keep it

320

/// as long as the resulting immutable one.

314

/// as long as the resulting immutable one.

321

fn ensure_undecided(&mut self) -> Result<(), GraphError> {

315

fn ensure_undecided(&mut self) -> Result<(), GraphError> {

322

if self.undecided.is_some() {

316

if self.undecided.is_some() {

323

return Ok(());

317

return Ok(());

324

}

318

}

325

let tgt = self.target_heads.take().unwrap();

319

let tgt = self.target_heads.take().unwrap();

326

self.undecided =

320

self.undecided =

327

Some(self.common.missing_ancestors(tgt)?.into_iter().collect());

321

Some(self.common.missing_ancestors(tgt)?.into_iter().collect());

328

Ok(())

322

Ok(())

329

}

323

}

330

324

331

fn ensure_children_cache(&mut self) -> Result<(), GraphError> {

325

fn ensure_children_cache(&mut self) -> Result<(), GraphError> {

332

if self.children_cache.is_some() {

326

if self.children_cache.is_some() {

333

return Ok(());

327

return Ok(());

334

}

328

}

335

self.ensure_undecided()?;

329

self.ensure_undecided()?;

336

330

337

let mut children: HashMap<Revision, Vec<Revision>> = HashMap::new();

331

let mut children: HashMap<Revision, Vec<Revision>> = HashMap::new();

338

for &rev in self.undecided.as_ref().unwrap() {

332

for &rev in self.undecided.as_ref().unwrap() {

339

for p in ParentsIterator::graph_parents(&self.graph, rev)? {

333

for p in ParentsIterator::graph_parents(&self.graph, rev)? {

340

children.entry(p).or_insert_with(|| Vec::new()).push(rev);

334

children.entry(p).or_insert_with(|| Vec::new()).push(rev);

341

}

335

}

342

}

336

}

343

self.children_cache = Some(children);

337

self.children_cache = Some(children);

344

Ok(())

338

Ok(())

345

}

339

}

346

340

347

/// Provide statistics about the current state of the discovery process

341

/// Provide statistics about the current state of the discovery process

348

pub fn stats(&self) -> DiscoveryStats {

342

pub fn stats(&self) -> DiscoveryStats {

349

DiscoveryStats {

343

DiscoveryStats {

350

undecided: self.undecided.as_ref().map(|s| s.len()),

344

undecided: self.undecided.as_ref().map(|s| s.len()),

351

}

345

}

352

}

346

}

353

347

354

pub fn take_quick_sample(

348

pub fn take_quick_sample(

355

&mut self,

349

&mut self,

356

headrevs: impl IntoIterator<Item = Revision>,

350

headrevs: impl IntoIterator<Item = Revision>,

357

size: usize,

351

size: usize,

358

) -> Result<Vec<Revision>, GraphError> {

352

) -> Result<Vec<Revision>, GraphError> {

359

self.ensure_undecided()?;

353

self.ensure_undecided()?;

360

let mut sample = {

354

let mut sample = {

361

let undecided = self.undecided.as_ref().unwrap();

355

let undecided = self.undecided.as_ref().unwrap();

362

if undecided.len() <= size {

356

if undecided.len() <= size {

363

return Ok(undecided.iter().cloned().collect());

357

return Ok(undecided.iter().cloned().collect());

364

}

358

}

365

dagops::heads(&self.graph, undecided.iter())?

359

dagops::heads(&self.graph, undecided.iter())?

366

};

360

};

367

if sample.len() >= size {

361

if sample.len() >= size {

368

return Ok(self.limit_sample(sample.into_iter().collect(), size));

362

return Ok(self.limit_sample(sample.into_iter().collect(), size));

369

}

363

}

370

update_sample(

364

update_sample(

371

None,

365

None,

372

headrevs,

366

headrevs,

373

&mut sample,

367

&mut sample,

374

|r| ParentsIterator::graph_parents(&self.graph, r),

368

|r| ParentsIterator::graph_parents(&self.graph, r),

375

Some(size),

369

Some(size),

376

)?;

370

)?;

377

Ok(sample.into_iter().collect())

371

Ok(sample.into_iter().collect())

378

}

372

}

379

373

380

/// Extract a sample from `self.undecided`, going from its heads and roots.

374

/// Extract a sample from `self.undecided`, going from its heads and roots.

381

///

375

///

382

/// The `size` parameter is used to avoid useless computations if

376

/// The `size` parameter is used to avoid useless computations if

383

/// it turns out to be bigger than the whole set of undecided Revisions.

377

/// it turns out to be bigger than the whole set of undecided Revisions.

384

///

378

///

385

/// The sample is taken by using `update_sample` from the heads, then

379

/// The sample is taken by using `update_sample` from the heads, then

386

/// from the roots, working on the reverse DAG,

380

/// from the roots, working on the reverse DAG,

387

/// expressed by `self.children_cache`.

381

/// expressed by `self.children_cache`.

388

///

382

///

389

/// No effort is being made to complete or limit the sample to `size`

383

/// No effort is being made to complete or limit the sample to `size`

390

/// but this method returns another interesting size that it derives

384

/// but this method returns another interesting size that it derives

391

/// from its knowledge of the structure of the various sets, leaving

385

/// from its knowledge of the structure of the various sets, leaving

392

/// to the caller the decision to use it or not.

386

/// to the caller the decision to use it or not.

393

fn bidirectional_sample(

387

fn bidirectional_sample(

394

&mut self,

388

&mut self,

395

size: usize,

389

size: usize,

396

) -> Result<(HashSet<Revision>, usize), GraphError> {

390

) -> Result<(HashSet<Revision>, usize), GraphError> {

397

self.ensure_undecided()?;

391

self.ensure_undecided()?;

398

{

392

{

399

// we don't want to compute children_cache before this

393

// we don't want to compute children_cache before this

400

// but doing it after extracting self.undecided takes a mutable

394

// but doing it after extracting self.undecided takes a mutable

401

// ref to self while a shareable one is still active.

395

// ref to self while a shareable one is still active.

402

let undecided = self.undecided.as_ref().unwrap();

396

let undecided = self.undecided.as_ref().unwrap();

403

if undecided.len() <= size {

397

if undecided.len() <= size {

404

return Ok((undecided.clone(), size));

398

return Ok((undecided.clone(), size));

405

}

399

}

406

}

400

}

407

401

408

self.ensure_children_cache()?;

402

self.ensure_children_cache()?;

409

let revs = self.undecided.as_ref().unwrap();

403

let revs = self.undecided.as_ref().unwrap();

410

let mut sample: HashSet<Revision> = revs.clone();

404

let mut sample: HashSet<Revision> = revs.clone();

411

405

412

// it's possible that leveraging the children cache would be more

406

// it's possible that leveraging the children cache would be more

413

// efficient here

407

// efficient here

414

dagops::retain_heads(&self.graph, &mut sample)?;

408

dagops::retain_heads(&self.graph, &mut sample)?;

415

let revsheads = sample.clone(); // was again heads(revs) in python

409

let revsheads = sample.clone(); // was again heads(revs) in python

416

410

417

// update from heads

411

// update from heads

418

update_sample(

412

update_sample(

419

Some(revs),

413

Some(revs),

420

revsheads.iter().cloned(),

414

revsheads.iter().cloned(),

421

&mut sample,

415

&mut sample,

422

|r| ParentsIterator::graph_parents(&self.graph, r),

416

|r| ParentsIterator::graph_parents(&self.graph, r),

423

None,

417

None,

424

)?;

418

)?;

425

419

426

// update from roots

420

// update from roots

427

let revroots: HashSet<Revision> =

421

let revroots: HashSet<Revision> =

428

dagops::roots(&self.graph, revs)?.into_iter().collect();

422

dagops::roots(&self.graph, revs)?.into_iter().collect();

429

let prescribed_size = max(size, min(revroots.len(), revsheads.len()));

423

let prescribed_size = max(size, min(revroots.len(), revsheads.len()));

430

424

431

let children = self.children_cache.as_ref().unwrap();

425

let children = self.children_cache.as_ref().unwrap();

432

let empty_vec: Vec<Revision> = Vec::new();

426

let empty_vec: Vec<Revision> = Vec::new();

433

update_sample(

427

update_sample(

434

Some(revs),

428

Some(revs),

435

revroots,

429

revroots,

436

&mut sample,

430

&mut sample,

437

|r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),

431

|r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),

438

None,

432

None,

439

)?;

433

)?;

440

Ok((sample, prescribed_size))

434

Ok((sample, prescribed_size))

441

}

435

}

442

436

443

/// Fill up sample up to the wished size with random undecided Revisions.

437

/// Fill up sample up to the wished size with random undecided Revisions.

444

///

438

///

445

/// This is intended to be used as a last resort completion if the

439

/// This is intended to be used as a last resort completion if the

446

/// regular sampling algorithm returns too few elements.

440

/// regular sampling algorithm returns too few elements.

447

fn random_complete_sample(

441

fn random_complete_sample(

448

&mut self,

442

&mut self,

449

sample: &mut Vec<Revision>,

443

sample: &mut Vec<Revision>,

450

size: usize,

444

size: usize,

451

) {

445

) {

452

let sample_len = sample.len();

446

let sample_len = sample.len();

453

if size <= sample_len {

447

if size <= sample_len {

454

return;

448

return;

455

}

449

}

456

let take_from: Vec<Revision> = self

450

let take_from: Vec<Revision> = self

457

.undecided

451

.undecided

458

.as_ref()

452

.as_ref()

459

.unwrap()

453

.unwrap()

460

.iter()

454

.iter()

461

.filter(|&r| !sample.contains(r))

455

.filter(|&r| !sample.contains(r))

462

.cloned()

456

.cloned()

463

.collect();

457

.collect();

464

sample.extend(self.limit_sample(take_from, size - sample_len));

458

sample.extend(self.limit_sample(take_from, size - sample_len));

465

}

459

}

466

460

467

pub fn take_full_sample(

461

pub fn take_full_sample(

468

&mut self,

462

&mut self,

469

size: usize,

463

size: usize,

470

) -> Result<Vec<Revision>, GraphError> {

464

) -> Result<Vec<Revision>, GraphError> {

471

let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;

465

let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;

472

let size = if self.respect_size {

466

let size = if self.respect_size {

473

size

467

size

474

} else {

468

} else {

475

prescribed_size

469

prescribed_size

476

};

470

};

477

let mut sample =

471

let mut sample =

478

self.limit_sample(sample_set.into_iter().collect(), size);

472

self.limit_sample(sample_set.into_iter().collect(), size);

479

self.random_complete_sample(&mut sample, size);

473

self.random_complete_sample(&mut sample, size);

480

Ok(sample)

474

Ok(sample)

481

}

475

}

482

}

476

}

483

477

484

#[cfg(test)]

478

#[cfg(test)]

485

mod tests {

479

mod tests {

486

use super::*;

480

use super::*;

487

use crate::testing::SampleGraph;

481

use crate::testing::SampleGraph;

488

482

489

/// A PartialDiscovery as for pushing all the heads of `SampleGraph`

483

/// A PartialDiscovery as for pushing all the heads of `SampleGraph`

490

///

484

///

491

/// To avoid actual randomness in these tests, we give it a fixed

485

/// To avoid actual randomness in these tests, we give it a fixed

492

/// random seed, but by default we'll test the random version.

486

/// random seed, but by default we'll test the random version.

493

fn full_disco() -> PartialDiscovery<SampleGraph> {

487

fn full_disco() -> PartialDiscovery<SampleGraph> {

494

PartialDiscovery::new_with_seed(

488

PartialDiscovery::new_with_seed(

495

SampleGraph,

489

SampleGraph,

496

vec![10, 11, 12, 13],

490

vec![10, 11, 12, 13],

497

[0; 16],

491

[0; 16],

498

true,

492

true,

499

true,

493

true,

500

)

494

)

501

}

495

}

502

496

503

/// A PartialDiscovery as for pushing the 12 head of `SampleGraph`

497

/// A PartialDiscovery as for pushing the 12 head of `SampleGraph`

504

///

498

///

505

/// To avoid actual randomness in tests, we give it a fixed random seed.

499

/// To avoid actual randomness in tests, we give it a fixed random seed.

506

fn disco12() -> PartialDiscovery<SampleGraph> {

500

fn disco12() -> PartialDiscovery<SampleGraph> {

507

PartialDiscovery::new_with_seed(

501

PartialDiscovery::new_with_seed(

508

SampleGraph,

502

SampleGraph,

509

vec![12],

503

vec![12],

510

[0; 16],

504

[0; 16],

511

true,

505

true,

512

true,

506

true,

513

)

507

)

514

}

508

}

515

509

516

fn sorted_undecided(

510

fn sorted_undecided(

517

disco: &PartialDiscovery<SampleGraph>,

511

disco: &PartialDiscovery<SampleGraph>,

518

) -> Vec<Revision> {

512

) -> Vec<Revision> {

519

let mut as_vec: Vec<Revision> =

513

let mut as_vec: Vec<Revision> =

520

disco.undecided.as_ref().unwrap().iter().cloned().collect();

514

disco.undecided.as_ref().unwrap().iter().cloned().collect();

521

as_vec.sort();

515

as_vec.sort();

522

as_vec

516

as_vec

523

}

517

}

524

518

525

fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {

519

fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {

526

let mut as_vec: Vec<Revision> =

520

let mut as_vec: Vec<Revision> =

527

disco.missing.iter().cloned().collect();

521

disco.missing.iter().cloned().collect();

528

as_vec.sort();

522

as_vec.sort();

529

as_vec

523

as_vec

530

}

524

}

531

525

532

fn sorted_common_heads(

526

fn sorted_common_heads(

533

disco: &PartialDiscovery<SampleGraph>,

527

disco: &PartialDiscovery<SampleGraph>,

534

) -> Result<Vec<Revision>, GraphError> {

528

) -> Result<Vec<Revision>, GraphError> {

535

let mut as_vec: Vec<Revision> =

529

let mut as_vec: Vec<Revision> =

536

disco.common_heads()?.iter().cloned().collect();

530

disco.common_heads()?.iter().cloned().collect();

537

as_vec.sort();

531

as_vec.sort();

538

Ok(as_vec)

532

Ok(as_vec)

539

}

533

}

540

534

541

#[test]

535

#[test]

542

fn test_add_common_get_undecided() -> Result<(), GraphError> {

536

fn test_add_common_get_undecided() -> Result<(), GraphError> {

543

let mut disco = full_disco();

537

let mut disco = full_disco();

544

assert_eq!(disco.undecided, None);

538

assert_eq!(disco.undecided, None);

545

assert!(!disco.has_info());

539

assert!(!disco.has_info());

546

assert_eq!(disco.stats().undecided, None);

540

assert_eq!(disco.stats().undecided, None);

547

541

548

disco.add_common_revisions(vec![11, 12])?;

542

disco.add_common_revisions(vec![11, 12])?;

549

assert!(disco.has_info());

543

assert!(disco.has_info());

550

assert!(!disco.is_complete());

544

assert!(!disco.is_complete());

551

assert!(disco.missing.is_empty());

545

assert!(disco.missing.is_empty());

552

546

553

// add_common_revisions did not trigger a premature computation

547

// add_common_revisions did not trigger a premature computation

554

// of `undecided`, let's check that and ask for them

548

// of `undecided`, let's check that and ask for them

555

assert_eq!(disco.undecided, None);

549

assert_eq!(disco.undecided, None);

556

disco.ensure_undecided()?;

550

disco.ensure_undecided()?;

557

assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);

551

assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);

558

assert_eq!(disco.stats().undecided, Some(4));

552

assert_eq!(disco.stats().undecided, Some(4));

559

Ok(())

553

Ok(())

560

}

554

}

561

555

562

/// in this test, we pretend that our peer misses exactly (8+10)::

556

/// in this test, we pretend that our peer misses exactly (8+10)::

563

/// and we're comparing all our repo to it (as in a bare push)

557

/// and we're comparing all our repo to it (as in a bare push)

564

#[test]

558

#[test]

565

fn test_discovery() -> Result<(), GraphError> {

559

fn test_discovery() -> Result<(), GraphError> {

566

let mut disco = full_disco();

560

let mut disco = full_disco();

567

disco.add_common_revisions(vec![11, 12])?;

561

disco.add_common_revisions(vec![11, 12])?;

568

disco.add_missing_revisions(vec![8, 10])?;

562

disco.add_missing_revisions(vec![8, 10])?;

569

assert_eq!(sorted_undecided(&disco), vec![5]);

563

assert_eq!(sorted_undecided(&disco), vec![5]);

570

assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);

564

assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);

571

assert!(!disco.is_complete());

565

assert!(!disco.is_complete());

572

566

573

disco.add_common_revisions(vec![5])?;

567

disco.add_common_revisions(vec![5])?;

574

assert_eq!(sorted_undecided(&disco), vec![]);

568

assert_eq!(sorted_undecided(&disco), vec![]);

575

assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);

569

assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);

576

assert!(disco.is_complete());

570

assert!(disco.is_complete());

577

assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);

571

assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);

578

Ok(())

572

Ok(())

579

}

573

}

580

574

581

#[test]

575

#[test]

582

fn test_add_missing_early_continue() -> Result<(), GraphError> {

576

fn test_add_missing_early_continue() -> Result<(), GraphError> {

583

eprintln!("test_add_missing_early_stop");

577

eprintln!("test_add_missing_early_stop");

584

let mut disco = full_disco();

578

let mut disco = full_disco();

585

disco.add_common_revisions(vec![13, 3, 4])?;

579

disco.add_common_revisions(vec![13, 3, 4])?;

586

disco.ensure_children_cache()?;

580

disco.ensure_children_cache()?;

587

// 12 is grand-child of 6 through 9

581

// 12 is grand-child of 6 through 9

588

// passing them in this order maximizes the chances of the

582

// passing them in this order maximizes the chances of the

589

// early continue to do the wrong thing

583

// early continue to do the wrong thing

590

disco.add_missing_revisions(vec![6, 9, 12])?;

584

disco.add_missing_revisions(vec![6, 9, 12])?;

591

assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);

585

assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);

592

assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);

586

assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);

593

assert!(!disco.is_complete());

587

assert!(!disco.is_complete());

594

Ok(())

588

Ok(())

595

}

589

}

596

590

597

#[test]

591

#[test]

598

fn test_limit_sample_no_need_to() {

592

fn test_limit_sample_no_need_to() {

599

let sample = vec![1, 2, 3, 4];

593

let sample = vec![1, 2, 3, 4];

600

assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);

594

assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);

601

}

595

}

602

596

603

#[test]

597

#[test]

604

fn test_limit_sample_less_than_half() {

598

fn test_limit_sample_less_than_half() {

605

assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]);

599

assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]);

606

}

600

}

607

601

608

#[test]

602

#[test]

609

fn test_limit_sample_more_than_half() {

603

fn test_limit_sample_more_than_half() {

610

assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]);

604

assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]);

611

}

605

}

612

606

613

#[test]

607

#[test]

614

fn test_limit_sample_no_random() {

608

fn test_limit_sample_no_random() {

615

let mut disco = full_disco();

609

let mut disco = full_disco();

616

disco.randomize = false;

610

disco.randomize = false;

617

assert_eq!(

611

assert_eq!(

618

disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),

612

disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),

619

vec![1, 3, 5, 7]

613

vec![1, 3, 5, 7]

620

);

614

);

621

}

615

}

622

616

623

#[test]

617

#[test]

624

fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {

618

fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {

625

let mut disco = full_disco();

619

let mut disco = full_disco();

626

disco.undecided = Some((1..=13).collect());

620

disco.undecided = Some((1..=13).collect());

627

621

628

let mut sample_vec = disco.take_quick_sample(vec![], 4)?;

622

let mut sample_vec = disco.take_quick_sample(vec![], 4)?;

629

sample_vec.sort();

623

sample_vec.sort();

630

assert_eq!(sample_vec, vec![10, 11, 12, 13]);

624

assert_eq!(sample_vec, vec![10, 11, 12, 13]);

631

Ok(())

625

Ok(())

632

}

626

}

633

627

634

#[test]

628

#[test]

635

fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {

629

fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {

636

let mut disco = disco12();

630

let mut disco = disco12();

637

disco.ensure_undecided()?;

631

disco.ensure_undecided()?;

638

632

639

let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;

633

let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;

640

sample_vec.sort();

634

sample_vec.sort();

641

// r12's only parent is r9, whose unique grand-parent through the

635

// r12's only parent is r9, whose unique grand-parent through the

642

// diamond shape is r4. This ends there because the distance from r4

636

// diamond shape is r4. This ends there because the distance from r4

643

// to the root is only 3.

637

// to the root is only 3.

644

assert_eq!(sample_vec, vec![4, 9, 12]);

638

assert_eq!(sample_vec, vec![4, 9, 12]);

645

Ok(())

639

Ok(())

646

}

640

}

647

641

648

#[test]

642

#[test]

649

fn test_children_cache() -> Result<(), GraphError> {

643

fn test_children_cache() -> Result<(), GraphError> {

650

let mut disco = full_disco();

644

let mut disco = full_disco();

651

disco.ensure_children_cache()?;

645

disco.ensure_children_cache()?;

652

646

653

let cache = disco.children_cache.unwrap();

647

let cache = disco.children_cache.unwrap();

654

assert_eq!(cache.get(&2).cloned(), Some(vec![4]));

648

assert_eq!(cache.get(&2).cloned(), Some(vec![4]));

655

assert_eq!(cache.get(&10).cloned(), None);

649

assert_eq!(cache.get(&10).cloned(), None);

656

650

657

let mut children_4 = cache.get(&4).cloned().unwrap();

651

let mut children_4 = cache.get(&4).cloned().unwrap();

658

children_4.sort();

652

children_4.sort();

659

assert_eq!(children_4, vec![5, 6, 7]);

653

assert_eq!(children_4, vec![5, 6, 7]);

660

654

661

let mut children_7 = cache.get(&7).cloned().unwrap();

655

let mut children_7 = cache.get(&7).cloned().unwrap();

662

children_7.sort();

656

children_7.sort();

663

assert_eq!(children_7, vec![9, 11]);

657

assert_eq!(children_7, vec![9, 11]);

664

658

665

Ok(())

659

Ok(())

666

}

660

}

667

661

668

#[test]

662

#[test]

669

fn test_complete_sample() {

663

fn test_complete_sample() {

670

let mut disco = full_disco();

664

let mut disco = full_disco();

671

let undecided: HashSet<Revision> =

665

let undecided: HashSet<Revision> =

672

[4, 7, 9, 2, 3].iter().cloned().collect();

666

[4, 7, 9, 2, 3].iter().cloned().collect();

673

disco.undecided = Some(undecided);

667

disco.undecided = Some(undecided);

674

668

675

let mut sample = vec![0];

669

let mut sample = vec![0];

676

disco.random_complete_sample(&mut sample, 3);

670

disco.random_complete_sample(&mut sample, 3);

677

assert_eq!(sample.len(), 3);

671

assert_eq!(sample.len(), 3);

678

672

679

let mut sample = vec![2, 4, 7];

673

let mut sample = vec![2, 4, 7];

680

disco.random_complete_sample(&mut sample, 1);

674

disco.random_complete_sample(&mut sample, 1);

681

assert_eq!(sample.len(), 3);

675

assert_eq!(sample.len(), 3);

682

}

676

}

683

677

684

#[test]

678

#[test]

685

fn test_bidirectional_sample() -> Result<(), GraphError> {

679

fn test_bidirectional_sample() -> Result<(), GraphError> {

686

let mut disco = full_disco();

680

let mut disco = full_disco();

687

disco.undecided = Some((0..=13).into_iter().collect());

681

disco.undecided = Some((0..=13).into_iter().collect());

688

682

689

let (sample_set, size) = disco.bidirectional_sample(7)?;

683

let (sample_set, size) = disco.bidirectional_sample(7)?;

690

assert_eq!(size, 7);

684

assert_eq!(size, 7);

691

let mut sample: Vec<Revision> = sample_set.into_iter().collect();

685

let mut sample: Vec<Revision> = sample_set.into_iter().collect();

692

sample.sort();

686

sample.sort();

693

// our DAG is a bit too small for the results to be really interesting

687

// our DAG is a bit too small for the results to be really interesting

694

// at least it shows that

688

// at least it shows that

695

// - we went both ways

689

// - we went both ways

696

// - we didn't take all Revisions (6 is not in the sample)

690

// - we didn't take all Revisions (6 is not in the sample)

697

assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);

691

assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);

698

Ok(())

692

Ok(())

699

}

693

}

700

694

701

}

695

}

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             // discovery.rs
             //
             // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Discovery operations
             //!
             //! This is a Rust counterpart to the `partialdiscovery` class of
             //! `mercurial.setdiscovery`
             use super::{Graph, GraphError, Revision, NULL_REVISION};
             use crate::ancestors::MissingAncestors;
             use crate::dagops;
             use rand::seq::SliceRandom;
             use rand::{thread_rng, RngCore, SeedableRng};
             use std::cmp::{max, min};
             use std::collections::{HashMap, HashSet, VecDeque};
             type Rng = rand_pcg::Pcg32;
             pub struct PartialDiscovery<G: Graph + Clone> {
                 target_heads: Option<Vec<Revision>>,
                 graph: G, // plays the role of self._repo
                 common: MissingAncestors<G>,
                 undecided: Option<HashSet<Revision>>,
                 children_cache: Option<HashMap<Revision, Vec<Revision>>>,
                 missing: HashSet<Revision>,
                 rng: Rng,
                 respect_size: bool,
                 randomize: bool,
             }
             pub struct DiscoveryStats {
                 pub undecided: Option<usize>,
             }
             /// Update an existing sample to match the expected size
             ///
             /// The sample is updated with revisions exponentially distant from each
             /// element of `heads`.
             ///
             /// If a target size is specified, the sampling will stop once this size is
             /// reached. Otherwise sampling will happen until roots of the <revs> set are
             /// reached.
             ///
             /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
             ///   represented by `parentfn`
             /// - `heads`: set of DAG head revs
             /// - `sample`: a sample to update
             /// - `parentfn`: a callable to resolve parents for a revision
             /// - `quicksamplesize`: optional target size of the sample
             fn update_sample<I>(
                 revs: Option<&HashSet<Revision>>,
                 heads: impl IntoIterator<Item = Revision>,
                 sample: &mut HashSet<Revision>,
                 parentsfn: impl Fn(Revision) -> Result<I, GraphError>,
                 quicksamplesize: Option<usize>,
             ) -> Result<(), GraphError>
             where
                 I: Iterator<Item = Revision>,
             {
                 let mut distances: HashMap<Revision, u32> = HashMap::new();
                 let mut visit: VecDeque<Revision> = heads.into_iter().collect();
                 let mut factor: u32 = 1;
                 let mut seen: HashSet<Revision> = HashSet::new();
-                loop {
+                while let Some(current) = visit.pop_front() {
-                    let current = match visit.pop_front() {
-                        None => {
-                            break;
-                        Some(r) => r,
-                    };
                     if !seen.insert(current) {
                         continue;
                     }
                     let d = *distances.entry(current).or_insert(1);
                     if d > factor {
                         factor *= 2;
                     }
                     if d == factor {
                         sample.insert(current);
                         if let Some(sz) = quicksamplesize {
                             if sample.len() >= sz {
                                 return Ok(());
                             }
                         }
                     }
                     for p in parentsfn(current)? {
                         if let Some(revs) = revs {
                             if !revs.contains(&p) {
                                 continue;
                             }
                         }
                         distances.entry(p).or_insert(d + 1);
                         visit.push_back(p);
                     }
                 }
                 Ok(())
             }
             struct ParentsIterator {
                 parents: [Revision; 2],
                 cur: usize,
             }
             impl ParentsIterator {
                 fn graph_parents(
                     graph: &impl Graph,
                     r: Revision,
                 ) -> Result<ParentsIterator, GraphError> {
                     Ok(ParentsIterator {
                         parents: graph.parents(r)?,
                         cur: 0,
                     })
                 }
             }
             impl Iterator for ParentsIterator {
                 type Item = Revision;
                 fn next(&mut self) -> Option<Revision> {
                     if self.cur > 1 {
                         return None;
                     }
                     let rev = self.parents[self.cur];
                     self.cur += 1;
                     if rev == NULL_REVISION {
                         return self.next();
                     }
                     Some(rev)
                 }
             }
             impl<G: Graph + Clone> PartialDiscovery<G> {
                 /// Create a PartialDiscovery object, with the intent
                 /// of comparing our `::<target_heads>` revset to the contents of another
                 /// repo.
                 ///
                 /// For now `target_heads` is passed as a vector, and will be used
                 /// at the first call to `ensure_undecided()`.
                 ///
                 /// If we want to make the signature more flexible,
                 /// we'll have to make it a type argument of `PartialDiscovery` or a trait
                 /// object since we'll keep it in the meanwhile
                 ///
                 /// The `respect_size` boolean controls how the sampling methods
                 /// will interpret the size argument requested by the caller. If it's
                 /// `false`, they are allowed to produce a sample whose size is more
                 /// appropriate to the situation (typically bigger).
                 ///
                 /// The `randomize` boolean affects sampling, and specifically how
                 /// limiting or last-minute expanding is been done:
                 ///
                 /// If `true`, both will perform random picking from `self.undecided`.
                 /// This is currently the best for actual discoveries.
                 ///
                 /// If `false`, a reproductible picking strategy is performed. This is
                 /// useful for integration tests.
                 pub fn new(
                     graph: G,
                     target_heads: Vec<Revision>,
                     respect_size: bool,
                     randomize: bool,
                 ) -> Self {
                     let mut seed: [u8; 16] = [0; 16];
                     if randomize {
                         thread_rng().fill_bytes(&mut seed);
                     }
                     Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)
                 }
                 pub fn new_with_seed(
                     graph: G,
                     target_heads: Vec<Revision>,
                     seed: [u8; 16],
                     respect_size: bool,
                     randomize: bool,
                 ) -> Self {
                     PartialDiscovery {
                         undecided: None,
                         children_cache: None,
                         target_heads: Some(target_heads),
                         graph: graph.clone(),
                         common: MissingAncestors::new(graph, vec![]),
                         missing: HashSet::new(),
                         rng: Rng::from_seed(seed),
                         respect_size: respect_size,
                         randomize: randomize,
                     }
                 }
                 /// Extract at most `size` random elements from sample and return them
                 /// as a vector
                 fn limit_sample(
                     &mut self,
                     mut sample: Vec<Revision>,
                     size: usize,
                 ) -> Vec<Revision> {
                     if !self.randomize {
                         sample.sort();
                         sample.truncate(size);
                         return sample;
                     }
                     let sample_len = sample.len();
                     if sample_len <= size {
                         return sample;
                     }
                     let rng = &mut self.rng;
                     let dropped_size = sample_len - size;
                     let limited_slice = if size < dropped_size {
                         sample.partial_shuffle(rng, size).0
                     } else {
                         sample.partial_shuffle(rng, dropped_size).1
                     };
                     limited_slice.to_owned()
                 }
                 /// Register revisions known as being common
                 pub fn add_common_revisions(
                     &mut self,
                     common: impl IntoIterator<Item = Revision>,
                 ) -> Result<(), GraphError> {
                     let before_len = self.common.get_bases().len();
                     self.common.add_bases(common);
                     if self.common.get_bases().len() == before_len {
                         return Ok(());
                     }
                     if let Some(ref mut undecided) = self.undecided {
                         self.common.remove_ancestors_from(undecided)?;
                     }
                     Ok(())
                 }
                 /// Register revisions known as being missing
                 ///
                 /// # Performance note
                 ///
                 /// Except in the most trivial case, the first call of this method has
                 /// the side effect of computing `self.undecided` set for the first time,
                 /// and the related caches it might need for efficiency of its internal
                 /// computation. This is typically faster if more information is
                 /// available in `self.common`. Therefore, for good performance, the
                 /// caller should avoid calling this too early.
                 pub fn add_missing_revisions(
                     &mut self,
                     missing: impl IntoIterator<Item = Revision>,
                 ) -> Result<(), GraphError> {
                     let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();
                     if tovisit.is_empty() {
                         return Ok(());
                     }
                     self.ensure_children_cache()?;
                     self.ensure_undecided()?; // for safety of possible future refactors
                     let children = self.children_cache.as_ref().unwrap();
                     let mut seen: HashSet<Revision> = HashSet::new();
                     let undecided_mut = self.undecided.as_mut().unwrap();
                     while let Some(rev) = tovisit.pop_front() {
                         if !self.missing.insert(rev) {
                             // either it's known to be missing from a previous
                             // invocation, and there's no need to iterate on its
                             // children (we now they are all missing)
                             // or it's from a previous iteration of this loop
                             // and its children have already been queued
                             continue;
                         }
                         undecided_mut.remove(&rev);
                         match children.get(&rev) {
                             None => {
                                 continue;
                             }
                             Some(this_children) => {
                                 for child in this_children.iter().cloned() {
                                     if seen.insert(child) {
                                         tovisit.push_back(child);
                                     }
                                 }
                             }
                         }
                     }
                     Ok(())
                 }
                 /// Do we have any information about the peer?
                 pub fn has_info(&self) -> bool {
                     self.common.has_bases()
                 }
                 /// Did we acquire full knowledge of our Revisions that the peer has?
                 pub fn is_complete(&self) -> bool {
                     self.undecided.as_ref().map_or(false, |s| s.is_empty())
                 }
                 /// Return the heads of the currently known common set of revisions.
                 ///
                 /// If the discovery process is not complete (see `is_complete()`), the
                 /// caller must be aware that this is an intermediate state.
                 ///
                 /// On the other hand, if it is complete, then this is currently
                 /// the only way to retrieve the end results of the discovery process.
                 ///
                 /// We may introduce in the future an `into_common_heads` call that
                 /// would be more appropriate for normal Rust callers, dropping `self`
                 /// if it is complete.
                 pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
                     self.common.bases_heads()
                 }
                 /// Force first computation of `self.undecided`
                 ///
                 /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
                 /// unwrapped to get workable immutable or mutable references without
                 /// any panic.
                 ///
                 /// This is an imperative call instead of an access with added lazyness
                 /// to reduce easily the scope of mutable borrow for the caller,
                 /// compared to undecided(&'a mut self) -> &'a… that would keep it
                 /// as long as the resulting immutable one.
                 fn ensure_undecided(&mut self) -> Result<(), GraphError> {
                     if self.undecided.is_some() {
                         return Ok(());
                     }
                     let tgt = self.target_heads.take().unwrap();
                     self.undecided =
                         Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
                     Ok(())
                 }
                 fn ensure_children_cache(&mut self) -> Result<(), GraphError> {
                     if self.children_cache.is_some() {
                         return Ok(());
                     }
                     self.ensure_undecided()?;
                     let mut children: HashMap<Revision, Vec<Revision>> = HashMap::new();
                     for &rev in self.undecided.as_ref().unwrap() {
                         for p in ParentsIterator::graph_parents(&self.graph, rev)? {
                             children.entry(p).or_insert_with(|| Vec::new()).push(rev);
                         }
                     }
                     self.children_cache = Some(children);
                     Ok(())
                 }
                 /// Provide statistics about the current state of the discovery process
                 pub fn stats(&self) -> DiscoveryStats {
                     DiscoveryStats {
                         undecided: self.undecided.as_ref().map(|s| s.len()),
                     }
                 }
                 pub fn take_quick_sample(
                     &mut self,
                     headrevs: impl IntoIterator<Item = Revision>,
                     size: usize,
                 ) -> Result<Vec<Revision>, GraphError> {
                     self.ensure_undecided()?;
                     let mut sample = {
                         let undecided = self.undecided.as_ref().unwrap();
                         if undecided.len() <= size {
                             return Ok(undecided.iter().cloned().collect());
                         }
                         dagops::heads(&self.graph, undecided.iter())?
                     };
                     if sample.len() >= size {
                         return Ok(self.limit_sample(sample.into_iter().collect(), size));
                     }
                     update_sample(
                         None,
                         headrevs,
                         &mut sample,
                         |r| ParentsIterator::graph_parents(&self.graph, r),
                         Some(size),
                     )?;
                     Ok(sample.into_iter().collect())
                 }
                 /// Extract a sample from `self.undecided`, going from its heads and roots.
                 ///
                 /// The `size` parameter is used to avoid useless computations if
                 /// it turns out to be bigger than the whole set of undecided Revisions.
                 ///
                 /// The sample is taken by using `update_sample` from the heads, then
                 /// from the roots, working on the reverse DAG,
                 /// expressed by `self.children_cache`.
                 ///
                 /// No effort is being made to complete or limit the sample to `size`
                 /// but this method returns another interesting size that it derives
                 /// from its knowledge of the structure of the various sets, leaving
                 /// to the caller the decision to use it or not.
                 fn bidirectional_sample(
                     &mut self,
                     size: usize,
                 ) -> Result<(HashSet<Revision>, usize), GraphError> {
                     self.ensure_undecided()?;
                     {
                         // we don't want to compute children_cache before this
                         // but doing it after extracting self.undecided takes a mutable
                         // ref to self while a shareable one is still active.
                         let undecided = self.undecided.as_ref().unwrap();
                         if undecided.len() <= size {
                             return Ok((undecided.clone(), size));
                         }
                     }
                     self.ensure_children_cache()?;
                     let revs = self.undecided.as_ref().unwrap();
                     let mut sample: HashSet<Revision> = revs.clone();
                     // it's possible that leveraging the children cache would be more
                     // efficient here
                     dagops::retain_heads(&self.graph, &mut sample)?;
                     let revsheads = sample.clone(); // was again heads(revs) in python
                     // update from heads
                     update_sample(
                         Some(revs),
                         revsheads.iter().cloned(),
                         &mut sample,
                         |r| ParentsIterator::graph_parents(&self.graph, r),
                         None,
                     )?;
                     // update from roots
                     let revroots: HashSet<Revision> =
                         dagops::roots(&self.graph, revs)?.into_iter().collect();
                     let prescribed_size = max(size, min(revroots.len(), revsheads.len()));
                     let children = self.children_cache.as_ref().unwrap();
                     let empty_vec: Vec<Revision> = Vec::new();
                     update_sample(
                         Some(revs),
                         revroots,
                         &mut sample,
                         |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),
                         None,
                     )?;
                     Ok((sample, prescribed_size))
                 }
                 /// Fill up sample up to the wished size with random undecided Revisions.
                 ///
                 /// This is intended to be used as a last resort completion if the
                 /// regular sampling algorithm returns too few elements.
                 fn random_complete_sample(
                     &mut self,
                     sample: &mut Vec<Revision>,
                     size: usize,
                 ) {
                     let sample_len = sample.len();
                     if size <= sample_len {
                         return;
                     }
                     let take_from: Vec<Revision> = self
                         .undecided
                         .as_ref()
                         .unwrap()
                         .iter()
                         .filter(|&r| !sample.contains(r))
                         .cloned()
                         .collect();
                     sample.extend(self.limit_sample(take_from, size - sample_len));
                 }
                 pub fn take_full_sample(
                     &mut self,
                     size: usize,
                 ) -> Result<Vec<Revision>, GraphError> {
                     let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;
                     let size = if self.respect_size {
                         size
                     } else {
                         prescribed_size
                     };
                     let mut sample =
                         self.limit_sample(sample_set.into_iter().collect(), size);
                     self.random_complete_sample(&mut sample, size);
                     Ok(sample)
                 }
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use crate::testing::SampleGraph;
                 /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
                 ///
                 /// To avoid actual randomness in these tests, we give it a fixed
                 /// random seed, but by default we'll test the random version.
                 fn full_disco() -> PartialDiscovery<SampleGraph> {
                     PartialDiscovery::new_with_seed(
                         SampleGraph,
                         vec![10, 11, 12, 13],
                         [0; 16],
                         true,
                         true,
                     )
                 }
                 /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
                 ///
                 /// To avoid actual randomness in tests, we give it a fixed random seed.
                 fn disco12() -> PartialDiscovery<SampleGraph> {
                     PartialDiscovery::new_with_seed(
                         SampleGraph,
                         vec![12],
                         [0; 16],
                         true,
                         true,
                     )
                 }
                 fn sorted_undecided(
                     disco: &PartialDiscovery<SampleGraph>,
                 ) -> Vec<Revision> {
                     let mut as_vec: Vec<Revision> =
                         disco.undecided.as_ref().unwrap().iter().cloned().collect();
                     as_vec.sort();
                     as_vec
                 }
                 fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
                     let mut as_vec: Vec<Revision> =
                         disco.missing.iter().cloned().collect();
                     as_vec.sort();
                     as_vec
                 }
                 fn sorted_common_heads(
                     disco: &PartialDiscovery<SampleGraph>,
                 ) -> Result<Vec<Revision>, GraphError> {
                     let mut as_vec: Vec<Revision> =
                         disco.common_heads()?.iter().cloned().collect();
                     as_vec.sort();
                     Ok(as_vec)
                 }
                 #[test]
                 fn test_add_common_get_undecided() -> Result<(), GraphError> {
                     let mut disco = full_disco();
                     assert_eq!(disco.undecided, None);
                     assert!(!disco.has_info());
                     assert_eq!(disco.stats().undecided, None);
                     disco.add_common_revisions(vec![11, 12])?;
                     assert!(disco.has_info());
                     assert!(!disco.is_complete());
                     assert!(disco.missing.is_empty());
                     // add_common_revisions did not trigger a premature computation
                     // of `undecided`, let's check that and ask for them
                     assert_eq!(disco.undecided, None);
                     disco.ensure_undecided()?;
                     assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
                     assert_eq!(disco.stats().undecided, Some(4));
                     Ok(())
                 }
                 /// in this test, we pretend that our peer misses exactly (8+10)::
                 /// and we're comparing all our repo to it (as in a bare push)
                 #[test]
                 fn test_discovery() -> Result<(), GraphError> {
                     let mut disco = full_disco();
                     disco.add_common_revisions(vec![11, 12])?;
                     disco.add_missing_revisions(vec![8, 10])?;
                     assert_eq!(sorted_undecided(&disco), vec![5]);
                     assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
                     assert!(!disco.is_complete());
                     disco.add_common_revisions(vec![5])?;
                     assert_eq!(sorted_undecided(&disco), vec![]);
                     assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
                     assert!(disco.is_complete());
                     assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
                     Ok(())
                 }
                 #[test]
                 fn test_add_missing_early_continue() -> Result<(), GraphError> {
                     eprintln!("test_add_missing_early_stop");
                     let mut disco = full_disco();
                     disco.add_common_revisions(vec![13, 3, 4])?;
                     disco.ensure_children_cache()?;
                     // 12 is grand-child of 6 through 9
                     // passing them in this order maximizes the chances of the
                     // early continue to do the wrong thing
                     disco.add_missing_revisions(vec![6, 9, 12])?;
                     assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);
                     assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);
                     assert!(!disco.is_complete());
                     Ok(())
                 }
                 #[test]
                 fn test_limit_sample_no_need_to() {
                     let sample = vec![1, 2, 3, 4];
                     assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
                 }
                 #[test]
                 fn test_limit_sample_less_than_half() {
                     assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]);
                 }
                 #[test]
                 fn test_limit_sample_more_than_half() {
                     assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]);
                 }
                 #[test]
                 fn test_limit_sample_no_random() {
                     let mut disco = full_disco();
                     disco.randomize = false;
                     assert_eq!(
                         disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),
                         vec![1, 3, 5, 7]
                     );
                 }
                 #[test]
                 fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
                     let mut disco = full_disco();
                     disco.undecided = Some((1..=13).collect());
                     let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
                     sample_vec.sort();
                     assert_eq!(sample_vec, vec![10, 11, 12, 13]);
                     Ok(())
                 }
                 #[test]
                 fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
                     let mut disco = disco12();
                     disco.ensure_undecided()?;
                     let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;
                     sample_vec.sort();
                     // r12's only parent is r9, whose unique grand-parent through the
                     // diamond shape is r4. This ends there because the distance from r4
                     // to the root is only 3.
                     assert_eq!(sample_vec, vec![4, 9, 12]);
                     Ok(())
                 }
                 #[test]
                 fn test_children_cache() -> Result<(), GraphError> {
                     let mut disco = full_disco();
                     disco.ensure_children_cache()?;
                     let cache = disco.children_cache.unwrap();
                     assert_eq!(cache.get(&2).cloned(), Some(vec![4]));
                     assert_eq!(cache.get(&10).cloned(), None);
                     let mut children_4 = cache.get(&4).cloned().unwrap();
                     children_4.sort();
                     assert_eq!(children_4, vec![5, 6, 7]);
                     let mut children_7 = cache.get(&7).cloned().unwrap();
                     children_7.sort();
                     assert_eq!(children_7, vec![9, 11]);
                     Ok(())
                 }
                 #[test]
                 fn test_complete_sample() {
                     let mut disco = full_disco();
                     let undecided: HashSet<Revision> =
                         [4, 7, 9, 2, 3].iter().cloned().collect();
                     disco.undecided = Some(undecided);
                     let mut sample = vec![0];
                     disco.random_complete_sample(&mut sample, 3);
                     assert_eq!(sample.len(), 3);
                     let mut sample = vec![2, 4, 7];
                     disco.random_complete_sample(&mut sample, 1);
                     assert_eq!(sample.len(), 3);
                 }
                 #[test]
                 fn test_bidirectional_sample() -> Result<(), GraphError> {
                     let mut disco = full_disco();
                     disco.undecided = Some((0..=13).into_iter().collect());
                     let (sample_set, size) = disco.bidirectional_sample(7)?;
                     assert_eq!(size, 7);
                     let mut sample: Vec<Revision> = sample_set.into_iter().collect();
                     sample.sort();
                     // our DAG is a bit too small for the results to be really interesting
                     // at least it shows that
                     // - we went both ways
                     // - we didn't take all Revisions (6 is not in the sample)
                     assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);
                     Ok(())
                 }
             }