upstream/mercurial-mirror Commit - r43031:04c3b76f

1

// discovery.rs

1

// discovery.rs

2

//

2

//

3

4

//

4

//

5

// This software may be used and distributed according to the terms of the

5

// This software may be used and distributed according to the terms of the

6

// GNU General Public License version 2 or any later version.

6

// GNU General Public License version 2 or any later version.

7

8

//! Discovery operations

8

//! Discovery operations

9

//!

9

//!

10

//! This is a Rust counterpart to the `partialdiscovery` class of

10

//! This is a Rust counterpart to the `partialdiscovery` class of

11

//! `mercurial.setdiscovery`

11

//! `mercurial.setdiscovery`

12

13

extern crate rand;

14

extern crate rand_pcg;

15

use self::rand::seq::SliceRandom;

16

use self::rand::{thread_rng, RngCore, SeedableRng};

17

use super::{Graph, GraphError, Revision, NULL_REVISION};

13

use super::{Graph, GraphError, Revision, NULL_REVISION};

18

use crate::ancestors::MissingAncestors;

14

use crate::ancestors::MissingAncestors;

19

use crate::dagops;

15

use crate::dagops;

16

use rand::seq::SliceRandom;

17

use rand::{thread_rng, RngCore, SeedableRng};

20

use std::cmp::{max, min};

18

use std::cmp::{max, min};

21

use std::collections::{HashMap, HashSet, VecDeque};

19

use std::collections::{HashMap, HashSet, VecDeque};

22

20

23

type Rng = ~~self~~::rand_pcg::Pcg32;

21

type Rng = rand_pcg::Pcg32;

24

22

25

pub struct PartialDiscovery<G: Graph + Clone> {

23

pub struct PartialDiscovery<G: Graph + Clone> {

26

target_heads: Option<Vec<Revision>>,

24

target_heads: Option<Vec<Revision>>,

27

graph: G, // plays the role of self._repo

25

graph: G, // plays the role of self._repo

28

common: MissingAncestors<G>,

26

common: MissingAncestors<G>,

29

undecided: Option<HashSet<Revision>>,

27

undecided: Option<HashSet<Revision>>,

30

children_cache: Option<HashMap<Revision, Vec<Revision>>>,

28

children_cache: Option<HashMap<Revision, Vec<Revision>>>,

31

missing: HashSet<Revision>,

29

missing: HashSet<Revision>,

32

rng: Rng,

30

rng: Rng,

33

respect_size: bool,

31

respect_size: bool,

34

randomize: bool,

32

randomize: bool,

35

}

33

}

36

34

37

pub struct DiscoveryStats {

35

pub struct DiscoveryStats {

38

pub undecided: Option<usize>,

36

pub undecided: Option<usize>,

39

}

37

}

40

38

41

/// Update an existing sample to match the expected size

39

/// Update an existing sample to match the expected size

42

///

40

///

43

/// The sample is updated with revisions exponentially distant from each

41

/// The sample is updated with revisions exponentially distant from each

44

/// element of `heads`.

42

/// element of `heads`.

45

///

43

///

46

/// If a target size is specified, the sampling will stop once this size is

44

/// If a target size is specified, the sampling will stop once this size is

47

/// reached. Otherwise sampling will happen until roots of the <revs> set are

45

/// reached. Otherwise sampling will happen until roots of the <revs> set are

48

/// reached.

46

/// reached.

49

///

47

///

50

/// - `revs`: set of revs we want to discover (if None, `assume` the whole dag

48

/// - `revs`: set of revs we want to discover (if None, `assume` the whole dag

51

/// represented by `parentfn`

49

/// represented by `parentfn`

52

/// - `heads`: set of DAG head revs

50

/// - `heads`: set of DAG head revs

53

/// - `sample`: a sample to update

51

/// - `sample`: a sample to update

54

/// - `parentfn`: a callable to resolve parents for a revision

52

/// - `parentfn`: a callable to resolve parents for a revision

55

/// - `quicksamplesize`: optional target size of the sample

53

/// - `quicksamplesize`: optional target size of the sample

56

fn update_sample<I>(

54

fn update_sample<I>(

57

revs: Option<&HashSet<Revision>>,

55

revs: Option<&HashSet<Revision>>,

58

heads: impl IntoIterator<Item = Revision>,

56

heads: impl IntoIterator<Item = Revision>,

59

sample: &mut HashSet<Revision>,

57

sample: &mut HashSet<Revision>,

60

parentsfn: impl Fn(Revision) -> Result<I, GraphError>,

58

parentsfn: impl Fn(Revision) -> Result<I, GraphError>,

61

quicksamplesize: Option<usize>,

59

quicksamplesize: Option<usize>,

62

) -> Result<(), GraphError>

60

) -> Result<(), GraphError>

63

where

61

where

64

I: Iterator<Item = Revision>,

62

I: Iterator<Item = Revision>,

65

{

63

{

66

let mut distances: HashMap<Revision, u32> = HashMap::new();

64

let mut distances: HashMap<Revision, u32> = HashMap::new();

67

let mut visit: VecDeque<Revision> = heads.into_iter().collect();

65

let mut visit: VecDeque<Revision> = heads.into_iter().collect();

68

let mut factor: u32 = 1;

66

let mut factor: u32 = 1;

69

let mut seen: HashSet<Revision> = HashSet::new();

67

let mut seen: HashSet<Revision> = HashSet::new();

70

loop {

68

loop {

71

let current = match visit.pop_front() {

69

let current = match visit.pop_front() {

72

None => {

70

None => {

73

break;

71

break;

74

}

72

}

75

Some(r) => r,

73

Some(r) => r,

76

};

74

};

77

if !seen.insert(current) {

75

if !seen.insert(current) {

78

continue;

76

continue;

79

}

77

}

80

78

81

let d = *distances.entry(current).or_insert(1);

79

let d = *distances.entry(current).or_insert(1);

82

if d > factor {

80

if d > factor {

83

factor *= 2;

81

factor *= 2;

84

}

82

}

85

if d == factor {

83

if d == factor {

86

sample.insert(current);

84

sample.insert(current);

87

if let Some(sz) = quicksamplesize {

85

if let Some(sz) = quicksamplesize {

88

if sample.len() >= sz {

86

if sample.len() >= sz {

89

return Ok(());

87

return Ok(());

90

}

88

}

91

}

89

}

92

}

90

}

93

for p in parentsfn(current)? {

91

for p in parentsfn(current)? {

94

if let Some(revs) = revs {

92

if let Some(revs) = revs {

95

if !revs.contains(&p) {

93

if !revs.contains(&p) {

96

continue;

94

continue;

97

}

95

}

98

}

96

}

99

distances.entry(p).or_insert(d + 1);

97

distances.entry(p).or_insert(d + 1);

100

visit.push_back(p);

98

visit.push_back(p);

101

}

99

}

102

}

100

}

103

Ok(())

101

Ok(())

104

}

102

}

105

103

106

struct ParentsIterator {

104

struct ParentsIterator {

107

parents: [Revision; 2],

105

parents: [Revision; 2],

108

cur: usize,

106

cur: usize,

109

}

107

}

110

108

111

impl ParentsIterator {

109

impl ParentsIterator {

112

fn graph_parents(

110

fn graph_parents(

113

graph: &impl Graph,

111

graph: &impl Graph,

114

r: Revision,

112

r: Revision,

115

) -> Result<ParentsIterator, GraphError> {

113

) -> Result<ParentsIterator, GraphError> {

116

Ok(ParentsIterator {

114

Ok(ParentsIterator {

117

parents: graph.parents(r)?,

115

parents: graph.parents(r)?,

118

cur: 0,

116

cur: 0,

119

})

117

})

120

}

118

}

121

}

119

}

122

120

123

impl Iterator for ParentsIterator {

121

impl Iterator for ParentsIterator {

124

type Item = Revision;

122

type Item = Revision;

125

123

126

fn next(&mut self) -> Option<Revision> {

124

fn next(&mut self) -> Option<Revision> {

127

if self.cur > 1 {

125

if self.cur > 1 {

128

return None;

126

return None;

129

}

127

}

130

let rev = self.parents[self.cur];

128

let rev = self.parents[self.cur];

131

self.cur += 1;

129

self.cur += 1;

132

if rev == NULL_REVISION {

130

if rev == NULL_REVISION {

133

return self.next();

131

return self.next();

134

}

132

}

135

Some(rev)

133

Some(rev)

136

}

134

}

137

}

135

}

138

136

139

impl<G: Graph + Clone> PartialDiscovery<G> {

137

impl<G: Graph + Clone> PartialDiscovery<G> {

140

/// Create a PartialDiscovery object, with the intent

138

/// Create a PartialDiscovery object, with the intent

141

/// of comparing our `::<target_heads>` revset to the contents of another

139

/// of comparing our `::<target_heads>` revset to the contents of another

142

/// repo.

140

/// repo.

143

///

141

///

144

/// For now `target_heads` is passed as a vector, and will be used

142

/// For now `target_heads` is passed as a vector, and will be used

145

/// at the first call to `ensure_undecided()`.

143

/// at the first call to `ensure_undecided()`.

146

///

144

///

147

/// If we want to make the signature more flexible,

145

/// If we want to make the signature more flexible,

148

/// we'll have to make it a type argument of `PartialDiscovery` or a trait

146

/// we'll have to make it a type argument of `PartialDiscovery` or a trait

149

/// object since we'll keep it in the meanwhile

147

/// object since we'll keep it in the meanwhile

150

///

148

///

151

/// The `respect_size` boolean controls how the sampling methods

149

/// The `respect_size` boolean controls how the sampling methods

152

/// will interpret the size argument requested by the caller. If it's

150

/// will interpret the size argument requested by the caller. If it's

153

/// `false`, they are allowed to produce a sample whose size is more

151

/// `false`, they are allowed to produce a sample whose size is more

154

/// appropriate to the situation (typically bigger).

152

/// appropriate to the situation (typically bigger).

155

///

153

///

156

/// The `randomize` boolean affects sampling, and specifically how

154

/// The `randomize` boolean affects sampling, and specifically how

157

/// limiting or last-minute expanding is been done:

155

/// limiting or last-minute expanding is been done:

158

///

156

///

159

/// If `true`, both will perform random picking from `self.undecided`.

157

/// If `true`, both will perform random picking from `self.undecided`.

160

/// This is currently the best for actual discoveries.

158

/// This is currently the best for actual discoveries.

161

///

159

///

162

/// If `false`, a reproductible picking strategy is performed. This is

160

/// If `false`, a reproductible picking strategy is performed. This is

163

/// useful for integration tests.

161

/// useful for integration tests.

164

pub fn new(

162

pub fn new(

165

graph: G,

163

graph: G,

166

target_heads: Vec<Revision>,

164

target_heads: Vec<Revision>,

167

respect_size: bool,

165

respect_size: bool,

168

randomize: bool,

166

randomize: bool,

169

) -> Self {

167

) -> Self {

170

let mut seed: [u8; 16] = [0; 16];

168

let mut seed: [u8; 16] = [0; 16];

171

if randomize {

169

if randomize {

172

thread_rng().fill_bytes(&mut seed);

170

thread_rng().fill_bytes(&mut seed);

173

}

171

}

174

Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)

172

Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)

175

}

173

}

176

174

177

pub fn new_with_seed(

175

pub fn new_with_seed(

178

graph: G,

176

graph: G,

179

target_heads: Vec<Revision>,

177

target_heads: Vec<Revision>,

180

seed: [u8; 16],

178

seed: [u8; 16],

181

respect_size: bool,

179

respect_size: bool,

182

randomize: bool,

180

randomize: bool,

183

) -> Self {

181

) -> Self {

184

PartialDiscovery {

182

PartialDiscovery {

185

undecided: None,

183

undecided: None,

186

children_cache: None,

184

children_cache: None,

187

target_heads: Some(target_heads),

185

target_heads: Some(target_heads),

188

graph: graph.clone(),

186

graph: graph.clone(),

189

common: MissingAncestors::new(graph, vec![]),

187

common: MissingAncestors::new(graph, vec![]),

190

missing: HashSet::new(),

188

missing: HashSet::new(),

191

rng: Rng::from_seed(seed),

189

rng: Rng::from_seed(seed),

192

respect_size: respect_size,

190

respect_size: respect_size,

193

randomize: randomize,

191

randomize: randomize,

194

}

192

}

195

}

193

}

196

194

197

/// Extract at most `size` random elements from sample and return them

195

/// Extract at most `size` random elements from sample and return them

198

/// as a vector

196

/// as a vector

199

fn limit_sample(

197

fn limit_sample(

200

&mut self,

198

&mut self,

201

mut sample: Vec<Revision>,

199

mut sample: Vec<Revision>,

202

size: usize,

200

size: usize,

203

) -> Vec<Revision> {

201

) -> Vec<Revision> {

204

if !self.randomize {

202

if !self.randomize {

205

sample.sort();

203

sample.sort();

206

sample.truncate(size);

204

sample.truncate(size);

207

return sample;

205

return sample;

208

}

206

}

209

let sample_len = sample.len();

207

let sample_len = sample.len();

210

if sample_len <= size {

208

if sample_len <= size {

211

return sample;

209

return sample;

212

}

210

}

213

let rng = &mut self.rng;

211

let rng = &mut self.rng;

214

let dropped_size = sample_len - size;

212

let dropped_size = sample_len - size;

215

let limited_slice = if size < dropped_size {

213

let limited_slice = if size < dropped_size {

216

sample.partial_shuffle(rng, size).0

214

sample.partial_shuffle(rng, size).0

217

} else {

215

} else {

218

sample.partial_shuffle(rng, dropped_size).1

216

sample.partial_shuffle(rng, dropped_size).1

219

};

217

};

220

limited_slice.to_owned()

218

limited_slice.to_owned()

221

}

219

}

222

220

223

/// Register revisions known as being common

221

/// Register revisions known as being common

224

pub fn add_common_revisions(

222

pub fn add_common_revisions(

225

&mut self,

223

&mut self,

226

common: impl IntoIterator<Item = Revision>,

224

common: impl IntoIterator<Item = Revision>,

227

) -> Result<(), GraphError> {

225

) -> Result<(), GraphError> {

228

let before_len = self.common.get_bases().len();

226

let before_len = self.common.get_bases().len();

229

self.common.add_bases(common);

227

self.common.add_bases(common);

230

if self.common.get_bases().len() == before_len {

228

if self.common.get_bases().len() == before_len {

231

return Ok(());

229

return Ok(());

232

}

230

}

233

if let Some(ref mut undecided) = self.undecided {

231

if let Some(ref mut undecided) = self.undecided {

234

self.common.remove_ancestors_from(undecided)?;

232

self.common.remove_ancestors_from(undecided)?;

235

}

233

}

236

Ok(())

234

Ok(())

237

}

235

}

238

236

239

/// Register revisions known as being missing

237

/// Register revisions known as being missing

240

///

238

///

241

/// # Performance note

239

/// # Performance note

242

///

240

///

243

/// Except in the most trivial case, the first call of this method has

241

/// Except in the most trivial case, the first call of this method has

244

/// the side effect of computing `self.undecided` set for the first time,

242

/// the side effect of computing `self.undecided` set for the first time,

245

/// and the related caches it might need for efficiency of its internal

243

/// and the related caches it might need for efficiency of its internal

246

/// computation. This is typically faster if more information is

244

/// computation. This is typically faster if more information is

247

/// available in `self.common`. Therefore, for good performance, the

245

/// available in `self.common`. Therefore, for good performance, the

248

/// caller should avoid calling this too early.

246

/// caller should avoid calling this too early.

249

pub fn add_missing_revisions(

247

pub fn add_missing_revisions(

250

&mut self,

248

&mut self,

251

missing: impl IntoIterator<Item = Revision>,

249

missing: impl IntoIterator<Item = Revision>,

252

) -> Result<(), GraphError> {

250

) -> Result<(), GraphError> {

253

let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();

251

let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();

254

if tovisit.is_empty() {

252

if tovisit.is_empty() {

255

return Ok(());

253

return Ok(());

256

}

254

}

257

self.ensure_children_cache()?;

255

self.ensure_children_cache()?;

258

self.ensure_undecided()?; // for safety of possible future refactors

256

self.ensure_undecided()?; // for safety of possible future refactors

259

let children = self.children_cache.as_ref().unwrap();

257

let children = self.children_cache.as_ref().unwrap();

260

let mut seen: HashSet<Revision> = HashSet::new();

258

let mut seen: HashSet<Revision> = HashSet::new();

261

let undecided_mut = self.undecided.as_mut().unwrap();

259

let undecided_mut = self.undecided.as_mut().unwrap();

262

while let Some(rev) = tovisit.pop_front() {

260

while let Some(rev) = tovisit.pop_front() {

263

if !self.missing.insert(rev) {

261

if !self.missing.insert(rev) {

264

// either it's known to be missing from a previous

262

// either it's known to be missing from a previous

265

// invocation, and there's no need to iterate on its

263

// invocation, and there's no need to iterate on its

266

// children (we now they are all missing)

264

// children (we now they are all missing)

267

// or it's from a previous iteration of this loop

265

// or it's from a previous iteration of this loop

268

// and its children have already been queued

266

// and its children have already been queued

269

continue;

267

continue;

270

}

268

}

271

undecided_mut.remove(&rev);

269

undecided_mut.remove(&rev);

272

match children.get(&rev) {

270

match children.get(&rev) {

273

None => {

271

None => {

274

continue;

272

continue;

275

}

273

}

276

Some(this_children) => {

274

Some(this_children) => {

277

for child in this_children.iter().cloned() {

275

for child in this_children.iter().cloned() {

278

if seen.insert(child) {

276

if seen.insert(child) {

279

tovisit.push_back(child);

277

tovisit.push_back(child);

280

}

278

}

281

}

279

}

282

}

280

}

283

}

281

}

284

}

282

}

285

Ok(())

283

Ok(())

286

}

284

}

287

285

288

/// Do we have any information about the peer?

286

/// Do we have any information about the peer?

289

pub fn has_info(&self) -> bool {

287

pub fn has_info(&self) -> bool {

290

self.common.has_bases()

288

self.common.has_bases()

291

}

289

}

292

290

293

/// Did we acquire full knowledge of our Revisions that the peer has?

291

/// Did we acquire full knowledge of our Revisions that the peer has?

294

pub fn is_complete(&self) -> bool {

292

pub fn is_complete(&self) -> bool {

295

self.undecided.as_ref().map_or(false, |s| s.is_empty())

293

self.undecided.as_ref().map_or(false, |s| s.is_empty())

296

}

294

}

297

295

298

/// Return the heads of the currently known common set of revisions.

296

/// Return the heads of the currently known common set of revisions.

299

///

297

///

300

/// If the discovery process is not complete (see `is_complete()`), the

298

/// If the discovery process is not complete (see `is_complete()`), the

301

/// caller must be aware that this is an intermediate state.

299

/// caller must be aware that this is an intermediate state.

302

///

300

///

303

/// On the other hand, if it is complete, then this is currently

301

/// On the other hand, if it is complete, then this is currently

304

/// the only way to retrieve the end results of the discovery process.

302

/// the only way to retrieve the end results of the discovery process.

305

///

303

///

306

/// We may introduce in the future an `into_common_heads` call that

304

/// We may introduce in the future an `into_common_heads` call that

307

/// would be more appropriate for normal Rust callers, dropping `self`

305

/// would be more appropriate for normal Rust callers, dropping `self`

308

/// if it is complete.

306

/// if it is complete.

309

pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {

307

pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {

310

self.common.bases_heads()

308

self.common.bases_heads()

311

}

309

}

312

310

313

/// Force first computation of `self.undecided`

311

/// Force first computation of `self.undecided`

314

///

312

///

315

/// After this, `self.undecided.as_ref()` and `.as_mut()` can be

313

/// After this, `self.undecided.as_ref()` and `.as_mut()` can be

316

/// unwrapped to get workable immutable or mutable references without

314

/// unwrapped to get workable immutable or mutable references without

317

/// any panic.

315

/// any panic.

318

///

316

///

319

/// This is an imperative call instead of an access with added lazyness

317

/// This is an imperative call instead of an access with added lazyness

320

/// to reduce easily the scope of mutable borrow for the caller,

318

/// to reduce easily the scope of mutable borrow for the caller,

321

/// compared to undecided(&'a mut self) -> &'a… that would keep it

319

/// compared to undecided(&'a mut self) -> &'a… that would keep it

322

/// as long as the resulting immutable one.

320

/// as long as the resulting immutable one.

323

fn ensure_undecided(&mut self) -> Result<(), GraphError> {

321

fn ensure_undecided(&mut self) -> Result<(), GraphError> {

324

if self.undecided.is_some() {

322

if self.undecided.is_some() {

325

return Ok(());

323

return Ok(());

326

}

324

}

327

let tgt = self.target_heads.take().unwrap();

325

let tgt = self.target_heads.take().unwrap();

328

self.undecided =

326

self.undecided =

329

Some(self.common.missing_ancestors(tgt)?.into_iter().collect());

327

Some(self.common.missing_ancestors(tgt)?.into_iter().collect());

330

Ok(())

328

Ok(())

331

}

329

}

332

330

333

fn ensure_children_cache(&mut self) -> Result<(), GraphError> {

331

fn ensure_children_cache(&mut self) -> Result<(), GraphError> {

334

if self.children_cache.is_some() {

332

if self.children_cache.is_some() {

335

return Ok(());

333

return Ok(());

336

}

334

}

337

self.ensure_undecided()?;

335

self.ensure_undecided()?;

338

336

339

let mut children: HashMap<Revision, Vec<Revision>> = HashMap::new();

337

let mut children: HashMap<Revision, Vec<Revision>> = HashMap::new();

340

for &rev in self.undecided.as_ref().unwrap() {

338

for &rev in self.undecided.as_ref().unwrap() {

341

for p in ParentsIterator::graph_parents(&self.graph, rev)? {

339

for p in ParentsIterator::graph_parents(&self.graph, rev)? {

342

children.entry(p).or_insert_with(|| Vec::new()).push(rev);

340

children.entry(p).or_insert_with(|| Vec::new()).push(rev);

343

}

341

}

344

}

342

}

345

self.children_cache = Some(children);

343

self.children_cache = Some(children);

346

Ok(())

344

Ok(())

347

}

345

}

348

346

349

/// Provide statistics about the current state of the discovery process

347

/// Provide statistics about the current state of the discovery process

350

pub fn stats(&self) -> DiscoveryStats {

348

pub fn stats(&self) -> DiscoveryStats {

351

DiscoveryStats {

349

DiscoveryStats {

352

undecided: self.undecided.as_ref().map(|s| s.len()),

350

undecided: self.undecided.as_ref().map(|s| s.len()),

353

}

351

}

354

}

352

}

355

353

356

pub fn take_quick_sample(

354

pub fn take_quick_sample(

357

&mut self,

355

&mut self,

358

headrevs: impl IntoIterator<Item = Revision>,

356

headrevs: impl IntoIterator<Item = Revision>,

359

size: usize,

357

size: usize,

360

) -> Result<Vec<Revision>, GraphError> {

358

) -> Result<Vec<Revision>, GraphError> {

361

self.ensure_undecided()?;

359

self.ensure_undecided()?;

362

let mut sample = {

360

let mut sample = {

363

let undecided = self.undecided.as_ref().unwrap();

361

let undecided = self.undecided.as_ref().unwrap();

364

if undecided.len() <= size {

362

if undecided.len() <= size {

365

return Ok(undecided.iter().cloned().collect());

363

return Ok(undecided.iter().cloned().collect());

366

}

364

}

367

dagops::heads(&self.graph, undecided.iter())?

365

dagops::heads(&self.graph, undecided.iter())?

368

};

366

};

369

if sample.len() >= size {

367

if sample.len() >= size {

370

return Ok(self.limit_sample(sample.into_iter().collect(), size));

368

return Ok(self.limit_sample(sample.into_iter().collect(), size));

371

}

369

}

372

update_sample(

370

update_sample(

373

None,

371

None,

374

headrevs,

372

headrevs,

375

&mut sample,

373

&mut sample,

376

|r| ParentsIterator::graph_parents(&self.graph, r),

374

|r| ParentsIterator::graph_parents(&self.graph, r),

377

Some(size),

375

Some(size),

378

)?;

376

)?;

379

Ok(sample.into_iter().collect())

377

Ok(sample.into_iter().collect())

380

}

378

}

381

379

382

/// Extract a sample from `self.undecided`, going from its heads and roots.

380

/// Extract a sample from `self.undecided`, going from its heads and roots.

383

///

381

///

384

/// The `size` parameter is used to avoid useless computations if

382

/// The `size` parameter is used to avoid useless computations if

385

/// it turns out to be bigger than the whole set of undecided Revisions.

383

/// it turns out to be bigger than the whole set of undecided Revisions.

386

///

384

///

387

/// The sample is taken by using `update_sample` from the heads, then

385

/// The sample is taken by using `update_sample` from the heads, then

388

/// from the roots, working on the reverse DAG,

386

/// from the roots, working on the reverse DAG,

389

/// expressed by `self.children_cache`.

387

/// expressed by `self.children_cache`.

390

///

388

///

391

/// No effort is being made to complete or limit the sample to `size`

389

/// No effort is being made to complete or limit the sample to `size`

392

/// but this method returns another interesting size that it derives

390

/// but this method returns another interesting size that it derives

393

/// from its knowledge of the structure of the various sets, leaving

391

/// from its knowledge of the structure of the various sets, leaving

394

/// to the caller the decision to use it or not.

392

/// to the caller the decision to use it or not.

395

fn bidirectional_sample(

393

fn bidirectional_sample(

396

&mut self,

394

&mut self,

397

size: usize,

395

size: usize,

398

) -> Result<(HashSet<Revision>, usize), GraphError> {

396

) -> Result<(HashSet<Revision>, usize), GraphError> {

399

self.ensure_undecided()?;

397

self.ensure_undecided()?;

400

{

398

{

401

// we don't want to compute children_cache before this

399

// we don't want to compute children_cache before this

402

// but doing it after extracting self.undecided takes a mutable

400

// but doing it after extracting self.undecided takes a mutable

403

// ref to self while a shareable one is still active.

401

// ref to self while a shareable one is still active.

404

let undecided = self.undecided.as_ref().unwrap();

402

let undecided = self.undecided.as_ref().unwrap();

405

if undecided.len() <= size {

403

if undecided.len() <= size {

406

return Ok((undecided.clone(), size));

404

return Ok((undecided.clone(), size));

407

}

405

}

408

}

406

}

409

407

410

self.ensure_children_cache()?;

408

self.ensure_children_cache()?;

411

let revs = self.undecided.as_ref().unwrap();

409

let revs = self.undecided.as_ref().unwrap();

412

let mut sample: HashSet<Revision> = revs.clone();

410

let mut sample: HashSet<Revision> = revs.clone();

413

411

414

// it's possible that leveraging the children cache would be more

412

// it's possible that leveraging the children cache would be more

415

// efficient here

413

// efficient here

416

dagops::retain_heads(&self.graph, &mut sample)?;

414

dagops::retain_heads(&self.graph, &mut sample)?;

417

let revsheads = sample.clone(); // was again heads(revs) in python

415

let revsheads = sample.clone(); // was again heads(revs) in python

418

416

419

// update from heads

417

// update from heads

420

update_sample(

418

update_sample(

421

Some(revs),

419

Some(revs),

422

revsheads.iter().cloned(),

420

revsheads.iter().cloned(),

423

&mut sample,

421

&mut sample,

424

|r| ParentsIterator::graph_parents(&self.graph, r),

422

|r| ParentsIterator::graph_parents(&self.graph, r),

425

None,

423

None,

426

)?;

424

)?;

427

425

428

// update from roots

426

// update from roots

429

let revroots: HashSet<Revision> =

427

let revroots: HashSet<Revision> =

430

dagops::roots(&self.graph, revs)?.into_iter().collect();

428

dagops::roots(&self.graph, revs)?.into_iter().collect();

431

let prescribed_size = max(size, min(revroots.len(), revsheads.len()));

429

let prescribed_size = max(size, min(revroots.len(), revsheads.len()));

432

430

433

let children = self.children_cache.as_ref().unwrap();

431

let children = self.children_cache.as_ref().unwrap();

434

let empty_vec: Vec<Revision> = Vec::new();

432

let empty_vec: Vec<Revision> = Vec::new();

435

update_sample(

433

update_sample(

436

Some(revs),

434

Some(revs),

437

revroots,

435

revroots,

438

&mut sample,

436

&mut sample,

439

|r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),

437

|r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),

440

None,

438

None,

441

)?;

439

)?;

442

Ok((sample, prescribed_size))

440

Ok((sample, prescribed_size))

443

}

441

}

444

442

445

/// Fill up sample up to the wished size with random undecided Revisions.

443

/// Fill up sample up to the wished size with random undecided Revisions.

446

///

444

///

447

/// This is intended to be used as a last resort completion if the

445

/// This is intended to be used as a last resort completion if the

448

/// regular sampling algorithm returns too few elements.

446

/// regular sampling algorithm returns too few elements.

449

fn random_complete_sample(

447

fn random_complete_sample(

450

&mut self,

448

&mut self,

451

sample: &mut Vec<Revision>,

449

sample: &mut Vec<Revision>,

452

size: usize,

450

size: usize,

453

) {

451

) {

454

let sample_len = sample.len();

452

let sample_len = sample.len();

455

if size <= sample_len {

453

if size <= sample_len {

456

return;

454

return;

457

}

455

}

458

let take_from: Vec<Revision> = self

456

let take_from: Vec<Revision> = self

459

.undecided

457

.undecided

460

.as_ref()

458

.as_ref()

461

.unwrap()

459

.unwrap()

462

.iter()

460

.iter()

463

.filter(|&r| !sample.contains(r))

461

.filter(|&r| !sample.contains(r))

464

.cloned()

462

.cloned()

465

.collect();

463

.collect();

466

sample.extend(self.limit_sample(take_from, size - sample_len));

464

sample.extend(self.limit_sample(take_from, size - sample_len));

467

}

465

}

468

466

469

pub fn take_full_sample(

467

pub fn take_full_sample(

470

&mut self,

468

&mut self,

471

size: usize,

469

size: usize,

472

) -> Result<Vec<Revision>, GraphError> {

470

) -> Result<Vec<Revision>, GraphError> {

473

let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;

471

let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;

474

let size = if self.respect_size {

472

let size = if self.respect_size {

475

size

473

size

476

} else {

474

} else {

477

prescribed_size

475

prescribed_size

478

};

476

};

479

let mut sample =

477

let mut sample =

480

self.limit_sample(sample_set.into_iter().collect(), size);

478

self.limit_sample(sample_set.into_iter().collect(), size);

481

self.random_complete_sample(&mut sample, size);

479

self.random_complete_sample(&mut sample, size);

482

Ok(sample)

480

Ok(sample)

483

}

481

}

484

}

482

}

485

483

486

#[cfg(test)]

484

#[cfg(test)]

487

mod tests {

485

mod tests {

488

use super::*;

486

use super::*;

489

use crate::testing::SampleGraph;

487

use crate::testing::SampleGraph;

490

488

491

/// A PartialDiscovery as for pushing all the heads of `SampleGraph`

489

/// A PartialDiscovery as for pushing all the heads of `SampleGraph`

492

///

490

///

493

/// To avoid actual randomness in these tests, we give it a fixed

491

/// To avoid actual randomness in these tests, we give it a fixed

494

/// random seed, but by default we'll test the random version.

492

/// random seed, but by default we'll test the random version.

495

fn full_disco() -> PartialDiscovery<SampleGraph> {

493

fn full_disco() -> PartialDiscovery<SampleGraph> {

496

PartialDiscovery::new_with_seed(

494

PartialDiscovery::new_with_seed(

497

SampleGraph,

495

SampleGraph,

498

vec![10, 11, 12, 13],

496

vec![10, 11, 12, 13],

499

[0; 16],

497

[0; 16],

500

true,

498

true,

501

true,

499

true,

502

)

500

)

503

}

501

}

504

502

505

/// A PartialDiscovery as for pushing the 12 head of `SampleGraph`

503

/// A PartialDiscovery as for pushing the 12 head of `SampleGraph`

506

///

504

///

507

/// To avoid actual randomness in tests, we give it a fixed random seed.

505

/// To avoid actual randomness in tests, we give it a fixed random seed.

508

fn disco12() -> PartialDiscovery<SampleGraph> {

506

fn disco12() -> PartialDiscovery<SampleGraph> {

509

PartialDiscovery::new_with_seed(

507

PartialDiscovery::new_with_seed(

510

SampleGraph,

508

SampleGraph,

511

vec![12],

509

vec![12],

512

[0; 16],

510

[0; 16],

513

true,

511

true,

514

true,

512

true,

515

)

513

)

516

}

514

}

517

515

518

fn sorted_undecided(

516

fn sorted_undecided(

519

disco: &PartialDiscovery<SampleGraph>,

517

disco: &PartialDiscovery<SampleGraph>,

520

) -> Vec<Revision> {

518

) -> Vec<Revision> {

521

let mut as_vec: Vec<Revision> =

519

let mut as_vec: Vec<Revision> =

522

disco.undecided.as_ref().unwrap().iter().cloned().collect();

520

disco.undecided.as_ref().unwrap().iter().cloned().collect();

523

as_vec.sort();

521

as_vec.sort();

524

as_vec

522

as_vec

525

}

523

}

526

524

527

fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {

525

fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {

528

let mut as_vec: Vec<Revision> =

526

let mut as_vec: Vec<Revision> =

529

disco.missing.iter().cloned().collect();

527

disco.missing.iter().cloned().collect();

530

as_vec.sort();

528

as_vec.sort();

531

as_vec

529

as_vec

532

}

530

}

533

531

534

fn sorted_common_heads(

532

fn sorted_common_heads(

535

disco: &PartialDiscovery<SampleGraph>,

533

disco: &PartialDiscovery<SampleGraph>,

536

) -> Result<Vec<Revision>, GraphError> {

534

) -> Result<Vec<Revision>, GraphError> {

537

let mut as_vec: Vec<Revision> =

535

let mut as_vec: Vec<Revision> =

538

disco.common_heads()?.iter().cloned().collect();

536

disco.common_heads()?.iter().cloned().collect();

539

as_vec.sort();

537

as_vec.sort();

540

Ok(as_vec)

538

Ok(as_vec)

541

}

539

}

542

540

543

#[test]

541

#[test]

544

fn test_add_common_get_undecided() -> Result<(), GraphError> {

542

fn test_add_common_get_undecided() -> Result<(), GraphError> {

545

let mut disco = full_disco();

543

let mut disco = full_disco();

546

assert_eq!(disco.undecided, None);

544

assert_eq!(disco.undecided, None);

547

assert!(!disco.has_info());

545

assert!(!disco.has_info());

548

assert_eq!(disco.stats().undecided, None);

546

assert_eq!(disco.stats().undecided, None);

549

547

550

disco.add_common_revisions(vec![11, 12])?;

548

disco.add_common_revisions(vec![11, 12])?;

551

assert!(disco.has_info());

549

assert!(disco.has_info());

552

assert!(!disco.is_complete());

550

assert!(!disco.is_complete());

553

assert!(disco.missing.is_empty());

551

assert!(disco.missing.is_empty());

554

552

555

// add_common_revisions did not trigger a premature computation

553

// add_common_revisions did not trigger a premature computation

556

// of `undecided`, let's check that and ask for them

554

// of `undecided`, let's check that and ask for them

557

assert_eq!(disco.undecided, None);

555

assert_eq!(disco.undecided, None);

558

disco.ensure_undecided()?;

556

disco.ensure_undecided()?;

559

assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);

557

assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);

560

assert_eq!(disco.stats().undecided, Some(4));

558

assert_eq!(disco.stats().undecided, Some(4));

561

Ok(())

559

Ok(())

562

}

560

}

563

561

564

/// in this test, we pretend that our peer misses exactly (8+10)::

562

/// in this test, we pretend that our peer misses exactly (8+10)::

565

/// and we're comparing all our repo to it (as in a bare push)

563

/// and we're comparing all our repo to it (as in a bare push)

566

#[test]

564

#[test]

567

fn test_discovery() -> Result<(), GraphError> {

565

fn test_discovery() -> Result<(), GraphError> {

568

let mut disco = full_disco();

566

let mut disco = full_disco();

569

disco.add_common_revisions(vec![11, 12])?;

567

disco.add_common_revisions(vec![11, 12])?;

570

disco.add_missing_revisions(vec![8, 10])?;

568

disco.add_missing_revisions(vec![8, 10])?;

571

assert_eq!(sorted_undecided(&disco), vec![5]);

569

assert_eq!(sorted_undecided(&disco), vec![5]);

572

assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);

570

assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);

573

assert!(!disco.is_complete());

571

assert!(!disco.is_complete());

574

572

575

disco.add_common_revisions(vec![5])?;

573

disco.add_common_revisions(vec![5])?;

576

assert_eq!(sorted_undecided(&disco), vec![]);

574

assert_eq!(sorted_undecided(&disco), vec![]);

577

assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);

575

assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);

578

assert!(disco.is_complete());

576

assert!(disco.is_complete());

579

assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);

577

assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);

580

Ok(())

578

Ok(())

581

}

579

}

582

580

583

#[test]

581

#[test]

584

fn test_add_missing_early_continue() -> Result<(), GraphError> {

582

fn test_add_missing_early_continue() -> Result<(), GraphError> {

585

eprintln!("test_add_missing_early_stop");

583

eprintln!("test_add_missing_early_stop");

586

let mut disco = full_disco();

584

let mut disco = full_disco();

587

disco.add_common_revisions(vec![13, 3, 4])?;

585

disco.add_common_revisions(vec![13, 3, 4])?;

588

disco.ensure_children_cache()?;

586

disco.ensure_children_cache()?;

589

// 12 is grand-child of 6 through 9

587

// 12 is grand-child of 6 through 9

590

// passing them in this order maximizes the chances of the

588

// passing them in this order maximizes the chances of the

591

// early continue to do the wrong thing

589

// early continue to do the wrong thing

592

disco.add_missing_revisions(vec![6, 9, 12])?;

590

disco.add_missing_revisions(vec![6, 9, 12])?;

593

assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);

591

assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);

594

assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);

592

assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);

595

assert!(!disco.is_complete());

593

assert!(!disco.is_complete());

596

Ok(())

594

Ok(())

597

}

595

}

598

596

599

#[test]

597

#[test]

600

fn test_limit_sample_no_need_to() {

598

fn test_limit_sample_no_need_to() {

601

let sample = vec![1, 2, 3, 4];

599

let sample = vec![1, 2, 3, 4];

602

assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);

600

assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);

603

}

601

}

604

602

605

#[test]

603

#[test]

606

fn test_limit_sample_less_than_half() {

604

fn test_limit_sample_less_than_half() {

607

assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]);

605

assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]);

608

}

606

}

609

607

610

#[test]

608

#[test]

611

fn test_limit_sample_more_than_half() {

609

fn test_limit_sample_more_than_half() {

612

assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]);

610

assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]);

613

}

611

}

614

612

615

#[test]

613

#[test]

616

fn test_limit_sample_no_random() {

614

fn test_limit_sample_no_random() {

617

let mut disco = full_disco();

615

let mut disco = full_disco();

618

disco.randomize = false;

616

disco.randomize = false;

619

assert_eq!(

617

assert_eq!(

620

disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),

618

disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),

621

vec![1, 3, 5, 7]

619

vec![1, 3, 5, 7]

622

);

620

);

623

}

621

}

624

622

625

#[test]

623

#[test]

626

fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {

624

fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {

627

let mut disco = full_disco();

625

let mut disco = full_disco();

628

disco.undecided = Some((1..=13).collect());

626

disco.undecided = Some((1..=13).collect());

629

627

630

let mut sample_vec = disco.take_quick_sample(vec![], 4)?;

628

let mut sample_vec = disco.take_quick_sample(vec![], 4)?;

631

sample_vec.sort();

629

sample_vec.sort();

632

assert_eq!(sample_vec, vec![10, 11, 12, 13]);

630

assert_eq!(sample_vec, vec![10, 11, 12, 13]);

633

Ok(())

631

Ok(())

634

}

632

}

635

633

636

#[test]

634

#[test]

637

fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {

635

fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {

638

let mut disco = disco12();

636

let mut disco = disco12();

639

disco.ensure_undecided()?;

637

disco.ensure_undecided()?;

640

638

641

let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;

639

let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;

642

sample_vec.sort();

640

sample_vec.sort();

643

// r12's only parent is r9, whose unique grand-parent through the

641

// r12's only parent is r9, whose unique grand-parent through the

644

// diamond shape is r4. This ends there because the distance from r4

642

// diamond shape is r4. This ends there because the distance from r4

645

// to the root is only 3.

643

// to the root is only 3.

646

assert_eq!(sample_vec, vec![4, 9, 12]);

644

assert_eq!(sample_vec, vec![4, 9, 12]);

647

Ok(())

645

Ok(())

648

}

646

}

649

647

650

#[test]

648

#[test]

651

fn test_children_cache() -> Result<(), GraphError> {

649

fn test_children_cache() -> Result<(), GraphError> {

652

let mut disco = full_disco();

650

let mut disco = full_disco();

653

disco.ensure_children_cache()?;

651

disco.ensure_children_cache()?;

654

652

655

let cache = disco.children_cache.unwrap();

653

let cache = disco.children_cache.unwrap();

656

assert_eq!(cache.get(&2).cloned(), Some(vec![4]));

654

assert_eq!(cache.get(&2).cloned(), Some(vec![4]));

657

assert_eq!(cache.get(&10).cloned(), None);

655

assert_eq!(cache.get(&10).cloned(), None);

658

656

659

let mut children_4 = cache.get(&4).cloned().unwrap();

657

let mut children_4 = cache.get(&4).cloned().unwrap();

660

children_4.sort();

658

children_4.sort();

661

assert_eq!(children_4, vec![5, 6, 7]);

659

assert_eq!(children_4, vec![5, 6, 7]);

662

660

663

let mut children_7 = cache.get(&7).cloned().unwrap();

661

let mut children_7 = cache.get(&7).cloned().unwrap();

664

children_7.sort();

662

children_7.sort();

665

assert_eq!(children_7, vec![9, 11]);

663

assert_eq!(children_7, vec![9, 11]);

666

664

667

Ok(())

665

Ok(())

668

}

666

}

669

667

670

#[test]

668

#[test]

671

fn test_complete_sample() {

669

fn test_complete_sample() {

672

let mut disco = full_disco();

670

let mut disco = full_disco();

673

let undecided: HashSet<Revision> =

671

let undecided: HashSet<Revision> =

674

[4, 7, 9, 2, 3].iter().cloned().collect();

672

[4, 7, 9, 2, 3].iter().cloned().collect();

675

disco.undecided = Some(undecided);

673

disco.undecided = Some(undecided);

676

674

677

let mut sample = vec![0];

675

let mut sample = vec![0];

678

disco.random_complete_sample(&mut sample, 3);

676

disco.random_complete_sample(&mut sample, 3);

679

assert_eq!(sample.len(), 3);

677

assert_eq!(sample.len(), 3);

680

678

681

let mut sample = vec![2, 4, 7];

679

let mut sample = vec![2, 4, 7];

682

disco.random_complete_sample(&mut sample, 1);

680

disco.random_complete_sample(&mut sample, 1);

683

assert_eq!(sample.len(), 3);

681

assert_eq!(sample.len(), 3);

684

}

682

}

685

683

686

#[test]

684

#[test]

687

fn test_bidirectional_sample() -> Result<(), GraphError> {

685

fn test_bidirectional_sample() -> Result<(), GraphError> {

688

let mut disco = full_disco();

686

let mut disco = full_disco();

689

disco.undecided = Some((0..=13).into_iter().collect());

687

disco.undecided = Some((0..=13).into_iter().collect());

690

688

691

let (sample_set, size) = disco.bidirectional_sample(7)?;

689

let (sample_set, size) = disco.bidirectional_sample(7)?;

692

assert_eq!(size, 7);

690

assert_eq!(size, 7);

693

let mut sample: Vec<Revision> = sample_set.into_iter().collect();

691

let mut sample: Vec<Revision> = sample_set.into_iter().collect();

694

sample.sort();

692

sample.sort();

695

// our DAG is a bit too small for the results to be really interesting

693

// our DAG is a bit too small for the results to be really interesting

696

// at least it shows that

694

// at least it shows that

697

// - we went both ways

695

// - we went both ways

698

// - we didn't take all Revisions (6 is not in the sample)

696

// - we didn't take all Revisions (6 is not in the sample)

699

assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);

697

assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);

700

Ok(())

698

Ok(())

701

}

699

}

702

700

703

}

701

}

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             // discovery.rs
             //
             // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Discovery operations
             //!
             //! This is a Rust counterpart to the `partialdiscovery` class of
             //! `mercurial.setdiscovery`
-            extern crate rand;
-            extern crate rand_pcg;
-            use self::rand::seq::SliceRandom;
-            use self::rand::{thread_rng, RngCore, SeedableRng};
             use super::{Graph, GraphError, Revision, NULL_REVISION};
             use crate::ancestors::MissingAncestors;
             use crate::dagops;
+            use rand::seq::SliceRandom;
+            use rand::{thread_rng, RngCore, SeedableRng};
             use std::cmp::{max, min};
             use std::collections::{HashMap, HashSet, VecDeque};
-            type Rng = self::rand_pcg::Pcg32;
+            type Rng = rand_pcg::Pcg32;
             pub struct PartialDiscovery<G: Graph + Clone> {
                 target_heads: Option<Vec<Revision>>,
                 graph: G, // plays the role of self._repo
                 common: MissingAncestors<G>,
                 undecided: Option<HashSet<Revision>>,
                 children_cache: Option<HashMap<Revision, Vec<Revision>>>,
                 missing: HashSet<Revision>,
                 rng: Rng,
                 respect_size: bool,
                 randomize: bool,
             }
             pub struct DiscoveryStats {
                 pub undecided: Option<usize>,
             }
             /// Update an existing sample to match the expected size
             ///
             /// The sample is updated with revisions exponentially distant from each
             /// element of `heads`.
             ///
             /// If a target size is specified, the sampling will stop once this size is
             /// reached. Otherwise sampling will happen until roots of the <revs> set are
             /// reached.
             ///
             /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
             ///   represented by `parentfn`
             /// - `heads`: set of DAG head revs
             /// - `sample`: a sample to update
             /// - `parentfn`: a callable to resolve parents for a revision
             /// - `quicksamplesize`: optional target size of the sample
             fn update_sample<I>(
                 revs: Option<&HashSet<Revision>>,
                 heads: impl IntoIterator<Item = Revision>,
                 sample: &mut HashSet<Revision>,
                 parentsfn: impl Fn(Revision) -> Result<I, GraphError>,
                 quicksamplesize: Option<usize>,
             ) -> Result<(), GraphError>
             where
                 I: Iterator<Item = Revision>,
             {
                 let mut distances: HashMap<Revision, u32> = HashMap::new();
                 let mut visit: VecDeque<Revision> = heads.into_iter().collect();
                 let mut factor: u32 = 1;
                 let mut seen: HashSet<Revision> = HashSet::new();
                 loop {
                     let current = match visit.pop_front() {
                         None => {
                             break;
                         }
                         Some(r) => r,
                     };
                     if !seen.insert(current) {
                         continue;
                     }
                     let d = *distances.entry(current).or_insert(1);
                     if d > factor {
                         factor *= 2;
                     }
                     if d == factor {
                         sample.insert(current);
                         if let Some(sz) = quicksamplesize {
                             if sample.len() >= sz {
                                 return Ok(());
                             }
                         }
                     }
                     for p in parentsfn(current)? {
                         if let Some(revs) = revs {
                             if !revs.contains(&p) {
                                 continue;
                             }
                         }
                         distances.entry(p).or_insert(d + 1);
                         visit.push_back(p);
                     }
                 }
                 Ok(())
             }
             struct ParentsIterator {
                 parents: [Revision; 2],
                 cur: usize,
             }
             impl ParentsIterator {
                 fn graph_parents(
                     graph: &impl Graph,
                     r: Revision,
                 ) -> Result<ParentsIterator, GraphError> {
                     Ok(ParentsIterator {
                         parents: graph.parents(r)?,
                         cur: 0,
                     })
                 }
             }
             impl Iterator for ParentsIterator {
                 type Item = Revision;
                 fn next(&mut self) -> Option<Revision> {
                     if self.cur > 1 {
                         return None;
                     }
                     let rev = self.parents[self.cur];
                     self.cur += 1;
                     if rev == NULL_REVISION {
                         return self.next();
                     }
                     Some(rev)
                 }
             }
             impl<G: Graph + Clone> PartialDiscovery<G> {
                 /// Create a PartialDiscovery object, with the intent
                 /// of comparing our `::<target_heads>` revset to the contents of another
                 /// repo.
                 ///
                 /// For now `target_heads` is passed as a vector, and will be used
                 /// at the first call to `ensure_undecided()`.
                 ///
                 /// If we want to make the signature more flexible,
                 /// we'll have to make it a type argument of `PartialDiscovery` or a trait
                 /// object since we'll keep it in the meanwhile
                 ///
                 /// The `respect_size` boolean controls how the sampling methods
                 /// will interpret the size argument requested by the caller. If it's
                 /// `false`, they are allowed to produce a sample whose size is more
                 /// appropriate to the situation (typically bigger).
                 ///
                 /// The `randomize` boolean affects sampling, and specifically how
                 /// limiting or last-minute expanding is been done:
                 ///
                 /// If `true`, both will perform random picking from `self.undecided`.
                 /// This is currently the best for actual discoveries.
                 ///
                 /// If `false`, a reproductible picking strategy is performed. This is
                 /// useful for integration tests.
                 pub fn new(
                     graph: G,
                     target_heads: Vec<Revision>,
                     respect_size: bool,
                     randomize: bool,
                 ) -> Self {
                     let mut seed: [u8; 16] = [0; 16];
                     if randomize {
                         thread_rng().fill_bytes(&mut seed);
                     }
                     Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)
                 }
                 pub fn new_with_seed(
                     graph: G,
                     target_heads: Vec<Revision>,
                     seed: [u8; 16],
                     respect_size: bool,
                     randomize: bool,
                 ) -> Self {
                     PartialDiscovery {
                         undecided: None,
                         children_cache: None,
                         target_heads: Some(target_heads),
                         graph: graph.clone(),
                         common: MissingAncestors::new(graph, vec![]),
                         missing: HashSet::new(),
                         rng: Rng::from_seed(seed),
                         respect_size: respect_size,
                         randomize: randomize,
                     }
                 }
                 /// Extract at most `size` random elements from sample and return them
                 /// as a vector
                 fn limit_sample(
                     &mut self,
                     mut sample: Vec<Revision>,
                     size: usize,
                 ) -> Vec<Revision> {
                     if !self.randomize {
                         sample.sort();
                         sample.truncate(size);
                         return sample;
                     }
                     let sample_len = sample.len();
                     if sample_len <= size {
                         return sample;
                     }
                     let rng = &mut self.rng;
                     let dropped_size = sample_len - size;
                     let limited_slice = if size < dropped_size {
                         sample.partial_shuffle(rng, size).0
                     } else {
                         sample.partial_shuffle(rng, dropped_size).1
                     };
                     limited_slice.to_owned()
                 }
                 /// Register revisions known as being common
                 pub fn add_common_revisions(
                     &mut self,
                     common: impl IntoIterator<Item = Revision>,
                 ) -> Result<(), GraphError> {
                     let before_len = self.common.get_bases().len();
                     self.common.add_bases(common);
                     if self.common.get_bases().len() == before_len {
                         return Ok(());
                     }
                     if let Some(ref mut undecided) = self.undecided {
                         self.common.remove_ancestors_from(undecided)?;
                     }
                     Ok(())
                 }
                 /// Register revisions known as being missing
                 ///
                 /// # Performance note
                 ///
                 /// Except in the most trivial case, the first call of this method has
                 /// the side effect of computing `self.undecided` set for the first time,
                 /// and the related caches it might need for efficiency of its internal
                 /// computation. This is typically faster if more information is
                 /// available in `self.common`. Therefore, for good performance, the
                 /// caller should avoid calling this too early.
                 pub fn add_missing_revisions(
                     &mut self,
                     missing: impl IntoIterator<Item = Revision>,
                 ) -> Result<(), GraphError> {
                     let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();
                     if tovisit.is_empty() {
                         return Ok(());
                     }
                     self.ensure_children_cache()?;
                     self.ensure_undecided()?; // for safety of possible future refactors
                     let children = self.children_cache.as_ref().unwrap();
                     let mut seen: HashSet<Revision> = HashSet::new();
                     let undecided_mut = self.undecided.as_mut().unwrap();
                     while let Some(rev) = tovisit.pop_front() {
                         if !self.missing.insert(rev) {
                             // either it's known to be missing from a previous
                             // invocation, and there's no need to iterate on its
                             // children (we now they are all missing)
                             // or it's from a previous iteration of this loop
                             // and its children have already been queued
                             continue;
                         }
                         undecided_mut.remove(&rev);
                         match children.get(&rev) {
                             None => {
                                 continue;
                             }
                             Some(this_children) => {
                                 for child in this_children.iter().cloned() {
                                     if seen.insert(child) {
                                         tovisit.push_back(child);
                                     }
                                 }
                             }
                         }
                     }
                     Ok(())
                 }
                 /// Do we have any information about the peer?
                 pub fn has_info(&self) -> bool {
                     self.common.has_bases()
                 }
                 /// Did we acquire full knowledge of our Revisions that the peer has?
                 pub fn is_complete(&self) -> bool {
                     self.undecided.as_ref().map_or(false, |s| s.is_empty())
                 }
                 /// Return the heads of the currently known common set of revisions.
                 ///
                 /// If the discovery process is not complete (see `is_complete()`), the
                 /// caller must be aware that this is an intermediate state.
                 ///
                 /// On the other hand, if it is complete, then this is currently
                 /// the only way to retrieve the end results of the discovery process.
                 ///
                 /// We may introduce in the future an `into_common_heads` call that
                 /// would be more appropriate for normal Rust callers, dropping `self`
                 /// if it is complete.
                 pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
                     self.common.bases_heads()
                 }
                 /// Force first computation of `self.undecided`
                 ///
                 /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
                 /// unwrapped to get workable immutable or mutable references without
                 /// any panic.
                 ///
                 /// This is an imperative call instead of an access with added lazyness
                 /// to reduce easily the scope of mutable borrow for the caller,
                 /// compared to undecided(&'a mut self) -> &'a… that would keep it
                 /// as long as the resulting immutable one.
                 fn ensure_undecided(&mut self) -> Result<(), GraphError> {
                     if self.undecided.is_some() {
                         return Ok(());
                     }
                     let tgt = self.target_heads.take().unwrap();
                     self.undecided =
                         Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
                     Ok(())
                 }
                 fn ensure_children_cache(&mut self) -> Result<(), GraphError> {
                     if self.children_cache.is_some() {
                         return Ok(());
                     }
                     self.ensure_undecided()?;
                     let mut children: HashMap<Revision, Vec<Revision>> = HashMap::new();
                     for &rev in self.undecided.as_ref().unwrap() {
                         for p in ParentsIterator::graph_parents(&self.graph, rev)? {
                             children.entry(p).or_insert_with(|| Vec::new()).push(rev);
                         }
                     }
                     self.children_cache = Some(children);
                     Ok(())
                 }
                 /// Provide statistics about the current state of the discovery process
                 pub fn stats(&self) -> DiscoveryStats {
                     DiscoveryStats {
                         undecided: self.undecided.as_ref().map(|s| s.len()),
                     }
                 }
                 pub fn take_quick_sample(
                     &mut self,
                     headrevs: impl IntoIterator<Item = Revision>,
                     size: usize,
                 ) -> Result<Vec<Revision>, GraphError> {
                     self.ensure_undecided()?;
                     let mut sample = {
                         let undecided = self.undecided.as_ref().unwrap();
                         if undecided.len() <= size {
                             return Ok(undecided.iter().cloned().collect());
                         }
                         dagops::heads(&self.graph, undecided.iter())?
                     };
                     if sample.len() >= size {
                         return Ok(self.limit_sample(sample.into_iter().collect(), size));
                     }
                     update_sample(
                         None,
                         headrevs,
                         &mut sample,
                         |r| ParentsIterator::graph_parents(&self.graph, r),
                         Some(size),
                     )?;
                     Ok(sample.into_iter().collect())
                 }
                 /// Extract a sample from `self.undecided`, going from its heads and roots.
                 ///
                 /// The `size` parameter is used to avoid useless computations if
                 /// it turns out to be bigger than the whole set of undecided Revisions.
                 ///
                 /// The sample is taken by using `update_sample` from the heads, then
                 /// from the roots, working on the reverse DAG,
                 /// expressed by `self.children_cache`.
                 ///
                 /// No effort is being made to complete or limit the sample to `size`
                 /// but this method returns another interesting size that it derives
                 /// from its knowledge of the structure of the various sets, leaving
                 /// to the caller the decision to use it or not.
                 fn bidirectional_sample(
                     &mut self,
                     size: usize,
                 ) -> Result<(HashSet<Revision>, usize), GraphError> {
                     self.ensure_undecided()?;
                     {
                         // we don't want to compute children_cache before this
                         // but doing it after extracting self.undecided takes a mutable
                         // ref to self while a shareable one is still active.
                         let undecided = self.undecided.as_ref().unwrap();
                         if undecided.len() <= size {
                             return Ok((undecided.clone(), size));
                         }
                     }
                     self.ensure_children_cache()?;
                     let revs = self.undecided.as_ref().unwrap();
                     let mut sample: HashSet<Revision> = revs.clone();
                     // it's possible that leveraging the children cache would be more
                     // efficient here
                     dagops::retain_heads(&self.graph, &mut sample)?;
                     let revsheads = sample.clone(); // was again heads(revs) in python
                     // update from heads
                     update_sample(
                         Some(revs),
                         revsheads.iter().cloned(),
                         &mut sample,
                         |r| ParentsIterator::graph_parents(&self.graph, r),
                         None,
                     )?;
                     // update from roots
                     let revroots: HashSet<Revision> =
                         dagops::roots(&self.graph, revs)?.into_iter().collect();
                     let prescribed_size = max(size, min(revroots.len(), revsheads.len()));
                     let children = self.children_cache.as_ref().unwrap();
                     let empty_vec: Vec<Revision> = Vec::new();
                     update_sample(
                         Some(revs),
                         revroots,
                         &mut sample,
                         |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),
                         None,
                     )?;
                     Ok((sample, prescribed_size))
                 }
                 /// Fill up sample up to the wished size with random undecided Revisions.
                 ///
                 /// This is intended to be used as a last resort completion if the
                 /// regular sampling algorithm returns too few elements.
                 fn random_complete_sample(
                     &mut self,
                     sample: &mut Vec<Revision>,
                     size: usize,
                 ) {
                     let sample_len = sample.len();
                     if size <= sample_len {
                         return;
                     }
                     let take_from: Vec<Revision> = self
                         .undecided
                         .as_ref()
                         .unwrap()
                         .iter()
                         .filter(|&r| !sample.contains(r))
                         .cloned()
                         .collect();
                     sample.extend(self.limit_sample(take_from, size - sample_len));
                 }
                 pub fn take_full_sample(
                     &mut self,
                     size: usize,
                 ) -> Result<Vec<Revision>, GraphError> {
                     let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;
                     let size = if self.respect_size {
                         size
                     } else {
                         prescribed_size
                     };
                     let mut sample =
                         self.limit_sample(sample_set.into_iter().collect(), size);
                     self.random_complete_sample(&mut sample, size);
                     Ok(sample)
                 }
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use crate::testing::SampleGraph;
                 /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
                 ///
                 /// To avoid actual randomness in these tests, we give it a fixed
                 /// random seed, but by default we'll test the random version.
                 fn full_disco() -> PartialDiscovery<SampleGraph> {
                     PartialDiscovery::new_with_seed(
                         SampleGraph,
                         vec![10, 11, 12, 13],
                         [0; 16],
                         true,
                         true,
                     )
                 }
                 /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
                 ///
                 /// To avoid actual randomness in tests, we give it a fixed random seed.
                 fn disco12() -> PartialDiscovery<SampleGraph> {
                     PartialDiscovery::new_with_seed(
                         SampleGraph,
                         vec![12],
                         [0; 16],
                         true,
                         true,
                     )
                 }
                 fn sorted_undecided(
                     disco: &PartialDiscovery<SampleGraph>,
                 ) -> Vec<Revision> {
                     let mut as_vec: Vec<Revision> =
                         disco.undecided.as_ref().unwrap().iter().cloned().collect();
                     as_vec.sort();
                     as_vec
                 }
                 fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
                     let mut as_vec: Vec<Revision> =
                         disco.missing.iter().cloned().collect();
                     as_vec.sort();
                     as_vec
                 }
                 fn sorted_common_heads(
                     disco: &PartialDiscovery<SampleGraph>,
                 ) -> Result<Vec<Revision>, GraphError> {
                     let mut as_vec: Vec<Revision> =
                         disco.common_heads()?.iter().cloned().collect();
                     as_vec.sort();
                     Ok(as_vec)
                 }
                 #[test]
                 fn test_add_common_get_undecided() -> Result<(), GraphError> {
                     let mut disco = full_disco();
                     assert_eq!(disco.undecided, None);
                     assert!(!disco.has_info());
                     assert_eq!(disco.stats().undecided, None);
                     disco.add_common_revisions(vec![11, 12])?;
                     assert!(disco.has_info());
                     assert!(!disco.is_complete());
                     assert!(disco.missing.is_empty());
                     // add_common_revisions did not trigger a premature computation
                     // of `undecided`, let's check that and ask for them
                     assert_eq!(disco.undecided, None);
                     disco.ensure_undecided()?;
                     assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
                     assert_eq!(disco.stats().undecided, Some(4));
                     Ok(())
                 }
                 /// in this test, we pretend that our peer misses exactly (8+10)::
                 /// and we're comparing all our repo to it (as in a bare push)
                 #[test]
                 fn test_discovery() -> Result<(), GraphError> {
                     let mut disco = full_disco();
                     disco.add_common_revisions(vec![11, 12])?;
                     disco.add_missing_revisions(vec![8, 10])?;
                     assert_eq!(sorted_undecided(&disco), vec![5]);
                     assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
                     assert!(!disco.is_complete());
                     disco.add_common_revisions(vec![5])?;
                     assert_eq!(sorted_undecided(&disco), vec![]);
                     assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
                     assert!(disco.is_complete());
                     assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
                     Ok(())
                 }
                 #[test]
                 fn test_add_missing_early_continue() -> Result<(), GraphError> {
                     eprintln!("test_add_missing_early_stop");
                     let mut disco = full_disco();
                     disco.add_common_revisions(vec![13, 3, 4])?;
                     disco.ensure_children_cache()?;
                     // 12 is grand-child of 6 through 9
                     // passing them in this order maximizes the chances of the
                     // early continue to do the wrong thing
                     disco.add_missing_revisions(vec![6, 9, 12])?;
                     assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);
                     assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);
                     assert!(!disco.is_complete());
                     Ok(())
                 }
                 #[test]
                 fn test_limit_sample_no_need_to() {
                     let sample = vec![1, 2, 3, 4];
                     assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
                 }
                 #[test]
                 fn test_limit_sample_less_than_half() {
                     assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]);
                 }
                 #[test]
                 fn test_limit_sample_more_than_half() {
                     assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]);
                 }
                 #[test]
                 fn test_limit_sample_no_random() {
                     let mut disco = full_disco();
                     disco.randomize = false;
                     assert_eq!(
                         disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),
                         vec![1, 3, 5, 7]
                     );
                 }
                 #[test]
                 fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
                     let mut disco = full_disco();
                     disco.undecided = Some((1..=13).collect());
                     let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
                     sample_vec.sort();
                     assert_eq!(sample_vec, vec![10, 11, 12, 13]);
                     Ok(())
                 }
                 #[test]
                 fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
                     let mut disco = disco12();
                     disco.ensure_undecided()?;
                     let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;
                     sample_vec.sort();
                     // r12's only parent is r9, whose unique grand-parent through the
                     // diamond shape is r4. This ends there because the distance from r4
                     // to the root is only 3.
                     assert_eq!(sample_vec, vec![4, 9, 12]);
                     Ok(())
                 }
                 #[test]
                 fn test_children_cache() -> Result<(), GraphError> {
                     let mut disco = full_disco();
                     disco.ensure_children_cache()?;
                     let cache = disco.children_cache.unwrap();
                     assert_eq!(cache.get(&2).cloned(), Some(vec![4]));
                     assert_eq!(cache.get(&10).cloned(), None);
                     let mut children_4 = cache.get(&4).cloned().unwrap();
                     children_4.sort();
                     assert_eq!(children_4, vec![5, 6, 7]);
                     let mut children_7 = cache.get(&7).cloned().unwrap();
                     children_7.sort();
                     assert_eq!(children_7, vec![9, 11]);
                     Ok(())
                 }
                 #[test]
                 fn test_complete_sample() {
                     let mut disco = full_disco();
                     let undecided: HashSet<Revision> =
                         [4, 7, 9, 2, 3].iter().cloned().collect();
                     disco.undecided = Some(undecided);
                     let mut sample = vec![0];
                     disco.random_complete_sample(&mut sample, 3);
                     assert_eq!(sample.len(), 3);
                     let mut sample = vec![2, 4, 7];
                     disco.random_complete_sample(&mut sample, 1);
                     assert_eq!(sample.len(), 3);
                 }
                 #[test]
                 fn test_bidirectional_sample() -> Result<(), GraphError> {
                     let mut disco = full_disco();
                     disco.undecided = Some((0..=13).into_iter().collect());
                     let (sample_set, size) = disco.bidirectional_sample(7)?;
                     assert_eq!(size, 7);
                     let mut sample: Vec<Revision> = sample_set.into_iter().collect();
                     sample.sort();
                     // our DAG is a bit too small for the results to be really interesting
                     // at least it shows that
                     // - we went both ways
                     // - we didn't take all Revisions (6 is not in the sample)
                     assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);
                     Ok(())
                 }
             }