upstream/mercurial-mirror Commit - r42970:8c9a6ade

1

// discovery.rs

1

// discovery.rs

2

//

2

//

3

4

//

4

//

5

// This software may be used and distributed according to the terms of the

5

// This software may be used and distributed according to the terms of the

6

// GNU General Public License version 2 or any later version.

6

// GNU General Public License version 2 or any later version.

7

8

//! Discovery operations

8

//! Discovery operations

9

//!

9

//!

10

//! This is a Rust counterpart to the `partialdiscovery` class of

10

//! This is a Rust counterpart to the `partialdiscovery` class of

11

//! `mercurial.setdiscovery`

11

//! `mercurial.setdiscovery`

12

13

extern crate rand;

13

extern crate rand;

14

extern crate rand_pcg;

14

extern crate rand_pcg;

15

use self::rand::seq::SliceRandom;

15

use self::rand::seq::SliceRandom;

16

use self::rand::{thread_rng, RngCore, SeedableRng};

16

use self::rand::{thread_rng, RngCore, SeedableRng};

17

use super::{Graph, GraphError, Revision, NULL_REVISION};

17

use super::{Graph, GraphError, Revision, NULL_REVISION};

18

use crate::ancestors::MissingAncestors;

18

use crate::ancestors::MissingAncestors;

19

use crate::dagops;

19

use crate::dagops;

20

use std::cmp::{max, min};

20

use std::cmp::{max, min};

21

use std::collections::{HashMap, HashSet, VecDeque};

21

use std::collections::{HashMap, HashSet, VecDeque};

22

23

type Rng = self::rand_pcg::Pcg32;

23

type Rng = self::rand_pcg::Pcg32;

24

25

pub struct PartialDiscovery<G: Graph + Clone> {

25

pub struct PartialDiscovery<G: Graph + Clone> {

26

target_heads: Option<Vec<Revision>>,

26

target_heads: Option<Vec<Revision>>,

27

graph: G, // plays the role of self._repo

27

graph: G, // plays the role of self._repo

28

common: MissingAncestors<G>,

28

common: MissingAncestors<G>,

29

undecided: Option<HashSet<Revision>>,

29

undecided: Option<HashSet<Revision>>,

30

children_cache: Option<HashMap<Revision, Vec<Revision>>>,

30

children_cache: Option<HashMap<Revision, Vec<Revision>>>,

31

missing: HashSet<Revision>,

31

missing: HashSet<Revision>,

32

rng: Rng,

32

rng: Rng,

33

respect_size: bool,

33

respect_size: bool,

34

randomize: bool,

34

randomize: bool,

35

}

35

}

36

37

pub struct DiscoveryStats {

37

pub struct DiscoveryStats {

38

pub undecided: Option<usize>,

38

pub undecided: Option<usize>,

39

}

39

}

40

41

/// Update an existing sample to match the expected size

41

/// Update an existing sample to match the expected size

42

///

42

///

43

/// The sample is updated with revisions exponentially distant from each

43

/// The sample is updated with revisions exponentially distant from each

44

/// element of `heads`.

44

/// element of `heads`.

45

///

45

///

46

/// If a target size is specified, the sampling will stop once this size is

46

/// If a target size is specified, the sampling will stop once this size is

47

/// reached. Otherwise sampling will happen until roots of the <revs> set are

47

/// reached. Otherwise sampling will happen until roots of the <revs> set are

48

/// reached.

48

/// reached.

49

///

49

///

50

/// - `revs`: set of revs we want to discover (if None, `assume` the whole dag

50

/// - `revs`: set of revs we want to discover (if None, `assume` the whole dag

51

/// represented by `parentfn`

51

/// represented by `parentfn`

52

/// - `heads`: set of DAG head revs

52

/// - `heads`: set of DAG head revs

53

/// - `sample`: a sample to update

53

/// - `sample`: a sample to update

54

/// - `parentfn`: a callable to resolve parents for a revision

54

/// - `parentfn`: a callable to resolve parents for a revision

55

/// - `quicksamplesize`: optional target size of the sample

55

/// - `quicksamplesize`: optional target size of the sample

56

fn update_sample<I>(

56

fn update_sample<I>(

57

revs: Option<&HashSet<Revision>>,

57

revs: Option<&HashSet<Revision>>,

58

heads: impl IntoIterator<Item = Revision>,

58

heads: impl IntoIterator<Item = Revision>,

59

sample: &mut HashSet<Revision>,

59

sample: &mut HashSet<Revision>,

60

parentsfn: impl Fn(Revision) -> Result<I, GraphError>,

60

parentsfn: impl Fn(Revision) -> Result<I, GraphError>,

61

quicksamplesize: Option<usize>,

61

quicksamplesize: Option<usize>,

62

) -> Result<(), GraphError>

62

) -> Result<(), GraphError>

63

where

63

where

64

I: Iterator<Item = Revision>,

64

I: Iterator<Item = Revision>,

65

{

65

{

66

let mut distances: HashMap<Revision, u32> = HashMap::new();

66

let mut distances: HashMap<Revision, u32> = HashMap::new();

67

let mut visit: VecDeque<Revision> = heads.into_iter().collect();

67

let mut visit: VecDeque<Revision> = heads.into_iter().collect();

68

let mut factor: u32 = 1;

68

let mut factor: u32 = 1;

69

let mut seen: HashSet<Revision> = HashSet::new();

69

let mut seen: HashSet<Revision> = HashSet::new();

70

loop {

70

loop {

71

let current = match visit.pop_front() {

71

let current = match visit.pop_front() {

72

None => {

72

None => {

73

break;

73

break;

74

}

74

}

75

Some(r) => r,

75

Some(r) => r,

76

};

76

};

77

if !seen.insert(current) {

77

if !seen.insert(current) {

78

continue;

78

continue;

79

}

79

}

80

81

let d = *distances.entry(current).or_insert(1);

81

let d = *distances.entry(current).or_insert(1);

82

if d > factor {

82

if d > factor {

83

factor *= 2;

83

factor *= 2;

84

}

84

}

85

if d == factor {

85

if d == factor {

86

sample.insert(current);

86

sample.insert(current);

87

if let Some(sz) = quicksamplesize {

87

if let Some(sz) = quicksamplesize {

88

if sample.len() >= sz {

88

if sample.len() >= sz {

89

return Ok(());

89

return Ok(());

90

}

90

}

91

}

91

}

92

}

92

}

93

for p in parentsfn(current)? {

93

for p in parentsfn(current)? {

94

if let Some(revs) = revs {

94

if let Some(revs) = revs {

95

if !revs.contains(&p) {

95

if !revs.contains(&p) {

96

continue;

96

continue;

97

}

97

}

98

}

98

}

99

distances.entry(p).or_insert(d + 1);

99

distances.entry(p).or_insert(d + 1);

100

visit.push_back(p);

100

visit.push_back(p);

101

}

101

}

102

}

102

}

103

Ok(())

103

Ok(())

104

}

104

}

105

106

struct ParentsIterator {

106

struct ParentsIterator {

107

parents: [Revision; 2],

107

parents: [Revision; 2],

108

cur: usize,

108

cur: usize,

109

}

109

}

110

111

impl ParentsIterator {

111

impl ParentsIterator {

112

fn graph_parents(

112

fn graph_parents(

113

graph: &impl Graph,

113

graph: &impl Graph,

114

r: Revision,

114

r: Revision,

115

) -> Result<ParentsIterator, GraphError> {

115

) -> Result<ParentsIterator, GraphError> {

116

Ok(ParentsIterator {

116

Ok(ParentsIterator {

117

parents: graph.parents(r)?,

117

parents: graph.parents(r)?,

118

cur: 0,

118

cur: 0,

119

})

119

})

120

}

120

}

121

}

121

}

122

123

impl Iterator for ParentsIterator {

123

impl Iterator for ParentsIterator {

124

type Item = Revision;

124

type Item = Revision;

125

126

fn next(&mut self) -> Option<Revision> {

126

fn next(&mut self) -> Option<Revision> {

127

if self.cur > 1 {

127

if self.cur > 1 {

128

return None;

128

return None;

129

}

129

}

130

let rev = self.parents[self.cur];

130

let rev = self.parents[self.cur];

131

self.cur += 1;

131

self.cur += 1;

132

if rev == NULL_REVISION {

132

if rev == NULL_REVISION {

133

return self.next();

133

return self.next();

134

}

134

}

135

Some(rev)

135

Some(rev)

136

}

136

}

137

}

137

}

138

139

impl<G: Graph + Clone> PartialDiscovery<G> {

139

impl<G: Graph + Clone> PartialDiscovery<G> {

140

/// Create a PartialDiscovery object, with the intent

140

/// Create a PartialDiscovery object, with the intent

141

/// of comparing our `::<target_heads>` revset to the contents of another

141

/// of comparing our `::<target_heads>` revset to the contents of another

142

/// repo.

142

/// repo.

143

///

143

///

144

/// For now `target_heads` is passed as a vector, and will be used

144

/// For now `target_heads` is passed as a vector, and will be used

145

/// at the first call to `ensure_undecided()`.

145

/// at the first call to `ensure_undecided()`.

146

///

146

///

147

/// If we want to make the signature more flexible,

147

/// If we want to make the signature more flexible,

148

/// we'll have to make it a type argument of `PartialDiscovery` or a trait

148

/// we'll have to make it a type argument of `PartialDiscovery` or a trait

149

/// object since we'll keep it in the meanwhile

149

/// object since we'll keep it in the meanwhile

150

///

150

///

151

/// The `respect_size` boolean controls how the sampling methods

151

/// The `respect_size` boolean controls how the sampling methods

152

/// will interpret the size argument requested by the caller. If it's

152

/// will interpret the size argument requested by the caller. If it's

153

/// `false`, they are allowed to produce a sample whose size is more

153

/// `false`, they are allowed to produce a sample whose size is more

154

/// appropriate to the situation (typically bigger).

154

/// appropriate to the situation (typically bigger).

155

///

155

///

156

/// The `randomize` boolean affects sampling, and specifically how

156

/// The `randomize` boolean affects sampling, and specifically how

157

/// limiting or last-minute expanding is been done:

157

/// limiting or last-minute expanding is been done:

158

///

158

///

159

/// If `true`, both will perform random picking from `self.undecided`.

159

/// If `true`, both will perform random picking from `self.undecided`.

160

/// This is currently the best for actual discoveries.

160

/// This is currently the best for actual discoveries.

161

///

161

///

162

/// If `false`, a reproductible picking strategy is performed. This is

162

/// If `false`, a reproductible picking strategy is performed. This is

163

/// useful for integration tests.

163

/// useful for integration tests.

164

pub fn new(

164

pub fn new(

165

graph: G,

165

graph: G,

166

target_heads: Vec<Revision>,

166

target_heads: Vec<Revision>,

167

respect_size: bool,

167

respect_size: bool,

168

randomize: bool,

168

randomize: bool,

169

) -> Self {

169

) -> Self {

170

let mut seed: [u8; 16] = [0; 16];

170

let mut seed: [u8; 16] = [0; 16];

171

if randomize {

171

if randomize {

172

thread_rng().fill_bytes(&mut seed);

172

thread_rng().fill_bytes(&mut seed);

173

}

173

}

174

Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)

174

Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)

175

}

175

}

176

177

pub fn new_with_seed(

177

pub fn new_with_seed(

178

graph: G,

178

graph: G,

179

target_heads: Vec<Revision>,

179

target_heads: Vec<Revision>,

180

seed: [u8; 16],

180

seed: [u8; 16],

181

respect_size: bool,

181

respect_size: bool,

182

randomize: bool,

182

randomize: bool,

183

) -> Self {

183

) -> Self {

184

PartialDiscovery {

184

PartialDiscovery {

185

undecided: None,

185

undecided: None,

186

children_cache: None,

186

children_cache: None,

187

target_heads: Some(target_heads),

187

target_heads: Some(target_heads),

188

graph: graph.clone(),

188

graph: graph.clone(),

189

common: MissingAncestors::new(graph, vec![]),

189

common: MissingAncestors::new(graph, vec![]),

190

missing: HashSet::new(),

190

missing: HashSet::new(),

191

rng: Rng::from_seed(seed),

191

rng: Rng::from_seed(seed),

192

respect_size: respect_size,

192

respect_size: respect_size,

193

randomize: randomize,

193

randomize: randomize,

194

}

194

}

195

}

195

}

196

197

/// Extract at most `size` random elements from sample and return them

197

/// Extract at most `size` random elements from sample and return them

198

/// as a vector

198

/// as a vector

199

fn limit_sample(

199

fn limit_sample(

200

&mut self,

200

&mut self,

201

mut sample: Vec<Revision>,

201

mut sample: Vec<Revision>,

202

size: usize,

202

size: usize,

203

) -> Vec<Revision> {

203

) -> Vec<Revision> {

204

if !self.randomize {

204

if !self.randomize {

205

sample.sort();

205

sample.sort();

206

sample.truncate(size);

206

sample.truncate(size);

207

return sample;

207

return sample;

208

}

208

}

209

let sample_len = sample.len();

209

let sample_len = sample.len();

210

if sample_len <= size {

210

if sample_len <= size {

211

return sample;

211

return sample;

212

}

212

}

213

let rng = &mut self.rng;

213

let rng = &mut self.rng;

214

let dropped_size = sample_len - size;

214

let dropped_size = sample_len - size;

215

let limited_slice = if size < dropped_size {

215

let limited_slice = if size < dropped_size {

216

sample.partial_shuffle(rng, size).0

216

sample.partial_shuffle(rng, size).0

217

} else {

217

} else {

218

sample.partial_shuffle(rng, dropped_size).1

218

sample.partial_shuffle(rng, dropped_size).1

219

};

219

};

220

limited_slice.to_owned()

220

limited_slice.to_owned()

221

}

221

}

222

223

/// Register revisions known as being common

223

/// Register revisions known as being common

224

pub fn add_common_revisions(

224

pub fn add_common_revisions(

225

&mut self,

225

&mut self,

226

common: impl IntoIterator<Item = Revision>,

226

common: impl IntoIterator<Item = Revision>,

227

) -> Result<(), GraphError> {

227

) -> Result<(), GraphError> {

228

self.common.add_bases(common);

228

self.common.add_bases(common);

229

if let Some(ref mut undecided) = self.undecided {

229

if let Some(ref mut undecided) = self.undecided {

230

self.common.remove_ancestors_from(undecided)?;

230

self.common.remove_ancestors_from(undecided)?;

231

}

231

}

232

Ok(())

232

Ok(())

233

}

233

}

234

235

/// Register revisions known as being missing

235

/// Register revisions known as being missing

236

///

237

/// # Performance note

238

///

239

/// Except in the most trivial case, the first call of this method has

240

/// the side effect of computing `self.undecided` set for the first time,

241

/// and the related caches it might need for efficiency of its internal

242

/// computation. This is typically faster if more information is

243

/// available in `self.common`. Therefore, for good performance, the

244

/// caller should avoid calling this too early.

236

pub fn add_missing_revisions(

245

pub fn add_missing_revisions(

237

&mut self,

246

&mut self,

238

missing: impl IntoIterator<Item = Revision>,

247

missing: impl IntoIterator<Item = Revision>,

239

) -> Result<(), GraphError> {

248

) -> Result<(), GraphError> {

240

self.ensure_~~undecided~~()?;

249

self.ensure_children_cache()?;

241

let range = dagops::range(

250

self.ensure_undecided()?; // for safety of possible future refactors

242

&self.graph,

251

let children = self.children_cache.as_ref().unwrap();

243

missing,

252

let mut seen: HashSet<Revision> = HashSet::new();

244

self.undecided.as_ref().unwrap().iter().cloned(),

253

let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();

245

)?;

246

let undecided_mut = self.undecided.as_mut().unwrap();

254

let undecided_mut = self.undecided.as_mut().unwrap();

247

for missrev in range {

255

while let Some(rev) = tovisit.pop_front() {

248

self.missing.insert(~~miss~~rev);

256

if !self.missing.insert(rev) {

249

undecided_mut.remove(&missrev);

257

// either it's known to be missing from a previous

258

// invocation, and there's no need to iterate on its

259

// children (we now they are all missing)

260

// or it's from a previous iteration of this loop

261

// and its children have already been queued

262

continue;

263

}

264

undecided_mut.remove(&rev);

265

match children.get(&rev) {

266

None => {

267

continue;

268

}

269

Some(this_children) => {

270

for child in this_children.iter().cloned() {

271

if seen.insert(child) {

272

tovisit.push_back(child);

273

}

274

}

275

}

276

}

250

}

277

}

251

Ok(())

278

Ok(())

252

}

279

}

253

280

254

/// Do we have any information about the peer?

281

/// Do we have any information about the peer?

255

pub fn has_info(&self) -> bool {

282

pub fn has_info(&self) -> bool {

256

self.common.has_bases()

283

self.common.has_bases()

257

}

284

}

258

285

259

/// Did we acquire full knowledge of our Revisions that the peer has?

286

/// Did we acquire full knowledge of our Revisions that the peer has?

260

pub fn is_complete(&self) -> bool {

287

pub fn is_complete(&self) -> bool {

261

self.undecided.as_ref().map_or(false, |s| s.is_empty())

288

self.undecided.as_ref().map_or(false, |s| s.is_empty())

262

}

289

}

263

290

264

/// Return the heads of the currently known common set of revisions.

291

/// Return the heads of the currently known common set of revisions.

265

///

292

///

266

/// If the discovery process is not complete (see `is_complete()`), the

293

/// If the discovery process is not complete (see `is_complete()`), the

267

/// caller must be aware that this is an intermediate state.

294

/// caller must be aware that this is an intermediate state.

268

///

295

///

269

/// On the other hand, if it is complete, then this is currently

296

/// On the other hand, if it is complete, then this is currently

270

/// the only way to retrieve the end results of the discovery process.

297

/// the only way to retrieve the end results of the discovery process.

271

///

298

///

272

/// We may introduce in the future an `into_common_heads` call that

299

/// We may introduce in the future an `into_common_heads` call that

273

/// would be more appropriate for normal Rust callers, dropping `self`

300

/// would be more appropriate for normal Rust callers, dropping `self`

274

/// if it is complete.

301

/// if it is complete.

275

pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {

302

pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {

276

self.common.bases_heads()

303

self.common.bases_heads()

277

}

304

}

278

305

279

/// Force first computation of `self.undecided`

306

/// Force first computation of `self.undecided`

280

///

307

///

281

/// After this, `self.undecided.as_ref()` and `.as_mut()` can be

308

/// After this, `self.undecided.as_ref()` and `.as_mut()` can be

282

/// unwrapped to get workable immutable or mutable references without

309

/// unwrapped to get workable immutable or mutable references without

283

/// any panic.

310

/// any panic.

284

///

311

///

285

/// This is an imperative call instead of an access with added lazyness

312

/// This is an imperative call instead of an access with added lazyness

286

/// to reduce easily the scope of mutable borrow for the caller,

313

/// to reduce easily the scope of mutable borrow for the caller,

287

/// compared to undecided(&'a mut self) -> &'a… that would keep it

314

/// compared to undecided(&'a mut self) -> &'a… that would keep it

288

/// as long as the resulting immutable one.

315

/// as long as the resulting immutable one.

289

fn ensure_undecided(&mut self) -> Result<(), GraphError> {

316

fn ensure_undecided(&mut self) -> Result<(), GraphError> {

290

if self.undecided.is_some() {

317

if self.undecided.is_some() {

291

return Ok(());

318

return Ok(());

292

}

319

}

293

let tgt = self.target_heads.take().unwrap();

320

let tgt = self.target_heads.take().unwrap();

294

self.undecided =

321

self.undecided =

295

Some(self.common.missing_ancestors(tgt)?.into_iter().collect());

322

Some(self.common.missing_ancestors(tgt)?.into_iter().collect());

296

Ok(())

323

Ok(())

297

}

324

}

298

325

299

fn ensure_children_cache(&mut self) -> Result<(), GraphError> {

326

fn ensure_children_cache(&mut self) -> Result<(), GraphError> {

300

if self.children_cache.is_some() {

327

if self.children_cache.is_some() {

301

return Ok(());

328

return Ok(());

302

}

329

}

303

self.ensure_undecided()?;

330

self.ensure_undecided()?;

304

331

305

let mut children: HashMap<Revision, Vec<Revision>> = HashMap::new();

332

let mut children: HashMap<Revision, Vec<Revision>> = HashMap::new();

306

for &rev in self.undecided.as_ref().unwrap() {

333

for &rev in self.undecided.as_ref().unwrap() {

307

for p in ParentsIterator::graph_parents(&self.graph, rev)? {

334

for p in ParentsIterator::graph_parents(&self.graph, rev)? {

308

children.entry(p).or_insert_with(|| Vec::new()).push(rev);

335

children.entry(p).or_insert_with(|| Vec::new()).push(rev);

309

}

336

}

310

}

337

}

311

self.children_cache = Some(children);

338

self.children_cache = Some(children);

312

Ok(())

339

Ok(())

313

}

340

}

314

341

315

/// Provide statistics about the current state of the discovery process

342

/// Provide statistics about the current state of the discovery process

316

pub fn stats(&self) -> DiscoveryStats {

343

pub fn stats(&self) -> DiscoveryStats {

317

DiscoveryStats {

344

DiscoveryStats {

318

undecided: self.undecided.as_ref().map(|s| s.len()),

345

undecided: self.undecided.as_ref().map(|s| s.len()),

319

}

346

}

320

}

347

}

321

348

322

pub fn take_quick_sample(

349

pub fn take_quick_sample(

323

&mut self,

350

&mut self,

324

headrevs: impl IntoIterator<Item = Revision>,

351

headrevs: impl IntoIterator<Item = Revision>,

325

size: usize,

352

size: usize,

326

) -> Result<Vec<Revision>, GraphError> {

353

) -> Result<Vec<Revision>, GraphError> {

327

self.ensure_undecided()?;

354

self.ensure_undecided()?;

328

let mut sample = {

355

let mut sample = {

329

let undecided = self.undecided.as_ref().unwrap();

356

let undecided = self.undecided.as_ref().unwrap();

330

if undecided.len() <= size {

357

if undecided.len() <= size {

331

return Ok(undecided.iter().cloned().collect());

358

return Ok(undecided.iter().cloned().collect());

332

}

359

}

333

dagops::heads(&self.graph, undecided.iter())?

360

dagops::heads(&self.graph, undecided.iter())?

334

};

361

};

335

if sample.len() >= size {

362

if sample.len() >= size {

336

return Ok(self.limit_sample(sample.into_iter().collect(), size));

363

return Ok(self.limit_sample(sample.into_iter().collect(), size));

337

}

364

}

338

update_sample(

365

update_sample(

339

None,

366

None,

340

headrevs,

367

headrevs,

341

&mut sample,

368

&mut sample,

342

|r| ParentsIterator::graph_parents(&self.graph, r),

369

|r| ParentsIterator::graph_parents(&self.graph, r),

343

Some(size),

370

Some(size),

344

)?;

371

)?;

345

Ok(sample.into_iter().collect())

372

Ok(sample.into_iter().collect())

346

}

373

}

347

374

348

/// Extract a sample from `self.undecided`, going from its heads and roots.

375

/// Extract a sample from `self.undecided`, going from its heads and roots.

349

///

376

///

350

/// The `size` parameter is used to avoid useless computations if

377

/// The `size` parameter is used to avoid useless computations if

351

/// it turns out to be bigger than the whole set of undecided Revisions.

378

/// it turns out to be bigger than the whole set of undecided Revisions.

352

///

379

///

353

/// The sample is taken by using `update_sample` from the heads, then

380

/// The sample is taken by using `update_sample` from the heads, then

354

/// from the roots, working on the reverse DAG,

381

/// from the roots, working on the reverse DAG,

355

/// expressed by `self.children_cache`.

382

/// expressed by `self.children_cache`.

356

///

383

///

357

/// No effort is being made to complete or limit the sample to `size`

384

/// No effort is being made to complete or limit the sample to `size`

358

/// but this method returns another interesting size that it derives

385

/// but this method returns another interesting size that it derives

359

/// from its knowledge of the structure of the various sets, leaving

386

/// from its knowledge of the structure of the various sets, leaving

360

/// to the caller the decision to use it or not.

387

/// to the caller the decision to use it or not.

361

fn bidirectional_sample(

388

fn bidirectional_sample(

362

&mut self,

389

&mut self,

363

size: usize,

390

size: usize,

364

) -> Result<(HashSet<Revision>, usize), GraphError> {

391

) -> Result<(HashSet<Revision>, usize), GraphError> {

365

self.ensure_undecided()?;

392

self.ensure_undecided()?;

366

{

393

{

367

// we don't want to compute children_cache before this

394

// we don't want to compute children_cache before this

368

// but doing it after extracting self.undecided takes a mutable

395

// but doing it after extracting self.undecided takes a mutable

369

// ref to self while a shareable one is still active.

396

// ref to self while a shareable one is still active.

370

let undecided = self.undecided.as_ref().unwrap();

397

let undecided = self.undecided.as_ref().unwrap();

371

if undecided.len() <= size {

398

if undecided.len() <= size {

372

return Ok((undecided.clone(), size));

399

return Ok((undecided.clone(), size));

373

}

400

}

374

}

401

}

375

402

376

self.ensure_children_cache()?;

403

self.ensure_children_cache()?;

377

let revs = self.undecided.as_ref().unwrap();

404

let revs = self.undecided.as_ref().unwrap();

378

let mut sample: HashSet<Revision> = revs.clone();

405

let mut sample: HashSet<Revision> = revs.clone();

379

406

380

// it's possible that leveraging the children cache would be more

407

// it's possible that leveraging the children cache would be more

381

// efficient here

408

// efficient here

382

dagops::retain_heads(&self.graph, &mut sample)?;

409

dagops::retain_heads(&self.graph, &mut sample)?;

383

let revsheads = sample.clone(); // was again heads(revs) in python

410

let revsheads = sample.clone(); // was again heads(revs) in python

384

411

385

// update from heads

412

// update from heads

386

update_sample(

413

update_sample(

387

Some(revs),

414

Some(revs),

388

revsheads.iter().cloned(),

415

revsheads.iter().cloned(),

389

&mut sample,

416

&mut sample,

390

|r| ParentsIterator::graph_parents(&self.graph, r),

417

|r| ParentsIterator::graph_parents(&self.graph, r),

391

None,

418

None,

392

)?;

419

)?;

393

420

394

// update from roots

421

// update from roots

395

let revroots: HashSet<Revision> =

422

let revroots: HashSet<Revision> =

396

dagops::roots(&self.graph, revs)?.into_iter().collect();

423

dagops::roots(&self.graph, revs)?.into_iter().collect();

397

let prescribed_size = max(size, min(revroots.len(), revsheads.len()));

424

let prescribed_size = max(size, min(revroots.len(), revsheads.len()));

398

425

399

let children = self.children_cache.as_ref().unwrap();

426

let children = self.children_cache.as_ref().unwrap();

400

let empty_vec: Vec<Revision> = Vec::new();

427

let empty_vec: Vec<Revision> = Vec::new();

401

update_sample(

428

update_sample(

402

Some(revs),

429

Some(revs),

403

revroots,

430

revroots,

404

&mut sample,

431

&mut sample,

405

|r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),

432

|r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),

406

None,

433

None,

407

)?;

434

)?;

408

Ok((sample, prescribed_size))

435

Ok((sample, prescribed_size))

409

}

436

}

410

437

411

/// Fill up sample up to the wished size with random undecided Revisions.

438

/// Fill up sample up to the wished size with random undecided Revisions.

412

///

439

///

413

/// This is intended to be used as a last resort completion if the

440

/// This is intended to be used as a last resort completion if the

414

/// regular sampling algorithm returns too few elements.

441

/// regular sampling algorithm returns too few elements.

415

fn random_complete_sample(

442

fn random_complete_sample(

416

&mut self,

443

&mut self,

417

sample: &mut Vec<Revision>,

444

sample: &mut Vec<Revision>,

418

size: usize,

445

size: usize,

419

) {

446

) {

420

let sample_len = sample.len();

447

let sample_len = sample.len();

421

if size <= sample_len {

448

if size <= sample_len {

422

return;

449

return;

423

}

450

}

424

let take_from: Vec<Revision> = self

451

let take_from: Vec<Revision> = self

425

.undecided

452

.undecided

426

.as_ref()

453

.as_ref()

427

.unwrap()

454

.unwrap()

428

.iter()

455

.iter()

429

.filter(|&r| !sample.contains(r))

456

.filter(|&r| !sample.contains(r))

430

.cloned()

457

.cloned()

431

.collect();

458

.collect();

432

sample.extend(self.limit_sample(take_from, size - sample_len));

459

sample.extend(self.limit_sample(take_from, size - sample_len));

433

}

460

}

434

461

435

pub fn take_full_sample(

462

pub fn take_full_sample(

436

&mut self,

463

&mut self,

437

size: usize,

464

size: usize,

438

) -> Result<Vec<Revision>, GraphError> {

465

) -> Result<Vec<Revision>, GraphError> {

439

let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;

466

let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;

440

let size = if self.respect_size {

467

let size = if self.respect_size {

441

size

468

size

442

} else {

469

} else {

443

prescribed_size

470

prescribed_size

444

};

471

};

445

let mut sample =

472

let mut sample =

446

self.limit_sample(sample_set.into_iter().collect(), size);

473

self.limit_sample(sample_set.into_iter().collect(), size);

447

self.random_complete_sample(&mut sample, size);

474

self.random_complete_sample(&mut sample, size);

448

Ok(sample)

475

Ok(sample)

449

}

476

}

450

}

477

}

451

478

452

#[cfg(test)]

479

#[cfg(test)]

453

mod tests {

480

mod tests {

454

use super::*;

481

use super::*;

455

use crate::testing::SampleGraph;

482

use crate::testing::SampleGraph;

456

483

457

/// A PartialDiscovery as for pushing all the heads of `SampleGraph`

484

/// A PartialDiscovery as for pushing all the heads of `SampleGraph`

458

///

485

///

459

/// To avoid actual randomness in these tests, we give it a fixed

486

/// To avoid actual randomness in these tests, we give it a fixed

460

/// random seed, but by default we'll test the random version.

487

/// random seed, but by default we'll test the random version.

461

fn full_disco() -> PartialDiscovery<SampleGraph> {

488

fn full_disco() -> PartialDiscovery<SampleGraph> {

462

PartialDiscovery::new_with_seed(

489

PartialDiscovery::new_with_seed(

463

SampleGraph,

490

SampleGraph,

464

vec![10, 11, 12, 13],

491

vec![10, 11, 12, 13],

465

[0; 16],

492

[0; 16],

466

true,

493

true,

467

true,

494

true,

468

)

495

)

469

}

496

}

470

497

471

/// A PartialDiscovery as for pushing the 12 head of `SampleGraph`

498

/// A PartialDiscovery as for pushing the 12 head of `SampleGraph`

472

///

499

///

473

/// To avoid actual randomness in tests, we give it a fixed random seed.

500

/// To avoid actual randomness in tests, we give it a fixed random seed.

474

fn disco12() -> PartialDiscovery<SampleGraph> {

501

fn disco12() -> PartialDiscovery<SampleGraph> {

475

PartialDiscovery::new_with_seed(

502

PartialDiscovery::new_with_seed(

476

SampleGraph,

503

SampleGraph,

477

vec![12],

504

vec![12],

478

[0; 16],

505

[0; 16],

479

true,

506

true,

480

true,

507

true,

481

)

508

)

482

}

509

}

483

510

484

fn sorted_undecided(

511

fn sorted_undecided(

485

disco: &PartialDiscovery<SampleGraph>,

512

disco: &PartialDiscovery<SampleGraph>,

486

) -> Vec<Revision> {

513

) -> Vec<Revision> {

487

let mut as_vec: Vec<Revision> =

514

let mut as_vec: Vec<Revision> =

488

disco.undecided.as_ref().unwrap().iter().cloned().collect();

515

disco.undecided.as_ref().unwrap().iter().cloned().collect();

489

as_vec.sort();

516

as_vec.sort();

490

as_vec

517

as_vec

491

}

518

}

492

519

493

fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {

520

fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {

494

let mut as_vec: Vec<Revision> =

521

let mut as_vec: Vec<Revision> =

495

disco.missing.iter().cloned().collect();

522

disco.missing.iter().cloned().collect();

496

as_vec.sort();

523

as_vec.sort();

497

as_vec

524

as_vec

498

}

525

}

499

526

500

fn sorted_common_heads(

527

fn sorted_common_heads(

501

disco: &PartialDiscovery<SampleGraph>,

528

disco: &PartialDiscovery<SampleGraph>,

502

) -> Result<Vec<Revision>, GraphError> {

529

) -> Result<Vec<Revision>, GraphError> {

503

let mut as_vec: Vec<Revision> =

530

let mut as_vec: Vec<Revision> =

504

disco.common_heads()?.iter().cloned().collect();

531

disco.common_heads()?.iter().cloned().collect();

505

as_vec.sort();

532

as_vec.sort();

506

Ok(as_vec)

533

Ok(as_vec)

507

}

534

}

508

535

509

#[test]

536

#[test]

510

fn test_add_common_get_undecided() -> Result<(), GraphError> {

537

fn test_add_common_get_undecided() -> Result<(), GraphError> {

511

let mut disco = full_disco();

538

let mut disco = full_disco();

512

assert_eq!(disco.undecided, None);

539

assert_eq!(disco.undecided, None);

513

assert!(!disco.has_info());

540

assert!(!disco.has_info());

514

assert_eq!(disco.stats().undecided, None);

541

assert_eq!(disco.stats().undecided, None);

515

542

516

disco.add_common_revisions(vec![11, 12])?;

543

disco.add_common_revisions(vec![11, 12])?;

517

assert!(disco.has_info());

544

assert!(disco.has_info());

518

assert!(!disco.is_complete());

545

assert!(!disco.is_complete());

519

assert!(disco.missing.is_empty());

546

assert!(disco.missing.is_empty());

520

547

521

// add_common_revisions did not trigger a premature computation

548

// add_common_revisions did not trigger a premature computation

522

// of `undecided`, let's check that and ask for them

549

// of `undecided`, let's check that and ask for them

523

assert_eq!(disco.undecided, None);

550

assert_eq!(disco.undecided, None);

524

disco.ensure_undecided()?;

551

disco.ensure_undecided()?;

525

assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);

552

assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);

526

assert_eq!(disco.stats().undecided, Some(4));

553

assert_eq!(disco.stats().undecided, Some(4));

527

Ok(())

554

Ok(())

528

}

555

}

529

556

530

/// in this test, we pretend that our peer misses exactly (8+10)::

557

/// in this test, we pretend that our peer misses exactly (8+10)::

531

/// and we're comparing all our repo to it (as in a bare push)

558

/// and we're comparing all our repo to it (as in a bare push)

532

#[test]

559

#[test]

533

fn test_discovery() -> Result<(), GraphError> {

560

fn test_discovery() -> Result<(), GraphError> {

534

let mut disco = full_disco();

561

let mut disco = full_disco();

535

disco.add_common_revisions(vec![11, 12])?;

562

disco.add_common_revisions(vec![11, 12])?;

536

disco.add_missing_revisions(vec![8, 10])?;

563

disco.add_missing_revisions(vec![8, 10])?;

537

assert_eq!(sorted_undecided(&disco), vec![5]);

564

assert_eq!(sorted_undecided(&disco), vec![5]);

538

assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);

565

assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);

539

assert!(!disco.is_complete());

566

assert!(!disco.is_complete());

540

567

541

disco.add_common_revisions(vec![5])?;

568

disco.add_common_revisions(vec![5])?;

542

assert_eq!(sorted_undecided(&disco), vec![]);

569

assert_eq!(sorted_undecided(&disco), vec![]);

543

assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);

570

assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);

544

assert!(disco.is_complete());

571

assert!(disco.is_complete());

545

assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);

572

assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);

546

Ok(())

573

Ok(())

547

}

574

}

548

575

549

#[test]

576

#[test]

577

fn test_add_missing_early_continue() -> Result<(), GraphError> {

578

eprintln!("test_add_missing_early_stop");

579

let mut disco = full_disco();

580

disco.add_common_revisions(vec![13, 3, 4])?;

581

disco.ensure_children_cache()?;

582

// 12 is grand-child of 6 through 9

583

// passing them in this order maximizes the chances of the

584

// early continue to do the wrong thing

585

disco.add_missing_revisions(vec![6, 9, 12])?;

586

assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);

587

assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);

588

assert!(!disco.is_complete());

589

Ok(())

590

}

591

592

#[test]

550

fn test_limit_sample_no_need_to() {

593

fn test_limit_sample_no_need_to() {

551

let sample = vec![1, 2, 3, 4];

594

let sample = vec![1, 2, 3, 4];

552

assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);

595

assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);

553

}

596

}

554

597

555

#[test]

598

#[test]

556

fn test_limit_sample_less_than_half() {

599

fn test_limit_sample_less_than_half() {

557

assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]);

600

assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]);

558

}

601

}

559

602

560

#[test]

603

#[test]

561

fn test_limit_sample_more_than_half() {

604

fn test_limit_sample_more_than_half() {

562

assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]);

605

assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]);

563

}

606

}

564

607

565

#[test]

608

#[test]

566

fn test_limit_sample_no_random() {

609

fn test_limit_sample_no_random() {

567

let mut disco = full_disco();

610

let mut disco = full_disco();

568

disco.randomize = false;

611

disco.randomize = false;

569

assert_eq!(

612

assert_eq!(

570

disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),

613

disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),

571

vec![1, 3, 5, 7]

614

vec![1, 3, 5, 7]

572

);

615

);

573

}

616

}

574

617

575

#[test]

618

#[test]

576

fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {

619

fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {

577

let mut disco = full_disco();

620

let mut disco = full_disco();

578

disco.undecided = Some((1..=13).collect());

621

disco.undecided = Some((1..=13).collect());

579

622

580

let mut sample_vec = disco.take_quick_sample(vec![], 4)?;

623

let mut sample_vec = disco.take_quick_sample(vec![], 4)?;

581

sample_vec.sort();

624

sample_vec.sort();

582

assert_eq!(sample_vec, vec![10, 11, 12, 13]);

625

assert_eq!(sample_vec, vec![10, 11, 12, 13]);

583

Ok(())

626

Ok(())

584

}

627

}

585

628

586

#[test]

629

#[test]

587

fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {

630

fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {

588

let mut disco = disco12();

631

let mut disco = disco12();

589

disco.ensure_undecided()?;

632

disco.ensure_undecided()?;

590

633

591

let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;

634

let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;

592

sample_vec.sort();

635

sample_vec.sort();

593

// r12's only parent is r9, whose unique grand-parent through the

636

// r12's only parent is r9, whose unique grand-parent through the

594

// diamond shape is r4. This ends there because the distance from r4

637

// diamond shape is r4. This ends there because the distance from r4

595

// to the root is only 3.

638

// to the root is only 3.

596

assert_eq!(sample_vec, vec![4, 9, 12]);

639

assert_eq!(sample_vec, vec![4, 9, 12]);

597

Ok(())

640

Ok(())

598

}

641

}

599

642

600

#[test]

643

#[test]

601

fn test_children_cache() -> Result<(), GraphError> {

644

fn test_children_cache() -> Result<(), GraphError> {

602

let mut disco = full_disco();

645

let mut disco = full_disco();

603

disco.ensure_children_cache()?;

646

disco.ensure_children_cache()?;

604

647

605

let cache = disco.children_cache.unwrap();

648

let cache = disco.children_cache.unwrap();

606

assert_eq!(cache.get(&2).cloned(), Some(vec![4]));

649

assert_eq!(cache.get(&2).cloned(), Some(vec![4]));

607

assert_eq!(cache.get(&10).cloned(), None);

650

assert_eq!(cache.get(&10).cloned(), None);

608

651

609

let mut children_4 = cache.get(&4).cloned().unwrap();

652

let mut children_4 = cache.get(&4).cloned().unwrap();

610

children_4.sort();

653

children_4.sort();

611

assert_eq!(children_4, vec![5, 6, 7]);

654

assert_eq!(children_4, vec![5, 6, 7]);

612

655

613

let mut children_7 = cache.get(&7).cloned().unwrap();

656

let mut children_7 = cache.get(&7).cloned().unwrap();

614

children_7.sort();

657

children_7.sort();

615

assert_eq!(children_7, vec![9, 11]);

658

assert_eq!(children_7, vec![9, 11]);

616

659

617

Ok(())

660

Ok(())

618

}

661

}

619

662

620

#[test]

663

#[test]

621

fn test_complete_sample() {

664

fn test_complete_sample() {

622

let mut disco = full_disco();

665

let mut disco = full_disco();

623

let undecided: HashSet<Revision> =

666

let undecided: HashSet<Revision> =

624

[4, 7, 9, 2, 3].iter().cloned().collect();

667

[4, 7, 9, 2, 3].iter().cloned().collect();

625

disco.undecided = Some(undecided);

668

disco.undecided = Some(undecided);

626

669

627

let mut sample = vec![0];

670

let mut sample = vec![0];

628

disco.random_complete_sample(&mut sample, 3);

671

disco.random_complete_sample(&mut sample, 3);

629

assert_eq!(sample.len(), 3);

672

assert_eq!(sample.len(), 3);

630

673

631

let mut sample = vec![2, 4, 7];

674

let mut sample = vec![2, 4, 7];

632

disco.random_complete_sample(&mut sample, 1);

675

disco.random_complete_sample(&mut sample, 1);

633

assert_eq!(sample.len(), 3);

676

assert_eq!(sample.len(), 3);

634

}

677

}

635

678

636

#[test]

679

#[test]

637

fn test_bidirectional_sample() -> Result<(), GraphError> {

680

fn test_bidirectional_sample() -> Result<(), GraphError> {

638

let mut disco = full_disco();

681

let mut disco = full_disco();

639

disco.undecided = Some((0..=13).into_iter().collect());

682

disco.undecided = Some((0..=13).into_iter().collect());

640

683

641

let (sample_set, size) = disco.bidirectional_sample(7)?;

684

let (sample_set, size) = disco.bidirectional_sample(7)?;

642

assert_eq!(size, 7);

685

assert_eq!(size, 7);

643

let mut sample: Vec<Revision> = sample_set.into_iter().collect();

686

let mut sample: Vec<Revision> = sample_set.into_iter().collect();

644

sample.sort();

687

sample.sort();

645

// our DAG is a bit too small for the results to be really interesting

688

// our DAG is a bit too small for the results to be really interesting

646

// at least it shows that

689

// at least it shows that

647

// - we went both ways

690

// - we went both ways

648

// - we didn't take all Revisions (6 is not in the sample)

691

// - we didn't take all Revisions (6 is not in the sample)

649

assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);

692

assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);

650

Ok(())

693

Ok(())

651

}

694

}

652

695

653

}

696

}

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             // discovery.rs
             //
             // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Discovery operations
             //!
             //! This is a Rust counterpart to the `partialdiscovery` class of
             //! `mercurial.setdiscovery`
             extern crate rand;
             extern crate rand_pcg;
             use self::rand::seq::SliceRandom;
             use self::rand::{thread_rng, RngCore, SeedableRng};
             use super::{Graph, GraphError, Revision, NULL_REVISION};
             use crate::ancestors::MissingAncestors;
             use crate::dagops;
             use std::cmp::{max, min};
             use std::collections::{HashMap, HashSet, VecDeque};
             type Rng = self::rand_pcg::Pcg32;
             pub struct PartialDiscovery<G: Graph + Clone> {
                 target_heads: Option<Vec<Revision>>,
                 graph: G, // plays the role of self._repo
                 common: MissingAncestors<G>,
                 undecided: Option<HashSet<Revision>>,
                 children_cache: Option<HashMap<Revision, Vec<Revision>>>,
                 missing: HashSet<Revision>,
                 rng: Rng,
                 respect_size: bool,
                 randomize: bool,
             }
             pub struct DiscoveryStats {
                 pub undecided: Option<usize>,
             }
             /// Update an existing sample to match the expected size
             ///
             /// The sample is updated with revisions exponentially distant from each
             /// element of `heads`.
             ///
             /// If a target size is specified, the sampling will stop once this size is
             /// reached. Otherwise sampling will happen until roots of the <revs> set are
             /// reached.
             ///
             /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
             ///   represented by `parentfn`
             /// - `heads`: set of DAG head revs
             /// - `sample`: a sample to update
             /// - `parentfn`: a callable to resolve parents for a revision
             /// - `quicksamplesize`: optional target size of the sample
             fn update_sample<I>(
                 revs: Option<&HashSet<Revision>>,
                 heads: impl IntoIterator<Item = Revision>,
                 sample: &mut HashSet<Revision>,
                 parentsfn: impl Fn(Revision) -> Result<I, GraphError>,
                 quicksamplesize: Option<usize>,
             ) -> Result<(), GraphError>
             where
                 I: Iterator<Item = Revision>,
             {
                 let mut distances: HashMap<Revision, u32> = HashMap::new();
                 let mut visit: VecDeque<Revision> = heads.into_iter().collect();
                 let mut factor: u32 = 1;
                 let mut seen: HashSet<Revision> = HashSet::new();
                 loop {
                     let current = match visit.pop_front() {
                         None => {
                             break;
                         }
                         Some(r) => r,
                     };
                     if !seen.insert(current) {
                         continue;
                     }
                     let d = *distances.entry(current).or_insert(1);
                     if d > factor {
                         factor *= 2;
                     }
                     if d == factor {
                         sample.insert(current);
                         if let Some(sz) = quicksamplesize {
                             if sample.len() >= sz {
                                 return Ok(());
                             }
                         }
                     }
                     for p in parentsfn(current)? {
                         if let Some(revs) = revs {
                             if !revs.contains(&p) {
                                 continue;
                             }
                         }
                         distances.entry(p).or_insert(d + 1);
                         visit.push_back(p);
                     }
                 }
                 Ok(())
             }
             struct ParentsIterator {
                 parents: [Revision; 2],
                 cur: usize,
             }
             impl ParentsIterator {
                 fn graph_parents(
                     graph: &impl Graph,
                     r: Revision,
                 ) -> Result<ParentsIterator, GraphError> {
                     Ok(ParentsIterator {
                         parents: graph.parents(r)?,
                         cur: 0,
                     })
                 }
             }
             impl Iterator for ParentsIterator {
                 type Item = Revision;
                 fn next(&mut self) -> Option<Revision> {
                     if self.cur > 1 {
                         return None;
                     }
                     let rev = self.parents[self.cur];
                     self.cur += 1;
                     if rev == NULL_REVISION {
                         return self.next();
                     }
                     Some(rev)
                 }
             }
             impl<G: Graph + Clone> PartialDiscovery<G> {
                 /// Create a PartialDiscovery object, with the intent
                 /// of comparing our `::<target_heads>` revset to the contents of another
                 /// repo.
                 ///
                 /// For now `target_heads` is passed as a vector, and will be used
                 /// at the first call to `ensure_undecided()`.
                 ///
                 /// If we want to make the signature more flexible,
                 /// we'll have to make it a type argument of `PartialDiscovery` or a trait
                 /// object since we'll keep it in the meanwhile
                 ///
                 /// The `respect_size` boolean controls how the sampling methods
                 /// will interpret the size argument requested by the caller. If it's
                 /// `false`, they are allowed to produce a sample whose size is more
                 /// appropriate to the situation (typically bigger).
                 ///
                 /// The `randomize` boolean affects sampling, and specifically how
                 /// limiting or last-minute expanding is been done:
                 ///
                 /// If `true`, both will perform random picking from `self.undecided`.
                 /// This is currently the best for actual discoveries.
                 ///
                 /// If `false`, a reproductible picking strategy is performed. This is
                 /// useful for integration tests.
                 pub fn new(
                     graph: G,
                     target_heads: Vec<Revision>,
                     respect_size: bool,
                     randomize: bool,
                 ) -> Self {
                     let mut seed: [u8; 16] = [0; 16];
                     if randomize {
                         thread_rng().fill_bytes(&mut seed);
                     }
                     Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)
                 }
                 pub fn new_with_seed(
                     graph: G,
                     target_heads: Vec<Revision>,
                     seed: [u8; 16],
                     respect_size: bool,
                     randomize: bool,
                 ) -> Self {
                     PartialDiscovery {
                         undecided: None,
                         children_cache: None,
                         target_heads: Some(target_heads),
                         graph: graph.clone(),
                         common: MissingAncestors::new(graph, vec![]),
                         missing: HashSet::new(),
                         rng: Rng::from_seed(seed),
                         respect_size: respect_size,
                         randomize: randomize,
                     }
                 }
                 /// Extract at most `size` random elements from sample and return them
                 /// as a vector
                 fn limit_sample(
                     &mut self,
                     mut sample: Vec<Revision>,
                     size: usize,
                 ) -> Vec<Revision> {
                     if !self.randomize {
                         sample.sort();
                         sample.truncate(size);
                         return sample;
                     }
                     let sample_len = sample.len();
                     if sample_len <= size {
                         return sample;
                     }
                     let rng = &mut self.rng;
                     let dropped_size = sample_len - size;
                     let limited_slice = if size < dropped_size {
                         sample.partial_shuffle(rng, size).0
                     } else {
                         sample.partial_shuffle(rng, dropped_size).1
                     };
                     limited_slice.to_owned()
                 }
                 /// Register revisions known as being common
                 pub fn add_common_revisions(
                     &mut self,
                     common: impl IntoIterator<Item = Revision>,
                 ) -> Result<(), GraphError> {
                     self.common.add_bases(common);
                     if let Some(ref mut undecided) = self.undecided {
                         self.common.remove_ancestors_from(undecided)?;
                     }
                     Ok(())
                 }
                 /// Register revisions known as being missing
+                ///
+                /// # Performance note
+                ///
+                /// Except in the most trivial case, the first call of this method has
+                /// the side effect of computing `self.undecided` set for the first time,
+                /// and the related caches it might need for efficiency of its internal
+                /// computation. This is typically faster if more information is
+                /// available in `self.common`. Therefore, for good performance, the
+                /// caller should avoid calling this too early.
                 pub fn add_missing_revisions(
                     &mut self,
                     missing: impl IntoIterator<Item = Revision>,
                 ) -> Result<(), GraphError> {
-                    self.ensure_undecided()?;
+                    self.ensure_children_cache()?;
-                    let range = dagops::range(
+                    self.ensure_undecided()?; // for safety of possible future refactors
-                        &self.graph,
+                    let children = self.children_cache.as_ref().unwrap();
-                        missing,
+                    let mut seen: HashSet<Revision> = HashSet::new();
-                        self.undecided.as_ref().unwrap().iter().cloned(),
+                    let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();
-                    )?;
                     let undecided_mut = self.undecided.as_mut().unwrap();
-                    for missrev in range {
+                    while let Some(rev) = tovisit.pop_front() {
-                        self.missing.insert(missrev);
+                        if !self.missing.insert(rev) {
-                        undecided_mut.remove(&missrev);
+                            // either it's known to be missing from a previous
+                            // invocation, and there's no need to iterate on its
+                            // children (we now they are all missing)
+                            // or it's from a previous iteration of this loop
+                            // and its children have already been queued
+                            continue;
+                        }
+                        undecided_mut.remove(&rev);
+                        match children.get(&rev) {
+                            None => {
+                                continue;
+                            }
+                            Some(this_children) => {
+                                for child in this_children.iter().cloned() {
+                                    if seen.insert(child) {
+                                        tovisit.push_back(child);
+                                    }
+                                }
+                            }
+                        }
                     }
                     Ok(())
                 }
                 /// Do we have any information about the peer?
                 pub fn has_info(&self) -> bool {
                     self.common.has_bases()
                 }
                 /// Did we acquire full knowledge of our Revisions that the peer has?
                 pub fn is_complete(&self) -> bool {
                     self.undecided.as_ref().map_or(false, |s| s.is_empty())
                 }
                 /// Return the heads of the currently known common set of revisions.
                 ///
                 /// If the discovery process is not complete (see `is_complete()`), the
                 /// caller must be aware that this is an intermediate state.
                 ///
                 /// On the other hand, if it is complete, then this is currently
                 /// the only way to retrieve the end results of the discovery process.
                 ///
                 /// We may introduce in the future an `into_common_heads` call that
                 /// would be more appropriate for normal Rust callers, dropping `self`
                 /// if it is complete.
                 pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
                     self.common.bases_heads()
                 }
                 /// Force first computation of `self.undecided`
                 ///
                 /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
                 /// unwrapped to get workable immutable or mutable references without
                 /// any panic.
                 ///
                 /// This is an imperative call instead of an access with added lazyness
                 /// to reduce easily the scope of mutable borrow for the caller,
                 /// compared to undecided(&'a mut self) -> &'a… that would keep it
                 /// as long as the resulting immutable one.
                 fn ensure_undecided(&mut self) -> Result<(), GraphError> {
                     if self.undecided.is_some() {
                         return Ok(());
                     }
                     let tgt = self.target_heads.take().unwrap();
                     self.undecided =
                         Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
                     Ok(())
                 }
                 fn ensure_children_cache(&mut self) -> Result<(), GraphError> {
                     if self.children_cache.is_some() {
                         return Ok(());
                     }
                     self.ensure_undecided()?;
                     let mut children: HashMap<Revision, Vec<Revision>> = HashMap::new();
                     for &rev in self.undecided.as_ref().unwrap() {
                         for p in ParentsIterator::graph_parents(&self.graph, rev)? {
                             children.entry(p).or_insert_with(|| Vec::new()).push(rev);
                         }
                     }
                     self.children_cache = Some(children);
                     Ok(())
                 }
                 /// Provide statistics about the current state of the discovery process
                 pub fn stats(&self) -> DiscoveryStats {
                     DiscoveryStats {
                         undecided: self.undecided.as_ref().map(|s| s.len()),
                     }
                 }
                 pub fn take_quick_sample(
                     &mut self,
                     headrevs: impl IntoIterator<Item = Revision>,
                     size: usize,
                 ) -> Result<Vec<Revision>, GraphError> {
                     self.ensure_undecided()?;
                     let mut sample = {
                         let undecided = self.undecided.as_ref().unwrap();
                         if undecided.len() <= size {
                             return Ok(undecided.iter().cloned().collect());
                         }
                         dagops::heads(&self.graph, undecided.iter())?
                     };
                     if sample.len() >= size {
                         return Ok(self.limit_sample(sample.into_iter().collect(), size));
                     }
                     update_sample(
                         None,
                         headrevs,
                         &mut sample,
                         |r| ParentsIterator::graph_parents(&self.graph, r),
                         Some(size),
                     )?;
                     Ok(sample.into_iter().collect())
                 }
                 /// Extract a sample from `self.undecided`, going from its heads and roots.
                 ///
                 /// The `size` parameter is used to avoid useless computations if
                 /// it turns out to be bigger than the whole set of undecided Revisions.
                 ///
                 /// The sample is taken by using `update_sample` from the heads, then
                 /// from the roots, working on the reverse DAG,
                 /// expressed by `self.children_cache`.
                 ///
                 /// No effort is being made to complete or limit the sample to `size`
                 /// but this method returns another interesting size that it derives
                 /// from its knowledge of the structure of the various sets, leaving
                 /// to the caller the decision to use it or not.
                 fn bidirectional_sample(
                     &mut self,
                     size: usize,
                 ) -> Result<(HashSet<Revision>, usize), GraphError> {
                     self.ensure_undecided()?;
                     {
                         // we don't want to compute children_cache before this
                         // but doing it after extracting self.undecided takes a mutable
                         // ref to self while a shareable one is still active.
                         let undecided = self.undecided.as_ref().unwrap();
                         if undecided.len() <= size {
                             return Ok((undecided.clone(), size));
                         }
                     }
                     self.ensure_children_cache()?;
                     let revs = self.undecided.as_ref().unwrap();
                     let mut sample: HashSet<Revision> = revs.clone();
                     // it's possible that leveraging the children cache would be more
                     // efficient here
                     dagops::retain_heads(&self.graph, &mut sample)?;
                     let revsheads = sample.clone(); // was again heads(revs) in python
                     // update from heads
                     update_sample(
                         Some(revs),
                         revsheads.iter().cloned(),
                         &mut sample,
                         |r| ParentsIterator::graph_parents(&self.graph, r),
                         None,
                     )?;
                     // update from roots
                     let revroots: HashSet<Revision> =
                         dagops::roots(&self.graph, revs)?.into_iter().collect();
                     let prescribed_size = max(size, min(revroots.len(), revsheads.len()));
                     let children = self.children_cache.as_ref().unwrap();
                     let empty_vec: Vec<Revision> = Vec::new();
                     update_sample(
                         Some(revs),
                         revroots,
                         &mut sample,
                         |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),
                         None,
                     )?;
                     Ok((sample, prescribed_size))
                 }
                 /// Fill up sample up to the wished size with random undecided Revisions.
                 ///
                 /// This is intended to be used as a last resort completion if the
                 /// regular sampling algorithm returns too few elements.
                 fn random_complete_sample(
                     &mut self,
                     sample: &mut Vec<Revision>,
                     size: usize,
                 ) {
                     let sample_len = sample.len();
                     if size <= sample_len {
                         return;
                     }
                     let take_from: Vec<Revision> = self
                         .undecided
                         .as_ref()
                         .unwrap()
                         .iter()
                         .filter(|&r| !sample.contains(r))
                         .cloned()
                         .collect();
                     sample.extend(self.limit_sample(take_from, size - sample_len));
                 }
                 pub fn take_full_sample(
                     &mut self,
                     size: usize,
                 ) -> Result<Vec<Revision>, GraphError> {
                     let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;
                     let size = if self.respect_size {
                         size
                     } else {
                         prescribed_size
                     };
                     let mut sample =
                         self.limit_sample(sample_set.into_iter().collect(), size);
                     self.random_complete_sample(&mut sample, size);
                     Ok(sample)
                 }
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use crate::testing::SampleGraph;
                 /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
                 ///
                 /// To avoid actual randomness in these tests, we give it a fixed
                 /// random seed, but by default we'll test the random version.
                 fn full_disco() -> PartialDiscovery<SampleGraph> {
                     PartialDiscovery::new_with_seed(
                         SampleGraph,
                         vec![10, 11, 12, 13],
                         [0; 16],
                         true,
                         true,
                     )
                 }
                 /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
                 ///
                 /// To avoid actual randomness in tests, we give it a fixed random seed.
                 fn disco12() -> PartialDiscovery<SampleGraph> {
                     PartialDiscovery::new_with_seed(
                         SampleGraph,
                         vec![12],
                         [0; 16],
                         true,
                         true,
                     )
                 }
                 fn sorted_undecided(
                     disco: &PartialDiscovery<SampleGraph>,
                 ) -> Vec<Revision> {
                     let mut as_vec: Vec<Revision> =
                         disco.undecided.as_ref().unwrap().iter().cloned().collect();
                     as_vec.sort();
                     as_vec
                 }
                 fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
                     let mut as_vec: Vec<Revision> =
                         disco.missing.iter().cloned().collect();
                     as_vec.sort();
                     as_vec
                 }
                 fn sorted_common_heads(
                     disco: &PartialDiscovery<SampleGraph>,
                 ) -> Result<Vec<Revision>, GraphError> {
                     let mut as_vec: Vec<Revision> =
                         disco.common_heads()?.iter().cloned().collect();
                     as_vec.sort();
                     Ok(as_vec)
                 }
                 #[test]
                 fn test_add_common_get_undecided() -> Result<(), GraphError> {
                     let mut disco = full_disco();
                     assert_eq!(disco.undecided, None);
                     assert!(!disco.has_info());
                     assert_eq!(disco.stats().undecided, None);
                     disco.add_common_revisions(vec![11, 12])?;
                     assert!(disco.has_info());
                     assert!(!disco.is_complete());
                     assert!(disco.missing.is_empty());
                     // add_common_revisions did not trigger a premature computation
                     // of `undecided`, let's check that and ask for them
                     assert_eq!(disco.undecided, None);
                     disco.ensure_undecided()?;
                     assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
                     assert_eq!(disco.stats().undecided, Some(4));
                     Ok(())
                 }
                 /// in this test, we pretend that our peer misses exactly (8+10)::
                 /// and we're comparing all our repo to it (as in a bare push)
                 #[test]
                 fn test_discovery() -> Result<(), GraphError> {
                     let mut disco = full_disco();
                     disco.add_common_revisions(vec![11, 12])?;
                     disco.add_missing_revisions(vec![8, 10])?;
                     assert_eq!(sorted_undecided(&disco), vec![5]);
                     assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
                     assert!(!disco.is_complete());
                     disco.add_common_revisions(vec![5])?;
                     assert_eq!(sorted_undecided(&disco), vec![]);
                     assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
                     assert!(disco.is_complete());
                     assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
                     Ok(())
                 }
                 #[test]
+                fn test_add_missing_early_continue() -> Result<(), GraphError> {
+                    eprintln!("test_add_missing_early_stop");
+                    let mut disco = full_disco();
+                    disco.add_common_revisions(vec![13, 3, 4])?;
+                    disco.ensure_children_cache()?;
+                    // 12 is grand-child of 6 through 9
+                    // passing them in this order maximizes the chances of the
+                    // early continue to do the wrong thing
+                    disco.add_missing_revisions(vec![6, 9, 12])?;
+                    assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);
+                    assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);
+                    assert!(!disco.is_complete());
+                    Ok(())
+                }
+                #[test]
                 fn test_limit_sample_no_need_to() {
                     let sample = vec![1, 2, 3, 4];
                     assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
                 }
                 #[test]
                 fn test_limit_sample_less_than_half() {
                     assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]);
                 }
                 #[test]
                 fn test_limit_sample_more_than_half() {
                     assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]);
                 }
                 #[test]
                 fn test_limit_sample_no_random() {
                     let mut disco = full_disco();
                     disco.randomize = false;
                     assert_eq!(
                         disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),
                         vec![1, 3, 5, 7]
                     );
                 }
                 #[test]
                 fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
                     let mut disco = full_disco();
                     disco.undecided = Some((1..=13).collect());
                     let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
                     sample_vec.sort();
                     assert_eq!(sample_vec, vec![10, 11, 12, 13]);
                     Ok(())
                 }
                 #[test]
                 fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
                     let mut disco = disco12();
                     disco.ensure_undecided()?;
                     let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;
                     sample_vec.sort();
                     // r12's only parent is r9, whose unique grand-parent through the
                     // diamond shape is r4. This ends there because the distance from r4
                     // to the root is only 3.
                     assert_eq!(sample_vec, vec![4, 9, 12]);
                     Ok(())
                 }
                 #[test]
                 fn test_children_cache() -> Result<(), GraphError> {
                     let mut disco = full_disco();
                     disco.ensure_children_cache()?;
                     let cache = disco.children_cache.unwrap();
                     assert_eq!(cache.get(&2).cloned(), Some(vec![4]));
                     assert_eq!(cache.get(&10).cloned(), None);
                     let mut children_4 = cache.get(&4).cloned().unwrap();
                     children_4.sort();
                     assert_eq!(children_4, vec![5, 6, 7]);
                     let mut children_7 = cache.get(&7).cloned().unwrap();
                     children_7.sort();
                     assert_eq!(children_7, vec![9, 11]);
                     Ok(())
                 }
                 #[test]
                 fn test_complete_sample() {
                     let mut disco = full_disco();
                     let undecided: HashSet<Revision> =
                         [4, 7, 9, 2, 3].iter().cloned().collect();
                     disco.undecided = Some(undecided);
                     let mut sample = vec![0];
                     disco.random_complete_sample(&mut sample, 3);
                     assert_eq!(sample.len(), 3);
                     let mut sample = vec![2, 4, 7];
                     disco.random_complete_sample(&mut sample, 1);
                     assert_eq!(sample.len(), 3);
                 }
                 #[test]
                 fn test_bidirectional_sample() -> Result<(), GraphError> {
                     let mut disco = full_disco();
                     disco.undecided = Some((0..=13).into_iter().collect());
                     let (sample_set, size) = disco.bidirectional_sample(7)?;
                     assert_eq!(size, 7);
                     let mut sample: Vec<Revision> = sample_set.into_iter().collect();
                     sample.sort();
                     // our DAG is a bit too small for the results to be really interesting
                     // at least it shows that
                     // - we went both ways
                     // - we didn't take all Revisions (6 is not in the sample)
                     assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);
                     Ok(())
                 }
             }