upstream/mercurial-mirror Commit - r51869:9929c8a7

1

2

// and Mercurial contributors

2

// and Mercurial contributors

3

//

3

//

4

// This software may be used and distributed according to the terms of the

4

// This software may be used and distributed according to the terms of the

5

// GNU General Public License version 2 or any later version.

5

// GNU General Public License version 2 or any later version.

6

//! Mercurial concepts for handling revision history

6

//! Mercurial concepts for handling revision history

7

8

pub mod node;

8

pub mod node;

9

pub mod nodemap;

9

pub mod nodemap;

10

mod nodemap_docket;

10

mod nodemap_docket;

11

pub mod path_encode;

11

pub mod path_encode;

12

pub use node::{FromHexError, Node, NodePrefix};

12

pub use node::{FromHexError, Node, NodePrefix};

13

pub mod changelog;

13

pub mod changelog;

14

pub mod filelog;

14

pub mod filelog;

15

pub mod index;

15

pub mod index;

16

pub mod manifest;

16

pub mod manifest;

17

pub mod patch;

17

pub mod patch;

18

19

use std::borrow::Cow;

19

use std::borrow::Cow;

20

use std::io::Read;

20

use std::io::Read;

21

use std::ops::Deref;

21

use std::ops::Deref;

22

use std::path::Path;

22

use std::path::Path;

23

24

use flate2::read::ZlibDecoder;

24

use flate2::read::ZlibDecoder;

25

use sha1::{Digest, Sha1};

25

use sha1::{Digest, Sha1};

26

use std::cell::RefCell;

26

use std::cell::RefCell;

27

use zstd;

27

use zstd;

28

29

use self::node::{NODE_BYTES_LENGTH, NULL_NODE};

29

use self::node::{NODE_BYTES_LENGTH, NULL_NODE};

30

use self::nodemap_docket::NodeMapDocket;

30

use self::nodemap_docket::NodeMapDocket;

31

use super::index::Index;

31

use super::index::Index;

32

use super::nodemap::{NodeMap, NodeMapError};

32

use super::nodemap::{NodeMap, NodeMapError};

33

use crate::errors::HgError;

33

use crate::errors::HgError;

34

use crate::vfs::Vfs;

34

use crate::vfs::Vfs;

35

36

/// Mercurial revision numbers

36

/// Mercurial revision numbers

37

///

37

///

38

/// As noted in revlog.c, revision numbers are actually encoded in

38

/// As noted in revlog.c, revision numbers are actually encoded in

39

/// 4 bytes, and are liberally converted to ints, whence the i32

39

/// 4 bytes, and are liberally converted to ints, whence the i32

40

pub type Revision = i32;

40

pub type Revision = i32;

41

42

/// Unchecked Mercurial revision numbers.

42

/// Unchecked Mercurial revision numbers.

43

///

43

///

44

/// Values of this type have no guarantee of being a valid revision number

44

/// Values of this type have no guarantee of being a valid revision number

45

/// in any context. Use method `check_revision` to get a valid revision within

45

/// in any context. Use method `check_revision` to get a valid revision within

46

/// the appropriate index object.

46

/// the appropriate index object.

47

///

47

///

48

/// As noted in revlog.c, revision numbers are actually encoded in

48

/// As noted in revlog.c, revision numbers are actually encoded in

49

/// 4 bytes, and are liberally converted to ints, whence the i32

49

/// 4 bytes, and are liberally converted to ints, whence the i32

50

pub type UncheckedRevision = i32;

50

pub type UncheckedRevision = i32;

51

52

/// Marker expressing the absence of a parent

52

/// Marker expressing the absence of a parent

53

///

53

///

54

/// Independently of the actual representation, `NULL_REVISION` is guaranteed

54

/// Independently of the actual representation, `NULL_REVISION` is guaranteed

55

/// to be smaller than all existing revisions.

55

/// to be smaller than all existing revisions.

56

pub const NULL_REVISION: Revision = -1;

56

pub const NULL_REVISION: Revision = -1;

57

58

/// Same as `mercurial.node.wdirrev`

58

/// Same as `mercurial.node.wdirrev`

59

///

59

///

60

/// This is also equal to `i32::max_value()`, but it's better to spell

60

/// This is also equal to `i32::max_value()`, but it's better to spell

61

/// it out explicitely, same as in `mercurial.node`

61

/// it out explicitely, same as in `mercurial.node`

62

#[allow(clippy::unreadable_literal)]

62

#[allow(clippy::unreadable_literal)]

63

pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;

63

pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;

64

65

pub const WORKING_DIRECTORY_HEX: &str =

65

pub const WORKING_DIRECTORY_HEX: &str =

66

"ffffffffffffffffffffffffffffffffffffffff";

66

"ffffffffffffffffffffffffffffffffffffffff";

67

68

/// The simplest expression of what we need of Mercurial DAGs.

68

/// The simplest expression of what we need of Mercurial DAGs.

69

pub trait Graph {

69

pub trait Graph {

70

/// Return the two parents of the given `Revision`.

70

/// Return the two parents of the given `Revision`.

71

///

71

///

72

/// Each of the parents can be independently `NULL_REVISION`

72

/// Each of the parents can be independently `NULL_REVISION`

73

fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;

73

fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;

74

}

74

}

75

76

#[derive(Clone, Debug, PartialEq)]

76

#[derive(Clone, Debug, PartialEq)]

77

pub enum GraphError {

77

pub enum GraphError {

78

ParentOutOfRange(Revision),

78

ParentOutOfRange(Revision),

79

}

79

}

80

81

/// The Mercurial Revlog Index

81

/// The Mercurial Revlog Index

82

///

82

///

83

/// This is currently limited to the minimal interface that is needed for

83

/// This is currently limited to the minimal interface that is needed for

84

/// the [`nodemap`](nodemap/index.html) module

84

/// the [`nodemap`](nodemap/index.html) module

85

pub trait RevlogIndex {

85

pub trait RevlogIndex {

86

/// Total number of Revisions referenced in this index

86

/// Total number of Revisions referenced in this index

87

fn len(&self) -> usize;

87

fn len(&self) -> usize;

88

89

fn is_empty(&self) -> bool {

89

fn is_empty(&self) -> bool {

90

self.len() == 0

90

self.len() == 0

91

}

91

}

92

93

/// Return a reference to the Node or `None` if rev is out of bounds

93

/// Return a reference to the Node or `None` if rev is out of bounds

94

///

94

///

95

/// `NULL_REVISION` is not considered to be out of bounds.

95

/// `NULL_REVISION` is not considered to be out of bounds.

96

fn node(&self, rev: Revision) -> Option<&Node>;

96

fn node(&self, rev: Revision) -> Option<&Node>;

97

98

/// Return a [`Revision`] if `rev` is a valid revision number for this

98

/// Return a [`Revision`] if `rev` is a valid revision number for this

99

/// index

99

/// index

100

fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> {

100

fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> {

101

if rev == NULL_REVISION || (rev >= 0 && (rev as usize) < self.len()) {

101

if rev == NULL_REVISION || (rev >= 0 && (rev as usize) < self.len()) {

102

Some(rev)

102

Some(rev)

103

} else {

103

} else {

104

None

104

None

105

}

105

}

106

}

106

}

107

}

107

}

108

109

const REVISION_FLAG_CENSORED: u16 = 1 << 15;

109

const REVISION_FLAG_CENSORED: u16 = 1 << 15;

110

const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;

110

const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;

111

const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;

111

const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;

112

const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;

112

const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;

113

114

// Keep this in sync with REVIDX_KNOWN_FLAGS in

114

// Keep this in sync with REVIDX_KNOWN_FLAGS in

115

// mercurial/revlogutils/flagutil.py

115

// mercurial/revlogutils/flagutil.py

116

const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED

116

const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED

117

| REVISION_FLAG_ELLIPSIS

117

| REVISION_FLAG_ELLIPSIS

118

| REVISION_FLAG_EXTSTORED

118

| REVISION_FLAG_EXTSTORED

119

| REVISION_FLAG_HASCOPIESINFO;

119

| REVISION_FLAG_HASCOPIESINFO;

120

121

const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;

121

const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;

122

123

#[derive(Debug, derive_more::From)]

123

#[derive(Debug, derive_more::From)]

124

pub enum RevlogError {

124

pub enum RevlogError {

125

InvalidRevision,

125

InvalidRevision,

126

/// Working directory is not supported

126

/// Working directory is not supported

127

WDirUnsupported,

127

WDirUnsupported,

128

/// Found more than one entry whose ID match the requested prefix

128

/// Found more than one entry whose ID match the requested prefix

129

AmbiguousPrefix,

129

AmbiguousPrefix,

130

#[from]

130

#[from]

131

Other(HgError),

131

Other(HgError),

132

}

132

}

133

134

impl From<NodeMapError> for RevlogError {

134

impl From<NodeMapError> for RevlogError {

135

fn from(error: NodeMapError) -> Self {

135

fn from(error: NodeMapError) -> Self {

136

match error {

136

match error {

137

NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,

137

NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,

138

NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(

138

NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(

139

format!("nodemap point to revision {} not in index", rev),

139

format!("nodemap point to revision {} not in index", rev),

140

),

140

),

141

}

141

}

142

}

142

}

143

}

143

}

144

145

fn corrupted<S: AsRef<str>>(context: S) -> HgError {

145

fn corrupted<S: AsRef<str>>(context: S) -> HgError {

146

HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))

146

HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))

147

}

147

}

148

149

impl RevlogError {

149

impl RevlogError {

150

fn corrupted<S: AsRef<str>>(context: S) -> Self {

150

fn corrupted<S: AsRef<str>>(context: S) -> Self {

151

RevlogError::Other(corrupted(context))

151

RevlogError::Other(corrupted(context))

152

}

152

}

153

}

153

}

154

155

/// Read only implementation of revlog.

155

/// Read only implementation of revlog.

156

pub struct Revlog {

156

pub struct Revlog {

157

/// When index and data are not interleaved: bytes of the revlog index.

157

/// When index and data are not interleaved: bytes of the revlog index.

158

/// When index and data are interleaved: bytes of the revlog index and

158

/// When index and data are interleaved: bytes of the revlog index and

159

/// data.

159

/// data.

160

index: Index,

160

index: Index,

161

/// When index and data are not interleaved: bytes of the revlog data

161

/// When index and data are not interleaved: bytes of the revlog data

162

data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,

162

data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,

163

/// When present on disk: the persistent nodemap for this revlog

163

/// When present on disk: the persistent nodemap for this revlog

164

nodemap: Option<nodemap::NodeTree>,

164

nodemap: Option<nodemap::NodeTree>,

165

}

165

}

166

167

impl Revlog {

167

impl Revlog {

168

/// Open a revlog index file.

168

/// Open a revlog index file.

169

///

169

///

170

/// It will also open the associated data file if index and data are not

170

/// It will also open the associated data file if index and data are not

171

/// interleaved.

171

/// interleaved.

172

pub fn open(

172

pub fn open(

173

store_vfs: &Vfs,

173

store_vfs: &Vfs,

174

index_path: impl AsRef<Path>,

174

index_path: impl AsRef<Path>,

175

data_path: Option<&Path>,

175

data_path: Option<&Path>,

176

use_nodemap: bool,

176

use_nodemap: bool,

177

) -> Result<Self, HgError> {

177

) -> Result<Self, HgError> {

178

let index_path = index_path.as_ref();

178

let index_path = index_path.as_ref();

179

let index = {

179

let index = {

180

match store_vfs.mmap_open_opt(&index_path)? {

180

match store_vfs.mmap_open_opt(&index_path)? {

181

None => Index::new(Box::new(vec![])),

181

None => Index::new(Box::new(vec![])),

182

Some(index_mmap) => {

182

Some(index_mmap) => {

183

let index = Index::new(Box::new(index_mmap))?;

183

let index = Index::new(Box::new(index_mmap))?;

184

Ok(index)

184

Ok(index)

185

}

185

}

186

}

186

}

187

}?;

187

}?;

188

189

let default_data_path = index_path.with_extension("d");

189

let default_data_path = index_path.with_extension("d");

190

191

// type annotation required

191

// type annotation required

192

// won't recognize Mmap as Deref<Target = [u8]>

192

// won't recognize Mmap as Deref<Target = [u8]>

193

let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =

193

let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =

194

if index.is_inline() {

194

if index.is_inline() {

195

None

195

None

196

} else {

196

} else {

197

let data_path = data_path.unwrap_or(&default_data_path);

197

let data_path = data_path.unwrap_or(&default_data_path);

198

let data_mmap = store_vfs.mmap_open(data_path)?;

198

let data_mmap = store_vfs.mmap_open(data_path)?;

199

Some(Box::new(data_mmap))

199

Some(Box::new(data_mmap))

200

};

200

};

201

202

let nodemap = if index.is_inline() || !use_nodemap {

202

let nodemap = if index.is_inline() || !use_nodemap {

203

None

203

None

204

} else {

204

} else {

205

NodeMapDocket::read_from_file(store_vfs, index_path)?.map(

205

NodeMapDocket::read_from_file(store_vfs, index_path)?.map(

206

|(docket, data)| {

206

|(docket, data)| {

207

nodemap::NodeTree::load_bytes(

207

nodemap::NodeTree::load_bytes(

208

Box::new(data),

208

Box::new(data),

209

docket.data_length,

209

docket.data_length,

210

)

210

)

211

},

211

},

212

)

212

)

213

};

213

};

214

215

Ok(Revlog {

215

Ok(Revlog {

216

index,

216

index,

217

data_bytes,

217

data_bytes,

218

nodemap,

218

nodemap,

219

})

219

})

220

}

220

}

221

222

/// Return number of entries of the `Revlog`.

222

/// Return number of entries of the `Revlog`.

223

pub fn len(&self) -> usize {

223

pub fn len(&self) -> usize {

224

self.index.len()

224

self.index.len()

225

}

225

}

226

227

/// Returns `true` if the `Revlog` has zero `entries`.

227

/// Returns `true` if the `Revlog` has zero `entries`.

228

pub fn is_empty(&self) -> bool {

228

pub fn is_empty(&self) -> bool {

229

self.index.is_empty()

229

self.index.is_empty()

230

}

230

}

231

232

/// Returns the node ID for the given revision number, if it exists in this

232

/// Returns the node ID for the given revision number, if it exists in this

233

/// revlog

233

/// revlog

234

pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {

234

pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {

235

if rev == NULL_REVISION {

235

if rev == NULL_REVISION {

236

return Some(&NULL_NODE);

236

return Some(&NULL_NODE);

237

}

237

}

238

Some(self.index.get_entry(rev)?.hash())

238

Some(self.index.get_entry(rev)?.hash())

239

}

239

}

240

241

/// Return the revision number for the given node ID, if it exists in this

241

/// Return the revision number for the given node ID, if it exists in this

242

/// revlog

242

/// revlog

243

pub fn rev_from_node(

243

pub fn rev_from_node(

244

&self,

244

&self,

245

node: NodePrefix,

245

node: NodePrefix,

246

) -> Result<Revision, RevlogError> {

246

) -> Result<Revision, RevlogError> {

247

let looked_up = if let Some(nodemap) = &self.nodemap {

247

let looked_up = if let Some(nodemap) = &self.nodemap {

248

nodemap

248

nodemap

249

.find_bin(&self.index, node)?

249

.find_bin(&self.index, node)?

250

.ok_or(RevlogError::InvalidRevision)

250

.ok_or(RevlogError::InvalidRevision)

251

} else {

251

} else {

252

self.rev_from_node_no_persistent_nodemap(node)

252

self.rev_from_node_no_persistent_nodemap(node)

253

};

253

};

254

255

if node.is_prefix_of(&NULL_NODE) {

255

if node.is_prefix_of(&NULL_NODE) {

256

return match looked_up {

256

return match looked_up {

257

Ok(_) => Err(RevlogError::AmbiguousPrefix),

257

Ok(_) => Err(RevlogError::AmbiguousPrefix),

258

Err(RevlogError::InvalidRevision) => Ok(NULL_REVISION),

258

Err(RevlogError::InvalidRevision) => Ok(NULL_REVISION),

259

res => res,

259

res => res,

260

};

260

};

261

};

261

};

262

263

looked_up

263

looked_up

264

}

264

}

265

266

/// Same as `rev_from_node`, without using a persistent nodemap

266

/// Same as `rev_from_node`, without using a persistent nodemap

267

///

267

///

268

/// This is used as fallback when a persistent nodemap is not present.

268

/// This is used as fallback when a persistent nodemap is not present.

269

/// This happens when the persistent-nodemap experimental feature is not

269

/// This happens when the persistent-nodemap experimental feature is not

270

/// enabled, or for small revlogs.

270

/// enabled, or for small revlogs.

271

fn rev_from_node_no_persistent_nodemap(

271

fn rev_from_node_no_persistent_nodemap(

272

&self,

272

&self,

273

node: NodePrefix,

273

node: NodePrefix,

274

) -> Result<Revision, RevlogError> {

274

) -> Result<Revision, RevlogError> {

275

// Linear scan of the revlog

275

// Linear scan of the revlog

276

// TODO: consider building a non-persistent nodemap in memory to

276

// TODO: consider building a non-persistent nodemap in memory to

277

// optimize these cases.

277

// optimize these cases.

278

let mut found_by_prefix = None;

278

let mut found_by_prefix = None;

279

for rev in (0..self.len() as Revision).rev() {

279

for rev in (0..self.len() as Revision).rev() {

280

let index_entry = self.index.get_entry(rev).ok_or_else(|| {

280

let index_entry = self.index.get_entry(rev).ok_or_else(|| {

281

HgError::corrupted(

281

HgError::corrupted(

282

"revlog references a revision not in the index",

282

"revlog references a revision not in the index",

283

)

283

)

284

})?;

284

})?;

285

if node == *index_entry.hash() {

285

if node == *index_entry.hash() {

286

return Ok(rev);

286

return Ok(rev);

287

}

287

}

288

if node.is_prefix_of(index_entry.hash()) {

288

if node.is_prefix_of(index_entry.hash()) {

289

if found_by_prefix.is_some() {

289

if found_by_prefix.is_some() {

290

return Err(RevlogError::AmbiguousPrefix);

290

return Err(RevlogError::AmbiguousPrefix);

291

}

291

}

292

found_by_prefix = Some(rev)

292

found_by_prefix = Some(rev)

293

}

293

}

294

}

294

}

295

found_by_prefix.ok_or(RevlogError::InvalidRevision)

295

found_by_prefix.ok_or(RevlogError::InvalidRevision)

296

}

296

}

297

298

/// Returns whether the given revision exists in this revlog.

298

/// Returns whether the given revision exists in this revlog.

299

pub fn has_rev(&self, rev: Revision) -> bool {

299

pub fn has_rev(&self, rev: Revision) -> bool {

300

self.index.get_entry(rev).is_some()

300

self.index.get_entry(rev).is_some()

301

}

301

}

302

303

/// Return the full data associated to a revision.

303

/// Return the full data associated to a revision.

304

///

304

///

305

/// All entries required to build the final data out of deltas will be

305

/// All entries required to build the final data out of deltas will be

306

/// retrieved as needed, and the deltas will be applied to the inital

306

/// retrieved as needed, and the deltas will be applied to the inital

307

/// snapshot to rebuild the final data.

307

/// snapshot to rebuild the final data.

308

pub fn get_rev_data(

308

pub fn get_rev_data(

309

&self,

309

&self,

310

rev: Revision,

310

rev: Revision,

311

) -> Result<Cow<[u8]>, RevlogError> {

311

) -> Result<Cow<[u8]>, RevlogError> {

312

if rev == NULL_REVISION {

312

if rev == NULL_REVISION {

313

return Ok(Cow::Borrowed(&[]));

313

return Ok(Cow::Borrowed(&[]));

314

};

314

};

315

Ok(self.get_entry(rev)?.data()?)

315

Ok(self.get_entry(rev)?.data()?)

316

}

316

}

317

318

/// Check the hash of some given data against the recorded hash.

318

/// Check the hash of some given data against the recorded hash.

319

pub fn check_hash(

319

pub fn check_hash(

320

&self,

320

&self,

321

p1: Revision,

321

p1: Revision,

322

p2: Revision,

322

p2: Revision,

323

expected: &[u8],

323

expected: &[u8],

324

data: &[u8],

324

data: &[u8],

325

) -> bool {

325

) -> bool {

326

let e1 = self.index.get_entry(p1);

326

let e1 = self.index.get_entry(p1);

327

let h1 = match e1 {

327

let h1 = match e1 {

328

Some(ref entry) => entry.hash(),

328

Some(ref entry) => entry.hash(),

329

None => &NULL_NODE,

329

None => &NULL_NODE,

330

};

330

};

331

let e2 = self.index.get_entry(p2);

331

let e2 = self.index.get_entry(p2);

332

let h2 = match e2 {

332

let h2 = match e2 {

333

Some(ref entry) => entry.hash(),

333

Some(ref entry) => entry.hash(),

334

None => &NULL_NODE,

334

None => &NULL_NODE,

335

};

335

};

336

337

hash(data, h1.as_bytes(), h2.as_bytes()) == expected

337

hash(data, h1.as_bytes(), h2.as_bytes()) == expected

338

}

338

}

339

340

/// Build the full data of a revision out its snapshot

340

/// Build the full data of a revision out its snapshot

341

/// and its deltas.

341

/// and its deltas.

342

fn build_data_from_deltas(

342

fn build_data_from_deltas(

343

snapshot: RevlogEntry,

343

snapshot: RevlogEntry,

344

deltas: &[RevlogEntry],

344

deltas: &[RevlogEntry],

345

) -> Result<Vec<u8>, HgError> {

345

) -> Result<Vec<u8>, HgError> {

346

let snapshot = snapshot.data_chunk()?;

346

let snapshot = snapshot.data_chunk()?;

347

let deltas = deltas

347

let deltas = deltas

348

.iter()

348

.iter()

349

.rev()

349

.rev()

350

.map(RevlogEntry::data_chunk)

350

.map(RevlogEntry::data_chunk)

351

.collect::<Result<Vec<_>, _>>()?;

351

.collect::<Result<Vec<_>, _>>()?;

352

let patches: Vec<_> =

352

let patches: Vec<_> =

353

deltas.iter().map(|d| patch::PatchList::new(d)).collect();

353

deltas.iter().map(|d| patch::PatchList::new(d)).collect();

354

let patch = patch::fold_patch_lists(&patches);

354

let patch = patch::fold_patch_lists(&patches);

355

Ok(patch.apply(&snapshot))

355

Ok(patch.apply(&snapshot))

356

}

356

}

357

358

/// Return the revlog data.

358

/// Return the revlog data.

359

fn data(&self) -> &[u8] {

359

fn data(&self) -> &[u8] {

360

match &self.data_bytes {

360

match &self.data_bytes {

361

Some(data_bytes) => data_bytes,

361

Some(data_bytes) => data_bytes,

362

None => panic!(

362

None => panic!(

363

"forgot to load the data or trying to access inline data"

363

"forgot to load the data or trying to access inline data"

364

),

364

),

365

}

365

}

366

}

366

}

367

368

pub fn make_null_entry(&self) -> RevlogEntry {

368

pub fn make_null_entry(&self) -> RevlogEntry {

369

RevlogEntry {

369

RevlogEntry {

370

revlog: self,

370

revlog: self,

371

rev: NULL_REVISION,

371

rev: NULL_REVISION,

372

bytes: b"",

372

bytes: b"",

373

compressed_len: 0,

373

compressed_len: 0,

374

uncompressed_len: 0,

374

uncompressed_len: 0,

375

base_rev_or_base_of_delta_chain: None,

375

base_rev_or_base_of_delta_chain: None,

376

p1: NULL_REVISION,

376

p1: NULL_REVISION,

377

p2: NULL_REVISION,

377

p2: NULL_REVISION,

378

flags: NULL_REVLOG_ENTRY_FLAGS,

378

flags: NULL_REVLOG_ENTRY_FLAGS,

379

hash: NULL_NODE,

379

hash: NULL_NODE,

380

}

380

}

381

}

381

}

382

383

/// Get an entry of the revlog.

383

/// Get an entry of the revlog.

384

pub fn get_entry(

384

pub fn get_entry(

385

&self,

385

&self,

386

rev: Revision,

386

rev: Revision,

387

) -> Result<RevlogEntry, RevlogError> {

387

) -> Result<RevlogEntry, RevlogError> {

388

if rev == NULL_REVISION {

388

if rev == NULL_REVISION {

389

return Ok(self.make_null_entry());

389

return Ok(self.make_null_entry());

390

}

390

}

391

let index_entry = self

391

let index_entry = self

392

.index

392

.index

393

.get_entry(rev)

393

.get_entry(rev)

394

.ok_or(RevlogError::InvalidRevision)?;

394

.ok_or(RevlogError::InvalidRevision)?;

395

let start = index_entry.offset();

395

let start = index_entry.offset();

396

let end = start + index_entry.compressed_len() as usize;

396

let end = start + index_entry.compressed_len() as usize;

397

let data = if self.index.is_inline() {

397

let data = if self.index.is_inline() {

398

self.index.data(start, end)

398

self.index.data(start, end)

399

} else {

399

} else {

400

&self.data()[start..end]

400

&self.data()[start..end]

401

};

401

};

402

let entry = RevlogEntry {

402

let entry = RevlogEntry {

403

revlog: self,

403

revlog: self,

404

rev,

404

rev,

405

bytes: data,

405

bytes: data,

406

compressed_len: index_entry.compressed_len(),

406

compressed_len: index_entry.compressed_len(),

407

uncompressed_len: index_entry.uncompressed_len(),

407

uncompressed_len: index_entry.uncompressed_len(),

408

base_rev_or_base_of_delta_chain: if index_entry

408

base_rev_or_base_of_delta_chain: if index_entry

409

.base_revision_or_base_of_delta_chain()

409

.base_revision_or_base_of_delta_chain()

410

== rev

410

== rev

411

{

411

{

412

None

412

None

413

} else {

413

} else {

414

Some(index_entry.base_revision_or_base_of_delta_chain())

414

Some(index_entry.base_revision_or_base_of_delta_chain())

415

},

415

},

416

p1: index_entry.p1(),

416

p1: index_entry.p1(),

417

p2: index_entry.p2(),

417

p2: index_entry.p2(),

418

flags: index_entry.flags(),

418

flags: index_entry.flags(),

419

hash: *index_entry.hash(),

419

hash: *index_entry.hash(),

420

};

420

};

421

Ok(entry)

421

Ok(entry)

422

}

422

}

423

424

/// when resolving internal references within revlog, any errors

424

/// when resolving internal references within revlog, any errors

425

/// should be reported as corruption, instead of e.g. "invalid revision"

425

/// should be reported as corruption, instead of e.g. "invalid revision"

426

fn get_entry_internal(

426

fn get_entry_internal(

427

&self,

427

&self,

428

rev: Revision,

428

rev: Revision,

429

) -> Result<RevlogEntry, HgError> {

429

) -> Result<RevlogEntry, HgError> {

430

self.get_entry(rev)

430

self.get_entry(rev)

431

.map_err(|_| corrupted(format!("revision {} out of range", rev)))

431

.map_err(|_| corrupted(format!("revision {} out of range", rev)))

432

}

432

}

433

}

433

}

434

435

/// The revlog entry's bytes and the necessary informations to extract

435

/// The revlog entry's bytes and the necessary informations to extract

436

/// the entry's data.

436

/// the entry's data.

437

#[derive(Clone)]

437

#[derive(Clone)]

438

pub struct RevlogEntry<'revlog> {

438

pub struct RevlogEntry<'revlog> {

439

revlog: &'revlog Revlog,

439

revlog: &'revlog Revlog,

440

rev: Revision,

440

rev: Revision,

441

bytes: &'revlog [u8],

441

bytes: &'revlog [u8],

442

compressed_len: u32,

442

compressed_len: u32,

443

uncompressed_len: i32,

443

uncompressed_len: i32,

444

base_rev_or_base_of_delta_chain: Option<Revision>,

444

base_rev_or_base_of_delta_chain: Option<Revision>,

445

p1: Revision,

445

p1: Revision,

446

p2: Revision,

446

p2: Revision,

447

flags: u16,

447

flags: u16,

448

hash: Node,

448

hash: Node,

449

}

449

}

450

451

thread_local! {

451

thread_local! {

452

// seems fine to [unwrap] here: this can only fail due to memory allocation

452

// seems fine to [unwrap] here: this can only fail due to memory allocation

453

// failing, and it's normal for that to cause panic.

453

// failing, and it's normal for that to cause panic.

454

static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =

454

static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =

455

RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());

455

RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());

456

}

456

}

457

458

fn zstd_decompress_to_buffer(

458

fn zstd_decompress_to_buffer(

459

bytes: &[u8],

459

bytes: &[u8],

460

buf: &mut Vec<u8>,

460

buf: &mut Vec<u8>,

461

) -> Result<usize, std::io::Error> {

461

) -> Result<usize, std::io::Error> {

462

ZSTD_DECODER

462

ZSTD_DECODER

463

.with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))

463

.with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))

464

}

464

}

465

466

impl<'revlog> RevlogEntry<'revlog> {

466

impl<'revlog> RevlogEntry<'revlog> {

467

pub fn revision(&self) -> Revision {

467

pub fn revision(&self) -> Revision {

468

self.rev

468

self.rev

469

}

469

}

470

471

pub fn node(&self) -> &Node {

471

pub fn node(&self) -> &Node {

472

&self.hash

472

&self.hash

473

}

473

}

474

475

pub fn uncompressed_len(&self) -> Option<u32> {

475

pub fn uncompressed_len(&self) -> Option<u32> {

476

u32::try_from(self.uncompressed_len).ok()

476

u32::try_from(self.uncompressed_len).ok()

477

}

477

}

478

479

pub fn has_p1(&self) -> bool {

479

pub fn has_p1(&self) -> bool {

480

self.p1 != NULL_REVISION

480

self.p1 != NULL_REVISION

481

}

481

}

482

483

pub fn p1_entry(

483

pub fn p1_entry(

484

&self,

484

&self,

485

) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {

485

) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {

486

if self.p1 == NULL_REVISION {

486

if self.p1 == NULL_REVISION {

487

Ok(None)

487

Ok(None)

488

} else {

488

} else {

489

Ok(Some(self.revlog.get_entry(self.p1)?))

489

Ok(Some(self.revlog.get_entry(self.p1)?))

490

}

490

}

491

}

491

}

492

493

pub fn p2_entry(

493

pub fn p2_entry(

494

&self,

494

&self,

495

) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {

495

) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {

496

if self.p2 == NULL_REVISION {

496

if self.p2 == NULL_REVISION {

497

Ok(None)

497

Ok(None)

498

} else {

498

} else {

499

Ok(Some(self.revlog.get_entry(self.p2)?))

499

Ok(Some(self.revlog.get_entry(self.p2)?))

500

}

500

}

501

}

501

}

502

503

pub fn p1(&self) -> Option<Revision> {

503

pub fn p1(&self) -> Option<Revision> {

504

if self.p1 == NULL_REVISION {

504

if self.p1 == NULL_REVISION {

505

None

505

None

506

} else {

506

} else {

507

Some(self.p1)

507

Some(self.p1)

508

}

508

}

509

}

509

}

510

511

pub fn p2(&self) -> Option<Revision> {

511

pub fn p2(&self) -> Option<Revision> {

512

if self.p2 == NULL_REVISION {

512

if self.p2 == NULL_REVISION {

513

None

513

None

514

} else {

514

} else {

515

Some(self.p2)

515

Some(self.p2)

516

}

516

}

517

}

517

}

518

519

pub fn is_censored(&self) -> bool {

519

pub fn is_censored(&self) -> bool {

520

(self.flags & REVISION_FLAG_CENSORED) != 0

520

(self.flags & REVISION_FLAG_CENSORED) != 0

521

}

521

}

522

523

pub fn has_length_affecting_flag_processor(&self) -> bool {

523

pub fn has_length_affecting_flag_processor(&self) -> bool {

524

// Relevant Python code: revlog.size()

524

// Relevant Python code: revlog.size()

525

// note: ELLIPSIS is known to not change the content

525

// note: ELLIPSIS is known to not change the content

526

(self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0

526

(self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0

527

}

527

}

528

529

/// The data for this entry, after resolving deltas if any.

529

/// The data for this entry, after resolving deltas if any.

530

pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, HgError> {

530

pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, HgError> {

531

let mut entry = self.clone();

531

let mut entry = self.clone();

532

let mut delta_chain = vec![];

532

let mut delta_chain = vec![];

533

534

// The meaning of `base_rev_or_base_of_delta_chain` depends on

534

// The meaning of `base_rev_or_base_of_delta_chain` depends on

535

// generaldelta. See the doc on `ENTRY_DELTA_BASE` in

535

// generaldelta. See the doc on `ENTRY_DELTA_BASE` in

536

// `mercurial/revlogutils/constants.py` and the code in

536

// `mercurial/revlogutils/constants.py` and the code in

537

// [_chaininfo] and in [index_deltachain].

537

// [_chaininfo] and in [index_deltachain].

538

let uses_generaldelta = self.revlog.index.uses_generaldelta();

538

let uses_generaldelta = self.revlog.index.uses_generaldelta();

539

while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {

539

while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {

540

~~let~~ ~~base_rev~~ = if uses_generaldelta {

540

entry = if uses_generaldelta {

541

~~base_rev~~

541

delta_chain.push(entry);

542

self.revlog.get_entry_internal(base_rev)?

542

} else {

543

} else {

543

entry.rev - 1

544

let base_rev = entry.rev - 1;

545

delta_chain.push(entry);

546

self.revlog.get_entry_internal(base_rev)?

544

};

547

};

545

delta_chain.push(entry);

546

entry = self.revlog.get_entry_internal(base_rev)?;

547

}

548

}

548

549

let data = if delta_chain.is_empty() {

550

let data = if delta_chain.is_empty() {

550

entry.data_chunk()?

551

entry.data_chunk()?

551

} else {

552

} else {

552

Revlog::build_data_from_deltas(entry, &delta_chain)?.into()

553

Revlog::build_data_from_deltas(entry, &delta_chain)?.into()

553

};

554

};

554

555

Ok(data)

556

Ok(data)

556

}

557

}

557

558

fn check_data(

559

fn check_data(

559

&self,

560

&self,

560

data: Cow<'revlog, [u8]>,

561

data: Cow<'revlog, [u8]>,

561

) -> Result<Cow<'revlog, [u8]>, HgError> {

562

) -> Result<Cow<'revlog, [u8]>, HgError> {

562

if self.revlog.check_hash(

563

if self.revlog.check_hash(

563

self.p1,

564

self.p1,

564

self.p2,

565

self.p2,

565

self.hash.as_bytes(),

566

self.hash.as_bytes(),

566

&data,

567

&data,

567

) {

568

) {

568

Ok(data)

569

Ok(data)

569

} else {

570

} else {

570

if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {

571

if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {

571

return Err(HgError::unsupported(

572

return Err(HgError::unsupported(

572

"ellipsis revisions are not supported by rhg",

573

"ellipsis revisions are not supported by rhg",

573

));

574

));

574

}

575

}

575

Err(corrupted(format!(

576

Err(corrupted(format!(

576

"hash check failed for revision {}",

577

"hash check failed for revision {}",

577

self.rev

578

self.rev

578

)))

579

)))

579

}

580

}

580

}

581

}

581

582

pub fn data(&self) -> Result<Cow<'revlog, [u8]>, HgError> {

583

pub fn data(&self) -> Result<Cow<'revlog, [u8]>, HgError> {

583

let data = self.rawdata()?;

584

let data = self.rawdata()?;

584

if self.rev == NULL_REVISION {

585

if self.rev == NULL_REVISION {

585

return Ok(data);

586

return Ok(data);

586

}

587

}

587

if self.is_censored() {

588

if self.is_censored() {

588

return Err(HgError::CensoredNodeError);

589

return Err(HgError::CensoredNodeError);

589

}

590

}

590

self.check_data(data)

591

self.check_data(data)

591

}

592

}

592

593

/// Extract the data contained in the entry.

594

/// Extract the data contained in the entry.

594

/// This may be a delta. (See `is_delta`.)

595

/// This may be a delta. (See `is_delta`.)

595

fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {

596

fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {

596

if self.bytes.is_empty() {

597

if self.bytes.is_empty() {

597

return Ok(Cow::Borrowed(&[]));

598

return Ok(Cow::Borrowed(&[]));

598

}

599

}

599

match self.bytes[0] {

600

match self.bytes[0] {

600

// Revision data is the entirety of the entry, including this

601

// Revision data is the entirety of the entry, including this

601

// header.

602

// header.

602

b'\0' => Ok(Cow::Borrowed(self.bytes)),

603

b'\0' => Ok(Cow::Borrowed(self.bytes)),

603

// Raw revision data follows.

604

// Raw revision data follows.

604

b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),

605

b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),

605

// zlib (RFC 1950) data.

606

// zlib (RFC 1950) data.

606

b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),

607

b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),

607

// zstd data.

608

// zstd data.

608

b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),

609

b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),

609

// A proper new format should have had a repo/store requirement.

610

// A proper new format should have had a repo/store requirement.

610

format_type => Err(corrupted(format!(

611

format_type => Err(corrupted(format!(

611

"unknown compression header '{}'",

612

"unknown compression header '{}'",

612

format_type

613

format_type

613

))),

614

))),

614

}

615

}

615

}

616

}

616

617

fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {

618

fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {

618

let mut decoder = ZlibDecoder::new(self.bytes);

619

let mut decoder = ZlibDecoder::new(self.bytes);

619

if self.is_delta() {

620

if self.is_delta() {

620

let mut buf = Vec::with_capacity(self.compressed_len as usize);

621

let mut buf = Vec::with_capacity(self.compressed_len as usize);

621

decoder

622

decoder

622

.read_to_end(&mut buf)

623

.read_to_end(&mut buf)

623

.map_err(|e| corrupted(e.to_string()))?;

624

.map_err(|e| corrupted(e.to_string()))?;

624

Ok(buf)

625

Ok(buf)

625

} else {

626

} else {

626

let cap = self.uncompressed_len.max(0) as usize;

627

let cap = self.uncompressed_len.max(0) as usize;

627

let mut buf = vec![0; cap];

628

let mut buf = vec![0; cap];

628

decoder

629

decoder

629

.read_exact(&mut buf)

630

.read_exact(&mut buf)

630

.map_err(|e| corrupted(e.to_string()))?;

631

.map_err(|e| corrupted(e.to_string()))?;

631

Ok(buf)

632

Ok(buf)

632

}

633

}

633

}

634

}

634

635

fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {

636

fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {

636

let cap = self.uncompressed_len.max(0) as usize;

637

let cap = self.uncompressed_len.max(0) as usize;

637

if self.is_delta() {

638

if self.is_delta() {

638

// [cap] is usually an over-estimate of the space needed because

639

// [cap] is usually an over-estimate of the space needed because

639

// it's the length of delta-decoded data, but we're interested

640

// it's the length of delta-decoded data, but we're interested

640

// in the size of the delta.

641

// in the size of the delta.

641

// This means we have to [shrink_to_fit] to avoid holding on

642

// This means we have to [shrink_to_fit] to avoid holding on

642

// to a large chunk of memory, but it also means we must have a

643

// to a large chunk of memory, but it also means we must have a

643

// fallback branch, for the case when the delta is longer than

644

// fallback branch, for the case when the delta is longer than

644

// the original data (surprisingly, this does happen in practice)

645

// the original data (surprisingly, this does happen in practice)

645

let mut buf = Vec::with_capacity(cap);

646

let mut buf = Vec::with_capacity(cap);

646

match zstd_decompress_to_buffer(self.bytes, &mut buf) {

647

match zstd_decompress_to_buffer(self.bytes, &mut buf) {

647

Ok(_) => buf.shrink_to_fit(),

648

Ok(_) => buf.shrink_to_fit(),

648

Err(_) => {

649

Err(_) => {

649

buf.clear();

650

buf.clear();

650

zstd::stream::copy_decode(self.bytes, &mut buf)

651

zstd::stream::copy_decode(self.bytes, &mut buf)

651

.map_err(|e| corrupted(e.to_string()))?;

652

.map_err(|e| corrupted(e.to_string()))?;

652

}

653

}

653

};

654

};

654

Ok(buf)

655

Ok(buf)

655

} else {

656

} else {

656

let mut buf = Vec::with_capacity(cap);

657

let mut buf = Vec::with_capacity(cap);

657

let len = zstd_decompress_to_buffer(self.bytes, &mut buf)

658

let len = zstd_decompress_to_buffer(self.bytes, &mut buf)

658

.map_err(|e| corrupted(e.to_string()))?;

659

.map_err(|e| corrupted(e.to_string()))?;

659

if len != self.uncompressed_len as usize {

660

if len != self.uncompressed_len as usize {

660

Err(corrupted("uncompressed length does not match"))

661

Err(corrupted("uncompressed length does not match"))

661

} else {

662

} else {

662

Ok(buf)

663

Ok(buf)

663

}

664

}

664

}

665

}

665

}

666

}

666

667

/// Tell if the entry is a snapshot or a delta

668

/// Tell if the entry is a snapshot or a delta

668

/// (influences on decompression).

669

/// (influences on decompression).

669

fn is_delta(&self) -> bool {

670

fn is_delta(&self) -> bool {

670

self.base_rev_or_base_of_delta_chain.is_some()

671

self.base_rev_or_base_of_delta_chain.is_some()

671

}

672

}

672

}

673

}

673

674

/// Calculate the hash of a revision given its data and its parents.

675

/// Calculate the hash of a revision given its data and its parents.

675

fn hash(

676

fn hash(

676

data: &[u8],

677

data: &[u8],

677

p1_hash: &[u8],

678

p1_hash: &[u8],

678

p2_hash: &[u8],

679

p2_hash: &[u8],

679

) -> [u8; NODE_BYTES_LENGTH] {

680

) -> [u8; NODE_BYTES_LENGTH] {

680

let mut hasher = Sha1::new();

681

let mut hasher = Sha1::new();

681

let (a, b) = (p1_hash, p2_hash);

682

let (a, b) = (p1_hash, p2_hash);

682

if a > b {

683

if a > b {

683

hasher.update(b);

684

hasher.update(b);

684

hasher.update(a);

685

hasher.update(a);

685

} else {

686

} else {

686

hasher.update(a);

687

hasher.update(a);

687

hasher.update(b);

688

hasher.update(b);

688

}

689

}

689

hasher.update(data);

690

hasher.update(data);

690

*hasher.finalize().as_ref()

691

*hasher.finalize().as_ref()

691

}

692

}

692

693

#[cfg(test)]

694

#[cfg(test)]

694

mod tests {

695

mod tests {

695

use super::*;

696

use super::*;

696

use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE};

697

use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE};

697

use itertools::Itertools;

698

use itertools::Itertools;

698

699

#[test]

700

#[test]

700

fn test_empty() {

701

fn test_empty() {

701

let temp = tempfile::tempdir().unwrap();

702

let temp = tempfile::tempdir().unwrap();

702

let vfs = Vfs { base: temp.path() };

703

let vfs = Vfs { base: temp.path() };

703

std::fs::write(temp.path().join("foo.i"), b"").unwrap();

704

std::fs::write(temp.path().join("foo.i"), b"").unwrap();

704

let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();

705

let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();

705

assert!(revlog.is_empty());

706

assert!(revlog.is_empty());

706

assert_eq!(revlog.len(), 0);

707

assert_eq!(revlog.len(), 0);

707

assert!(revlog.get_entry(0).is_err());

708

assert!(revlog.get_entry(0).is_err());

708

assert!(!revlog.has_rev(0));

709

assert!(!revlog.has_rev(0));

709

assert_eq!(

710

assert_eq!(

710

revlog.rev_from_node(NULL_NODE.into()).unwrap(),

711

revlog.rev_from_node(NULL_NODE.into()).unwrap(),

711

NULL_REVISION

712

NULL_REVISION

712

);

713

);

713

let null_entry = revlog.get_entry(NULL_REVISION).ok().unwrap();

714

let null_entry = revlog.get_entry(NULL_REVISION).ok().unwrap();

714

assert_eq!(null_entry.revision(), NULL_REVISION);

715

assert_eq!(null_entry.revision(), NULL_REVISION);

715

assert!(null_entry.data().unwrap().is_empty());

716

assert!(null_entry.data().unwrap().is_empty());

716

}

717

}

717

718

#[test]

719

#[test]

719

fn test_inline() {

720

fn test_inline() {

720

let temp = tempfile::tempdir().unwrap();

721

let temp = tempfile::tempdir().unwrap();

721

let vfs = Vfs { base: temp.path() };

722

let vfs = Vfs { base: temp.path() };

722

let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")

723

let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")

723

.unwrap();

724

.unwrap();

724

let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")

725

let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")

725

.unwrap();

726

.unwrap();

726

let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")

727

let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")

727

.unwrap();

728

.unwrap();

728

let entry0_bytes = IndexEntryBuilder::new()

729

let entry0_bytes = IndexEntryBuilder::new()

729

.is_first(true)

730

.is_first(true)

730

.with_version(1)

731

.with_version(1)

731

.with_inline(true)

732

.with_inline(true)

732

.with_offset(INDEX_ENTRY_SIZE)

733

.with_offset(INDEX_ENTRY_SIZE)

733

.with_node(node0)

734

.with_node(node0)

734

.build();

735

.build();

735

let entry1_bytes = IndexEntryBuilder::new()

736

let entry1_bytes = IndexEntryBuilder::new()

736

.with_offset(INDEX_ENTRY_SIZE)

737

.with_offset(INDEX_ENTRY_SIZE)

737

.with_node(node1)

738

.with_node(node1)

738

.build();

739

.build();

739

let entry2_bytes = IndexEntryBuilder::new()

740

let entry2_bytes = IndexEntryBuilder::new()

740

.with_offset(INDEX_ENTRY_SIZE)

741

.with_offset(INDEX_ENTRY_SIZE)

741

.with_p1(0)

742

.with_p1(0)

742

.with_p2(1)

743

.with_p2(1)

743

.with_node(node2)

744

.with_node(node2)

744

.build();

745

.build();

745

let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]

746

let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]

746

.into_iter()

747

.into_iter()

747

.flatten()

748

.flatten()

748

.collect_vec();

749

.collect_vec();

749

std::fs::write(temp.path().join("foo.i"), contents).unwrap();

750

std::fs::write(temp.path().join("foo.i"), contents).unwrap();

750

let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();

751

let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();

751

752

let entry0 = revlog.get_entry(0).ok().unwrap();

753

let entry0 = revlog.get_entry(0).ok().unwrap();

753

assert_eq!(entry0.revision(), 0);

754

assert_eq!(entry0.revision(), 0);

754

assert_eq!(*entry0.node(), node0);

755

assert_eq!(*entry0.node(), node0);

755

assert!(!entry0.has_p1());

756

assert!(!entry0.has_p1());

756

assert_eq!(entry0.p1(), None);

757

assert_eq!(entry0.p1(), None);

757

assert_eq!(entry0.p2(), None);

758

assert_eq!(entry0.p2(), None);

758

let p1_entry = entry0.p1_entry().unwrap();

759

let p1_entry = entry0.p1_entry().unwrap();

759

assert!(p1_entry.is_none());

760

assert!(p1_entry.is_none());

760

let p2_entry = entry0.p2_entry().unwrap();

761

let p2_entry = entry0.p2_entry().unwrap();

761

assert!(p2_entry.is_none());

762

assert!(p2_entry.is_none());

762

763

let entry1 = revlog.get_entry(1).ok().unwrap();

764

let entry1 = revlog.get_entry(1).ok().unwrap();

764

assert_eq!(entry1.revision(), 1);

765

assert_eq!(entry1.revision(), 1);

765

assert_eq!(*entry1.node(), node1);

766

assert_eq!(*entry1.node(), node1);

766

assert!(!entry1.has_p1());

767

assert!(!entry1.has_p1());

767

assert_eq!(entry1.p1(), None);

768

assert_eq!(entry1.p1(), None);

768

assert_eq!(entry1.p2(), None);

769

assert_eq!(entry1.p2(), None);

769

let p1_entry = entry1.p1_entry().unwrap();

770

let p1_entry = entry1.p1_entry().unwrap();

770

assert!(p1_entry.is_none());

771

assert!(p1_entry.is_none());

771

let p2_entry = entry1.p2_entry().unwrap();

772

let p2_entry = entry1.p2_entry().unwrap();

772

assert!(p2_entry.is_none());

773

assert!(p2_entry.is_none());

773

774

let entry2 = revlog.get_entry(2).ok().unwrap();

775

let entry2 = revlog.get_entry(2).ok().unwrap();

775

assert_eq!(entry2.revision(), 2);

776

assert_eq!(entry2.revision(), 2);

776

assert_eq!(*entry2.node(), node2);

777

assert_eq!(*entry2.node(), node2);

777

assert!(entry2.has_p1());

778

assert!(entry2.has_p1());

778

assert_eq!(entry2.p1(), Some(0));

779

assert_eq!(entry2.p1(), Some(0));

779

assert_eq!(entry2.p2(), Some(1));

780

assert_eq!(entry2.p2(), Some(1));

780

let p1_entry = entry2.p1_entry().unwrap();

781

let p1_entry = entry2.p1_entry().unwrap();

781

assert!(p1_entry.is_some());

782

assert!(p1_entry.is_some());

782

assert_eq!(p1_entry.unwrap().revision(), 0);

783

assert_eq!(p1_entry.unwrap().revision(), 0);

783

let p2_entry = entry2.p2_entry().unwrap();

784

let p2_entry = entry2.p2_entry().unwrap();

784

assert!(p2_entry.is_some());

785

assert!(p2_entry.is_some());

785

assert_eq!(p2_entry.unwrap().revision(), 1);

786

assert_eq!(p2_entry.unwrap().revision(), 1);

786

}

787

}

787

788

#[test]

789

#[test]

789

fn test_nodemap() {

790

fn test_nodemap() {

790

let temp = tempfile::tempdir().unwrap();

791

let temp = tempfile::tempdir().unwrap();

791

let vfs = Vfs { base: temp.path() };

792

let vfs = Vfs { base: temp.path() };

792

793

// building a revlog with a forced Node starting with zeros

794

// building a revlog with a forced Node starting with zeros

794

// This is a corruption, but it does not preclude using the nodemap

795

// This is a corruption, but it does not preclude using the nodemap

795

// if we don't try and access the data

796

// if we don't try and access the data

796

let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")

797

let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")

797

.unwrap();

798

.unwrap();

798

let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")

799

let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")

799

.unwrap();

800

.unwrap();

800

let entry0_bytes = IndexEntryBuilder::new()

801

let entry0_bytes = IndexEntryBuilder::new()

801

.is_first(true)

802

.is_first(true)

802

.with_version(1)

803

.with_version(1)

803

.with_inline(true)

804

.with_inline(true)

804

.with_offset(INDEX_ENTRY_SIZE)

805

.with_offset(INDEX_ENTRY_SIZE)

805

.with_node(node0)

806

.with_node(node0)

806

.build();

807

.build();

807

let entry1_bytes = IndexEntryBuilder::new()

808

let entry1_bytes = IndexEntryBuilder::new()

808

.with_offset(INDEX_ENTRY_SIZE)

809

.with_offset(INDEX_ENTRY_SIZE)

809

.with_node(node1)

810

.with_node(node1)

810

.build();

811

.build();

811

let contents = vec![entry0_bytes, entry1_bytes]

812

let contents = vec![entry0_bytes, entry1_bytes]

812

.into_iter()

813

.into_iter()

813

.flatten()

814

.flatten()

814

.collect_vec();

815

.collect_vec();

815

std::fs::write(temp.path().join("foo.i"), contents).unwrap();

816

std::fs::write(temp.path().join("foo.i"), contents).unwrap();

816

let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();

817

let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();

817

818

// accessing the data shows the corruption

819

// accessing the data shows the corruption

819

revlog.get_entry(0).unwrap().data().unwrap_err();

820

revlog.get_entry(0).unwrap().data().unwrap_err();

820

821

assert_eq!(revlog.rev_from_node(NULL_NODE.into()).unwrap(), -1);

822

assert_eq!(revlog.rev_from_node(NULL_NODE.into()).unwrap(), -1);

822

assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), 0);

823

assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), 0);

823

assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), 1);

824

assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), 1);

824

assert_eq!(

825

assert_eq!(

825

revlog

826

revlog

826

.rev_from_node(NodePrefix::from_hex("000").unwrap())

827

.rev_from_node(NodePrefix::from_hex("000").unwrap())

827

.unwrap(),

828

.unwrap(),

828

-1

829

-1

829

);

830

);

830

assert_eq!(

831

assert_eq!(

831

revlog

832

revlog

832

.rev_from_node(NodePrefix::from_hex("b00").unwrap())

833

.rev_from_node(NodePrefix::from_hex("b00").unwrap())

833

.unwrap(),

834

.unwrap(),

834

1

835

1

835

);

836

);

836

// RevlogError does not implement PartialEq

837

// RevlogError does not implement PartialEq

837

// (ultimately because io::Error does not)

838

// (ultimately because io::Error does not)

838

match revlog

839

match revlog

839

.rev_from_node(NodePrefix::from_hex("00").unwrap())

840

.rev_from_node(NodePrefix::from_hex("00").unwrap())

840

.expect_err("Expected to give AmbiguousPrefix error")

841

.expect_err("Expected to give AmbiguousPrefix error")

841

{

842

{

842

RevlogError::AmbiguousPrefix => (),

843

RevlogError::AmbiguousPrefix => (),

843

e => {

844

e => {

844

panic!("Got another error than AmbiguousPrefix: {:?}", e);

845

panic!("Got another error than AmbiguousPrefix: {:?}", e);

845

}

846

}

846

};

847

};

847

}

848

}

848

}

849

}

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
             //           and Mercurial contributors
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Mercurial concepts for handling revision history
             pub mod node;
             pub mod nodemap;
             mod nodemap_docket;
             pub mod path_encode;
             pub use node::{FromHexError, Node, NodePrefix};
             pub mod changelog;
             pub mod filelog;
             pub mod index;
             pub mod manifest;
             pub mod patch;
             use std::borrow::Cow;
             use std::io::Read;
             use std::ops::Deref;
             use std::path::Path;
             use flate2::read::ZlibDecoder;
             use sha1::{Digest, Sha1};
             use std::cell::RefCell;
             use zstd;
             use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
             use self::nodemap_docket::NodeMapDocket;
             use super::index::Index;
             use super::nodemap::{NodeMap, NodeMapError};
             use crate::errors::HgError;
             use crate::vfs::Vfs;
             /// Mercurial revision numbers
             ///
             /// As noted in revlog.c, revision numbers are actually encoded in
             /// 4 bytes, and are liberally converted to ints, whence the i32
             pub type Revision = i32;
             /// Unchecked Mercurial revision numbers.
             ///
             /// Values of this type have no guarantee of being a valid revision number
             /// in any context. Use method `check_revision` to get a valid revision within
             /// the appropriate index object.
             ///
             /// As noted in revlog.c, revision numbers are actually encoded in
             /// 4 bytes, and are liberally converted to ints, whence the i32
             pub type UncheckedRevision = i32;
             /// Marker expressing the absence of a parent
             ///
             /// Independently of the actual representation, `NULL_REVISION` is guaranteed
             /// to be smaller than all existing revisions.
             pub const NULL_REVISION: Revision = -1;
             /// Same as `mercurial.node.wdirrev`
             ///
             /// This is also equal to `i32::max_value()`, but it's better to spell
             /// it out explicitely, same as in `mercurial.node`
             #[allow(clippy::unreadable_literal)]
             pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;
             pub const WORKING_DIRECTORY_HEX: &str =
                 "ffffffffffffffffffffffffffffffffffffffff";
             /// The simplest expression of what we need of Mercurial DAGs.
             pub trait Graph {
                 /// Return the two parents of the given `Revision`.
                 ///
                 /// Each of the parents can be independently `NULL_REVISION`
                 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
             }
             #[derive(Clone, Debug, PartialEq)]
             pub enum GraphError {
                 ParentOutOfRange(Revision),
             }
             /// The Mercurial Revlog Index
             ///
             /// This is currently limited to the minimal interface that is needed for
             /// the [`nodemap`](nodemap/index.html) module
             pub trait RevlogIndex {
                 /// Total number of Revisions referenced in this index
                 fn len(&self) -> usize;
                 fn is_empty(&self) -> bool {
                     self.len() == 0
                 }
                 /// Return a reference to the Node or `None` if rev is out of bounds
                 ///
                 /// `NULL_REVISION` is not considered to be out of bounds.
                 fn node(&self, rev: Revision) -> Option<&Node>;
                 /// Return a [`Revision`] if `rev` is a valid revision number for this
                 /// index
                 fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> {
                     if rev == NULL_REVISION || (rev >= 0 && (rev as usize) < self.len()) {
                         Some(rev)
                     } else {
                         None
                     }
                 }
             }
             const REVISION_FLAG_CENSORED: u16 = 1 << 15;
             const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
             const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
             const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
             // Keep this in sync with REVIDX_KNOWN_FLAGS in
             // mercurial/revlogutils/flagutil.py
             const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
                 | REVISION_FLAG_ELLIPSIS
                 | REVISION_FLAG_EXTSTORED
                 | REVISION_FLAG_HASCOPIESINFO;
             const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
             #[derive(Debug, derive_more::From)]
             pub enum RevlogError {
                 InvalidRevision,
                 /// Working directory is not supported
                 WDirUnsupported,
                 /// Found more than one entry whose ID match the requested prefix
                 AmbiguousPrefix,
                 #[from]
                 Other(HgError),
             }
             impl From<NodeMapError> for RevlogError {
                 fn from(error: NodeMapError) -> Self {
                     match error {
                         NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
                         NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
                             format!("nodemap point to revision {} not in index", rev),
                         ),
                     }
                 }
             }
             fn corrupted<S: AsRef<str>>(context: S) -> HgError {
                 HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
             }
             impl RevlogError {
                 fn corrupted<S: AsRef<str>>(context: S) -> Self {
                     RevlogError::Other(corrupted(context))
                 }
             }
             /// Read only implementation of revlog.
             pub struct Revlog {
                 /// When index and data are not interleaved: bytes of the revlog index.
                 /// When index and data are interleaved: bytes of the revlog index and
                 /// data.
                 index: Index,
                 /// When index and data are not interleaved: bytes of the revlog data
                 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
                 /// When present on disk: the persistent nodemap for this revlog
                 nodemap: Option<nodemap::NodeTree>,
             }
             impl Revlog {
                 /// Open a revlog index file.
                 ///
                 /// It will also open the associated data file if index and data are not
                 /// interleaved.
                 pub fn open(
                     store_vfs: &Vfs,
                     index_path: impl AsRef<Path>,
                     data_path: Option<&Path>,
                     use_nodemap: bool,
                 ) -> Result<Self, HgError> {
                     let index_path = index_path.as_ref();
                     let index = {
                         match store_vfs.mmap_open_opt(&index_path)? {
                             None => Index::new(Box::new(vec![])),
                             Some(index_mmap) => {
                                 let index = Index::new(Box::new(index_mmap))?;
                                 Ok(index)
                             }
                         }
                     }?;
                     let default_data_path = index_path.with_extension("d");
                     // type annotation required
                     // won't recognize Mmap as Deref<Target = [u8]>
                     let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
                         if index.is_inline() {
                             None
                         } else {
                             let data_path = data_path.unwrap_or(&default_data_path);
                             let data_mmap = store_vfs.mmap_open(data_path)?;
                             Some(Box::new(data_mmap))
                         };
                     let nodemap = if index.is_inline() || !use_nodemap {
                         None
                     } else {
                         NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
                             |(docket, data)| {
                                 nodemap::NodeTree::load_bytes(
                                     Box::new(data),
                                     docket.data_length,
                                 )
                             },
                         )
                     };
                     Ok(Revlog {
                         index,
                         data_bytes,
                         nodemap,
                     })
                 }
                 /// Return number of entries of the `Revlog`.
                 pub fn len(&self) -> usize {
                     self.index.len()
                 }
                 /// Returns `true` if the `Revlog` has zero `entries`.
                 pub fn is_empty(&self) -> bool {
                     self.index.is_empty()
                 }
                 /// Returns the node ID for the given revision number, if it exists in this
                 /// revlog
                 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
                     if rev == NULL_REVISION {
                         return Some(&NULL_NODE);
                     }
                     Some(self.index.get_entry(rev)?.hash())
                 }
                 /// Return the revision number for the given node ID, if it exists in this
                 /// revlog
                 pub fn rev_from_node(
                     &self,
                     node: NodePrefix,
                 ) -> Result<Revision, RevlogError> {
                     let looked_up = if let Some(nodemap) = &self.nodemap {
                         nodemap
                             .find_bin(&self.index, node)?
                             .ok_or(RevlogError::InvalidRevision)
                     } else {
                         self.rev_from_node_no_persistent_nodemap(node)
                     };
                     if node.is_prefix_of(&NULL_NODE) {
                         return match looked_up {
                             Ok(_) => Err(RevlogError::AmbiguousPrefix),
                             Err(RevlogError::InvalidRevision) => Ok(NULL_REVISION),
                             res => res,
                         };
                     };
                     looked_up
                 }
                 /// Same as `rev_from_node`, without using a persistent nodemap
                 ///
                 /// This is used as fallback when a persistent nodemap is not present.
                 /// This happens when the persistent-nodemap experimental feature is not
                 /// enabled, or for small revlogs.
                 fn rev_from_node_no_persistent_nodemap(
                     &self,
                     node: NodePrefix,
                 ) -> Result<Revision, RevlogError> {
                     // Linear scan of the revlog
                     // TODO: consider building a non-persistent nodemap in memory to
                     // optimize these cases.
                     let mut found_by_prefix = None;
                     for rev in (0..self.len() as Revision).rev() {
                         let index_entry = self.index.get_entry(rev).ok_or_else(|| {
                             HgError::corrupted(
                                 "revlog references a revision not in the index",
                             )
                         })?;
                         if node == *index_entry.hash() {
                             return Ok(rev);
                         }
                         if node.is_prefix_of(index_entry.hash()) {
                             if found_by_prefix.is_some() {
                                 return Err(RevlogError::AmbiguousPrefix);
                             }
                             found_by_prefix = Some(rev)
                         }
                     }
                     found_by_prefix.ok_or(RevlogError::InvalidRevision)
                 }
                 /// Returns whether the given revision exists in this revlog.
                 pub fn has_rev(&self, rev: Revision) -> bool {
                     self.index.get_entry(rev).is_some()
                 }
                 /// Return the full data associated to a revision.
                 ///
                 /// All entries required to build the final data out of deltas will be
                 /// retrieved as needed, and the deltas will be applied to the inital
                 /// snapshot to rebuild the final data.
                 pub fn get_rev_data(
                     &self,
                     rev: Revision,
                 ) -> Result<Cow<[u8]>, RevlogError> {
                     if rev == NULL_REVISION {
                         return Ok(Cow::Borrowed(&[]));
                     };
                     Ok(self.get_entry(rev)?.data()?)
                 }
                 /// Check the hash of some given data against the recorded hash.
                 pub fn check_hash(
                     &self,
                     p1: Revision,
                     p2: Revision,
                     expected: &[u8],
                     data: &[u8],
                 ) -> bool {
                     let e1 = self.index.get_entry(p1);
                     let h1 = match e1 {
                         Some(ref entry) => entry.hash(),
                         None => &NULL_NODE,
                     };
                     let e2 = self.index.get_entry(p2);
                     let h2 = match e2 {
                         Some(ref entry) => entry.hash(),
                         None => &NULL_NODE,
                     };
                     hash(data, h1.as_bytes(), h2.as_bytes()) == expected
                 }
                 /// Build the full data of a revision out its snapshot
                 /// and its deltas.
                 fn build_data_from_deltas(
                     snapshot: RevlogEntry,
                     deltas: &[RevlogEntry],
                 ) -> Result<Vec<u8>, HgError> {
                     let snapshot = snapshot.data_chunk()?;
                     let deltas = deltas
                         .iter()
                         .rev()
                         .map(RevlogEntry::data_chunk)
                         .collect::<Result<Vec<_>, _>>()?;
                     let patches: Vec<_> =
                         deltas.iter().map(|d| patch::PatchList::new(d)).collect();
                     let patch = patch::fold_patch_lists(&patches);
                     Ok(patch.apply(&snapshot))
                 }
                 /// Return the revlog data.
                 fn data(&self) -> &[u8] {
                     match &self.data_bytes {
                         Some(data_bytes) => data_bytes,
                         None => panic!(
                             "forgot to load the data or trying to access inline data"
                         ),
                     }
                 }
                 pub fn make_null_entry(&self) -> RevlogEntry {
                     RevlogEntry {
                         revlog: self,
                         rev: NULL_REVISION,
                         bytes: b"",
                         compressed_len: 0,
                         uncompressed_len: 0,
                         base_rev_or_base_of_delta_chain: None,
                         p1: NULL_REVISION,
                         p2: NULL_REVISION,
                         flags: NULL_REVLOG_ENTRY_FLAGS,
                         hash: NULL_NODE,
                     }
                 }
                 /// Get an entry of the revlog.
                 pub fn get_entry(
                     &self,
                     rev: Revision,
                 ) -> Result<RevlogEntry, RevlogError> {
                     if rev == NULL_REVISION {
                         return Ok(self.make_null_entry());
                     }
                     let index_entry = self
                         .index
                         .get_entry(rev)
                         .ok_or(RevlogError::InvalidRevision)?;
                     let start = index_entry.offset();
                     let end = start + index_entry.compressed_len() as usize;
                     let data = if self.index.is_inline() {
                         self.index.data(start, end)
                     } else {
                         &self.data()[start..end]
                     };
                     let entry = RevlogEntry {
                         revlog: self,
                         rev,
                         bytes: data,
                         compressed_len: index_entry.compressed_len(),
                         uncompressed_len: index_entry.uncompressed_len(),
                         base_rev_or_base_of_delta_chain: if index_entry
                             .base_revision_or_base_of_delta_chain()
                             == rev
                         {
                             None
                         } else {
                             Some(index_entry.base_revision_or_base_of_delta_chain())
                         },
                         p1: index_entry.p1(),
                         p2: index_entry.p2(),
                         flags: index_entry.flags(),
                         hash: *index_entry.hash(),
                     };
                     Ok(entry)
                 }
                 /// when resolving internal references within revlog, any errors
                 /// should be reported as corruption, instead of e.g. "invalid revision"
                 fn get_entry_internal(
                     &self,
                     rev: Revision,
                 ) -> Result<RevlogEntry, HgError> {
                     self.get_entry(rev)
                         .map_err(|_| corrupted(format!("revision {} out of range", rev)))
                 }
             }
             /// The revlog entry's bytes and the necessary informations to extract
             /// the entry's data.
             #[derive(Clone)]
             pub struct RevlogEntry<'revlog> {
                 revlog: &'revlog Revlog,
                 rev: Revision,
                 bytes: &'revlog [u8],
                 compressed_len: u32,
                 uncompressed_len: i32,
                 base_rev_or_base_of_delta_chain: Option<Revision>,
                 p1: Revision,
                 p2: Revision,
                 flags: u16,
                 hash: Node,
             }
             thread_local! {
               // seems fine to [unwrap] here: this can only fail due to memory allocation
               // failing, and it's normal for that to cause panic.
               static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
                   RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
             }
             fn zstd_decompress_to_buffer(
                 bytes: &[u8],
                 buf: &mut Vec<u8>,
             ) -> Result<usize, std::io::Error> {
                 ZSTD_DECODER
                     .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
             }
             impl<'revlog> RevlogEntry<'revlog> {
                 pub fn revision(&self) -> Revision {
                     self.rev
                 }
                 pub fn node(&self) -> &Node {
                     &self.hash
                 }
                 pub fn uncompressed_len(&self) -> Option<u32> {
                     u32::try_from(self.uncompressed_len).ok()
                 }
                 pub fn has_p1(&self) -> bool {
                     self.p1 != NULL_REVISION
                 }
                 pub fn p1_entry(
                     &self,
                 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
                     if self.p1 == NULL_REVISION {
                         Ok(None)
                     } else {
                         Ok(Some(self.revlog.get_entry(self.p1)?))
                     }
                 }
                 pub fn p2_entry(
                     &self,
                 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
                     if self.p2 == NULL_REVISION {
                         Ok(None)
                     } else {
                         Ok(Some(self.revlog.get_entry(self.p2)?))
                     }
                 }
                 pub fn p1(&self) -> Option<Revision> {
                     if self.p1 == NULL_REVISION {
                         None
                     } else {
                         Some(self.p1)
                     }
                 }
                 pub fn p2(&self) -> Option<Revision> {
                     if self.p2 == NULL_REVISION {
                         None
                     } else {
                         Some(self.p2)
                     }
                 }
                 pub fn is_censored(&self) -> bool {
                     (self.flags & REVISION_FLAG_CENSORED) != 0
                 }
                 pub fn has_length_affecting_flag_processor(&self) -> bool {
                     // Relevant Python code: revlog.size()
                     // note: ELLIPSIS is known to not change the content
                     (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
                 }
                 /// The data for this entry, after resolving deltas if any.
                 pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
                     let mut entry = self.clone();
                     let mut delta_chain = vec![];
                     // The meaning of `base_rev_or_base_of_delta_chain` depends on
                     // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
                     // `mercurial/revlogutils/constants.py` and the code in
                     // [_chaininfo] and in [index_deltachain].
                     let uses_generaldelta = self.revlog.index.uses_generaldelta();
                     while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
-                        let base_rev = if uses_generaldelta {
+                        entry = if uses_generaldelta {
-                            base_rev
+                            delta_chain.push(entry);
+                            self.revlog.get_entry_internal(base_rev)?
                         } else {
-                            entry.rev - 1
+                            let base_rev = entry.rev - 1;
+                            delta_chain.push(entry);
+                            self.revlog.get_entry_internal(base_rev)?
                         };
-                        delta_chain.push(entry);
-                        entry = self.revlog.get_entry_internal(base_rev)?;
                     }
                     let data = if delta_chain.is_empty() {
                         entry.data_chunk()?
                     } else {
                         Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
                     };
                     Ok(data)
                 }
                 fn check_data(
                     &self,
                     data: Cow<'revlog, [u8]>,
                 ) -> Result<Cow<'revlog, [u8]>, HgError> {
                     if self.revlog.check_hash(
                         self.p1,
                         self.p2,
                         self.hash.as_bytes(),
                         &data,
                     ) {
                         Ok(data)
                     } else {
                         if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
                             return Err(HgError::unsupported(
                                 "ellipsis revisions are not supported by rhg",
                             ));
                         }
                         Err(corrupted(format!(
                             "hash check failed for revision {}",
                             self.rev
                         )))
                     }
                 }
                 pub fn data(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
                     let data = self.rawdata()?;
                     if self.rev == NULL_REVISION {
                         return Ok(data);
                     }
                     if self.is_censored() {
                         return Err(HgError::CensoredNodeError);
                     }
                     self.check_data(data)
                 }
                 /// Extract the data contained in the entry.
                 /// This may be a delta. (See `is_delta`.)
                 fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
                     if self.bytes.is_empty() {
                         return Ok(Cow::Borrowed(&[]));
                     }
                     match self.bytes[0] {
                         // Revision data is the entirety of the entry, including this
                         // header.
                         b'\0' => Ok(Cow::Borrowed(self.bytes)),
                         // Raw revision data follows.
                         b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
                         // zlib (RFC 1950) data.
                         b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
                         // zstd data.
                         b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
                         // A proper new format should have had a repo/store requirement.
                         format_type => Err(corrupted(format!(
                             "unknown compression header '{}'",
                             format_type
                         ))),
                     }
                 }
                 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
                     let mut decoder = ZlibDecoder::new(self.bytes);
                     if self.is_delta() {
                         let mut buf = Vec::with_capacity(self.compressed_len as usize);
                         decoder
                             .read_to_end(&mut buf)
                             .map_err(|e| corrupted(e.to_string()))?;
                         Ok(buf)
                     } else {
                         let cap = self.uncompressed_len.max(0) as usize;
                         let mut buf = vec![0; cap];
                         decoder
                             .read_exact(&mut buf)
                             .map_err(|e| corrupted(e.to_string()))?;
                         Ok(buf)
                     }
                 }
                 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
                     let cap = self.uncompressed_len.max(0) as usize;
                     if self.is_delta() {
                         // [cap] is usually an over-estimate of the space needed because
                         // it's the length of delta-decoded data, but we're interested
                         // in the size of the delta.
                         // This means we have to [shrink_to_fit] to avoid holding on
                         // to a large chunk of memory, but it also means we must have a
                         // fallback branch, for the case when the delta is longer than
                         // the original data (surprisingly, this does happen in practice)
                         let mut buf = Vec::with_capacity(cap);
                         match zstd_decompress_to_buffer(self.bytes, &mut buf) {
                             Ok(_) => buf.shrink_to_fit(),
                             Err(_) => {
                                 buf.clear();
                                 zstd::stream::copy_decode(self.bytes, &mut buf)
                                     .map_err(|e| corrupted(e.to_string()))?;
                             }
                         };
                         Ok(buf)
                     } else {
                         let mut buf = Vec::with_capacity(cap);
                         let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
                             .map_err(|e| corrupted(e.to_string()))?;
                         if len != self.uncompressed_len as usize {
                             Err(corrupted("uncompressed length does not match"))
                         } else {
                             Ok(buf)
                         }
                     }
                 }
                 /// Tell if the entry is a snapshot or a delta
                 /// (influences on decompression).
                 fn is_delta(&self) -> bool {
                     self.base_rev_or_base_of_delta_chain.is_some()
                 }
             }
             /// Calculate the hash of a revision given its data and its parents.
             fn hash(
                 data: &[u8],
                 p1_hash: &[u8],
                 p2_hash: &[u8],
             ) -> [u8; NODE_BYTES_LENGTH] {
                 let mut hasher = Sha1::new();
                 let (a, b) = (p1_hash, p2_hash);
                 if a > b {
                     hasher.update(b);
                     hasher.update(a);
                 } else {
                     hasher.update(a);
                     hasher.update(b);
                 }
                 hasher.update(data);
                 *hasher.finalize().as_ref()
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE};
                 use itertools::Itertools;
                 #[test]
                 fn test_empty() {
                     let temp = tempfile::tempdir().unwrap();
                     let vfs = Vfs { base: temp.path() };
                     std::fs::write(temp.path().join("foo.i"), b"").unwrap();
                     let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
                     assert!(revlog.is_empty());
                     assert_eq!(revlog.len(), 0);
                     assert!(revlog.get_entry(0).is_err());
                     assert!(!revlog.has_rev(0));
                     assert_eq!(
                         revlog.rev_from_node(NULL_NODE.into()).unwrap(),
                         NULL_REVISION
                     );
                     let null_entry = revlog.get_entry(NULL_REVISION).ok().unwrap();
                     assert_eq!(null_entry.revision(), NULL_REVISION);
                     assert!(null_entry.data().unwrap().is_empty());
                 }
                 #[test]
                 fn test_inline() {
                     let temp = tempfile::tempdir().unwrap();
                     let vfs = Vfs { base: temp.path() };
                     let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
                         .unwrap();
                     let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
                         .unwrap();
                     let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
                         .unwrap();
                     let entry0_bytes = IndexEntryBuilder::new()
                         .is_first(true)
                         .with_version(1)
                         .with_inline(true)
                         .with_offset(INDEX_ENTRY_SIZE)
                         .with_node(node0)
                         .build();
                     let entry1_bytes = IndexEntryBuilder::new()
                         .with_offset(INDEX_ENTRY_SIZE)
                         .with_node(node1)
                         .build();
                     let entry2_bytes = IndexEntryBuilder::new()
                         .with_offset(INDEX_ENTRY_SIZE)
                         .with_p1(0)
                         .with_p2(1)
                         .with_node(node2)
                         .build();
                     let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
                         .into_iter()
                         .flatten()
                         .collect_vec();
                     std::fs::write(temp.path().join("foo.i"), contents).unwrap();
                     let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
                     let entry0 = revlog.get_entry(0).ok().unwrap();
                     assert_eq!(entry0.revision(), 0);
                     assert_eq!(*entry0.node(), node0);
                     assert!(!entry0.has_p1());
                     assert_eq!(entry0.p1(), None);
                     assert_eq!(entry0.p2(), None);
                     let p1_entry = entry0.p1_entry().unwrap();
                     assert!(p1_entry.is_none());
                     let p2_entry = entry0.p2_entry().unwrap();
                     assert!(p2_entry.is_none());
                     let entry1 = revlog.get_entry(1).ok().unwrap();
                     assert_eq!(entry1.revision(), 1);
                     assert_eq!(*entry1.node(), node1);
                     assert!(!entry1.has_p1());
                     assert_eq!(entry1.p1(), None);
                     assert_eq!(entry1.p2(), None);
                     let p1_entry = entry1.p1_entry().unwrap();
                     assert!(p1_entry.is_none());
                     let p2_entry = entry1.p2_entry().unwrap();
                     assert!(p2_entry.is_none());
                     let entry2 = revlog.get_entry(2).ok().unwrap();
                     assert_eq!(entry2.revision(), 2);
                     assert_eq!(*entry2.node(), node2);
                     assert!(entry2.has_p1());
                     assert_eq!(entry2.p1(), Some(0));
                     assert_eq!(entry2.p2(), Some(1));
                     let p1_entry = entry2.p1_entry().unwrap();
                     assert!(p1_entry.is_some());
                     assert_eq!(p1_entry.unwrap().revision(), 0);
                     let p2_entry = entry2.p2_entry().unwrap();
                     assert!(p2_entry.is_some());
                     assert_eq!(p2_entry.unwrap().revision(), 1);
                 }
                 #[test]
                 fn test_nodemap() {
                     let temp = tempfile::tempdir().unwrap();
                     let vfs = Vfs { base: temp.path() };
                     // building a revlog with a forced Node starting with zeros
                     // This is a corruption, but it does not preclude using the nodemap
                     // if we don't try and access the data
                     let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")
                         .unwrap();
                     let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
                         .unwrap();
                     let entry0_bytes = IndexEntryBuilder::new()
                         .is_first(true)
                         .with_version(1)
                         .with_inline(true)
                         .with_offset(INDEX_ENTRY_SIZE)
                         .with_node(node0)
                         .build();
                     let entry1_bytes = IndexEntryBuilder::new()
                         .with_offset(INDEX_ENTRY_SIZE)
                         .with_node(node1)
                         .build();
                     let contents = vec![entry0_bytes, entry1_bytes]
                         .into_iter()
                         .flatten()
                         .collect_vec();
                     std::fs::write(temp.path().join("foo.i"), contents).unwrap();
                     let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
                     // accessing the data shows the corruption
                     revlog.get_entry(0).unwrap().data().unwrap_err();
                     assert_eq!(revlog.rev_from_node(NULL_NODE.into()).unwrap(), -1);
                     assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), 0);
                     assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), 1);
                     assert_eq!(
                         revlog
                             .rev_from_node(NodePrefix::from_hex("000").unwrap())
                             .unwrap(),
                         -1
                     );
                     assert_eq!(
                         revlog
                             .rev_from_node(NodePrefix::from_hex("b00").unwrap())
                             .unwrap(),
                     );
                     // RevlogError does not implement PartialEq
                     // (ultimately because io::Error does not)
                     match revlog
                         .rev_from_node(NodePrefix::from_hex("00").unwrap())
                         .expect_err("Expected to give AmbiguousPrefix error")
                     {
                         RevlogError::AmbiguousPrefix => (),
                         e => {
                             panic!("Got another error than AmbiguousPrefix: {:?}", e);
                         }
                     };
                 }
             }