upstream/mercurial-mirror Commit - r53193:e2319309

1

//! A layer of lower-level revlog functionality to encapsulate most of the

1

//! A layer of lower-level revlog functionality to encapsulate most of the

2

//! IO work and expensive operations.

2

//! IO work and expensive operations.

3

use std::{

3

use std::{

4

borrow::Cow,

4

borrow::Cow,

5

cell::RefCell,

5

cell::RefCell,

6

io::{ErrorKind, Seek, SeekFrom, Write},

6

io::{ErrorKind, Seek, SeekFrom, Write},

7

ops::Deref,

7

ops::Deref,

8

path::PathBuf,

8

path::PathBuf,

9

sync::{Arc, Mutex},

9

sync::{Arc, Mutex},

10

};

10

};

11

12

use schnellru::{ByMemoryUsage, LruMap};

12

use schnellru::{ByMemoryUsage, LruMap};

13

use sha1::{Digest, Sha1};

13

use sha1::{Digest, Sha1};

14

15

use crate::{

15

use crate::{

16

errors::{HgError, IoResultExt},

16

errors::{HgError, IoResultExt},

17

exit_codes,

17

exit_codes,

18

transaction::Transaction,

18

transaction::Transaction,

19

vfs::Vfs,

19

vfs::Vfs,

20

};

20

};

21

22

use super::{

22

use super::{

23

compression::{

23

compression::{

24

uncompressed_zstd_data, CompressionConfig, Compressor, NoneCompressor,

24

uncompressed_zstd_data, CompressionConfig, Compressor, NoneCompressor,

25

ZlibCompressor, ZstdCompressor, ZLIB_BYTE, ZSTD_BYTE,

25

ZlibCompressor, ZstdCompressor, ZLIB_BYTE, ZSTD_BYTE,

26

},

26

},

27

file_io::{DelayedBuffer, FileHandle, RandomAccessFile, WriteHandles},

27

file_io::{DelayedBuffer, FileHandle, RandomAccessFile, WriteHandles},

28

index::{Index, IndexHeader, INDEX_ENTRY_SIZE},

28

index::{Index, IndexHeader, INDEX_ENTRY_SIZE},

29

node::{NODE_BYTES_LENGTH, NULL_NODE},

29

node::{NODE_BYTES_LENGTH, NULL_NODE},

30

options::{RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig},

30

options::{RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig},

31

BaseRevision, Node, Revision, RevlogEntry, RevlogError, RevlogIndex,

31

BaseRevision, Node, Revision, RevlogEntry, RevlogError, RevlogIndex,

32

UncheckedRevision, NULL_REVISION, NULL_REVLOG_ENTRY_FLAGS,

32

UncheckedRevision, NULL_REVISION, NULL_REVLOG_ENTRY_FLAGS,

33

};

33

};

34

35

/// Matches the `_InnerRevlog` class in the Python code, as an arbitrary

35

/// Matches the `_InnerRevlog` class in the Python code, as an arbitrary

36

/// boundary to incrementally rewrite higher-level revlog functionality in

36

/// boundary to incrementally rewrite higher-level revlog functionality in

37

/// Rust.

37

/// Rust.

38

pub struct InnerRevlog {

38

pub struct InnerRevlog {

39

/// When index and data are not interleaved: bytes of the revlog index.

39

/// When index and data are not interleaved: bytes of the revlog index.

40

/// When index and data are interleaved (inline revlog): bytes of the

40

/// When index and data are interleaved (inline revlog): bytes of the

41

/// revlog index and data.

41

/// revlog index and data.

42

pub index: Index,

42

pub index: Index,

43

/// The store vfs that is used to interact with the filesystem

43

/// The store vfs that is used to interact with the filesystem

44

vfs: Box<dyn Vfs>,

44

vfs: Box<dyn Vfs>,

45

/// The index file path, relative to the vfs root

45

/// The index file path, relative to the vfs root

46

pub index_file: PathBuf,

46

pub index_file: PathBuf,

47

/// The data file path, relative to the vfs root (same as `index_file`

47

/// The data file path, relative to the vfs root (same as `index_file`

48

/// if inline)

48

/// if inline)

49

data_file: PathBuf,

49

data_file: PathBuf,

50

/// Data config that applies to this revlog

50

/// Data config that applies to this revlog

51

data_config: RevlogDataConfig,

51

data_config: RevlogDataConfig,

52

/// Delta config that applies to this revlog

52

/// Delta config that applies to this revlog

53

delta_config: RevlogDeltaConfig,

53

delta_config: RevlogDeltaConfig,

54

/// Feature config that applies to this revlog

54

/// Feature config that applies to this revlog

55

feature_config: RevlogFeatureConfig,

55

feature_config: RevlogFeatureConfig,

56

/// A view into this revlog's data file

56

/// A view into this revlog's data file

57

segment_file: RandomAccessFile,

57

segment_file: RandomAccessFile,

58

/// A cache of uncompressed chunks that have previously been restored.

58

/// A cache of uncompressed chunks that have previously been restored.

59

/// Its eviction policy is defined in [`Self::new`].

59

/// Its eviction policy is defined in [`Self::new`].

60

uncompressed_chunk_cache: Option<UncompressedChunkCache>,

60

uncompressed_chunk_cache: Option<UncompressedChunkCache>,

61

/// Used to keep track of the actual target during diverted writes

61

/// Used to keep track of the actual target during diverted writes

62

/// for the changelog

62

/// for the changelog

63

original_index_file: Option<PathBuf>,

63

original_index_file: Option<PathBuf>,

64

/// Write handles to the index and data files

64

/// Write handles to the index and data files

65

/// XXX why duplicate from `index` and `segment_file`?

65

/// XXX why duplicate from `index` and `segment_file`?

66

writing_handles: Option<WriteHandles>,

66

writing_handles: Option<WriteHandles>,

67

/// See [`DelayedBuffer`].

67

/// See [`DelayedBuffer`].

68

delayed_buffer: Option<Arc<Mutex<DelayedBuffer>>>,

68

delayed_buffer: Option<Arc<Mutex<DelayedBuffer>>>,

69

/// Whether this revlog is inline. XXX why duplicate from `index`?

69

/// Whether this revlog is inline. XXX why duplicate from `index`?

70

pub inline: bool,

70

pub inline: bool,

71

/// A cache of the last revision, which is usually accessed multiple

71

/// A cache of the last revision, which is usually accessed multiple

72

/// times.

72

/// times.

73

pub last_revision_cache: Mutex<Option<SingleRevisionCache>>,

73

pub last_revision_cache: Mutex<Option<SingleRevisionCache>>,

74

}

74

}

75

76

impl InnerRevlog {

76

impl InnerRevlog {

77

pub fn new(

77

pub fn new(

78

vfs: Box<dyn Vfs>,

78

vfs: Box<dyn Vfs>,

79

index: Index,

79

index: Index,

80

index_file: PathBuf,

80

index_file: PathBuf,

81

data_file: PathBuf,

81

data_file: PathBuf,

82

data_config: RevlogDataConfig,

82

data_config: RevlogDataConfig,

83

delta_config: RevlogDeltaConfig,

83

delta_config: RevlogDeltaConfig,

84

feature_config: RevlogFeatureConfig,

84

feature_config: RevlogFeatureConfig,

85

) -> Self {

85

) -> Self {

86

assert!(index_file.is_relative());

86

assert!(index_file.is_relative());

87

assert!(data_file.is_relative());

87

assert!(data_file.is_relative());

88

let segment_file = RandomAccessFile::new(

88

let segment_file = RandomAccessFile::new(

89

dyn_clone::clone_box(&*vfs),

89

dyn_clone::clone_box(&*vfs),

90

if index.is_inline() {

90

if index.is_inline() {

91

index_file.to_owned()

91

index_file.to_owned()

92

} else {

92

} else {

93

data_file.to_owned()

93

data_file.to_owned()

94

},

94

},

95

);

95

);

96

97

let uncompressed_chunk_cache =

97

let uncompressed_chunk_cache =

98

data_config.uncompressed_cache_factor.map(

98

data_config.uncompressed_cache_factor.map(

99

// Arbitrary initial value

99

// Arbitrary initial value

100

// TODO check if using a hasher specific to integers is useful

100

// TODO check if using a hasher specific to integers is useful

101

|_factor| RefCell::new(LruMap::with_memory_budget(65536)),

101

|_factor| RefCell::new(LruMap::with_memory_budget(65536)),

102

);

102

);

103

104

let inline = index.is_inline();

104

let inline = index.is_inline();

105

Self {

105

Self {

106

index,

106

index,

107

vfs,

107

vfs,

108

index_file,

108

index_file,

109

data_file,

109

data_file,

110

data_config,

110

data_config,

111

delta_config,

111

delta_config,

112

feature_config,

112

feature_config,

113

segment_file,

113

segment_file,

114

uncompressed_chunk_cache,

114

uncompressed_chunk_cache,

115

original_index_file: None,

115

original_index_file: None,

116

writing_handles: None,

116

writing_handles: None,

117

delayed_buffer: None,

117

delayed_buffer: None,

118

inline,

118

inline,

119

last_revision_cache: Mutex::new(None),

119

last_revision_cache: Mutex::new(None),

120

}

120

}

121

}

121

}

122

123

/// Return number of entries of the revlog index

123

/// Return number of entries of the revlog index

124

pub fn len(&self) -> usize {

124

pub fn len(&self) -> usize {

125

self.index.len()

125

self.index.len()

126

}

126

}

127

128

/// Return `true` if this revlog has no entries

128

/// Return `true` if this revlog has no entries

129

pub fn is_empty(&self) -> bool {

129

pub fn is_empty(&self) -> bool {

130

self.len() == 0

130

self.len() == 0

131

}

131

}

132

133

/// Return whether this revlog is inline (mixed index and data)

133

/// Return whether this revlog is inline (mixed index and data)

134

pub fn is_inline(&self) -> bool {

134

pub fn is_inline(&self) -> bool {

135

self.inline

135

self.inline

136

}

136

}

137

138

/// Clear all caches from this revlog

138

/// Clear all caches from this revlog

139

pub fn clear_cache(&mut self) {

139

pub fn clear_cache(&mut self) {

140

assert!(!self.is_delaying());

140

assert!(!self.is_delaying());

141

if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {

141

if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {

142

// We don't clear the allocation here because it's probably faster.

142

// We don't clear the allocation here because it's probably faster.

143

// We could change our minds later if this ends up being a problem

143

// We could change our minds later if this ends up being a problem

144

// with regards to memory consumption.

144

// with regards to memory consumption.

145

cache.borrow_mut().clear();

145

cache.borrow_mut().clear();

146

}

146

}

147

}

147

}

148

149

/// Return an entry for the null revision

149

/// Return an entry for the null revision

150

pub fn make_null_entry(&self) -> RevlogEntry {

150

pub fn make_null_entry(&self) -> RevlogEntry {

151

RevlogEntry {

151

RevlogEntry {

152

revlog: self,

152

revlog: self,

153

rev: NULL_REVISION,

153

rev: NULL_REVISION,

154

uncompressed_len: 0,

154

uncompressed_len: 0,

155

p1: NULL_REVISION,

155

p1: NULL_REVISION,

156

p2: NULL_REVISION,

156

p2: NULL_REVISION,

157

flags: NULL_REVLOG_ENTRY_FLAGS,

157

flags: NULL_REVLOG_ENTRY_FLAGS,

158

hash: NULL_NODE,

158

hash: NULL_NODE,

159

}

159

}

160

}

160

}

161

162

/// Return the [`RevlogEntry`] for a [`Revision`] that is known to exist

162

/// Return the [`RevlogEntry`] for a [`Revision`] that is known to exist

163

pub fn get_entry(

163

pub fn get_entry(

164

&self,

164

&self,

165

rev: Revision,

165

rev: Revision,

166

) -> Result<RevlogEntry, RevlogError> {

166

) -> Result<RevlogEntry, RevlogError> {

167

if rev == NULL_REVISION {

167

if rev == NULL_REVISION {

168

return Ok(self.make_null_entry());

168

return Ok(self.make_null_entry());

169

}

169

}

170

let index_entry = self

170

let index_entry = self

171

.index

171

.index

172

.get_entry(rev)

172

.get_entry(rev)

173

.ok_or_else(|| RevlogError::InvalidRevision(rev.to_string()))?;

173

.ok_or_else(|| RevlogError::InvalidRevision(rev.to_string()))?;

174

let p1 =

174

let p1 =

175

self.index.check_revision(index_entry.p1()).ok_or_else(|| {

175

self.index.check_revision(index_entry.p1()).ok_or_else(|| {

176

RevlogError::corrupted(format!(

176

RevlogError::corrupted(format!(

177

"p1 for rev {} is invalid",

177

"p1 for rev {} is invalid",

178

rev

178

rev

179

))

179

))

180

})?;

180

})?;

181

let p2 =

181

let p2 =

182

self.index.check_revision(index_entry.p2()).ok_or_else(|| {

182

self.index.check_revision(index_entry.p2()).ok_or_else(|| {

183

RevlogError::corrupted(format!(

183

RevlogError::corrupted(format!(

184

"p2 for rev {} is invalid",

184

"p2 for rev {} is invalid",

185

rev

185

rev

186

))

186

))

187

})?;

187

})?;

188

let entry = RevlogEntry {

188

let entry = RevlogEntry {

189

revlog: self,

189

revlog: self,

190

rev,

190

rev,

191

uncompressed_len: index_entry.uncompressed_len(),

191

uncompressed_len: index_entry.uncompressed_len(),

192

p1,

192

p1,

193

p2,

193

p2,

194

flags: index_entry.flags(),

194

flags: index_entry.flags(),

195

hash: *index_entry.hash(),

195

hash: *index_entry.hash(),

196

};

196

};

197

Ok(entry)

197

Ok(entry)

198

}

198

}

199

200

/// Return the [`RevlogEntry`] for `rev`. If `rev` fails to check, this

200

/// Return the [`RevlogEntry`] for `rev`. If `rev` fails to check, this

201

/// returns a [`RevlogError`].

201

/// returns a [`RevlogError`].

202

pub fn get_entry_for_unchecked_rev(

202

pub fn get_entry_for_unchecked_rev(

203

&self,

203

&self,

204

rev: UncheckedRevision,

204

rev: UncheckedRevision,

205

) -> Result<RevlogEntry, RevlogError> {

205

) -> Result<RevlogEntry, RevlogError> {

206

if rev == NULL_REVISION.into() {

206

if rev == NULL_REVISION.into() {

207

return Ok(self.make_null_entry());

207

return Ok(self.make_null_entry());

208

}

208

}

209

let rev = self.index.check_revision(rev).ok_or_else(|| {

209

let rev = self.index.check_revision(rev).ok_or_else(|| {

210

RevlogError::corrupted(format!("rev {} is invalid", rev))

210

RevlogError::corrupted(format!("rev {} is invalid", rev))

211

})?;

211

})?;

212

self.get_entry(rev)

212

self.get_entry(rev)

213

}

213

}

214

215

/// Is the revlog currently delaying the visibility of written data?

215

/// Is the revlog currently delaying the visibility of written data?

216

///

216

///

217

/// The delaying mechanism can be either in-memory or written on disk in a

217

/// The delaying mechanism can be either in-memory or written on disk in a

218

/// side-file.

218

/// side-file.

219

pub fn is_delaying(&self) -> bool {

219

pub fn is_delaying(&self) -> bool {

220

self.delayed_buffer.is_some() || self.original_index_file.is_some()

220

self.delayed_buffer.is_some() || self.original_index_file.is_some()

221

}

221

}

222

223

/// The offset of the data chunk for this revision

223

/// The offset of the data chunk for this revision

224

#[inline(always)]

224

#[inline(always)]

225

pub fn start(&self, rev: Revision) -> usize {

225

pub fn data_start(&self, rev: Revision) -> usize {

226

self.index.start(

226

self.index.start(

227

rev,

227

rev,

228

&self

228

&self

229

.index

229

.index

230

.get_entry(rev)

230

.get_entry(rev)

231

.unwrap_or_else(|| self.index.make_null_entry()),

231

.unwrap_or_else(|| self.index.make_null_entry()),

232

)

232

)

233

}

233

}

234

235

/// The length of the data chunk for this revision

235

/// The length of the data chunk for this revision

236

#[inline(always)]

236

#[inline(always)]

237

pub fn data_compressed_length(&self, rev: Revision) -> usize {

237

pub fn data_compressed_length(&self, rev: Revision) -> usize {

238

self.index

238

self.index

239

.get_entry(rev)

239

.get_entry(rev)

240

.unwrap_or_else(|| self.index.make_null_entry())

240

.unwrap_or_else(|| self.index.make_null_entry())

241

.compressed_len() as usize

241

.compressed_len() as usize

242

}

242

}

243

244

/// The end of the data chunk for this revision

244

/// The end of the data chunk for this revision

245

#[inline(always)]

245

#[inline(always)]

246

pub fn end(&self, rev: Revision) -> usize {

246

pub fn end(&self, rev: Revision) -> usize {

247

self.start(rev) + self.data_compressed_length(rev)

247

self.data_start(rev) + self.data_compressed_length(rev)

248

}

248

}

249

250

/// Return the delta parent of the given revision

250

/// Return the delta parent of the given revision

251

pub fn delta_parent(&self, rev: Revision) -> Revision {

251

pub fn delta_parent(&self, rev: Revision) -> Revision {

252

let base = self

252

let base = self

253

.index

253

.index

254

.get_entry(rev)

254

.get_entry(rev)

255

.unwrap()

255

.unwrap()

256

.base_revision_or_base_of_delta_chain();

256

.base_revision_or_base_of_delta_chain();

257

if base.0 == rev.0 {

257

if base.0 == rev.0 {

258

NULL_REVISION

258

NULL_REVISION

259

} else if self.delta_config.general_delta {

259

} else if self.delta_config.general_delta {

260

Revision(base.0)

260

Revision(base.0)

261

} else {

261

} else {

262

Revision(rev.0 - 1)

262

Revision(rev.0 - 1)

263

}

263

}

264

}

264

}

265

266

/// Return whether `rev` points to a snapshot revision (i.e. does not have

266

/// Return whether `rev` points to a snapshot revision (i.e. does not have

267

/// a delta base).

267

/// a delta base).

268

pub fn is_snapshot(&self, rev: Revision) -> Result<bool, RevlogError> {

268

pub fn is_snapshot(&self, rev: Revision) -> Result<bool, RevlogError> {

269

if !self.delta_config.sparse_revlog {

269

if !self.delta_config.sparse_revlog {

270

return Ok(self.delta_parent(rev) == NULL_REVISION);

270

return Ok(self.delta_parent(rev) == NULL_REVISION);

271

}

271

}

272

self.index.is_snapshot_unchecked(rev)

272

self.index.is_snapshot_unchecked(rev)

273

}

273

}

274

275

/// Return the delta chain for `rev` according to this revlog's config.

275

/// Return the delta chain for `rev` according to this revlog's config.

276

/// See [`Index::delta_chain`] for more information.

276

/// See [`Index::delta_chain`] for more information.

277

pub fn delta_chain(

277

pub fn delta_chain(

278

&self,

278

&self,

279

rev: Revision,

279

rev: Revision,

280

stop_rev: Option<Revision>,

280

stop_rev: Option<Revision>,

281

) -> Result<(Vec<Revision>, bool), HgError> {

281

) -> Result<(Vec<Revision>, bool), HgError> {

282

self.index.delta_chain(

282

self.index.delta_chain(

283

rev,

283

rev,

284

stop_rev,

284

stop_rev,

285

self.delta_config.general_delta.into(),

285

self.delta_config.general_delta.into(),

286

)

286

)

287

}

287

}

288

289

fn compressor(&self) -> Result<Box<dyn Compressor>, HgError> {

289

fn compressor(&self) -> Result<Box<dyn Compressor>, HgError> {

290

// TODO cache the compressor?

290

// TODO cache the compressor?

291

Ok(match self.feature_config.compression_engine {

291

Ok(match self.feature_config.compression_engine {

292

CompressionConfig::Zlib { level } => {

292

CompressionConfig::Zlib { level } => {

293

Box::new(ZlibCompressor::new(level))

293

Box::new(ZlibCompressor::new(level))

294

}

294

}

295

CompressionConfig::Zstd { level, threads } => {

295

CompressionConfig::Zstd { level, threads } => {

296

Box::new(ZstdCompressor::new(level, threads))

296

Box::new(ZstdCompressor::new(level, threads))

297

}

297

}

298

CompressionConfig::None => Box::new(NoneCompressor),

298

CompressionConfig::None => Box::new(NoneCompressor),

299

})

299

})

300

}

300

}

301

302

/// Generate a possibly-compressed representation of data.

302

/// Generate a possibly-compressed representation of data.

303

/// Returns `None` if the data was not compressed.

303

/// Returns `None` if the data was not compressed.

304

pub fn compress<'data>(

304

pub fn compress<'data>(

305

&self,

305

&self,

306

data: &'data [u8],

306

data: &'data [u8],

307

) -> Result<Option<Cow<'data, [u8]>>, RevlogError> {

307

) -> Result<Option<Cow<'data, [u8]>>, RevlogError> {

308

if data.is_empty() {

308

if data.is_empty() {

309

return Ok(Some(data.into()));

309

return Ok(Some(data.into()));

310

}

310

}

311

let res = self.compressor()?.compress(data)?;

311

let res = self.compressor()?.compress(data)?;

312

if let Some(compressed) = res {

312

if let Some(compressed) = res {

313

// The revlog compressor added the header in the returned data.

313

// The revlog compressor added the header in the returned data.

314

return Ok(Some(compressed.into()));

314

return Ok(Some(compressed.into()));

315

}

315

}

316

317

if data[0] == b'\0' {

317

if data[0] == b'\0' {

318

return Ok(Some(data.into()));

318

return Ok(Some(data.into()));

319

}

319

}

320

Ok(None)

320

Ok(None)

321

}

321

}

322

323

/// Decompress a revlog chunk.

323

/// Decompress a revlog chunk.

324

///

324

///

325

/// The chunk is expected to begin with a header identifying the

325

/// The chunk is expected to begin with a header identifying the

326

/// format type so it can be routed to an appropriate decompressor.

326

/// format type so it can be routed to an appropriate decompressor.

327

pub fn decompress<'a>(

327

pub fn decompress<'a>(

328

&'a self,

328

&'a self,

329

data: &'a [u8],

329

data: &'a [u8],

330

) -> Result<Cow<[u8]>, RevlogError> {

330

) -> Result<Cow<[u8]>, RevlogError> {

331

if data.is_empty() {

331

if data.is_empty() {

332

return Ok(data.into());

332

return Ok(data.into());

333

}

333

}

334

335

// Revlogs are read much more frequently than they are written and many

335

// Revlogs are read much more frequently than they are written and many

336

// chunks only take microseconds to decompress, so performance is

336

// chunks only take microseconds to decompress, so performance is

337

// important here.

337

// important here.

338

339

let header = data[0];

339

let header = data[0];

340

match header {

340

match header {

341

// Settings don't matter as they only affect compression

341

// Settings don't matter as they only affect compression

342

ZLIB_BYTE => Ok(ZlibCompressor::new(0).decompress(data)?.into()),

342

ZLIB_BYTE => Ok(ZlibCompressor::new(0).decompress(data)?.into()),

343

// Settings don't matter as they only affect compression

343

// Settings don't matter as they only affect compression

344

ZSTD_BYTE => {

344

ZSTD_BYTE => {

345

Ok(ZstdCompressor::new(0, 0).decompress(data)?.into())

345

Ok(ZstdCompressor::new(0, 0).decompress(data)?.into())

346

}

346

}

347

b'\0' => Ok(data.into()),

347

b'\0' => Ok(data.into()),

348

b'u' => Ok((&data[1..]).into()),

348

b'u' => Ok((&data[1..]).into()),

349

other => Err(HgError::UnsupportedFeature(format!(

349

other => Err(HgError::UnsupportedFeature(format!(

350

"unknown compression header '{}'",

350

"unknown compression header '{}'",

351

other

351

other

352

))

352

))

353

.into()),

353

.into()),

354

}

354

}

355

}

355

}

356

357

/// Obtain a segment of raw data corresponding to a range of revisions.

357

/// Obtain a segment of raw data corresponding to a range of revisions.

358

///

358

///

359

/// Requests for data may be satisfied by a cache.

359

/// Requests for data may be satisfied by a cache.

360

///

360

///

361

/// Returns a 2-tuple of (offset, data) for the requested range of

361

/// Returns a 2-tuple of (offset, data) for the requested range of

362

/// revisions. Offset is the integer offset from the beginning of the

362

/// revisions. Offset is the integer offset from the beginning of the

363

/// revlog and data is a slice of the raw byte data.

363

/// revlog and data is a slice of the raw byte data.

364

///

364

///

365

/// Callers will need to call `self.start(rev)` and `self.length(rev)`

365

/// Callers will need to call `self.start(rev)` and `self.length(rev)`

366

/// to determine where each revision's data begins and ends.

366

/// to determine where each revision's data begins and ends.

367

pub fn get_segment_for_revs(

367

pub fn get_segment_for_revs(

368

&self,

368

&self,

369

start_rev: Revision,

369

start_rev: Revision,

370

end_rev: Revision,

370

end_rev: Revision,

371

) -> Result<(usize, Vec<u8>), HgError> {

371

) -> Result<(usize, Vec<u8>), HgError> {

372

let start = if start_rev == NULL_REVISION {

372

let start = if start_rev == NULL_REVISION {

373

0

373

0

374

} else {

374

} else {

375

let start_entry = self

375

let start_entry = self

376

.index

376

.index

377

.get_entry(start_rev)

377

.get_entry(start_rev)

378

.expect("null revision segment");

378

.expect("null revision segment");

379

self.index.start(start_rev, &start_entry)

379

self.index.start(start_rev, &start_entry)

380

};

380

};

381

let end_entry = self

381

let end_entry = self

382

.index

382

.index

383

.get_entry(end_rev)

383

.get_entry(end_rev)

384

.expect("null revision segment");

384

.expect("null revision segment");

385

let end = self.index.start(end_rev, &end_entry)

385

let end = self.index.start(end_rev, &end_entry)

386

+ self.data_compressed_length(end_rev);

386

+ self.data_compressed_length(end_rev);

387

388

let length = end - start;

388

let length = end - start;

389

390

// XXX should we use mmap instead of doing this for platforms that

390

// XXX should we use mmap instead of doing this for platforms that

391

// support madvise/populate?

391

// support madvise/populate?

392

Ok((start, self.segment_file.read_chunk(start, length)?))

392

Ok((start, self.segment_file.read_chunk(start, length)?))

393

}

393

}

394

395

/// Return the uncompressed raw data for `rev`

395

/// Return the uncompressed raw data for `rev`

396

pub fn chunk_for_rev(&self, rev: Revision) -> Result<Arc<[u8]>, HgError> {

396

pub fn chunk_for_rev(&self, rev: Revision) -> Result<Arc<[u8]>, HgError> {

397

if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {

397

if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {

398

if let Some(chunk) = cache.borrow_mut().get(&rev) {

398

if let Some(chunk) = cache.borrow_mut().get(&rev) {

399

return Ok(chunk.clone());

399

return Ok(chunk.clone());

400

}

400

}

401

}

401

}

402

// TODO revlogv2 should check the compression mode

402

// TODO revlogv2 should check the compression mode

403

let data = self.get_segment_for_revs(rev, rev)?.1;

403

let data = self.get_segment_for_revs(rev, rev)?.1;

404

let uncompressed = self.decompress(&data).map_err(|e| {

404

let uncompressed = self.decompress(&data).map_err(|e| {

405

HgError::abort(

405

HgError::abort(

406

format!("revlog decompression error: {}", e),

406

format!("revlog decompression error: {}", e),

407

exit_codes::ABORT,

407

exit_codes::ABORT,

408

None,

408

None,

409

)

409

)

410

})?;

410

})?;

411

let uncompressed: Arc<[u8]> = Arc::from(uncompressed.into_owned());

411

let uncompressed: Arc<[u8]> = Arc::from(uncompressed.into_owned());

412

if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {

412

if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {

413

cache.borrow_mut().insert(rev, uncompressed.clone());

413

cache.borrow_mut().insert(rev, uncompressed.clone());

414

}

414

}

415

Ok(uncompressed)

415

Ok(uncompressed)

416

}

416

}

417

418

/// Execute `func` within a read context for the data file, meaning that

418

/// Execute `func` within a read context for the data file, meaning that

419

/// the read handle will be taken and discarded after the operation.

419

/// the read handle will be taken and discarded after the operation.

420

pub fn with_read<R>(

420

pub fn with_read<R>(

421

&self,

421

&self,

422

func: impl FnOnce() -> Result<R, RevlogError>,

422

func: impl FnOnce() -> Result<R, RevlogError>,

423

) -> Result<R, RevlogError> {

423

) -> Result<R, RevlogError> {

424

self.enter_reading_context()?;

424

self.enter_reading_context()?;

425

let res = func();

425

let res = func();

426

self.exit_reading_context();

426

self.exit_reading_context();

427

res.map_err(Into::into)

427

res.map_err(Into::into)

428

}

428

}

429

430

/// `pub` only for use in hg-cpython

430

/// `pub` only for use in hg-cpython

431

#[doc(hidden)]

431

#[doc(hidden)]

432

pub fn enter_reading_context(&self) -> Result<(), HgError> {

432

pub fn enter_reading_context(&self) -> Result<(), HgError> {

433

if self.is_empty() {

433

if self.is_empty() {

434

// Nothing to be read

434

// Nothing to be read

435

return Ok(());

435

return Ok(());

436

}

436

}

437

if self.delayed_buffer.is_some() && self.is_inline() {

437

if self.delayed_buffer.is_some() && self.is_inline() {

438

return Err(HgError::abort(

438

return Err(HgError::abort(

439

"revlog with delayed write should not be inline",

439

"revlog with delayed write should not be inline",

440

exit_codes::ABORT,

440

exit_codes::ABORT,

441

None,

441

None,

442

));

442

));

443

}

443

}

444

self.segment_file.get_read_handle()?;

444

self.segment_file.get_read_handle()?;

445

Ok(())

445

Ok(())

446

}

446

}

447

448

/// `pub` only for use in hg-cpython

448

/// `pub` only for use in hg-cpython

449

#[doc(hidden)]

449

#[doc(hidden)]

450

pub fn exit_reading_context(&self) {

450

pub fn exit_reading_context(&self) {

451

self.segment_file.exit_reading_context()

451

self.segment_file.exit_reading_context()

452

}

452

}

453

454

/// Fill the buffer returned by `get_buffer` with the possibly un-validated

454

/// Fill the buffer returned by `get_buffer` with the possibly un-validated

455

/// raw text for a revision. It can be already validated if it comes

455

/// raw text for a revision. It can be already validated if it comes

456

/// from the cache.

456

/// from the cache.

457

pub fn raw_text<G, T>(

457

pub fn raw_text<G, T>(

458

&self,

458

&self,

459

rev: Revision,

459

rev: Revision,

460

get_buffer: G,

460

get_buffer: G,

461

) -> Result<(), RevlogError>

461

) -> Result<(), RevlogError>

462

where

462

where

463

G: FnOnce(

463

G: FnOnce(

464

usize,

464

usize,

465

&mut dyn FnMut(

465

&mut dyn FnMut(

466

&mut dyn RevisionBuffer<Target = T>,

466

&mut dyn RevisionBuffer<Target = T>,

467

) -> Result<(), RevlogError>,

467

) -> Result<(), RevlogError>,

468

) -> Result<(), RevlogError>,

468

) -> Result<(), RevlogError>,

469

{

469

{

470

let entry = &self.get_entry(rev)?;

470

let entry = &self.get_entry(rev)?;

471

let raw_size = entry.uncompressed_len();

471

let raw_size = entry.uncompressed_len();

472

let mut mutex_guard = self

472

let mut mutex_guard = self

473

.last_revision_cache

473

.last_revision_cache

474

.lock()

474

.lock()

475

.expect("lock should not be held");

475

.expect("lock should not be held");

476

let cached_rev = if let Some((_node, rev, data)) = &*mutex_guard {

476

let cached_rev = if let Some((_node, rev, data)) = &*mutex_guard {

477

Some((*rev, data.deref().as_ref()))

477

Some((*rev, data.deref().as_ref()))

478

} else {

478

} else {

479

None

479

None

480

};

480

};

481

if let Some(cache) = &self.uncompressed_chunk_cache {

481

if let Some(cache) = &self.uncompressed_chunk_cache {

482

let cache = &mut cache.borrow_mut();

482

let cache = &mut cache.borrow_mut();

483

if let Some(size) = raw_size {

483

if let Some(size) = raw_size {

484

// Dynamically update the uncompressed_chunk_cache size to the

484

// Dynamically update the uncompressed_chunk_cache size to the

485

// largest revision we've seen in this revlog.

485

// largest revision we've seen in this revlog.

486

// Do it *before* restoration in case the current revision

486

// Do it *before* restoration in case the current revision

487

// is the largest.

487

// is the largest.

488

let factor = self

488

let factor = self

489

.data_config

489

.data_config

490

.uncompressed_cache_factor

490

.uncompressed_cache_factor

491

.expect("cache should not exist without factor");

491

.expect("cache should not exist without factor");

492

let candidate_size = (size as f64 * factor) as usize;

492

let candidate_size = (size as f64 * factor) as usize;

493

let limiter_mut = cache.limiter_mut();

493

let limiter_mut = cache.limiter_mut();

494

if candidate_size > limiter_mut.max_memory_usage() {

494

if candidate_size > limiter_mut.max_memory_usage() {

495

std::mem::swap(

495

std::mem::swap(

496

limiter_mut,

496

limiter_mut,

497

&mut ByMemoryUsage::new(candidate_size),

497

&mut ByMemoryUsage::new(candidate_size),

498

);

498

);

499

}

499

}

500

}

500

}

501

}

501

}

502

entry.rawdata(cached_rev, get_buffer)?;

502

entry.rawdata(cached_rev, get_buffer)?;

503

// drop cache to save memory, the caller is expected to update

503

// drop cache to save memory, the caller is expected to update

504

// the revision cache after validating the text

504

// the revision cache after validating the text

505

mutex_guard.take();

505

mutex_guard.take();

506

Ok(())

506

Ok(())

507

}

507

}

508

509

/// Only `pub` for `hg-cpython`.

509

/// Only `pub` for `hg-cpython`.

510

/// Obtain decompressed raw data for the specified revisions that are

510

/// Obtain decompressed raw data for the specified revisions that are

511

/// assumed to be in ascending order.

511

/// assumed to be in ascending order.

512

///

512

///

513

/// Returns a list with decompressed data for each requested revision.

513

/// Returns a list with decompressed data for each requested revision.

514

#[doc(hidden)]

514

#[doc(hidden)]

515

pub fn chunks(

515

pub fn chunks(

516

&self,

516

&self,

517

revs: Vec<Revision>,

517

revs: Vec<Revision>,

518

target_size: Option<u64>,

518

target_size: Option<u64>,

519

) -> Result<Vec<Arc<[u8]>>, RevlogError> {

519

) -> Result<Vec<Arc<[u8]>>, RevlogError> {

520

if revs.is_empty() {

520

if revs.is_empty() {

521

return Ok(vec![]);

521

return Ok(vec![]);

522

}

522

}

523

let mut fetched_revs = vec![];

523

let mut fetched_revs = vec![];

524

let mut chunks = Vec::with_capacity(revs.len());

524

let mut chunks = Vec::with_capacity(revs.len());

525

526

match self.uncompressed_chunk_cache.as_ref() {

526

match self.uncompressed_chunk_cache.as_ref() {

527

Some(cache) => {

527

Some(cache) => {

528

let mut cache = cache.borrow_mut();

528

let mut cache = cache.borrow_mut();

529

for rev in revs.iter() {

529

for rev in revs.iter() {

530

match cache.get(rev) {

530

match cache.get(rev) {

531

Some(hit) => chunks.push((*rev, hit.to_owned())),

531

Some(hit) => chunks.push((*rev, hit.to_owned())),

532

None => fetched_revs.push(*rev),

532

None => fetched_revs.push(*rev),

533

}

533

}

534

}

534

}

535

}

535

}

536

None => fetched_revs = revs,

536

None => fetched_revs = revs,

537

}

537

}

538

539

let already_cached = chunks.len();

539

let already_cached = chunks.len();

540

541

let sliced_chunks = if fetched_revs.is_empty() {

541

let sliced_chunks = if fetched_revs.is_empty() {

542

vec![]

542

vec![]

543

} else if !self.data_config.with_sparse_read || self.is_inline() {

543

} else if !self.data_config.with_sparse_read || self.is_inline() {

544

vec![fetched_revs]

544

vec![fetched_revs]

545

} else {

545

} else {

546

self.slice_chunk(&fetched_revs, target_size)?

546

self.slice_chunk(&fetched_revs, target_size)?

547

};

547

};

548

549

self.with_read(|| {

549

self.with_read(|| {

550

for revs_chunk in sliced_chunks {

550

for revs_chunk in sliced_chunks {

551

let first_rev = revs_chunk[0];

551

let first_rev = revs_chunk[0];

552

// Skip trailing revisions with empty diff

552

// Skip trailing revisions with empty diff

553

let last_rev_idx = revs_chunk

553

let last_rev_idx = revs_chunk

554

.iter()

554

.iter()

555

.rposition(|r| self.data_compressed_length(*r) != 0)

555

.rposition(|r| self.data_compressed_length(*r) != 0)

556

.unwrap_or(revs_chunk.len() - 1);

556

.unwrap_or(revs_chunk.len() - 1);

557

558

let last_rev = revs_chunk[last_rev_idx];

558

let last_rev = revs_chunk[last_rev_idx];

559

560

let (offset, data) =

560

let (offset, data) =

561

self.get_segment_for_revs(first_rev, last_rev)?;

561

self.get_segment_for_revs(first_rev, last_rev)?;

562

563

let revs_chunk = &revs_chunk[..=last_rev_idx];

563

let revs_chunk = &revs_chunk[..=last_rev_idx];

564

565

for rev in revs_chunk {

565

for rev in revs_chunk {

566

let chunk_start = self.start(*rev);

566

let chunk_start = self.data_start(*rev);

567

let chunk_length = self.data_compressed_length(*rev);

567

let chunk_length = self.data_compressed_length(*rev);

568

// TODO revlogv2 should check the compression mode

568

// TODO revlogv2 should check the compression mode

569

let bytes = &data[chunk_start - offset..][..chunk_length];

569

let bytes = &data[chunk_start - offset..][..chunk_length];

570

let chunk = if !bytes.is_empty() && bytes[0] == ZSTD_BYTE {

570

let chunk = if !bytes.is_empty() && bytes[0] == ZSTD_BYTE {

571

// If we're using `zstd`, we want to try a more

571

// If we're using `zstd`, we want to try a more

572

// specialized decompression

572

// specialized decompression

573

let entry = self.index.get_entry(*rev).unwrap();

573

let entry = self.index.get_entry(*rev).unwrap();

574

let is_delta = entry

574

let is_delta = entry

575

.base_revision_or_base_of_delta_chain()

575

.base_revision_or_base_of_delta_chain()

576

!= (*rev).into();

576

!= (*rev).into();

577

let uncompressed = uncompressed_zstd_data(

577

let uncompressed = uncompressed_zstd_data(

578

bytes,

578

bytes,

579

is_delta,

579

is_delta,

580

entry.uncompressed_len(),

580

entry.uncompressed_len(),

581

)?;

581

)?;

582

Cow::Owned(uncompressed)

582

Cow::Owned(uncompressed)

583

} else {

583

} else {

584

// Otherwise just fallback to generic decompression.

584

// Otherwise just fallback to generic decompression.

585

self.decompress(bytes)?

585

self.decompress(bytes)?

586

};

586

};

587

588

chunks.push((*rev, chunk.into()));

588

chunks.push((*rev, chunk.into()));

589

}

589

}

590

}

590

}

591

Ok(())

591

Ok(())

592

})?;

592

})?;

593

594

if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {

594

if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {

595

let mut cache = cache.borrow_mut();

595

let mut cache = cache.borrow_mut();

596

for (rev, chunk) in chunks.iter().skip(already_cached) {

596

for (rev, chunk) in chunks.iter().skip(already_cached) {

597

cache.insert(*rev, chunk.clone());

597

cache.insert(*rev, chunk.clone());

598

}

598

}

599

}

599

}

600

// Use stable sort here since it's *mostly* sorted

600

// Use stable sort here since it's *mostly* sorted

601

chunks.sort_by(|a, b| a.0.cmp(&b.0));

601

chunks.sort_by(|a, b| a.0.cmp(&b.0));

602

Ok(chunks.into_iter().map(|(_r, chunk)| chunk).collect())

602

Ok(chunks.into_iter().map(|(_r, chunk)| chunk).collect())

603

}

603

}

604

605

/// Slice revs to reduce the amount of unrelated data to be read from disk.

605

/// Slice revs to reduce the amount of unrelated data to be read from disk.

606

///

606

///

607

/// ``revs`` is sliced into groups that should be read in one time.

607

/// ``revs`` is sliced into groups that should be read in one time.

608

/// Assume that revs are sorted.

608

/// Assume that revs are sorted.

609

///

609

///

610

/// The initial chunk is sliced until the overall density

610

/// The initial chunk is sliced until the overall density

611

/// (payload/chunks-span ratio) is above

611

/// (payload/chunks-span ratio) is above

612

/// `revlog.data_config.sr_density_threshold`.

612

/// `revlog.data_config.sr_density_threshold`.

613

/// No gap smaller than `revlog.data_config.sr_min_gap_size` is skipped.

613

/// No gap smaller than `revlog.data_config.sr_min_gap_size` is skipped.

614

///

614

///

615

/// If `target_size` is set, no chunk larger than `target_size`

615

/// If `target_size` is set, no chunk larger than `target_size`

616

/// will be returned.

616

/// will be returned.

617

/// For consistency with other slicing choices, this limit won't go lower

617

/// For consistency with other slicing choices, this limit won't go lower

618

/// than `revlog.data_config.sr_min_gap_size`.

618

/// than `revlog.data_config.sr_min_gap_size`.

619

///

619

///

620

/// If individual revision chunks are larger than this limit, they will

620

/// If individual revision chunks are larger than this limit, they will

621

/// still be raised individually.

621

/// still be raised individually.

622

pub fn slice_chunk(

622

pub fn slice_chunk(

623

&self,

623

&self,

624

revs: &[Revision],

624

revs: &[Revision],

625

target_size: Option<u64>,

625

target_size: Option<u64>,

626

) -> Result<Vec<Vec<Revision>>, RevlogError> {

626

) -> Result<Vec<Vec<Revision>>, RevlogError> {

627

let target_size =

627

let target_size =

628

target_size.map(|size| size.max(self.data_config.sr_min_gap_size));

628

target_size.map(|size| size.max(self.data_config.sr_min_gap_size));

629

630

let target_density = self.data_config.sr_density_threshold;

630

let target_density = self.data_config.sr_density_threshold;

631

let min_gap_size = self.data_config.sr_min_gap_size as usize;

631

let min_gap_size = self.data_config.sr_min_gap_size as usize;

632

let to_density = self.index.slice_chunk_to_density(

632

let to_density = self.index.slice_chunk_to_density(

633

revs,

633

revs,

634

target_density,

634

target_density,

635

min_gap_size,

635

min_gap_size,

636

);

636

);

637

638

let mut sliced = vec![];

638

let mut sliced = vec![];

639

640

for chunk in to_density {

640

for chunk in to_density {

641

sliced.extend(

641

sliced.extend(

642

self.slice_chunk_to_size(&chunk, target_size)?

642

self.slice_chunk_to_size(&chunk, target_size)?

643

.into_iter()

643

.into_iter()

644

.map(ToOwned::to_owned),

644

.map(ToOwned::to_owned),

645

);

645

);

646

}

646

}

647

648

Ok(sliced)

648

Ok(sliced)

649

}

649

}

650

651

/// Slice revs to match the target size

651

/// Slice revs to match the target size

652

///

652

///

653

/// This is intended to be used on chunks that density slicing selected,

653

/// This is intended to be used on chunks that density slicing selected,

654

/// but that are still too large compared to the read guarantee of revlogs.

654

/// but that are still too large compared to the read guarantee of revlogs.

655

/// This might happen when the "minimal gap size" interrupted the slicing

655

/// This might happen when the "minimal gap size" interrupted the slicing

656

/// or when chains are built in a way that create large blocks next to

656

/// or when chains are built in a way that create large blocks next to

657

/// each other.

657

/// each other.

658

fn slice_chunk_to_size<'a>(

658

fn slice_chunk_to_size<'a>(

659

&self,

659

&self,

660

revs: &'a [Revision],

660

revs: &'a [Revision],

661

target_size: Option<u64>,

661

target_size: Option<u64>,

662

) -> Result<Vec<&'a [Revision]>, RevlogError> {

662

) -> Result<Vec<&'a [Revision]>, RevlogError> {

663

let mut start_data = self.start(revs[0]);

663

let mut start_data = self.data_start(revs[0]);

664

let end_data = self.end(revs[revs.len() - 1]);

664

let end_data = self.end(revs[revs.len() - 1]);

665

let full_span = end_data - start_data;

665

let full_span = end_data - start_data;

666

667

let nothing_to_do = target_size

667

let nothing_to_do = target_size

668

.map(|size| full_span <= size as usize)

668

.map(|size| full_span <= size as usize)

669

.unwrap_or(true);

669

.unwrap_or(true);

670

671

if nothing_to_do {

671

if nothing_to_do {

672

return Ok(vec![revs]);

672

return Ok(vec![revs]);

673

}

673

}

674

let target_size = target_size.expect("target_size is set") as usize;

674

let target_size = target_size.expect("target_size is set") as usize;

675

676

let mut start_rev_idx = 0;

676

let mut start_rev_idx = 0;

677

let mut end_rev_idx = 1;

677

let mut end_rev_idx = 1;

678

let mut chunks = vec![];

678

let mut chunks = vec![];

679

680

for (idx, rev) in revs.iter().enumerate().skip(1) {

680

for (idx, rev) in revs.iter().enumerate().skip(1) {

681

let span = self.end(*rev) - start_data;

681

let span = self.end(*rev) - start_data;

682

let is_snapshot = self.is_snapshot(*rev)?;

682

let is_snapshot = self.is_snapshot(*rev)?;

683

if span <= target_size && is_snapshot {

683

if span <= target_size && is_snapshot {

684

end_rev_idx = idx + 1;

684

end_rev_idx = idx + 1;

685

} else {

685

} else {

686

let chunk =

686

let chunk =

687

self.trim_chunk(revs, start_rev_idx, Some(end_rev_idx));

687

self.trim_chunk(revs, start_rev_idx, Some(end_rev_idx));

688

if !chunk.is_empty() {

688

if !chunk.is_empty() {

689

chunks.push(chunk);

689

chunks.push(chunk);

690

}

690

}

691

start_rev_idx = idx;

691

start_rev_idx = idx;

692

start_data = self.start(*rev);

692

start_data = self.data_start(*rev);

693

end_rev_idx = idx + 1;

693

end_rev_idx = idx + 1;

694

}

694

}

695

if !is_snapshot {

695

if !is_snapshot {

696

break;

696

break;

697

}

697

}

698

}

698

}

699

700

// For the others, we use binary slicing to quickly converge towards

700

// For the others, we use binary slicing to quickly converge towards

701

// valid chunks (otherwise, we might end up looking for the start/end

701

// valid chunks (otherwise, we might end up looking for the start/end

702

// of many revisions). This logic is not looking for the perfect

702

// of many revisions). This logic is not looking for the perfect

703

// slicing point, it quickly converges towards valid chunks.

703

// slicing point, it quickly converges towards valid chunks.

704

let number_of_items = revs.len();

704

let number_of_items = revs.len();

705

706

while (end_data - start_data) > target_size {

706

while (end_data - start_data) > target_size {

707

end_rev_idx = number_of_items;

707

end_rev_idx = number_of_items;

708

if number_of_items - start_rev_idx <= 1 {

708

if number_of_items - start_rev_idx <= 1 {

709

// Protect against individual chunks larger than the limit

709

// Protect against individual chunks larger than the limit

710

break;

710

break;

711

}

711

}

712

let mut local_end_data = self.end(revs[end_rev_idx - 1]);

712

let mut local_end_data = self.end(revs[end_rev_idx - 1]);

713

let mut span = local_end_data - start_data;

713

let mut span = local_end_data - start_data;

714

while span > target_size {

714

while span > target_size {

715

if end_rev_idx - start_rev_idx <= 1 {

715

if end_rev_idx - start_rev_idx <= 1 {

716

// Protect against individual chunks larger than the limit

716

// Protect against individual chunks larger than the limit

717

break;

717

break;

718

}

718

}

719

end_rev_idx -= (end_rev_idx - start_rev_idx) / 2;

719

end_rev_idx -= (end_rev_idx - start_rev_idx) / 2;

720

local_end_data = self.end(revs[end_rev_idx - 1]);

720

local_end_data = self.end(revs[end_rev_idx - 1]);

721

span = local_end_data - start_data;

721

span = local_end_data - start_data;

722

}

722

}

723

let chunk =

723

let chunk =

724

self.trim_chunk(revs, start_rev_idx, Some(end_rev_idx));

724

self.trim_chunk(revs, start_rev_idx, Some(end_rev_idx));

725

if !chunk.is_empty() {

725

if !chunk.is_empty() {

726

chunks.push(chunk);

726

chunks.push(chunk);

727

}

727

}

728

start_rev_idx = end_rev_idx;

728

start_rev_idx = end_rev_idx;

729

start_data = self.start(revs[start_rev_idx]);

729

start_data = self.data_start(revs[start_rev_idx]);

730

}

730

}

731

732

let chunk = self.trim_chunk(revs, start_rev_idx, None);

732

let chunk = self.trim_chunk(revs, start_rev_idx, None);

733

if !chunk.is_empty() {

733

if !chunk.is_empty() {

734

chunks.push(chunk);

734

chunks.push(chunk);

735

}

735

}

736

737

Ok(chunks)

737

Ok(chunks)

738

}

738

}

739

740

/// Returns `revs[startidx..endidx]` without empty trailing revs

740

/// Returns `revs[startidx..endidx]` without empty trailing revs

741

fn trim_chunk<'a>(

741

fn trim_chunk<'a>(

742

&self,

742

&self,

743

revs: &'a [Revision],

743

revs: &'a [Revision],

744

start_rev_idx: usize,

744

start_rev_idx: usize,

745

end_rev_idx: Option<usize>,

745

end_rev_idx: Option<usize>,

746

) -> &'a [Revision] {

746

) -> &'a [Revision] {

747

let mut end_rev_idx = end_rev_idx.unwrap_or(revs.len());

747

let mut end_rev_idx = end_rev_idx.unwrap_or(revs.len());

748

749

// If we have a non-empty delta candidate, there is nothing to trim

749

// If we have a non-empty delta candidate, there is nothing to trim

750

if revs[end_rev_idx - 1].0 < self.len() as BaseRevision {

750

if revs[end_rev_idx - 1].0 < self.len() as BaseRevision {

751

// Trim empty revs at the end, except the very first rev of a chain

751

// Trim empty revs at the end, except the very first rev of a chain

752

while end_rev_idx > 1

752

while end_rev_idx > 1

753

&& end_rev_idx > start_rev_idx

753

&& end_rev_idx > start_rev_idx

754

&& self.data_compressed_length(revs[end_rev_idx - 1]) == 0

754

&& self.data_compressed_length(revs[end_rev_idx - 1]) == 0

755

{

755

{

756

end_rev_idx -= 1

756

end_rev_idx -= 1

757

}

757

}

758

}

758

}

759

760

&revs[start_rev_idx..end_rev_idx]

760

&revs[start_rev_idx..end_rev_idx]

761

}

761

}

762

763

/// Check the hash of some given data against the recorded hash.

763

/// Check the hash of some given data against the recorded hash.

764

pub fn check_hash(

764

pub fn check_hash(

765

&self,

765

&self,

766

p1: Revision,

766

p1: Revision,

767

p2: Revision,

767

p2: Revision,

768

expected: &[u8],

768

expected: &[u8],

769

data: &[u8],

769

data: &[u8],

770

) -> bool {

770

) -> bool {

771

let e1 = self.index.get_entry(p1);

771

let e1 = self.index.get_entry(p1);

772

let h1 = match e1 {

772

let h1 = match e1 {

773

Some(ref entry) => entry.hash(),

773

Some(ref entry) => entry.hash(),

774

None => &NULL_NODE,

774

None => &NULL_NODE,

775

};

775

};

776

let e2 = self.index.get_entry(p2);

776

let e2 = self.index.get_entry(p2);

777

let h2 = match e2 {

777

let h2 = match e2 {

778

Some(ref entry) => entry.hash(),

778

Some(ref entry) => entry.hash(),

779

None => &NULL_NODE,

779

None => &NULL_NODE,

780

};

780

};

781

782

hash(data, h1.as_bytes(), h2.as_bytes()) == expected

782

hash(data, h1.as_bytes(), h2.as_bytes()) == expected

783

}

783

}

784

785

/// Returns whether we are currently in a [`Self::with_write`] context

785

/// Returns whether we are currently in a [`Self::with_write`] context

786

pub fn is_writing(&self) -> bool {

786

pub fn is_writing(&self) -> bool {

787

self.writing_handles.is_some()

787

self.writing_handles.is_some()

788

}

788

}

789

790

/// Open the revlog files for writing

790

/// Open the revlog files for writing

791

///

791

///

792

/// Adding content to a revlog should be done within this context.

792

/// Adding content to a revlog should be done within this context.

793

/// TODO try using `BufRead` and `BufWrite` and see if performance improves

793

/// TODO try using `BufRead` and `BufWrite` and see if performance improves

794

pub fn with_write<R>(

794

pub fn with_write<R>(

795

&mut self,

795

&mut self,

796

transaction: &mut impl Transaction,

796

transaction: &mut impl Transaction,

797

data_end: Option<usize>,

797

data_end: Option<usize>,

798

func: impl FnOnce() -> R,

798

func: impl FnOnce() -> R,

799

) -> Result<R, HgError> {

799

) -> Result<R, HgError> {

800

if self.is_writing() {

800

if self.is_writing() {

801

return Ok(func());

801

return Ok(func());

802

}

802

}

803

self.enter_writing_context(data_end, transaction)

803

self.enter_writing_context(data_end, transaction)

804

.inspect_err(|_| {

804

.inspect_err(|_| {

805

self.exit_writing_context();

805

self.exit_writing_context();

806

})?;

806

})?;

807

let res = func();

807

let res = func();

808

self.exit_writing_context();

808

self.exit_writing_context();

809

Ok(res)

809

Ok(res)

810

}

810

}

811

812

/// `pub` only for use in hg-cpython

812

/// `pub` only for use in hg-cpython

813

#[doc(hidden)]

813

#[doc(hidden)]

814

pub fn exit_writing_context(&mut self) {

814

pub fn exit_writing_context(&mut self) {

815

self.writing_handles.take();

815

self.writing_handles.take();

816

self.segment_file.writing_handle.take();

816

self.segment_file.writing_handle.take();

817

self.segment_file.reading_handle.take();

817

self.segment_file.reading_handle.take();

818

}

818

}

819

820

/// `pub` only for use in hg-cpython

820

/// `pub` only for use in hg-cpython

821

#[doc(hidden)]

821

#[doc(hidden)]

822

pub fn python_writing_handles(&self) -> Option<&WriteHandles> {

822

pub fn python_writing_handles(&self) -> Option<&WriteHandles> {

823

self.writing_handles.as_ref()

823

self.writing_handles.as_ref()

824

}

824

}

825

826

/// `pub` only for use in hg-cpython

826

/// `pub` only for use in hg-cpython

827

#[doc(hidden)]

827

#[doc(hidden)]

828

pub fn enter_writing_context(

828

pub fn enter_writing_context(

829

&mut self,

829

&mut self,

830

data_end: Option<usize>,

830

data_end: Option<usize>,

831

transaction: &mut impl Transaction,

831

transaction: &mut impl Transaction,

832

) -> Result<(), HgError> {

832

) -> Result<(), HgError> {

833

let data_size = if self.is_empty() {

833

let data_size = if self.is_empty() {

834

0

834

0

835

} else {

835

} else {

836

self.end(Revision((self.len() - 1) as BaseRevision))

836

self.end(Revision((self.len() - 1) as BaseRevision))

837

};

837

};

838

let data_handle = if !self.is_inline() {

838

let data_handle = if !self.is_inline() {

839

let data_handle = match self.vfs.open_write(&self.data_file) {

839

let data_handle = match self.vfs.open_write(&self.data_file) {

840

Ok(mut f) => {

840

Ok(mut f) => {

841

if let Some(end) = data_end {

841

if let Some(end) = data_end {

842

f.seek(SeekFrom::Start(end as u64))

842

f.seek(SeekFrom::Start(end as u64))

843

.when_reading_file(&self.data_file)?;

843

.when_reading_file(&self.data_file)?;

844

} else {

844

} else {

845

f.seek(SeekFrom::End(0))

845

f.seek(SeekFrom::End(0))

846

.when_reading_file(&self.data_file)?;

846

.when_reading_file(&self.data_file)?;

847

}

847

}

848

f

848

f

849

}

849

}

850

Err(e) => match e {

850

Err(e) => match e {

851

HgError::IoError { error, context } => {

851

HgError::IoError { error, context } => {

852

if error.kind() != ErrorKind::NotFound {

852

if error.kind() != ErrorKind::NotFound {

853

return Err(HgError::IoError { error, context });

853

return Err(HgError::IoError { error, context });

854

}

854

}

855

self.vfs.create(&self.data_file, true)?

855

self.vfs.create(&self.data_file, true)?

856

}

856

}

857

e => return Err(e),

857

e => return Err(e),

858

},

858

},

859

};

859

};

860

transaction.add(&self.data_file, data_size);

860

transaction.add(&self.data_file, data_size);

861

Some(FileHandle::from_file(

861

Some(FileHandle::from_file(

862

data_handle,

862

data_handle,

863

dyn_clone::clone_box(&*self.vfs),

863

dyn_clone::clone_box(&*self.vfs),

864

&self.data_file,

864

&self.data_file,

865

))

865

))

866

} else {

866

} else {

867

None

867

None

868

};

868

};

869

let index_size = self.len() * INDEX_ENTRY_SIZE;

869

let index_size = self.len() * INDEX_ENTRY_SIZE;

870

let index_handle = self.index_write_handle()?;

870

let index_handle = self.index_write_handle()?;

871

if self.is_inline() {

871

if self.is_inline() {

872

transaction.add(&self.index_file, data_size);

872

transaction.add(&self.index_file, data_size);

873

} else {

873

} else {

874

transaction.add(&self.index_file, index_size);

874

transaction.add(&self.index_file, index_size);

875

}

875

}

876

self.writing_handles = Some(WriteHandles {

876

self.writing_handles = Some(WriteHandles {

877

index_handle: index_handle.clone(),

877

index_handle: index_handle.clone(),

878

data_handle: data_handle.clone(),

878

data_handle: data_handle.clone(),

879

});

879

});

880

*self.segment_file.reading_handle.borrow_mut() = if self.is_inline() {

880

*self.segment_file.reading_handle.borrow_mut() = if self.is_inline() {

881

Some(index_handle)

881

Some(index_handle)

882

} else {

882

} else {

883

data_handle

883

data_handle

884

};

884

};

885

Ok(())

885

Ok(())

886

}

886

}

887

888

/// Get a write handle to the index, sought to the end of its data.

888

/// Get a write handle to the index, sought to the end of its data.

889

fn index_write_handle(&self) -> Result<FileHandle, HgError> {

889

fn index_write_handle(&self) -> Result<FileHandle, HgError> {

890

let res = if self.delayed_buffer.is_none() {

890

let res = if self.delayed_buffer.is_none() {

891

if self.data_config.check_ambig {

891

if self.data_config.check_ambig {

892

self.vfs.open_check_ambig(&self.index_file)

892

self.vfs.open_check_ambig(&self.index_file)

893

} else {

893

} else {

894

self.vfs.open_write(&self.index_file)

894

self.vfs.open_write(&self.index_file)

895

}

895

}

896

} else {

896

} else {

897

self.vfs.open_write(&self.index_file)

897

self.vfs.open_write(&self.index_file)

898

};

898

};

899

match res {

899

match res {

900

Ok(mut handle) => {

900

Ok(mut handle) => {

901

handle

901

handle

902

.seek(SeekFrom::End(0))

902

.seek(SeekFrom::End(0))

903

.when_reading_file(&self.index_file)?;

903

.when_reading_file(&self.index_file)?;

904

Ok(

904

Ok(

905

if let Some(delayed_buffer) = self.delayed_buffer.as_ref()

905

if let Some(delayed_buffer) = self.delayed_buffer.as_ref()

906

{

906

{

907

FileHandle::from_file_delayed(

907

FileHandle::from_file_delayed(

908

handle,

908

handle,

909

dyn_clone::clone_box(&*self.vfs),

909

dyn_clone::clone_box(&*self.vfs),

910

&self.index_file,

910

&self.index_file,

911

delayed_buffer.clone(),

911

delayed_buffer.clone(),

912

)?

912

)?

913

} else {

913

} else {

914

FileHandle::from_file(

914

FileHandle::from_file(

915

handle,

915

handle,

916

dyn_clone::clone_box(&*self.vfs),

916

dyn_clone::clone_box(&*self.vfs),

917

&self.index_file,

917

&self.index_file,

918

)

918

)

919

},

919

},

920

)

920

)

921

}

921

}

922

Err(e) => match e {

922

Err(e) => match e {

923

HgError::IoError { error, context } => {

923

HgError::IoError { error, context } => {

924

if error.kind() != ErrorKind::NotFound {

924

if error.kind() != ErrorKind::NotFound {

925

return Err(HgError::IoError { error, context });

925

return Err(HgError::IoError { error, context });

926

};

926

};

927

if let Some(delayed_buffer) = self.delayed_buffer.as_ref()

927

if let Some(delayed_buffer) = self.delayed_buffer.as_ref()

928

{

928

{

929

FileHandle::new_delayed(

929

FileHandle::new_delayed(

930

dyn_clone::clone_box(&*self.vfs),

930

dyn_clone::clone_box(&*self.vfs),

931

&self.index_file,

931

&self.index_file,

932

true,

932

true,

933

delayed_buffer.clone(),

933

delayed_buffer.clone(),

934

)

934

)

935

} else {

935

} else {

936

FileHandle::new(

936

FileHandle::new(

937

dyn_clone::clone_box(&*self.vfs),

937

dyn_clone::clone_box(&*self.vfs),

938

&self.index_file,

938

&self.index_file,

939

true,

939

true,

940

true,

940

true,

941

)

941

)

942

}

942

}

943

}

943

}

944

e => Err(e),

944

e => Err(e),

945

},

945

},

946

}

946

}

947

}

947

}

948

949

/// Split the data of an inline revlog into an index and a data file

949

/// Split the data of an inline revlog into an index and a data file

950

pub fn split_inline(

950

pub fn split_inline(

951

&mut self,

951

&mut self,

952

header: IndexHeader,

952

header: IndexHeader,

953

new_index_file_path: Option<PathBuf>,

953

new_index_file_path: Option<PathBuf>,

954

) -> Result<PathBuf, RevlogError> {

954

) -> Result<PathBuf, RevlogError> {

955

assert!(self.delayed_buffer.is_none());

955

assert!(self.delayed_buffer.is_none());

956

let existing_handles = self.writing_handles.is_some();

956

let existing_handles = self.writing_handles.is_some();

957

if let Some(handles) = &mut self.writing_handles {

957

if let Some(handles) = &mut self.writing_handles {

958

handles.index_handle.flush()?;

958

handles.index_handle.flush()?;

959

self.writing_handles.take();

959

self.writing_handles.take();

960

self.segment_file.writing_handle.take();

960

self.segment_file.writing_handle.take();

961

}

961

}

962

let mut new_data_file_handle =

962

let mut new_data_file_handle =

963

self.vfs.create(&self.data_file, true)?;

963

self.vfs.create(&self.data_file, true)?;

964

// Drop any potential data, possibly redundant with the VFS impl.

964

// Drop any potential data, possibly redundant with the VFS impl.

965

new_data_file_handle

965

new_data_file_handle

966

.set_len(0)

966

.set_len(0)

967

.when_writing_file(&self.data_file)?;

967

.when_writing_file(&self.data_file)?;

968

969

self.with_read(|| -> Result<(), RevlogError> {

969

self.with_read(|| -> Result<(), RevlogError> {

970

for r in 0..self.index.len() {

970

for r in 0..self.index.len() {

971

let rev = Revision(r as BaseRevision);

971

let rev = Revision(r as BaseRevision);

972

let rev_segment = self.get_segment_for_revs(rev, rev)?.1;

972

let rev_segment = self.get_segment_for_revs(rev, rev)?.1;

973

new_data_file_handle

973

new_data_file_handle

974

.write_all(&rev_segment)

974

.write_all(&rev_segment)

975

.when_writing_file(&self.data_file)?;

975

.when_writing_file(&self.data_file)?;

976

}

976

}

977

new_data_file_handle

977

new_data_file_handle

978

.flush()

978

.flush()

979

.when_writing_file(&self.data_file)?;

979

.when_writing_file(&self.data_file)?;

980

Ok(())

980

Ok(())

981

})?;

981

})?;

982

983

if let Some(index_path) = new_index_file_path {

983

if let Some(index_path) = new_index_file_path {

984

self.index_file = index_path

984

self.index_file = index_path

985

}

985

}

986

987

let mut new_index_handle = self.vfs.create(&self.index_file, true)?;

987

let mut new_index_handle = self.vfs.create(&self.index_file, true)?;

988

let mut new_data = Vec::with_capacity(self.len() * INDEX_ENTRY_SIZE);

988

let mut new_data = Vec::with_capacity(self.len() * INDEX_ENTRY_SIZE);

989

for r in 0..self.len() {

989

for r in 0..self.len() {

990

let rev = Revision(r as BaseRevision);

990

let rev = Revision(r as BaseRevision);

991

let entry = self.index.entry_binary(rev).unwrap_or_else(|| {

991

let entry = self.index.entry_binary(rev).unwrap_or_else(|| {

992

panic!(

992

panic!(

993

"entry {} should exist in {}",

993

"entry {} should exist in {}",

994

r,

994

r,

995

self.index_file.display()

995

self.index_file.display()

996

)

996

)

997

});

997

});

998

if r == 0 {

998

if r == 0 {

999

new_data.extend(header.header_bytes);

999

new_data.extend(header.header_bytes);

1000

}

1000

}

1001

new_data.extend(entry);

1001

new_data.extend(entry);

1002

}

1002

}

1003

new_index_handle

1003

new_index_handle

1004

.write_all(&new_data)

1004

.write_all(&new_data)

1005

.when_writing_file(&self.index_file)?;

1005

.when_writing_file(&self.index_file)?;

1006

// Replace the index with a new one because the buffer contains inline

1006

// Replace the index with a new one because the buffer contains inline

1007

// data

1007

// data

1008

self.index = Index::new(Box::new(new_data), header)?;

1008

self.index = Index::new(Box::new(new_data), header)?;

1009

self.inline = false;

1009

self.inline = false;

1010

1011

self.segment_file = RandomAccessFile::new(

1011

self.segment_file = RandomAccessFile::new(

1012

dyn_clone::clone_box(&*self.vfs),

1012

dyn_clone::clone_box(&*self.vfs),

1013

self.data_file.to_owned(),

1013

self.data_file.to_owned(),

1014

);

1014

);

1015

if existing_handles {

1015

if existing_handles {

1016

// Switched from inline to conventional, reopen the index

1016

// Switched from inline to conventional, reopen the index

1017

let new_data_handle = Some(FileHandle::from_file(

1017

let new_data_handle = Some(FileHandle::from_file(

1018

new_data_file_handle,

1018

new_data_file_handle,

1019

dyn_clone::clone_box(&*self.vfs),

1019

dyn_clone::clone_box(&*self.vfs),

1020

&self.data_file,

1020

&self.data_file,

1021

));

1021

));

1022

self.writing_handles = Some(WriteHandles {

1022

self.writing_handles = Some(WriteHandles {

1023

index_handle: self.index_write_handle()?,

1023

index_handle: self.index_write_handle()?,

1024

data_handle: new_data_handle.clone(),

1024

data_handle: new_data_handle.clone(),

1025

});

1025

});

1026

*self.segment_file.writing_handle.borrow_mut() = new_data_handle;

1026

*self.segment_file.writing_handle.borrow_mut() = new_data_handle;

1027

}

1027

}

1028

1029

Ok(self.index_file.to_owned())

1029

Ok(self.index_file.to_owned())

1030

}

1030

}

1031

1032

/// Write a new entry to this revlog.

1032

/// Write a new entry to this revlog.

1033

/// - `entry` is the index bytes

1033

/// - `entry` is the index bytes

1034

/// - `header_and_data` is the compression header and the revision data

1034

/// - `header_and_data` is the compression header and the revision data

1035

/// - `offset` is the position in the data file to write to

1035

/// - `offset` is the position in the data file to write to

1036

/// - `index_end` is the overwritten position in the index in revlog-v2,

1036

/// - `index_end` is the overwritten position in the index in revlog-v2,

1037

/// since the format may allow a rewrite of garbage data at the end.

1037

/// since the format may allow a rewrite of garbage data at the end.

1038

/// - `data_end` is the overwritten position in the data-file in revlog-v2,

1038

/// - `data_end` is the overwritten position in the data-file in revlog-v2,

1039

/// since the format may allow a rewrite of garbage data at the end.

1039

/// since the format may allow a rewrite of garbage data at the end.

1040

///

1040

///

1041

/// XXX Why do we have `data_end` *and* `offset`? Same question in Python

1041

/// XXX Why do we have `data_end` *and* `offset`? Same question in Python

1042

pub fn write_entry(

1042

pub fn write_entry(

1043

&mut self,

1043

&mut self,

1044

mut transaction: impl Transaction,

1044

mut transaction: impl Transaction,

1045

entry: &[u8],

1045

entry: &[u8],

1046

header_and_data: (&[u8], &[u8]),

1046

header_and_data: (&[u8], &[u8]),

1047

mut offset: usize,

1047

mut offset: usize,

1048

index_end: Option<u64>,

1048

index_end: Option<u64>,

1049

data_end: Option<u64>,

1049

data_end: Option<u64>,

1050

) -> Result<(u64, Option<u64>), HgError> {

1050

) -> Result<(u64, Option<u64>), HgError> {

1051

let current_revision = self.len() - 1;

1051

let current_revision = self.len() - 1;

1052

let canonical_index_file = self.canonical_index_file();

1052

let canonical_index_file = self.canonical_index_file();

1053

1054

let is_inline = self.is_inline();

1054

let is_inline = self.is_inline();

1055

let handles = match &mut self.writing_handles {

1055

let handles = match &mut self.writing_handles {

1056

None => {

1056

None => {

1057

return Err(HgError::abort(

1057

return Err(HgError::abort(

1058

"adding revision outside of the `with_write` context",

1058

"adding revision outside of the `with_write` context",

1059

exit_codes::ABORT,

1059

exit_codes::ABORT,

1060

None,

1060

None,

1061

));

1061

));

1062

}

1062

}

1063

Some(handles) => handles,

1063

Some(handles) => handles,

1064

};

1064

};

1065

let index_handle = &mut handles.index_handle;

1065

let index_handle = &mut handles.index_handle;

1066

let data_handle = &mut handles.data_handle;

1066

let data_handle = &mut handles.data_handle;

1067

if let Some(end) = index_end {

1067

if let Some(end) = index_end {

1068

index_handle

1068

index_handle

1069

.seek(SeekFrom::Start(end))

1069

.seek(SeekFrom::Start(end))

1070

.when_reading_file(&self.index_file)?;

1070

.when_reading_file(&self.index_file)?;

1071

} else {

1071

} else {

1072

index_handle

1072

index_handle

1073

.seek(SeekFrom::End(0))

1073

.seek(SeekFrom::End(0))

1074

.when_reading_file(&self.index_file)?;

1074

.when_reading_file(&self.index_file)?;

1075

}

1075

}

1076

if let Some(data_handle) = data_handle {

1076

if let Some(data_handle) = data_handle {

1077

if let Some(end) = data_end {

1077

if let Some(end) = data_end {

1078

data_handle

1078

data_handle

1079

.seek(SeekFrom::Start(end))

1079

.seek(SeekFrom::Start(end))

1080

.when_reading_file(&self.data_file)?;

1080

.when_reading_file(&self.data_file)?;

1081

} else {

1081

} else {

1082

data_handle

1082

data_handle

1083

.seek(SeekFrom::End(0))

1083

.seek(SeekFrom::End(0))

1084

.when_reading_file(&self.data_file)?;

1084

.when_reading_file(&self.data_file)?;

1085

}

1085

}

1086

}

1086

}

1087

let (header, data) = header_and_data;

1087

let (header, data) = header_and_data;

1088

1089

if !is_inline {

1089

if !is_inline {

1090

transaction.add(&self.data_file, offset);

1090

transaction.add(&self.data_file, offset);

1091

transaction

1091

transaction

1092

.add(&canonical_index_file, current_revision * entry.len());

1092

.add(&canonical_index_file, current_revision * entry.len());

1093

let data_handle = data_handle

1093

let data_handle = data_handle

1094

.as_mut()

1094

.as_mut()

1095

.expect("data handle should exist when not inline");

1095

.expect("data handle should exist when not inline");

1096

if !header.is_empty() {

1096

if !header.is_empty() {

1097

data_handle.write_all(header)?;

1097

data_handle.write_all(header)?;

1098

}

1098

}

1099

data_handle.write_all(data)?;

1099

data_handle.write_all(data)?;

1100

match &mut self.delayed_buffer {

1100

match &mut self.delayed_buffer {

1101

Some(buf) => {

1101

Some(buf) => {

1102

buf.lock()

1102

buf.lock()

1103

.expect("propagate the panic")

1103

.expect("propagate the panic")

1104

.buffer

1104

.buffer

1105

.write_all(entry)

1105

.write_all(entry)

1106

.expect("write to delay buffer should succeed");

1106

.expect("write to delay buffer should succeed");

1107

}

1107

}

1108

None => index_handle.write_all(entry)?,

1108

None => index_handle.write_all(entry)?,

1109

}

1109

}

1110

} else if self.delayed_buffer.is_some() {

1110

} else if self.delayed_buffer.is_some() {

1111

return Err(HgError::abort(

1111

return Err(HgError::abort(

1112

"invalid delayed write on inline revlog",

1112

"invalid delayed write on inline revlog",

1113

exit_codes::ABORT,

1113

exit_codes::ABORT,

1114

None,

1114

None,

1115

));

1115

));

1116

} else {

1116

} else {

1117

offset += current_revision * entry.len();

1117

offset += current_revision * entry.len();

1118

transaction.add(&canonical_index_file, offset);

1118

transaction.add(&canonical_index_file, offset);

1119

index_handle.write_all(entry)?;

1119

index_handle.write_all(entry)?;

1120

index_handle.write_all(header)?;

1120

index_handle.write_all(header)?;

1121

index_handle.write_all(data)?;

1121

index_handle.write_all(data)?;

1122

}

1122

}

1123

let data_position = match data_handle {

1123

let data_position = match data_handle {

1124

Some(h) => Some(h.position()?),

1124

Some(h) => Some(h.position()?),

1125

None => None,

1125

None => None,

1126

};

1126

};

1127

Ok((index_handle.position()?, data_position))

1127

Ok((index_handle.position()?, data_position))

1128

}

1128

}

1129

1130

/// Return the real target index file and not the temporary when diverting

1130

/// Return the real target index file and not the temporary when diverting

1131

pub fn canonical_index_file(&self) -> PathBuf {

1131

pub fn canonical_index_file(&self) -> PathBuf {

1132

self.original_index_file

1132

self.original_index_file

1133

.as_ref()

1133

.as_ref()

1134

.map(ToOwned::to_owned)

1134

.map(ToOwned::to_owned)

1135

.unwrap_or_else(|| self.index_file.to_owned())

1135

.unwrap_or_else(|| self.index_file.to_owned())

1136

}

1136

}

1137

1138

/// Return the path to the diverted index

1138

/// Return the path to the diverted index

1139

fn diverted_index(&self) -> PathBuf {

1139

fn diverted_index(&self) -> PathBuf {

1140

self.index_file.with_extension("i.a")

1140

self.index_file.with_extension("i.a")

1141

}

1141

}

1142

1143

/// True if we're in a [`Self::with_write`] or [`Self::with_read`] context

1143

/// True if we're in a [`Self::with_write`] or [`Self::with_read`] context

1144

pub fn is_open(&self) -> bool {

1144

pub fn is_open(&self) -> bool {

1145

self.segment_file.is_open()

1145

self.segment_file.is_open()

1146

}

1146

}

1147

1148

/// Set this revlog to delay its writes to a buffer

1148

/// Set this revlog to delay its writes to a buffer

1149

pub fn delay(&mut self) -> Result<Option<PathBuf>, HgError> {

1149

pub fn delay(&mut self) -> Result<Option<PathBuf>, HgError> {

1150

assert!(!self.is_open());

1150

assert!(!self.is_open());

1151

if self.is_inline() {

1151

if self.is_inline() {

1152

return Err(HgError::abort(

1152

return Err(HgError::abort(

1153

"revlog with delayed write should not be inline",

1153

"revlog with delayed write should not be inline",

1154

exit_codes::ABORT,

1154

exit_codes::ABORT,

1155

None,

1155

None,

1156

));

1156

));

1157

}

1157

}

1158

if self.delayed_buffer.is_some() || self.original_index_file.is_some()

1158

if self.delayed_buffer.is_some() || self.original_index_file.is_some()

1159

{

1159

{

1160

// Delay or divert already happening

1160

// Delay or divert already happening

1161

return Ok(None);

1161

return Ok(None);

1162

}

1162

}

1163

if self.is_empty() {

1163

if self.is_empty() {

1164

self.original_index_file = Some(self.index_file.to_owned());

1164

self.original_index_file = Some(self.index_file.to_owned());

1165

self.index_file = self.diverted_index();

1165

self.index_file = self.diverted_index();

1166

if self.vfs.exists(&self.index_file) {

1166

if self.vfs.exists(&self.index_file) {

1167

self.vfs.unlink(&self.index_file)?;

1167

self.vfs.unlink(&self.index_file)?;

1168

}

1168

}

1169

Ok(Some(self.index_file.to_owned()))

1169

Ok(Some(self.index_file.to_owned()))

1170

} else {

1170

} else {

1171

self.delayed_buffer =

1171

self.delayed_buffer =

1172

Some(Arc::new(Mutex::new(DelayedBuffer::default())));

1172

Some(Arc::new(Mutex::new(DelayedBuffer::default())));

1173

Ok(None)

1173

Ok(None)

1174

}

1174

}

1175

}

1175

}

1176

1177

/// Write the pending data (in memory) if any to the diverted index file

1177

/// Write the pending data (in memory) if any to the diverted index file

1178

/// (on disk temporary file)

1178

/// (on disk temporary file)

1179

pub fn write_pending(

1179

pub fn write_pending(

1180

&mut self,

1180

&mut self,

1181

) -> Result<(Option<PathBuf>, bool), HgError> {

1181

) -> Result<(Option<PathBuf>, bool), HgError> {

1182

assert!(!self.is_open());

1182

assert!(!self.is_open());

1183

if self.is_inline() {

1183

if self.is_inline() {

1184

return Err(HgError::abort(

1184

return Err(HgError::abort(

1185

"revlog with delayed write should not be inline",

1185

"revlog with delayed write should not be inline",

1186

exit_codes::ABORT,

1186

exit_codes::ABORT,

1187

None,

1187

None,

1188

));

1188

));

1189

}

1189

}

1190

if self.original_index_file.is_some() {

1190

if self.original_index_file.is_some() {

1191

return Ok((None, true));

1191

return Ok((None, true));

1192

}

1192

}

1193

let mut any_pending = false;

1193

let mut any_pending = false;

1194

let pending_index_file = self.diverted_index();

1194

let pending_index_file = self.diverted_index();

1195

if self.vfs.exists(&pending_index_file) {

1195

if self.vfs.exists(&pending_index_file) {

1196

self.vfs.unlink(&pending_index_file)?;

1196

self.vfs.unlink(&pending_index_file)?;

1197

}

1197

}

1198

self.vfs.copy(&self.index_file, &pending_index_file)?;

1198

self.vfs.copy(&self.index_file, &pending_index_file)?;

1199

if let Some(delayed_buffer) = self.delayed_buffer.take() {

1199

if let Some(delayed_buffer) = self.delayed_buffer.take() {

1200

let mut index_file_handle =

1200

let mut index_file_handle =

1201

self.vfs.open_write(&pending_index_file)?;

1201

self.vfs.open_write(&pending_index_file)?;

1202

index_file_handle

1202

index_file_handle

1203

.seek(SeekFrom::End(0))

1203

.seek(SeekFrom::End(0))

1204

.when_writing_file(&pending_index_file)?;

1204

.when_writing_file(&pending_index_file)?;

1205

let delayed_data =

1205

let delayed_data =

1206

&delayed_buffer.lock().expect("propagate the panic").buffer;

1206

&delayed_buffer.lock().expect("propagate the panic").buffer;

1207

index_file_handle

1207

index_file_handle

1208

.write_all(delayed_data)

1208

.write_all(delayed_data)

1209

.when_writing_file(&pending_index_file)?;

1209

.when_writing_file(&pending_index_file)?;

1210

any_pending = true;

1210

any_pending = true;

1211

}

1211

}

1212

self.original_index_file = Some(self.index_file.to_owned());

1212

self.original_index_file = Some(self.index_file.to_owned());

1213

self.index_file = pending_index_file;

1213

self.index_file = pending_index_file;

1214

Ok((Some(self.index_file.to_owned()), any_pending))

1214

Ok((Some(self.index_file.to_owned()), any_pending))

1215

}

1215

}

1216

1217

/// Overwrite the canonical file with the diverted file, or write out the

1217

/// Overwrite the canonical file with the diverted file, or write out the

1218

/// delayed buffer.

1218

/// delayed buffer.

1219

/// Returns an error if the revlog is neither diverted nor delayed.

1219

/// Returns an error if the revlog is neither diverted nor delayed.

1220

pub fn finalize_pending(&mut self) -> Result<PathBuf, HgError> {

1220

pub fn finalize_pending(&mut self) -> Result<PathBuf, HgError> {

1221

assert!(!self.is_open());

1221

assert!(!self.is_open());

1222

if self.is_inline() {

1222

if self.is_inline() {

1223

return Err(HgError::abort(

1223

return Err(HgError::abort(

1224

"revlog with delayed write should not be inline",

1224

"revlog with delayed write should not be inline",

1225

exit_codes::ABORT,

1225

exit_codes::ABORT,

1226

None,

1226

None,

1227

));

1227

));

1228

}

1228

}

1229

match (

1229

match (

1230

self.delayed_buffer.as_ref(),

1230

self.delayed_buffer.as_ref(),

1231

self.original_index_file.as_ref(),

1231

self.original_index_file.as_ref(),

1232

) {

1232

) {

1233

(None, None) => {

1233

(None, None) => {

1234

return Err(HgError::abort(

1234

return Err(HgError::abort(

1235

"neither delay nor divert found on this revlog",

1235

"neither delay nor divert found on this revlog",

1236

exit_codes::ABORT,

1236

exit_codes::ABORT,

1237

None,

1237

None,

1238

));

1238

));

1239

}

1239

}

1240

(Some(delay), None) => {

1240

(Some(delay), None) => {

1241

let mut index_file_handle =

1241

let mut index_file_handle =

1242

self.vfs.open_write(&self.index_file)?;

1242

self.vfs.open_write(&self.index_file)?;

1243

index_file_handle

1243

index_file_handle

1244

.seek(SeekFrom::End(0))

1244

.seek(SeekFrom::End(0))

1245

.when_writing_file(&self.index_file)?;

1245

.when_writing_file(&self.index_file)?;

1246

index_file_handle

1246

index_file_handle

1247

.write_all(

1247

.write_all(

1248

&delay.lock().expect("propagate the panic").buffer,

1248

&delay.lock().expect("propagate the panic").buffer,

1249

)

1249

)

1250

.when_writing_file(&self.index_file)?;

1250

.when_writing_file(&self.index_file)?;

1251

self.delayed_buffer = None;

1251

self.delayed_buffer = None;

1252

}

1252

}

1253

(None, Some(divert)) => {

1253

(None, Some(divert)) => {

1254

if self.vfs.exists(&self.index_file) {

1254

if self.vfs.exists(&self.index_file) {

1255

self.vfs.rename(&self.index_file, divert, true)?;

1255

self.vfs.rename(&self.index_file, divert, true)?;

1256

}

1256

}

1257

divert.clone_into(&mut self.index_file);

1257

divert.clone_into(&mut self.index_file);

1258

self.original_index_file = None;

1258

self.original_index_file = None;

1259

}

1259

}

1260

(Some(_), Some(_)) => unreachable!(

1260

(Some(_), Some(_)) => unreachable!(

1261

"{} is in an inconsistent state of both delay and divert",

1261

"{} is in an inconsistent state of both delay and divert",

1262

self.canonical_index_file().display(),

1262

self.canonical_index_file().display(),

1263

),

1263

),

1264

}

1264

}

1265

Ok(self.canonical_index_file())

1265

Ok(self.canonical_index_file())

1266

}

1266

}

1267

1268

/// `pub` only for `hg-cpython`. This is made a different method than

1268

/// `pub` only for `hg-cpython`. This is made a different method than

1269

/// [`Revlog::index`] in case there is a different invariant that pops up

1269

/// [`Revlog::index`] in case there is a different invariant that pops up

1270

/// later.

1270

/// later.

1271

#[doc(hidden)]

1271

#[doc(hidden)]

1272

pub fn shared_index(&self) -> &Index {

1272

pub fn shared_index(&self) -> &Index {

1273

&self.index

1273

&self.index

1274

}

1274

}

1275

}

1275

}

1276

1277

/// The use of a [`Refcell`] assumes that a given revlog will only

1277

/// The use of a [`Refcell`] assumes that a given revlog will only

1278

/// be accessed (read or write) by a single thread.

1278

/// be accessed (read or write) by a single thread.

1279

type UncompressedChunkCache =

1279

type UncompressedChunkCache =

1280

RefCell<LruMap<Revision, Arc<[u8]>, ByMemoryUsage>>;

1280

RefCell<LruMap<Revision, Arc<[u8]>, ByMemoryUsage>>;

1281

1282

/// The node, revision and data for the last revision we've seen. Speeds up

1282

/// The node, revision and data for the last revision we've seen. Speeds up

1283

/// a lot of sequential operations of the revlog.

1283

/// a lot of sequential operations of the revlog.

1284

///

1284

///

1285

/// The data is not just bytes since it can come from Python and we want to

1285

/// The data is not just bytes since it can come from Python and we want to

1286

/// avoid copies if possible.

1286

/// avoid copies if possible.

1287

type SingleRevisionCache =

1287

type SingleRevisionCache =

1288

(Node, Revision, Box<dyn Deref<Target = [u8]> + Send>);

1288

(Node, Revision, Box<dyn Deref<Target = [u8]> + Send>);

1289

1290

/// A way of progressively filling a buffer with revision data, then return

1290

/// A way of progressively filling a buffer with revision data, then return

1291

/// that buffer. Used to abstract away Python-allocated code to reduce copying

1291

/// that buffer. Used to abstract away Python-allocated code to reduce copying

1292

/// for performance reasons.

1292

/// for performance reasons.

1293

pub trait RevisionBuffer {

1293

pub trait RevisionBuffer {

1294

/// The owned buffer type to return

1294

/// The owned buffer type to return

1295

type Target;

1295

type Target;

1296

/// Copies the slice into the buffer

1296

/// Copies the slice into the buffer

1297

fn extend_from_slice(&mut self, slice: &[u8]);

1297

fn extend_from_slice(&mut self, slice: &[u8]);

1298

/// Returns the now finished owned buffer

1298

/// Returns the now finished owned buffer

1299

fn finish(self) -> Self::Target;

1299

fn finish(self) -> Self::Target;

1300

}

1300

}

1301

1302

/// A simple vec-based buffer. This is uselessly complicated for the pure Rust

1302

/// A simple vec-based buffer. This is uselessly complicated for the pure Rust

1303

/// case, but it's the price to pay for Python compatibility.

1303

/// case, but it's the price to pay for Python compatibility.

1304

#[derive(Debug)]

1304

#[derive(Debug)]

1305

pub(super) struct CoreRevisionBuffer {

1305

pub(super) struct CoreRevisionBuffer {

1306

buf: Vec<u8>,

1306

buf: Vec<u8>,

1307

}

1307

}

1308

1309

impl CoreRevisionBuffer {

1309

impl CoreRevisionBuffer {

1310

pub fn new() -> Self {

1310

pub fn new() -> Self {

1311

Self { buf: vec![] }

1311

Self { buf: vec![] }

1312

}

1312

}

1313

1314

#[inline]

1314

#[inline]

1315

pub fn resize(&mut self, size: usize) {

1315

pub fn resize(&mut self, size: usize) {

1316

self.buf.reserve_exact(size - self.buf.capacity());

1316

self.buf.reserve_exact(size - self.buf.capacity());

1317

}

1317

}

1318

}

1318

}

1319

1320

impl RevisionBuffer for CoreRevisionBuffer {

1320

impl RevisionBuffer for CoreRevisionBuffer {

1321

type Target = Vec<u8>;

1321

type Target = Vec<u8>;

1322

1323

#[inline]

1323

#[inline]

1324

fn extend_from_slice(&mut self, slice: &[u8]) {

1324

fn extend_from_slice(&mut self, slice: &[u8]) {

1325

self.buf.extend_from_slice(slice);

1325

self.buf.extend_from_slice(slice);

1326

}

1326

}

1327

1328

#[inline]

1328

#[inline]

1329

fn finish(self) -> Self::Target {

1329

fn finish(self) -> Self::Target {

1330

self.buf

1330

self.buf

1331

}

1331

}

1332

}

1332

}

1333

1334

/// Calculate the hash of a revision given its data and its parents.

1334

/// Calculate the hash of a revision given its data and its parents.

1335

pub fn hash(

1335

pub fn hash(

1336

data: &[u8],

1336

data: &[u8],

1337

p1_hash: &[u8],

1337

p1_hash: &[u8],

1338

p2_hash: &[u8],

1338

p2_hash: &[u8],

1339

) -> [u8; NODE_BYTES_LENGTH] {

1339

) -> [u8; NODE_BYTES_LENGTH] {

1340

let mut hasher = Sha1::new();

1340

let mut hasher = Sha1::new();

1341

let (a, b) = (p1_hash, p2_hash);

1341

let (a, b) = (p1_hash, p2_hash);

1342

if a > b {

1342

if a > b {

1343

hasher.update(b);

1343

hasher.update(b);

1344

hasher.update(a);

1344

hasher.update(a);

1345

} else {

1345

} else {

1346

hasher.update(a);

1346

hasher.update(a);

1347

hasher.update(b);

1347

hasher.update(b);

1348

}

1348

}

1349

hasher.update(data);

1349

hasher.update(data);

1350

*hasher.finalize().as_ref()

1350

*hasher.finalize().as_ref()

1351

}

1351

}

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             //! A layer of lower-level revlog functionality to encapsulate most of the
             //! IO work and expensive operations.
             use std::{
                 borrow::Cow,
                 cell::RefCell,
                 io::{ErrorKind, Seek, SeekFrom, Write},
                 ops::Deref,
                 path::PathBuf,
                 sync::{Arc, Mutex},
             };
             use schnellru::{ByMemoryUsage, LruMap};
             use sha1::{Digest, Sha1};
             use crate::{
                 errors::{HgError, IoResultExt},
                 exit_codes,
                 transaction::Transaction,
                 vfs::Vfs,
             };
             use super::{
                 compression::{
                     uncompressed_zstd_data, CompressionConfig, Compressor, NoneCompressor,
                     ZlibCompressor, ZstdCompressor, ZLIB_BYTE, ZSTD_BYTE,
                 },
                 file_io::{DelayedBuffer, FileHandle, RandomAccessFile, WriteHandles},
                 index::{Index, IndexHeader, INDEX_ENTRY_SIZE},
                 node::{NODE_BYTES_LENGTH, NULL_NODE},
                 options::{RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig},
                 BaseRevision, Node, Revision, RevlogEntry, RevlogError, RevlogIndex,
                 UncheckedRevision, NULL_REVISION, NULL_REVLOG_ENTRY_FLAGS,
             };
             /// Matches the `_InnerRevlog` class in the Python code, as an arbitrary
             /// boundary to incrementally rewrite higher-level revlog functionality in
             /// Rust.
             pub struct InnerRevlog {
                 /// When index and data are not interleaved: bytes of the revlog index.
                 /// When index and data are interleaved (inline revlog): bytes of the
                 /// revlog index and data.
                 pub index: Index,
                 /// The store vfs that is used to interact with the filesystem
                 vfs: Box<dyn Vfs>,
                 /// The index file path, relative to the vfs root
                 pub index_file: PathBuf,
                 /// The data file path, relative to the vfs root (same as `index_file`
                 /// if inline)
                 data_file: PathBuf,
                 /// Data config that applies to this revlog
                 data_config: RevlogDataConfig,
                 /// Delta config that applies to this revlog
                 delta_config: RevlogDeltaConfig,
                 /// Feature config that applies to this revlog
                 feature_config: RevlogFeatureConfig,
                 /// A view into this revlog's data file
                 segment_file: RandomAccessFile,
                 /// A cache of uncompressed chunks that have previously been restored.
                 /// Its eviction policy is defined in [`Self::new`].
                 uncompressed_chunk_cache: Option<UncompressedChunkCache>,
                 /// Used to keep track of the actual target during diverted writes
                 /// for the changelog
                 original_index_file: Option<PathBuf>,
                 /// Write handles to the index and data files
                 /// XXX why duplicate from `index` and `segment_file`?
                 writing_handles: Option<WriteHandles>,
                 /// See [`DelayedBuffer`].
                 delayed_buffer: Option<Arc<Mutex<DelayedBuffer>>>,
                 /// Whether this revlog is inline. XXX why duplicate from `index`?
                 pub inline: bool,
                 /// A cache of the last revision, which is usually accessed multiple
                 /// times.
                 pub last_revision_cache: Mutex<Option<SingleRevisionCache>>,
             }
             impl InnerRevlog {
                 pub fn new(
                     vfs: Box<dyn Vfs>,
                     index: Index,
                     index_file: PathBuf,
                     data_file: PathBuf,
                     data_config: RevlogDataConfig,
                     delta_config: RevlogDeltaConfig,
                     feature_config: RevlogFeatureConfig,
                 ) -> Self {
                     assert!(index_file.is_relative());
                     assert!(data_file.is_relative());
                     let segment_file = RandomAccessFile::new(
                         dyn_clone::clone_box(&*vfs),
                         if index.is_inline() {
                             index_file.to_owned()
                         } else {
                             data_file.to_owned()
                         },
                     );
                     let uncompressed_chunk_cache =
                         data_config.uncompressed_cache_factor.map(
                             // Arbitrary initial value
                             // TODO check if using a hasher specific to integers is useful
                             |_factor| RefCell::new(LruMap::with_memory_budget(65536)),
                         );
                     let inline = index.is_inline();
                     Self {
                         index,
                         vfs,
                         index_file,
                         data_file,
                         data_config,
                         delta_config,
                         feature_config,
                         segment_file,
                         uncompressed_chunk_cache,
                         original_index_file: None,
                         writing_handles: None,
                         delayed_buffer: None,
                         inline,
                         last_revision_cache: Mutex::new(None),
                     }
                 }
                 /// Return number of entries of the revlog index
                 pub fn len(&self) -> usize {
                     self.index.len()
                 }
                 /// Return `true` if this revlog has no entries
                 pub fn is_empty(&self) -> bool {
                     self.len() == 0
                 }
                 /// Return whether this revlog is inline (mixed index and data)
                 pub fn is_inline(&self) -> bool {
                     self.inline
                 }
                 /// Clear all caches from this revlog
                 pub fn clear_cache(&mut self) {
                     assert!(!self.is_delaying());
                     if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
                         // We don't clear the allocation here because it's probably faster.
                         // We could change our minds later if this ends up being a problem
                         // with regards to memory consumption.
                         cache.borrow_mut().clear();
                     }
                 }
                 /// Return an entry for the null revision
                 pub fn make_null_entry(&self) -> RevlogEntry {
                     RevlogEntry {
                         revlog: self,
                         rev: NULL_REVISION,
                         uncompressed_len: 0,
                         p1: NULL_REVISION,
                         p2: NULL_REVISION,
                         flags: NULL_REVLOG_ENTRY_FLAGS,
                         hash: NULL_NODE,
                     }
                 }
                 /// Return the [`RevlogEntry`] for a [`Revision`] that is known to exist
                 pub fn get_entry(
                     &self,
                     rev: Revision,
                 ) -> Result<RevlogEntry, RevlogError> {
                     if rev == NULL_REVISION {
                         return Ok(self.make_null_entry());
                     }
                     let index_entry = self
                         .index
                         .get_entry(rev)
                         .ok_or_else(|| RevlogError::InvalidRevision(rev.to_string()))?;
                     let p1 =
                         self.index.check_revision(index_entry.p1()).ok_or_else(|| {
                             RevlogError::corrupted(format!(
                                 "p1 for rev {} is invalid",
                                 rev
                             ))
                         })?;
                     let p2 =
                         self.index.check_revision(index_entry.p2()).ok_or_else(|| {
                             RevlogError::corrupted(format!(
                                 "p2 for rev {} is invalid",
                                 rev
                             ))
                         })?;
                     let entry = RevlogEntry {
                         revlog: self,
                         rev,
                         uncompressed_len: index_entry.uncompressed_len(),
                         p1,
                         p2,
                         flags: index_entry.flags(),
                         hash: *index_entry.hash(),
                     };
                     Ok(entry)
                 }
                 /// Return the [`RevlogEntry`] for `rev`. If `rev` fails to check, this
                 /// returns a [`RevlogError`].
                 pub fn get_entry_for_unchecked_rev(
                     &self,
                     rev: UncheckedRevision,
                 ) -> Result<RevlogEntry, RevlogError> {
                     if rev == NULL_REVISION.into() {
                         return Ok(self.make_null_entry());
                     }
                     let rev = self.index.check_revision(rev).ok_or_else(|| {
                         RevlogError::corrupted(format!("rev {} is invalid", rev))
                     })?;
                     self.get_entry(rev)
                 }
                 /// Is the revlog currently delaying the visibility of written data?
                 ///
                 /// The delaying mechanism can be either in-memory or written on disk in a
                 /// side-file.
                 pub fn is_delaying(&self) -> bool {
                     self.delayed_buffer.is_some() || self.original_index_file.is_some()
                 }
                 /// The offset of the data chunk for this revision
                 #[inline(always)]
-                pub fn start(&self, rev: Revision) -> usize {
+                pub fn data_start(&self, rev: Revision) -> usize {
                     self.index.start(
                         rev,
                         &self
                             .index
                             .get_entry(rev)
                             .unwrap_or_else(|| self.index.make_null_entry()),
                     )
                 }
                 /// The length of the data chunk for this revision
                 #[inline(always)]
                 pub fn data_compressed_length(&self, rev: Revision) -> usize {
                     self.index
                         .get_entry(rev)
                         .unwrap_or_else(|| self.index.make_null_entry())
                         .compressed_len() as usize
                 }
                 /// The end of the data chunk for this revision
                 #[inline(always)]
                 pub fn end(&self, rev: Revision) -> usize {
-                    self.start(rev) + self.data_compressed_length(rev)
+                    self.data_start(rev) + self.data_compressed_length(rev)
                 }
                 /// Return the delta parent of the given revision
                 pub fn delta_parent(&self, rev: Revision) -> Revision {
                     let base = self
                         .index
                         .get_entry(rev)
                         .unwrap()
                         .base_revision_or_base_of_delta_chain();
                     if base.0 == rev.0 {
                         NULL_REVISION
                     } else if self.delta_config.general_delta {
                         Revision(base.0)
                     } else {
                         Revision(rev.0 - 1)
                     }
                 }
                 /// Return whether `rev` points to a snapshot revision (i.e. does not have
                 /// a delta base).
                 pub fn is_snapshot(&self, rev: Revision) -> Result<bool, RevlogError> {
                     if !self.delta_config.sparse_revlog {
                         return Ok(self.delta_parent(rev) == NULL_REVISION);
                     }
                     self.index.is_snapshot_unchecked(rev)
                 }
                 /// Return the delta chain for `rev` according to this revlog's config.
                 /// See [`Index::delta_chain`] for more information.
                 pub fn delta_chain(
                     &self,
                     rev: Revision,
                     stop_rev: Option<Revision>,
                 ) -> Result<(Vec<Revision>, bool), HgError> {
                     self.index.delta_chain(
                         rev,
                         stop_rev,
                         self.delta_config.general_delta.into(),
                     )
                 }
                 fn compressor(&self) -> Result<Box<dyn Compressor>, HgError> {
                     // TODO cache the compressor?
                     Ok(match self.feature_config.compression_engine {
                         CompressionConfig::Zlib { level } => {
                             Box::new(ZlibCompressor::new(level))
                         }
                         CompressionConfig::Zstd { level, threads } => {
                             Box::new(ZstdCompressor::new(level, threads))
                         }
                         CompressionConfig::None => Box::new(NoneCompressor),
                     })
                 }
                 /// Generate a possibly-compressed representation of data.
                 /// Returns `None` if the data was not compressed.
                 pub fn compress<'data>(
                     &self,
                     data: &'data [u8],
                 ) -> Result<Option<Cow<'data, [u8]>>, RevlogError> {
                     if data.is_empty() {
                         return Ok(Some(data.into()));
                     }
                     let res = self.compressor()?.compress(data)?;
                     if let Some(compressed) = res {
                         // The revlog compressor added the header in the returned data.
                         return Ok(Some(compressed.into()));
                     }
                     if data[0] == b'\0' {
                         return Ok(Some(data.into()));
                     }
                     Ok(None)
                 }
                 /// Decompress a revlog chunk.
                 ///
                 /// The chunk is expected to begin with a header identifying the
                 /// format type so it can be routed to an appropriate decompressor.
                 pub fn decompress<'a>(
                     &'a self,
                     data: &'a [u8],
                 ) -> Result<Cow<[u8]>, RevlogError> {
                     if data.is_empty() {
                         return Ok(data.into());
                     }
                     // Revlogs are read much more frequently than they are written and many
                     // chunks only take microseconds to decompress, so performance is
                     // important here.
                     let header = data[0];
                     match header {
                         // Settings don't matter as they only affect compression
                         ZLIB_BYTE => Ok(ZlibCompressor::new(0).decompress(data)?.into()),
                         // Settings don't matter as they only affect compression
                         ZSTD_BYTE => {
                             Ok(ZstdCompressor::new(0, 0).decompress(data)?.into())
                         }
                         b'\0' => Ok(data.into()),
                         b'u' => Ok((&data[1..]).into()),
                         other => Err(HgError::UnsupportedFeature(format!(
                             "unknown compression header '{}'",
                             other
                         ))
                         .into()),
                     }
                 }
                 /// Obtain a segment of raw data corresponding to a range of revisions.
                 ///
                 /// Requests for data may be satisfied by a cache.
                 ///
                 /// Returns a 2-tuple of (offset, data) for the requested range of
                 /// revisions. Offset is the integer offset from the beginning of the
                 /// revlog and data is a slice of the raw byte data.
                 ///
                 /// Callers will need to call `self.start(rev)` and `self.length(rev)`
                 /// to determine where each revision's data begins and ends.
                 pub fn get_segment_for_revs(
                     &self,
                     start_rev: Revision,
                     end_rev: Revision,
                 ) -> Result<(usize, Vec<u8>), HgError> {
                     let start = if start_rev == NULL_REVISION {
                     } else {
                         let start_entry = self
                             .index
                             .get_entry(start_rev)
                             .expect("null revision segment");
                         self.index.start(start_rev, &start_entry)
                     };
                     let end_entry = self
                         .index
                         .get_entry(end_rev)
                         .expect("null revision segment");
                     let end = self.index.start(end_rev, &end_entry)
                         + self.data_compressed_length(end_rev);
                     let length = end - start;
                     // XXX should we use mmap instead of doing this for platforms that
                     // support madvise/populate?
                     Ok((start, self.segment_file.read_chunk(start, length)?))
                 }
                 /// Return the uncompressed raw data for `rev`
                 pub fn chunk_for_rev(&self, rev: Revision) -> Result<Arc<[u8]>, HgError> {
                     if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
                         if let Some(chunk) = cache.borrow_mut().get(&rev) {
                             return Ok(chunk.clone());
                         }
                     }
                     // TODO revlogv2 should check the compression mode
                     let data = self.get_segment_for_revs(rev, rev)?.1;
                     let uncompressed = self.decompress(&data).map_err(|e| {
                         HgError::abort(
                             format!("revlog decompression error: {}", e),
                             exit_codes::ABORT,
                             None,
                         )
                     })?;
                     let uncompressed: Arc<[u8]> = Arc::from(uncompressed.into_owned());
                     if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
                         cache.borrow_mut().insert(rev, uncompressed.clone());
                     }
                     Ok(uncompressed)
                 }
                 /// Execute `func` within a read context for the data file, meaning that
                 /// the read handle will be taken and discarded after the operation.
                 pub fn with_read<R>(
                     &self,
                     func: impl FnOnce() -> Result<R, RevlogError>,
                 ) -> Result<R, RevlogError> {
                     self.enter_reading_context()?;
                     let res = func();
                     self.exit_reading_context();
                     res.map_err(Into::into)
                 }
                 /// `pub` only for use in hg-cpython
                 #[doc(hidden)]
                 pub fn enter_reading_context(&self) -> Result<(), HgError> {
                     if self.is_empty() {
                         // Nothing to be read
                         return Ok(());
                     }
                     if self.delayed_buffer.is_some() && self.is_inline() {
                         return Err(HgError::abort(
                             "revlog with delayed write should not be inline",
                             exit_codes::ABORT,
                             None,
                         ));
                     }
                     self.segment_file.get_read_handle()?;
                     Ok(())
                 }
                 /// `pub` only for use in hg-cpython
                 #[doc(hidden)]
                 pub fn exit_reading_context(&self) {
                     self.segment_file.exit_reading_context()
                 }
                 /// Fill the buffer returned by `get_buffer` with the possibly un-validated
                 /// raw text for a revision. It can be already validated if it comes
                 /// from the cache.
                 pub fn raw_text<G, T>(
                     &self,
                     rev: Revision,
                     get_buffer: G,
                 ) -> Result<(), RevlogError>
                 where
                     G: FnOnce(
                         usize,
                         &mut dyn FnMut(
                             &mut dyn RevisionBuffer<Target = T>,
                         ) -> Result<(), RevlogError>,
                     ) -> Result<(), RevlogError>,
                 {
                     let entry = &self.get_entry(rev)?;
                     let raw_size = entry.uncompressed_len();
                     let mut mutex_guard = self
                         .last_revision_cache
                         .lock()
                         .expect("lock should not be held");
                     let cached_rev = if let Some((_node, rev, data)) = &*mutex_guard {
                         Some((*rev, data.deref().as_ref()))
                     } else {
                         None
                     };
                     if let Some(cache) = &self.uncompressed_chunk_cache {
                         let cache = &mut cache.borrow_mut();
                         if let Some(size) = raw_size {
                             // Dynamically update the uncompressed_chunk_cache size to the
                             // largest revision we've seen in this revlog.
                             // Do it *before* restoration in case the current revision
                             // is the largest.
                             let factor = self
                                 .data_config
                                 .uncompressed_cache_factor
                                 .expect("cache should not exist without factor");
                             let candidate_size = (size as f64 * factor) as usize;
                             let limiter_mut = cache.limiter_mut();
                             if candidate_size > limiter_mut.max_memory_usage() {
                                 std::mem::swap(
                                     limiter_mut,
                                     &mut ByMemoryUsage::new(candidate_size),
                                 );
                             }
                         }
                     }
                     entry.rawdata(cached_rev, get_buffer)?;
                     // drop cache to save memory, the caller is expected to update
                     // the revision cache after validating the text
                     mutex_guard.take();
                     Ok(())
                 }
                 /// Only `pub` for `hg-cpython`.
                 /// Obtain decompressed raw data for the specified revisions that are
                 /// assumed to be in ascending order.
                 ///
                 /// Returns a list with decompressed data for each requested revision.
                 #[doc(hidden)]
                 pub fn chunks(
                     &self,
                     revs: Vec<Revision>,
                     target_size: Option<u64>,
                 ) -> Result<Vec<Arc<[u8]>>, RevlogError> {
                     if revs.is_empty() {
                         return Ok(vec![]);
                     }
                     let mut fetched_revs = vec![];
                     let mut chunks = Vec::with_capacity(revs.len());
                     match self.uncompressed_chunk_cache.as_ref() {
                         Some(cache) => {
                             let mut cache = cache.borrow_mut();
                             for rev in revs.iter() {
                                 match cache.get(rev) {
                                     Some(hit) => chunks.push((*rev, hit.to_owned())),
                                     None => fetched_revs.push(*rev),
                                 }
                             }
                         }
                         None => fetched_revs = revs,
                     }
                     let already_cached = chunks.len();
                     let sliced_chunks = if fetched_revs.is_empty() {
                         vec![]
                     } else if !self.data_config.with_sparse_read || self.is_inline() {
                         vec![fetched_revs]
                     } else {
                         self.slice_chunk(&fetched_revs, target_size)?
                     };
                     self.with_read(|| {
                         for revs_chunk in sliced_chunks {
                             let first_rev = revs_chunk[0];
                             // Skip trailing revisions with empty diff
                             let last_rev_idx = revs_chunk
                                 .iter()
                                 .rposition(|r| self.data_compressed_length(*r) != 0)
                                 .unwrap_or(revs_chunk.len() - 1);
                             let last_rev = revs_chunk[last_rev_idx];
                             let (offset, data) =
                                 self.get_segment_for_revs(first_rev, last_rev)?;
                             let revs_chunk = &revs_chunk[..=last_rev_idx];
                             for rev in revs_chunk {
-                                let chunk_start = self.start(*rev);
+                                let chunk_start = self.data_start(*rev);
                                 let chunk_length = self.data_compressed_length(*rev);
                                 // TODO revlogv2 should check the compression mode
                                 let bytes = &data[chunk_start - offset..][..chunk_length];
                                 let chunk = if !bytes.is_empty() && bytes[0] == ZSTD_BYTE {
                                     // If we're using `zstd`, we want to try a more
                                     // specialized decompression
                                     let entry = self.index.get_entry(*rev).unwrap();
                                     let is_delta = entry
                                         .base_revision_or_base_of_delta_chain()
                                         != (*rev).into();
                                     let uncompressed = uncompressed_zstd_data(
                                         bytes,
                                         is_delta,
                                         entry.uncompressed_len(),
                                     )?;
                                     Cow::Owned(uncompressed)
                                 } else {
                                     // Otherwise just fallback to generic decompression.
                                     self.decompress(bytes)?
                                 };
                                 chunks.push((*rev, chunk.into()));
                             }
                         }
                         Ok(())
                     })?;
                     if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
                         let mut cache = cache.borrow_mut();
                         for (rev, chunk) in chunks.iter().skip(already_cached) {
                             cache.insert(*rev, chunk.clone());
                         }
                     }
                     // Use stable sort here since it's *mostly* sorted
                     chunks.sort_by(|a, b| a.0.cmp(&b.0));
                     Ok(chunks.into_iter().map(|(_r, chunk)| chunk).collect())
                 }
                 /// Slice revs to reduce the amount of unrelated data to be read from disk.
                 ///
                 /// ``revs`` is sliced into groups that should be read in one time.
                 /// Assume that revs are sorted.
                 ///
                 /// The initial chunk is sliced until the overall density
                 /// (payload/chunks-span ratio) is above
                 /// `revlog.data_config.sr_density_threshold`.
                 /// No gap smaller than `revlog.data_config.sr_min_gap_size` is skipped.
                 ///
                 /// If `target_size` is set, no chunk larger than `target_size`
                 /// will be returned.
                 /// For consistency with other slicing choices, this limit won't go lower
                 /// than `revlog.data_config.sr_min_gap_size`.
                 ///
                 /// If individual revision chunks are larger than this limit, they will
                 /// still be raised individually.
                 pub fn slice_chunk(
                     &self,
                     revs: &[Revision],
                     target_size: Option<u64>,
                 ) -> Result<Vec<Vec<Revision>>, RevlogError> {
                     let target_size =
                         target_size.map(|size| size.max(self.data_config.sr_min_gap_size));
                     let target_density = self.data_config.sr_density_threshold;
                     let min_gap_size = self.data_config.sr_min_gap_size as usize;
                     let to_density = self.index.slice_chunk_to_density(
                         revs,
                         target_density,
                         min_gap_size,
                     );
                     let mut sliced = vec![];
                     for chunk in to_density {
                         sliced.extend(
                             self.slice_chunk_to_size(&chunk, target_size)?
                                 .into_iter()
                                 .map(ToOwned::to_owned),
                         );
                     }
                     Ok(sliced)
                 }
                 /// Slice revs to match the target size
                 ///
                 /// This is intended to be used on chunks that density slicing selected,
                 /// but that are still too large compared to the read guarantee of revlogs.
                 /// This might happen when the "minimal gap size" interrupted the slicing
                 /// or when chains are built in a way that create large blocks next to
                 /// each other.
                 fn slice_chunk_to_size<'a>(
                     &self,
                     revs: &'a [Revision],
                     target_size: Option<u64>,
                 ) -> Result<Vec<&'a [Revision]>, RevlogError> {
-                    let mut start_data = self.start(revs[0]);
+                    let mut start_data = self.data_start(revs[0]);
                     let end_data = self.end(revs[revs.len() - 1]);
                     let full_span = end_data - start_data;
                     let nothing_to_do = target_size
                         .map(|size| full_span <= size as usize)
                         .unwrap_or(true);
                     if nothing_to_do {
                         return Ok(vec![revs]);
                     }
                     let target_size = target_size.expect("target_size is set") as usize;
                     let mut start_rev_idx = 0;
                     let mut end_rev_idx = 1;
                     let mut chunks = vec![];
                     for (idx, rev) in revs.iter().enumerate().skip(1) {
                         let span = self.end(*rev) - start_data;
                         let is_snapshot = self.is_snapshot(*rev)?;
                         if span <= target_size && is_snapshot {
                             end_rev_idx = idx + 1;
                         } else {
                             let chunk =
                                 self.trim_chunk(revs, start_rev_idx, Some(end_rev_idx));
                             if !chunk.is_empty() {
                                 chunks.push(chunk);
                             }
                             start_rev_idx = idx;
-                            start_data = self.start(*rev);
+                            start_data = self.data_start(*rev);
                             end_rev_idx = idx + 1;
                         }
                         if !is_snapshot {
                             break;
                         }
                     }
                     // For the others, we use binary slicing to quickly converge towards
                     // valid chunks (otherwise, we might end up looking for the start/end
                     // of many revisions). This logic is not looking for the perfect
                     // slicing point, it quickly converges towards valid chunks.
                     let number_of_items = revs.len();
                     while (end_data - start_data) > target_size {
                         end_rev_idx = number_of_items;
                         if number_of_items - start_rev_idx <= 1 {
                             // Protect against individual chunks larger than the limit
                             break;
                         }
                         let mut local_end_data = self.end(revs[end_rev_idx - 1]);
                         let mut span = local_end_data - start_data;
                         while span > target_size {
                             if end_rev_idx - start_rev_idx <= 1 {
                                 // Protect against individual chunks larger than the limit
                                 break;
                             }
                             end_rev_idx -= (end_rev_idx - start_rev_idx) / 2;
                             local_end_data = self.end(revs[end_rev_idx - 1]);
                             span = local_end_data - start_data;
                         }
                         let chunk =
                             self.trim_chunk(revs, start_rev_idx, Some(end_rev_idx));
                         if !chunk.is_empty() {
                             chunks.push(chunk);
                         }
                         start_rev_idx = end_rev_idx;
-                        start_data = self.start(revs[start_rev_idx]);
+                        start_data = self.data_start(revs[start_rev_idx]);
                     }
                     let chunk = self.trim_chunk(revs, start_rev_idx, None);
                     if !chunk.is_empty() {
                         chunks.push(chunk);
                     }
                     Ok(chunks)
                 }
                 /// Returns `revs[startidx..endidx]` without empty trailing revs
                 fn trim_chunk<'a>(
                     &self,
                     revs: &'a [Revision],
                     start_rev_idx: usize,
                     end_rev_idx: Option<usize>,
                 ) -> &'a [Revision] {
                     let mut end_rev_idx = end_rev_idx.unwrap_or(revs.len());
                     // If we have a non-empty delta candidate, there is nothing to trim
                     if revs[end_rev_idx - 1].0 < self.len() as BaseRevision {
                         // Trim empty revs at the end, except the very first rev of a chain
                         while end_rev_idx > 1
                             && end_rev_idx > start_rev_idx
                             && self.data_compressed_length(revs[end_rev_idx - 1]) == 0
                         {
                             end_rev_idx -= 1
                         }
                     }
                     &revs[start_rev_idx..end_rev_idx]
                 }
                 /// Check the hash of some given data against the recorded hash.
                 pub fn check_hash(
                     &self,
                     p1: Revision,
                     p2: Revision,
                     expected: &[u8],
                     data: &[u8],
                 ) -> bool {
                     let e1 = self.index.get_entry(p1);
                     let h1 = match e1 {
                         Some(ref entry) => entry.hash(),
                         None => &NULL_NODE,
                     };
                     let e2 = self.index.get_entry(p2);
                     let h2 = match e2 {
                         Some(ref entry) => entry.hash(),
                         None => &NULL_NODE,
                     };
                     hash(data, h1.as_bytes(), h2.as_bytes()) == expected
                 }
                 /// Returns whether we are currently in a [`Self::with_write`] context
                 pub fn is_writing(&self) -> bool {
                     self.writing_handles.is_some()
                 }
                 /// Open the revlog files for writing
                 ///
                 /// Adding content to a revlog should be done within this context.
                 /// TODO try using `BufRead` and `BufWrite` and see if performance improves
                 pub fn with_write<R>(
                     &mut self,
                     transaction: &mut impl Transaction,
                     data_end: Option<usize>,
                     func: impl FnOnce() -> R,
                 ) -> Result<R, HgError> {
                     if self.is_writing() {
                         return Ok(func());
                     }
                     self.enter_writing_context(data_end, transaction)
                         .inspect_err(|_| {
                             self.exit_writing_context();
                         })?;
                     let res = func();
                     self.exit_writing_context();
                     Ok(res)
                 }
                 /// `pub` only for use in hg-cpython
                 #[doc(hidden)]
                 pub fn exit_writing_context(&mut self) {
                     self.writing_handles.take();
                     self.segment_file.writing_handle.take();
                     self.segment_file.reading_handle.take();
                 }
                 /// `pub` only for use in hg-cpython
                 #[doc(hidden)]
                 pub fn python_writing_handles(&self) -> Option<&WriteHandles> {
                     self.writing_handles.as_ref()
                 }
                 /// `pub` only for use in hg-cpython
                 #[doc(hidden)]
                 pub fn enter_writing_context(
                     &mut self,
                     data_end: Option<usize>,
                     transaction: &mut impl Transaction,
                 ) -> Result<(), HgError> {
                     let data_size = if self.is_empty() {
                     } else {
                         self.end(Revision((self.len() - 1) as BaseRevision))
                     };
                     let data_handle = if !self.is_inline() {
                         let data_handle = match self.vfs.open_write(&self.data_file) {
                             Ok(mut f) => {
                                 if let Some(end) = data_end {
                                     f.seek(SeekFrom::Start(end as u64))
                                         .when_reading_file(&self.data_file)?;
                                 } else {
                                     f.seek(SeekFrom::End(0))
                                         .when_reading_file(&self.data_file)?;
                                 }
                                 f
                             }
                             Err(e) => match e {
                                 HgError::IoError { error, context } => {
                                     if error.kind() != ErrorKind::NotFound {
                                         return Err(HgError::IoError { error, context });
                                     }
                                     self.vfs.create(&self.data_file, true)?
                                 }
                                 e => return Err(e),
                             },
                         };
                         transaction.add(&self.data_file, data_size);
                         Some(FileHandle::from_file(
                             data_handle,
                             dyn_clone::clone_box(&*self.vfs),
                             &self.data_file,
                         ))
                     } else {
                         None
                     };
                     let index_size = self.len() * INDEX_ENTRY_SIZE;
                     let index_handle = self.index_write_handle()?;
                     if self.is_inline() {
                         transaction.add(&self.index_file, data_size);
                     } else {
                         transaction.add(&self.index_file, index_size);
                     }
                     self.writing_handles = Some(WriteHandles {
                         index_handle: index_handle.clone(),
                         data_handle: data_handle.clone(),
                     });
                     *self.segment_file.reading_handle.borrow_mut() = if self.is_inline() {
                         Some(index_handle)
                     } else {
                         data_handle
                     };
                     Ok(())
                 }
                 /// Get a write handle to the index, sought to the end of its data.
                 fn index_write_handle(&self) -> Result<FileHandle, HgError> {
                     let res = if self.delayed_buffer.is_none() {
                         if self.data_config.check_ambig {
                             self.vfs.open_check_ambig(&self.index_file)
                         } else {
                             self.vfs.open_write(&self.index_file)
                         }
                     } else {
                         self.vfs.open_write(&self.index_file)
                     };
                     match res {
                         Ok(mut handle) => {
                             handle
                                 .seek(SeekFrom::End(0))
                                 .when_reading_file(&self.index_file)?;
                             Ok(
                                 if let Some(delayed_buffer) = self.delayed_buffer.as_ref()
                                 {
                                     FileHandle::from_file_delayed(
                                         handle,
                                         dyn_clone::clone_box(&*self.vfs),
                                         &self.index_file,
                                         delayed_buffer.clone(),
                                     )?
                                 } else {
                                     FileHandle::from_file(
                                         handle,
                                         dyn_clone::clone_box(&*self.vfs),
                                         &self.index_file,
                                     )
                                 },
                             )
                         }
                         Err(e) => match e {
                             HgError::IoError { error, context } => {
                                 if error.kind() != ErrorKind::NotFound {
                                     return Err(HgError::IoError { error, context });
                                 };
                                 if let Some(delayed_buffer) = self.delayed_buffer.as_ref()
                                 {
                                     FileHandle::new_delayed(
                                         dyn_clone::clone_box(&*self.vfs),
                                         &self.index_file,
                                         true,
                                         delayed_buffer.clone(),
                                     )
                                 } else {
                                     FileHandle::new(
                                         dyn_clone::clone_box(&*self.vfs),
                                         &self.index_file,
                                         true,
                                         true,
                                     )
                                 }
                             }
                             e => Err(e),
                         },
                     }
                 }
                 /// Split the data of an inline revlog into an index and a data file
                 pub fn split_inline(
                     &mut self,
                     header: IndexHeader,
                     new_index_file_path: Option<PathBuf>,
                 ) -> Result<PathBuf, RevlogError> {
                     assert!(self.delayed_buffer.is_none());
                     let existing_handles = self.writing_handles.is_some();
                     if let Some(handles) = &mut self.writing_handles {
                         handles.index_handle.flush()?;
                         self.writing_handles.take();
                         self.segment_file.writing_handle.take();
                     }
                     let mut new_data_file_handle =
                         self.vfs.create(&self.data_file, true)?;
                     // Drop any potential data, possibly redundant with the VFS impl.
                     new_data_file_handle
                         .set_len(0)
                         .when_writing_file(&self.data_file)?;
                     self.with_read(|| -> Result<(), RevlogError> {
                         for r in 0..self.index.len() {
                             let rev = Revision(r as BaseRevision);
                             let rev_segment = self.get_segment_for_revs(rev, rev)?.1;
                             new_data_file_handle
                                 .write_all(&rev_segment)
                                 .when_writing_file(&self.data_file)?;
                         }
                         new_data_file_handle
                             .flush()
                             .when_writing_file(&self.data_file)?;
                         Ok(())
                     })?;
                     if let Some(index_path) = new_index_file_path {
                         self.index_file = index_path
                     }
                     let mut new_index_handle = self.vfs.create(&self.index_file, true)?;
                     let mut new_data = Vec::with_capacity(self.len() * INDEX_ENTRY_SIZE);
                     for r in 0..self.len() {
                         let rev = Revision(r as BaseRevision);
                         let entry = self.index.entry_binary(rev).unwrap_or_else(|| {
                             panic!(
                                 "entry {} should exist in {}",
                                 r,
                                 self.index_file.display()
                             )
                         });
                         if r == 0 {
                             new_data.extend(header.header_bytes);
                         }
                         new_data.extend(entry);
                     }
                     new_index_handle
                         .write_all(&new_data)
                         .when_writing_file(&self.index_file)?;
                     // Replace the index with a new one because the buffer contains inline
                     // data
                     self.index = Index::new(Box::new(new_data), header)?;
                     self.inline = false;
                     self.segment_file = RandomAccessFile::new(
                         dyn_clone::clone_box(&*self.vfs),
                         self.data_file.to_owned(),
                     );
                     if existing_handles {
                         // Switched from inline to conventional, reopen the index
                         let new_data_handle = Some(FileHandle::from_file(
                             new_data_file_handle,
                             dyn_clone::clone_box(&*self.vfs),
                             &self.data_file,
                         ));
                         self.writing_handles = Some(WriteHandles {
                             index_handle: self.index_write_handle()?,
                             data_handle: new_data_handle.clone(),
                         });
                         *self.segment_file.writing_handle.borrow_mut() = new_data_handle;
                     }
                     Ok(self.index_file.to_owned())
                 }
                 /// Write a new entry to this revlog.
                 /// - `entry` is the index bytes
                 /// - `header_and_data` is the compression header and the revision data
                 /// - `offset` is the position in the data file to write to
                 /// - `index_end` is the overwritten position in the index in revlog-v2,
                 ///   since the format may allow a rewrite of garbage data at the end.
                 /// - `data_end` is the overwritten position in the data-file in revlog-v2,
                 ///   since the format may allow a rewrite of garbage data at the end.
                 ///
                 /// XXX Why do we have `data_end` *and* `offset`? Same question in Python
                 pub fn write_entry(
                     &mut self,
                     mut transaction: impl Transaction,
                     entry: &[u8],
                     header_and_data: (&[u8], &[u8]),
                     mut offset: usize,
                     index_end: Option<u64>,
                     data_end: Option<u64>,
                 ) -> Result<(u64, Option<u64>), HgError> {
                     let current_revision = self.len() - 1;
                     let canonical_index_file = self.canonical_index_file();
                     let is_inline = self.is_inline();
                     let handles = match &mut self.writing_handles {
                         None => {
                             return Err(HgError::abort(
                                 "adding revision outside of the `with_write` context",
                                 exit_codes::ABORT,
                                 None,
                             ));
                         }
                         Some(handles) => handles,
                     };
                     let index_handle = &mut handles.index_handle;
                     let data_handle = &mut handles.data_handle;
                     if let Some(end) = index_end {
                         index_handle
                             .seek(SeekFrom::Start(end))
                             .when_reading_file(&self.index_file)?;
                     } else {
                         index_handle
                             .seek(SeekFrom::End(0))
                             .when_reading_file(&self.index_file)?;
                     }
                     if let Some(data_handle) = data_handle {
                         if let Some(end) = data_end {
                             data_handle
                                 .seek(SeekFrom::Start(end))
                                 .when_reading_file(&self.data_file)?;
                         } else {
                             data_handle
                                 .seek(SeekFrom::End(0))
                                 .when_reading_file(&self.data_file)?;
                         }
                     }
                     let (header, data) = header_and_data;
                     if !is_inline {
                         transaction.add(&self.data_file, offset);
                         transaction
                             .add(&canonical_index_file, current_revision * entry.len());
                         let data_handle = data_handle
                             .as_mut()
                             .expect("data handle should exist when not inline");
                         if !header.is_empty() {
                             data_handle.write_all(header)?;
                         }
                         data_handle.write_all(data)?;
                         match &mut self.delayed_buffer {
                             Some(buf) => {
                                 buf.lock()
                                     .expect("propagate the panic")
                                     .buffer
                                     .write_all(entry)
                                     .expect("write to delay buffer should succeed");
                             }
                             None => index_handle.write_all(entry)?,
                         }
                     } else if self.delayed_buffer.is_some() {
                         return Err(HgError::abort(
                             "invalid delayed write on inline revlog",
                             exit_codes::ABORT,
                             None,
                         ));
                     } else {
                         offset += current_revision * entry.len();
                         transaction.add(&canonical_index_file, offset);
                         index_handle.write_all(entry)?;
                         index_handle.write_all(header)?;
                         index_handle.write_all(data)?;
                     }
                     let data_position = match data_handle {
                         Some(h) => Some(h.position()?),
                         None => None,
                     };
                     Ok((index_handle.position()?, data_position))
                 }
                 /// Return the real target index file and not the temporary when diverting
                 pub fn canonical_index_file(&self) -> PathBuf {
                     self.original_index_file
                         .as_ref()
                         .map(ToOwned::to_owned)
                         .unwrap_or_else(|| self.index_file.to_owned())
                 }
                 /// Return the path to the diverted index
                 fn diverted_index(&self) -> PathBuf {
                     self.index_file.with_extension("i.a")
                 }
                 /// True if we're in a [`Self::with_write`] or [`Self::with_read`] context
                 pub fn is_open(&self) -> bool {
                     self.segment_file.is_open()
                 }
                 /// Set this revlog to delay its writes to a buffer
                 pub fn delay(&mut self) -> Result<Option<PathBuf>, HgError> {
                     assert!(!self.is_open());
                     if self.is_inline() {
                         return Err(HgError::abort(
                             "revlog with delayed write should not be inline",
                             exit_codes::ABORT,
                             None,
                         ));
                     }
                     if self.delayed_buffer.is_some() || self.original_index_file.is_some()
                     {
                         // Delay or divert already happening
                         return Ok(None);
                     }
                     if self.is_empty() {
                         self.original_index_file = Some(self.index_file.to_owned());
                         self.index_file = self.diverted_index();
                         if self.vfs.exists(&self.index_file) {
                             self.vfs.unlink(&self.index_file)?;
                         }
                         Ok(Some(self.index_file.to_owned()))
                     } else {
                         self.delayed_buffer =
                             Some(Arc::new(Mutex::new(DelayedBuffer::default())));
                         Ok(None)
                     }
                 }
                 /// Write the pending data (in memory) if any to the diverted index file
                 /// (on disk temporary file)
                 pub fn write_pending(
                     &mut self,
                 ) -> Result<(Option<PathBuf>, bool), HgError> {
                     assert!(!self.is_open());
                     if self.is_inline() {
                         return Err(HgError::abort(
                             "revlog with delayed write should not be inline",
                             exit_codes::ABORT,
                             None,
                         ));
                     }
                     if self.original_index_file.is_some() {
                         return Ok((None, true));
                     }
                     let mut any_pending = false;
                     let pending_index_file = self.diverted_index();
                     if self.vfs.exists(&pending_index_file) {
                         self.vfs.unlink(&pending_index_file)?;
                     }
                     self.vfs.copy(&self.index_file, &pending_index_file)?;
                     if let Some(delayed_buffer) = self.delayed_buffer.take() {
                         let mut index_file_handle =
                             self.vfs.open_write(&pending_index_file)?;
                         index_file_handle
                             .seek(SeekFrom::End(0))
                             .when_writing_file(&pending_index_file)?;
                         let delayed_data =
                             &delayed_buffer.lock().expect("propagate the panic").buffer;
                         index_file_handle
                             .write_all(delayed_data)
                             .when_writing_file(&pending_index_file)?;
                         any_pending = true;
                     }
                     self.original_index_file = Some(self.index_file.to_owned());
                     self.index_file = pending_index_file;
                     Ok((Some(self.index_file.to_owned()), any_pending))
                 }
                 /// Overwrite the canonical file with the diverted file, or write out the
                 /// delayed buffer.
                 /// Returns an error if the revlog is neither diverted nor delayed.
                 pub fn finalize_pending(&mut self) -> Result<PathBuf, HgError> {
                     assert!(!self.is_open());
                     if self.is_inline() {
                         return Err(HgError::abort(
                             "revlog with delayed write should not be inline",
                             exit_codes::ABORT,
                             None,
                         ));
                     }
                     match (
                         self.delayed_buffer.as_ref(),
                         self.original_index_file.as_ref(),
                     ) {
                         (None, None) => {
                             return Err(HgError::abort(
                                 "neither delay nor divert found on this revlog",
                                 exit_codes::ABORT,
                                 None,
                             ));
                         }
                         (Some(delay), None) => {
                             let mut index_file_handle =
                                 self.vfs.open_write(&self.index_file)?;
                             index_file_handle
                                 .seek(SeekFrom::End(0))
                                 .when_writing_file(&self.index_file)?;
                             index_file_handle
                                 .write_all(
                                     &delay.lock().expect("propagate the panic").buffer,
                                 )
                                 .when_writing_file(&self.index_file)?;
                             self.delayed_buffer = None;
                         }
                         (None, Some(divert)) => {
                             if self.vfs.exists(&self.index_file) {
                                 self.vfs.rename(&self.index_file, divert, true)?;
                             }
                             divert.clone_into(&mut self.index_file);
                             self.original_index_file = None;
                         }
                         (Some(_), Some(_)) => unreachable!(
                             "{} is in an inconsistent state of both delay and divert",
                             self.canonical_index_file().display(),
                         ),
                     }
                     Ok(self.canonical_index_file())
                 }
                 /// `pub` only for `hg-cpython`. This is made a different method than
                 /// [`Revlog::index`] in case there is a different invariant that pops up
                 /// later.
                 #[doc(hidden)]
                 pub fn shared_index(&self) -> &Index {
                     &self.index
                 }
             }
             /// The use of a [`Refcell`] assumes that a given revlog will only
             /// be accessed (read or write) by a single thread.
             type UncompressedChunkCache =
                 RefCell<LruMap<Revision, Arc<[u8]>, ByMemoryUsage>>;
             /// The node, revision and data for the last revision we've seen. Speeds up
             /// a lot of sequential operations of the revlog.
             ///
             /// The data is not just bytes since it can come from Python and we want to
             /// avoid copies if possible.
             type SingleRevisionCache =
                 (Node, Revision, Box<dyn Deref<Target = [u8]> + Send>);
             /// A way of progressively filling a buffer with revision data, then return
             /// that buffer. Used to abstract away Python-allocated code to reduce copying
             /// for performance reasons.
             pub trait RevisionBuffer {
                 /// The owned buffer type to return
                 type Target;
                 /// Copies the slice into the buffer
                 fn extend_from_slice(&mut self, slice: &[u8]);
                 /// Returns the now finished owned buffer
                 fn finish(self) -> Self::Target;
             }
             /// A simple vec-based buffer. This is uselessly complicated for the pure Rust
             /// case, but it's the price to pay for Python compatibility.
             #[derive(Debug)]
             pub(super) struct CoreRevisionBuffer {
                 buf: Vec<u8>,
             }
             impl CoreRevisionBuffer {
                 pub fn new() -> Self {
                     Self { buf: vec![] }
                 }
                 #[inline]
                 pub fn resize(&mut self, size: usize) {
                     self.buf.reserve_exact(size - self.buf.capacity());
                 }
             }
             impl RevisionBuffer for CoreRevisionBuffer {
                 type Target = Vec<u8>;
                 #[inline]
                 fn extend_from_slice(&mut self, slice: &[u8]) {
                     self.buf.extend_from_slice(slice);
                 }
                 #[inline]
                 fn finish(self) -> Self::Target {
                     self.buf
                 }
             }
             /// Calculate the hash of a revision given its data and its parents.
             pub fn hash(
                 data: &[u8],
                 p1_hash: &[u8],
                 p2_hash: &[u8],
             ) -> [u8; NODE_BYTES_LENGTH] {
                 let mut hasher = Sha1::new();
                 let (a, b) = (p1_hash, p2_hash);
                 if a > b {
                     hasher.update(b);
                     hasher.update(a);
                 } else {
                     hasher.update(a);
                     hasher.update(b);
                 }
                 hasher.update(data);
                 *hasher.finalize().as_ref()
             }