upstream/mercurial-mirror Commit - r48183:33d62691

1

# revlog.py - storage back-end for mercurial

1

# censor code related to censoring revision

2

# coding: utf8

3

#

2

#

4

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

9

"""Storage back-end for Mercurial.

9

from ..node import (

10

11

This provides efficient delta storage with O(1) retrieve and append

12

and O(changes) merge between branches.

13

"""

14

15

from __future__ import absolute_import

16

17

import binascii

18

import collections

19

import contextlib

20

import errno

21

import io

22

import os

23

import struct

24

import zlib

25

26

# import stuff from node for others to import from revlog

27

from .node import (

28

bin,

29

hex,

30

nullrev,

10

nullrev,

31

sha1nodeconstants,

32

short,

33

wdirrev,

34

)

35

from .i18n import _

36

from .pycompat import getattr

37

from .revlogutils.constants import (

38

ALL_KINDS,

39

CHANGELOGV2,

40

COMP_MODE_DEFAULT,

41

COMP_MODE_INLINE,

42

COMP_MODE_PLAIN,

43

FEATURES_BY_VERSION,

44

FLAG_GENERALDELTA,

45

FLAG_INLINE_DATA,

46

INDEX_HEADER,

47

KIND_CHANGELOG,

48

REVLOGV0,

49

REVLOGV1,

50

REVLOGV1_FLAGS,

51

REVLOGV2,

52

REVLOGV2_FLAGS,

53

REVLOG_DEFAULT_FLAGS,

54

REVLOG_DEFAULT_FORMAT,

55

REVLOG_DEFAULT_VERSION,

56

SUPPORTED_FLAGS,

57

)

58

from .revlogutils.flagutil import (

59

REVIDX_DEFAULT_FLAGS,

60

REVIDX_ELLIPSIS,

61

REVIDX_EXTSTORED,

62

REVIDX_FLAGS_ORDER,

63

REVIDX_HASCOPIESINFO,

64

REVIDX_ISCENSORED,

65

REVIDX_RAWTEXT_CHANGING_FLAGS,

66

)

67

from .thirdparty import attr

68

from . import (

69

ancestor,

70

dagop,

71

error,

72

mdiff,

73

policy,

74

pycompat,

75

templatefilters,

76

util,

77

)

78

from .interfaces import (

79

repository,

80

util as interfaceutil,

81

)

82

from .revlogutils import (

83

deltas as deltautil,

84

docket as docketutil,

85

flagutil,

86

nodemap as nodemaputil,

87

revlogv0,

88

sidedata as sidedatautil,

89

)

90

from .utils import (

91

storageutil,

92

stringutil,

93

)

94

95

# blanked usage of all the name to prevent pyflakes constraints

96

# We need these name available in the module for extensions.

97

98

REVLOGV0

99

REVLOGV1

100

REVLOGV2

101

FLAG_INLINE_DATA

102

FLAG_GENERALDELTA

103

REVLOG_DEFAULT_FLAGS

104

REVLOG_DEFAULT_FORMAT

105

REVLOG_DEFAULT_VERSION

106

REVLOGV1_FLAGS

107

REVLOGV2_FLAGS

108

REVIDX_ISCENSORED

109

REVIDX_ELLIPSIS

110

REVIDX_HASCOPIESINFO

111

REVIDX_EXTSTORED

112

REVIDX_DEFAULT_FLAGS

113

REVIDX_FLAGS_ORDER

114

REVIDX_RAWTEXT_CHANGING_FLAGS

115

116

parsers = policy.importmod('parsers')

117

rustancestor = policy.importrust('ancestor')

118

rustdagop = policy.importrust('dagop')

119

rustrevlog = policy.importrust('revlog')

120

121

# Aliased for performance.

122

_zlibdecompress = zlib.decompress

123

124

# max size of revlog with inline data

125

_maxinline = 131072

126

_chunksize = 1048576

127

128

# Flag processors for REVIDX_ELLIPSIS.

129

def ellipsisreadprocessor(rl, text):

130

return text, False

131

132

133

def ellipsiswriteprocessor(rl, text):

134

return text, False

135

136

137

def ellipsisrawprocessor(rl, text):

138

return False

139

140

141

ellipsisprocessor = (

142

ellipsisreadprocessor,

143

ellipsiswriteprocessor,

144

ellipsisrawprocessor,

145

)

11

)

146

12

from ..i18n import _

147

13

from .. import (

148

def offset_type(offset, type):

14

error,

149

if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:

150

raise ValueError(b'unknown revlog index flags')

151

return int(int(offset) << 16 | type)

152

153

154

def _verify_revision(rl, skipflags, state, node):

155

"""Verify the integrity of the given revlog ``node`` while providing a hook

156

point for extensions to influence the operation."""

157

if skipflags:

158

state[b'skipread'].add(node)

159

else:

160

# Side-effect: read content and verify hash.

161

rl.revision(node)

162

163

164

# True if a fast implementation for persistent-nodemap is available

165

#

166

# We also consider we have a "fast" implementation in "pure" python because

167

# people using pure don't really have performance consideration (and a

168

# wheelbarrow of other slowness source)

169

HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(

170

parsers, 'BaseIndexObject'

171

)

15

)

172

16

from ..utils import (

173

17

storageutil,

174

@attr.s(slots=True, frozen=True)

175

class _revisioninfo(object):

176

"""Information about a revision that allows building its fulltext

177

node: expected hash of the revision

178

p1, p2: parent revs of the revision

179

btext: built text cache consisting of a one-element list

180

cachedelta: (baserev, uncompressed_delta) or None

181

flags: flags associated to the revision storage

182

183

One of btext[0] or cachedelta must be set.

184

"""

185

186

node = attr.ib()

187

p1 = attr.ib()

188

p2 = attr.ib()

189

btext = attr.ib()

190

textlen = attr.ib()

191

cachedelta = attr.ib()

192

flags = attr.ib()

193

194

195

@interfaceutil.implementer(repository.irevisiondelta)

196

@attr.s(slots=True)

197

class revlogrevisiondelta(object):

198

node = attr.ib()

199

p1node = attr.ib()

200

p2node = attr.ib()

201

basenode = attr.ib()

202

flags = attr.ib()

203

baserevisionsize = attr.ib()

204

revision = attr.ib()

205

delta = attr.ib()

206

sidedata = attr.ib()

207

protocol_flags = attr.ib()

208

linknode = attr.ib(default=None)

209

210

211

@interfaceutil.implementer(repository.iverifyproblem)

212

@attr.s(frozen=True)

213

class revlogproblem(object):

214

warning = attr.ib(default=None)

215

error = attr.ib(default=None)

216

node = attr.ib(default=None)

217

218

219

def parse_index_v1(data, inline):

220

# call the C implementation to parse the index data

221

index, cache = parsers.parse_index2(data, inline)

222

return index, cache

223

224

225

def parse_index_v2(data, inline):

226

# call the C implementation to parse the index data

227

index, cache = parsers.parse_index2(data, inline, revlogv2=True)

228

return index, cache

229

230

231

def parse_index_cl_v2(data, inline):

232

# call the C implementation to parse the index data

233

assert not inline

234

from .pure.parsers import parse_index_cl_v2

235

236

index, cache = parse_index_cl_v2(data)

237

return index, cache

238

239

240

if util.safehasattr(parsers, 'parse_index_devel_nodemap'):

241

242

def parse_index_v1_nodemap(data, inline):

243

index, cache = parsers.parse_index_devel_nodemap(data, inline)

244

return index, cache

245

246

247

else:

248

parse_index_v1_nodemap = None

249

250

251

def parse_index_v1_mixed(data, inline):

252

index, cache = parse_index_v1(data, inline)

253

return rustrevlog.MixedIndex(index), cache

254

255

256

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

257

# signed integer)

258

_maxentrysize = 0x7FFFFFFF

259

260

PARTIAL_READ_MSG = _(

261

b'partial read of revlog %s; expected %d bytes from offset %d, got %d'

262

)

18

)

263

19

from . import constants

264

FILE_TOO_SHORT_MSG = _(

265

b'cannot read from revlog %s;'

266

b' expected %d bytes from offset %d, data size is %d'

267

)

268

269

270

class revlog(object):

271

"""

272

the underlying revision storage object

273

274

A revlog consists of two parts, an index and the revision data.

275

276

The index is a file with a fixed record size containing

277

information on each revision, including its nodeid (hash), the

278

nodeids of its parents, the position and offset of its data within

279

the data file, and the revision it's based on. Finally, each entry

280

contains a linkrev entry that can serve as a pointer to external

281

data.

282

283

The revision data itself is a linear collection of data chunks.

284

Each chunk represents a revision and is usually represented as a

285

delta against the previous chunk. To bound lookup time, runs of

286

deltas are limited to about 2 times the length of the original

287

version data. This makes retrieval of a version proportional to

288

its size, or O(1) relative to the number of revisions.

289

290

Both pieces of the revlog are written to in an append-only

291

fashion, which means we never need to rewrite a file to insert or

292

remove data, and can use some simple techniques to avoid the need

293

for locking while reading.

294

295

If checkambig, indexfile is opened with checkambig=True at

296

writing, to avoid file stat ambiguity.

297

298

If mmaplargeindex is True, and an mmapindexthreshold is set, the

299

index will be mmapped rather than read if it is larger than the

300

configured threshold.

301

302

If censorable is True, the revlog can have censored revisions.

303

304

If `upperboundcomp` is not None, this is the expected maximal gain from

305

compression for the data content.

306

307

`concurrencychecker` is an optional function that receives 3 arguments: a

308

file handle, a filename, and an expected position. It should check whether

309

the current position in the file handle is valid, and log/warn/fail (by

310

raising).

311

20

312

21

313

Internal details

22

def v1_censor(rl, tr, censornode, tombstone=b''):

314

----------------

23

"""censors a revision in a "version 1" revlog"""

315

24

assert rl._format_version == constants.REVLOGV1, rl._format_version

316

A large part of the revlog logic deals with revisions' "index entries", tuple

317

objects that contains the same "items" whatever the revlog version.

318

Different versions will have different ways of storing these items (sometimes

319

not having them at all), but the tuple will always be the same. New fields

320

are usually added at the end to avoid breaking existing code that relies

321

on the existing order. The field are defined as follows:

322

323

[0] offset:

324

The byte index of the start of revision data chunk.

325

That value is shifted up by 16 bits. use "offset = field >> 16" to

326

retrieve it.

327

328

flags:

329

A flag field that carries special information or changes the behavior

330

of the revision. (see `REVIDX_*` constants for details)

331

The flag field only occupies the first 16 bits of this field,

332

use "flags = field & 0xFFFF" to retrieve the value.

333

334

[1] compressed length:

335

The size, in bytes, of the chunk on disk

336

337

[2] uncompressed length:

338

The size, in bytes, of the full revision once reconstructed.

339

340

[3] base rev:

341

Either the base of the revision delta chain (without general

342

delta), or the base of the delta (stored in the data chunk)

343

with general delta.

344

345

[4] link rev:

346

Changelog revision number of the changeset introducing this

347

revision.

348

349

[5] parent 1 rev:

350

Revision number of the first parent

351

352

[6] parent 2 rev:

353

Revision number of the second parent

354

355

[7] node id:

356

The node id of the current revision

357

358

[8] sidedata offset:

359

The byte index of the start of the revision's side-data chunk.

360

361

[9] sidedata chunk length:

362

The size, in bytes, of the revision's side-data chunk.

363

364

[10] data compression mode:

365

two bits that detail the way the data chunk is compressed on disk.

366

(see "COMP_MODE_*" constants for details). For revlog version 0 and

367

1 this will always be COMP_MODE_INLINE.

368

369

[11] side-data compression mode:

370

two bits that detail the way the sidedata chunk is compressed on disk.

371

(see "COMP_MODE_*" constants for details)

372

"""

373

374

_flagserrorclass = error.RevlogError

375

376

def __init__(

377

self,

378

opener,

379

target,

380

radix,

381

postfix=None, # only exist for `tmpcensored` now

382

checkambig=False,

383

mmaplargeindex=False,

384

censorable=False,

385

upperboundcomp=None,

386

persistentnodemap=False,

387

concurrencychecker=None,

388

trypending=False,

389

):

390

"""

391

create a revlog object

392

393

opener is a function that abstracts the file opening operation

394

and can be used to implement COW semantics or the like.

395

396

`target`: a (KIND, ID) tuple that identify the content stored in

397

this revlog. It help the rest of the code to understand what the revlog

398

is about without having to resort to heuristic and index filename

399

analysis. Note: that this must be reliably be set by normal code, but

400

that test, debug, or performance measurement code might not set this to

401

accurate value.

402

"""

403

self.upperboundcomp = upperboundcomp

404

405

self.radix = radix

406

407

self._docket_file = None

408

self._indexfile = None

409

self._datafile = None

410

self._sidedatafile = None

411

self._nodemap_file = None

412

self.postfix = postfix

413

self._trypending = trypending

414

self.opener = opener

415

if persistentnodemap:

416

self._nodemap_file = nodemaputil.get_nodemap_file(self)

417

25

418

assert target[0] in ALL_KINDS

26

# avoid cycle

419

assert len(target) == 2

27

from .. import revlog

420

self.target = target

421

# When True, indexfile is opened with checkambig=True at writing, to

422

# avoid file stat ambiguity.

423

self._checkambig = checkambig

424

self._mmaplargeindex = mmaplargeindex

425

self._censorable = censorable

426

# 3-tuple of (node, rev, text) for a raw revision.

427

self._revisioncache = None

428

# Maps rev to chain base rev.

429

self._chainbasecache = util.lrucachedict(100)

430

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

431

self._chunkcache = (0, b'')

432

# How much data to read and cache into the raw revlog data cache.

433

self._chunkcachesize = 65536

434

self._maxchainlen = None

435

self._deltabothparents = True

436

self.index = None

437

self._docket = None

438

self._nodemap_docket = None

439

# Mapping of partial identifiers to full nodes.

440

self._pcache = {}

441

# Mapping of revision integer to full node.

442

self._compengine = b'zlib'

443

self._compengineopts = {}

444

self._maxdeltachainspan = -1

445

self._withsparseread = False

446

self._sparserevlog = False

447

self.hassidedata = False

448

self._srdensitythreshold = 0.50

449

self._srmingapsize = 262144

450

451

# Make copy of flag processors so each revlog instance can support

452

# custom flags.

453

self._flagprocessors = dict(flagutil.flagprocessors)

454

455

# 3-tuple of file handles being used for active writing.

456

self._writinghandles = None

457

# prevent nesting of addgroup

458

self._adding_group = None

459

460

self._loadindex()

461

462

self._concurrencychecker = concurrencychecker

463

464

def _init_opts(self):

465

"""process options (from above/config) to setup associated default revlog mode

466

467

These values might be affected when actually reading on disk information.

468

469

The relevant values are returned for use in _loadindex().

470

471

* newversionflags:

472

version header to use if we need to create a new revlog

473

474

* mmapindexthreshold:

475

minimal index size for start to use mmap

476

477

* force_nodemap:

478

force the usage of a "development" version of the nodemap code

479

"""

480

mmapindexthreshold = None

481

opts = self.opener.options

482

483

if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:

484

new_header = CHANGELOGV2

485

elif b'revlogv2' in opts:

486

new_header = REVLOGV2

487

elif b'revlogv1' in opts:

488

new_header = REVLOGV1 | FLAG_INLINE_DATA

489

if b'generaldelta' in opts:

490

new_header |= FLAG_GENERALDELTA

491

elif b'revlogv0' in self.opener.options:

492

new_header = REVLOGV0

493

else:

494

new_header = REVLOG_DEFAULT_VERSION

495

496

if b'chunkcachesize' in opts:

497

self._chunkcachesize = opts[b'chunkcachesize']

498

if b'maxchainlen' in opts:

499

self._maxchainlen = opts[b'maxchainlen']

500

if b'deltabothparents' in opts:

501

self._deltabothparents = opts[b'deltabothparents']

502

self._lazydelta = bool(opts.get(b'lazydelta', True))

503

self._lazydeltabase = False

504

if self._lazydelta:

505

self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))

506

if b'compengine' in opts:

507

self._compengine = opts[b'compengine']

508

if b'zlib.level' in opts:

509

self._compengineopts[b'zlib.level'] = opts[b'zlib.level']

510

if b'zstd.level' in opts:

511

self._compengineopts[b'zstd.level'] = opts[b'zstd.level']

512

if b'maxdeltachainspan' in opts:

513

self._maxdeltachainspan = opts[b'maxdeltachainspan']

514

if self._mmaplargeindex and b'mmapindexthreshold' in opts:

515

mmapindexthreshold = opts[b'mmapindexthreshold']

516

self._sparserevlog = bool(opts.get(b'sparse-revlog', False))

517

withsparseread = bool(opts.get(b'with-sparse-read', False))

518

# sparse-revlog forces sparse-read

519

self._withsparseread = self._sparserevlog or withsparseread

520

if b'sparse-read-density-threshold' in opts:

521

self._srdensitythreshold = opts[b'sparse-read-density-threshold']

522

if b'sparse-read-min-gap-size' in opts:

523

self._srmingapsize = opts[b'sparse-read-min-gap-size']

524

if opts.get(b'enableellipsis'):

525

self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor

526

527

# revlog v0 doesn't have flag processors

528

for flag, processor in pycompat.iteritems(

529

opts.get(b'flagprocessors', {})

530

):

531

flagutil.insertflagprocessor(flag, processor, self._flagprocessors)

532

533

if self._chunkcachesize <= 0:

534

raise error.RevlogError(

535

_(b'revlog chunk cache size %r is not greater than 0')

536

% self._chunkcachesize

537

)

538

elif self._chunkcachesize & (self._chunkcachesize - 1):

539

raise error.RevlogError(

540

_(b'revlog chunk cache size %r is not a power of 2')

541

% self._chunkcachesize

542

)

543

force_nodemap = opts.get(b'devel-force-nodemap', False)

544

return new_header, mmapindexthreshold, force_nodemap

545

28

546

def _get_data(self, filepath, mmap_threshold, size=None):

29

censorrev = rl.rev(censornode)

547

"""return a file content with or without mmap

30

tombstone = storageutil.packmeta({b'censored': tombstone}, b'')

548

549

If the file is missing return the empty string"""

550

try:

551

with self.opener(filepath) as fp:

552

if mmap_threshold is not None:

553

file_size = self.opener.fstat(fp).st_size

554

if file_size >= mmap_threshold:

555

if size is not None:

556

# avoid potentiel mmap crash

557

size = min(file_size, size)

558

# TODO: should .close() to release resources without

559

# relying on Python GC

560

if size is None:

561

return util.buffer(util.mmapread(fp))

562

else:

563

return util.buffer(util.mmapread(fp, size))

564

if size is None:

565

return fp.read()

566

else:

567

return fp.read(size)

568

except IOError as inst:

569

if inst.errno != errno.ENOENT:

570

raise

571

return b''

572

573

def _loadindex(self):

574

575

new_header, mmapindexthreshold, force_nodemap = self._init_opts()

576

577

if self.postfix is not None:

578

entry_point = b'%s.i.%s' % (self.radix, self.postfix)

579

elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):

580

entry_point = b'%s.i.a' % self.radix

581

else:

582

entry_point = b'%s.i' % self.radix

583

584

entry_data = b''

585

self._initempty = True

586

entry_data = self._get_data(entry_point, mmapindexthreshold)

587

if len(entry_data) > 0:

588

header = INDEX_HEADER.unpack(entry_data[:4])[0]

589

self._initempty = False

590

else:

591

header = new_header

592

593

self._format_flags = header & ~0xFFFF

594

self._format_version = header & 0xFFFF

595

596

supported_flags = SUPPORTED_FLAGS.get(self._format_version)

597

if supported_flags is None:

598

msg = _(b'unknown version (%d) in revlog %s')

599

msg %= (self._format_version, self.display_id)

600

raise error.RevlogError(msg)

601

elif self._format_flags & ~supported_flags:

602

msg = _(b'unknown flags (%#04x) in version %d revlog %s')

603

display_flag = self._format_flags >> 16

604

msg %= (display_flag, self._format_version, self.display_id)

605

raise error.RevlogError(msg)

606

607

features = FEATURES_BY_VERSION[self._format_version]

608

self._inline = features[b'inline'](self._format_flags)

609

self._generaldelta = features[b'generaldelta'](self._format_flags)

610

self.hassidedata = features[b'sidedata']

611

612

if not features[b'docket']:

613

self._indexfile = entry_point

614

index_data = entry_data

615

else:

616

self._docket_file = entry_point

617

if self._initempty:

618

self._docket = docketutil.default_docket(self, header)

619

else:

620

self._docket = docketutil.parse_docket(

621

self, entry_data, use_pending=self._trypending

622

)

623

self._indexfile = self._docket.index_filepath()

624

index_data = b''

625

index_size = self._docket.index_end

626

if index_size > 0:

627

index_data = self._get_data(

628

self._indexfile, mmapindexthreshold, size=index_size

629

)

630

if len(index_data) < index_size:

631

msg = _(b'too few index data for %s: got %d, expected %d')

632

msg %= (self.display_id, len(index_data), index_size)

633

raise error.RevlogError(msg)

634

635

self._inline = False

636

# generaldelta implied by version 2 revlogs.

637

self._generaldelta = True

638

# the logic for persistent nodemap will be dealt with within the

639

# main docket, so disable it for now.

640

self._nodemap_file = None

641

642

if self._docket is not None:

643

self._datafile = self._docket.data_filepath()

644

self._sidedatafile = self._docket.sidedata_filepath()

645

elif self.postfix is None:

646

self._datafile = b'%s.d' % self.radix

647

else:

648

self._datafile = b'%s.d.%s' % (self.radix, self.postfix)

649

650

self.nodeconstants = sha1nodeconstants

651

self.nullid = self.nodeconstants.nullid

652

653

# sparse-revlog can't be on without general-delta (issue6056)

654

if not self._generaldelta:

655

self._sparserevlog = False

656

657

self._storedeltachains = True

658

31

659

devel_nodemap = (

32

if len(tombstone) > rl.rawsize(censorrev):

660

self._nodemap_file

33

raise error.Abort(

661

and force_nodemap

34

_(b'censor tombstone must be no longer than censored data')

662

and parse_index_v1_nodemap is not None

663

)

664

665

use_rust_index = False

666

if rustrevlog is not None:

667

if self._nodemap_file is not None:

668

use_rust_index = True

669

else:

670

use_rust_index = self.opener.options.get(b'rust.index')

671

672

self._parse_index = parse_index_v1

673

if self._format_version == REVLOGV0:

674

self._parse_index = revlogv0.parse_index_v0

675

elif self._format_version == REVLOGV2:

676

self._parse_index = parse_index_v2

677

elif self._format_version == CHANGELOGV2:

678

self._parse_index = parse_index_cl_v2

679

elif devel_nodemap:

680

self._parse_index = parse_index_v1_nodemap

681

elif use_rust_index:

682

self._parse_index = parse_index_v1_mixed

683

try:

684

d = self._parse_index(index_data, self._inline)

685

index, _chunkcache = d

686

use_nodemap = (

687

not self._inline

688

and self._nodemap_file is not None

689

and util.safehasattr(index, 'update_nodemap_data')

690

)

691

if use_nodemap:

692

nodemap_data = nodemaputil.persisted_data(self)

693

if nodemap_data is not None:

694

docket = nodemap_data[0]

695

if (

696

len(d[0]) > docket.tip_rev

697

and d[0][docket.tip_rev][7] == docket.tip_node

698

):

699

# no changelog tampering

700

self._nodemap_docket = docket

701

index.update_nodemap_data(*nodemap_data)

702

except (ValueError, IndexError):

703

raise error.RevlogError(

704

_(b"index %s is corrupted") % self.display_id

705

)

706

self.index, self._chunkcache = d

707

if not self._chunkcache:

708

self._chunkclear()

709

# revnum -> (chain-length, sum-delta-length)

710

self._chaininfocache = util.lrucachedict(500)

711

# revlog header -> revlog compressor

712

self._decompressors = {}

713

714

@util.propertycache

715

def revlog_kind(self):

716

return self.target[0]

717

718

@util.propertycache

719

def display_id(self):

720

"""The public facing "ID" of the revlog that we use in message"""

721

# Maybe we should build a user facing representation of

722

# revlog.target instead of using `self.radix`

723

return self.radix

724

725

def _get_decompressor(self, t):

726

try:

727

compressor = self._decompressors[t]

728

except KeyError:

729

try:

730

engine = util.compengines.forrevlogheader(t)

731

compressor = engine.revlogcompressor(self._compengineopts)

732

self._decompressors[t] = compressor

733

except KeyError:

734

raise error.RevlogError(

735

_(b'unknown compression type %s') % binascii.hexlify(t)

736

)

737

return compressor

738

739

@util.propertycache

740

def _compressor(self):

741

engine = util.compengines[self._compengine]

742

return engine.revlogcompressor(self._compengineopts)

743

744

@util.propertycache

745

def _decompressor(self):

746

"""the default decompressor"""

747

if self._docket is None:

748

return None

749

t = self._docket.default_compression_header

750

c = self._get_decompressor(t)

751

return c.decompress

752

753

def _indexfp(self):

754

"""file object for the revlog's index file"""

755

return self.opener(self._indexfile, mode=b"r")

756

757

def __index_write_fp(self):

758

# You should not use this directly and use `_writing` instead

759

try:

760

f = self.opener(

761

self._indexfile, mode=b"r+", checkambig=self._checkambig

762

)

763

if self._docket is None:

764

f.seek(0, os.SEEK_END)

765

else:

766

f.seek(self._docket.index_end, os.SEEK_SET)

767

return f

768

except IOError as inst:

769

if inst.errno != errno.ENOENT:

770

raise

771

return self.opener(

772

self._indexfile, mode=b"w+", checkambig=self._checkambig

773

)

774

775

def __index_new_fp(self):

776

# You should not use this unless you are upgrading from inline revlog

777

return self.opener(

778

self._indexfile,

779

mode=b"w",

780

checkambig=self._checkambig,

781

atomictemp=True,

782

)

35

)

783

36

784

def _datafp(self, mode=b'r'):

37

# Rewriting the revlog in place is hard. Our strategy for censoring is

785

"""file object for the revlog's data file"""

38

# to create a new revlog, copy all revisions to it, then replace the

786

return self.opener(self._datafile, mode=mode)

39

# revlogs on transaction close.

787

40

#

788

@contextlib.contextmanager

41

# This is a bit dangerous. We could easily have a mismatch of state.

789

def _datareadfp(self, existingfp=None):

42

newrl = revlog.revlog(

790

"""file object suitable to read data"""

43

rl.opener,

791

# Use explicit file handle, if given.

44

target=rl.target,

792

if existingfp is not None:

45

radix=rl.radix,

793

yield existingfp

46

postfix=b'tmpcensored',

794

47

censorable=True,

795

# Use a file handle being actively used for writes, if available.

48

)

796

# There is some danger to doing this because reads will seek the

49

newrl._format_version = rl._format_version

797

# file. However, _writeentry() performs a SEEK_END before all writes,

50

newrl._format_flags = rl._format_flags

798

# so we should be safe.

51

newrl._generaldelta = rl._generaldelta

799

elif self._writinghandles:

52

newrl._parse_index = rl._parse_index

800

if self._inline:

801

yield self._writinghandles[0]

802

else:

803

yield self._writinghandles[1]

804

805

# Otherwise open a new file handle.

806

else:

807

if self._inline:

808

func = self._indexfp

809

else:

810

func = self._datafp

811

with func() as fp:

812

yield fp

813

814

@contextlib.contextmanager

815

def _sidedatareadfp(self):

816

"""file object suitable to read sidedata"""

817

if self._writinghandles:

818

yield self._writinghandles[2]

819

else:

820

with self.opener(self._sidedatafile) as fp:

821

yield fp

822

823

def tiprev(self):

824

return len(self.index) - 1

825

826

def tip(self):

827

return self.node(self.tiprev())

828

829

def __contains__(self, rev):

830

return 0 <= rev < len(self)

831

832

def __len__(self):

833

return len(self.index)

834

835

def __iter__(self):

836

return iter(pycompat.xrange(len(self)))

837

838

def revs(self, start=0, stop=None):

839

"""iterate over all rev in this revlog (from start to stop)"""

840

return storageutil.iterrevs(len(self), start=start, stop=stop)

841

842

@property

843

def nodemap(self):

844

msg = (

845

b"revlog.nodemap is deprecated, "

846

b"use revlog.index.[has_node|rev|get_rev]"

847

)

848

util.nouideprecwarn(msg, b'5.3', stacklevel=2)

849

return self.index.nodemap

850

851

@property

852

def _nodecache(self):

853

msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"

854

util.nouideprecwarn(msg, b'5.3', stacklevel=2)

855

return self.index.nodemap

856

857

def hasnode(self, node):

858

try:

859

self.rev(node)

860

return True

861

except KeyError:

862

return False

863

864

def candelta(self, baserev, rev):

865

"""whether two revisions (baserev, rev) can be delta-ed or not"""

866

# Disable delta if either rev requires a content-changing flag

867

# processor (ex. LFS). This is because such flag processor can alter

868

# the rawtext content that the delta will be based on, and two clients

869

# could have a same revlog node with different flags (i.e. different

870

# rawtext contents) and the delta could be incompatible.

871

if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (

872

self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS

873

):

874

return False

875

return True

876

877

def update_caches(self, transaction):

878

if self._nodemap_file is not None:

879

if transaction is None:

880

nodemaputil.update_persistent_nodemap(self)

881

else:

882

nodemaputil.setup_persistent_nodemap(transaction, self)

883

884

def clearcaches(self):

885

self._revisioncache = None

886

self._chainbasecache.clear()

887

self._chunkcache = (0, b'')

888

self._pcache = {}

889

self._nodemap_docket = None

890

self.index.clearcaches()

891

# The python code is the one responsible for validating the docket, we

892

# end up having to refresh it here.

893

use_nodemap = (

894

not self._inline

895

and self._nodemap_file is not None

896

and util.safehasattr(self.index, 'update_nodemap_data')

897

)

898

if use_nodemap:

899

nodemap_data = nodemaputil.persisted_data(self)

900

if nodemap_data is not None:

901

self._nodemap_docket = nodemap_data[0]

902

self.index.update_nodemap_data(*nodemap_data)

903

904

def rev(self, node):

905

try:

906

return self.index.rev(node)

907

except TypeError:

908

raise

909

except error.RevlogError:

910

# parsers.c radix tree lookup failed

911

if (

912

node == self.nodeconstants.wdirid

913

or node in self.nodeconstants.wdirfilenodeids

914

):

915

raise error.WdirUnsupported

916

raise error.LookupError(node, self.display_id, _(b'no node'))

917

918

# Accessors for index entries.

919

920

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

921

# are flags.

922

def start(self, rev):

923

return int(self.index[rev][0] >> 16)

924

925

def sidedata_cut_off(self, rev):

926

sd_cut_off = self.index[rev][8]

927

if sd_cut_off != 0:

928

return sd_cut_off

929

# This is some annoying dance, because entries without sidedata

930

# currently use 0 as their ofsset. (instead of previous-offset +

931

# previous-size)

932

#

933

# We should reconsider this sidedata → 0 sidata_offset policy.

934

# In the meantime, we need this.

935

while 0 <= rev:

936

e = self.index[rev]

937

if e[9] != 0:

938

return e[8] + e[9]

939

rev -= 1

940

return 0

941

942

def flags(self, rev):

943

return self.index[rev][0] & 0xFFFF

944

945

def length(self, rev):

946

return self.index[rev][1]

947

948

def sidedata_length(self, rev):

949

if not self.hassidedata:

950

return 0

951

return self.index[rev][9]

952

953

def rawsize(self, rev):

954

"""return the length of the uncompressed text for a given revision"""

955

l = self.index[rev][2]

956

if l >= 0:

957

return l

958

959

t = self.rawdata(rev)

960

return len(t)

961

962

def size(self, rev):

963

"""length of non-raw text (processed by a "read" flag processor)"""

964

# fast path: if no "read" flag processor could change the content,

965

# size is rawsize. note: ELLIPSIS is known to not change the content.

966

flags = self.flags(rev)

967

if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

968

return self.rawsize(rev)

969

970

return len(self.revision(rev, raw=False))

971

972

def chainbase(self, rev):

973

base = self._chainbasecache.get(rev)

974

if base is not None:

975

return base

976

977

index = self.index

978

iterrev = rev

979

base = index[iterrev][3]

980

while base != iterrev:

981

iterrev = base

982

base = index[iterrev][3]

983

984

self._chainbasecache[rev] = base

985

return base

986

987

def linkrev(self, rev):

988

return self.index[rev][4]

989

990

def parentrevs(self, rev):

991

try:

992

entry = self.index[rev]

993

except IndexError:

994

if rev == wdirrev:

995

raise error.WdirUnsupported

996

raise

997

if entry[5] == nullrev:

998

return entry[6], entry[5]

999

else:

1000

return entry[5], entry[6]

1001

1002

# fast parentrevs(rev) where rev isn't filtered

1003

_uncheckedparentrevs = parentrevs

1004

1005

def node(self, rev):

1006

try:

1007

return self.index[rev][7]

1008

except IndexError:

1009

if rev == wdirrev:

1010

raise error.WdirUnsupported

1011

raise

1012

1013

# Derived from index values.

1014

1015

def end(self, rev):

1016

return self.start(rev) + self.length(rev)

1017

1018

def parents(self, node):

1019

i = self.index

1020

d = i[self.rev(node)]

1021

# inline node() to avoid function call overhead

1022

if d[5] == self.nullid:

1023

return i[d[6]][7], i[d[5]][7]

1024

else:

1025

return i[d[5]][7], i[d[6]][7]

1026

1027

def chainlen(self, rev):

1028

return self._chaininfo(rev)[0]

1029

1030

def _chaininfo(self, rev):

1031

chaininfocache = self._chaininfocache

1032

if rev in chaininfocache:

1033

return chaininfocache[rev]

1034

index = self.index

1035

generaldelta = self._generaldelta

1036

iterrev = rev

1037

e = index[iterrev]

1038

clen = 0

1039

compresseddeltalen = 0

1040

while iterrev != e[3]:

1041

clen += 1

1042

compresseddeltalen += e[1]

1043

if generaldelta:

1044

iterrev = e[3]

1045

else:

1046

iterrev -= 1

1047

if iterrev in chaininfocache:

1048

t = chaininfocache[iterrev]

1049

clen += t[0]

1050

compresseddeltalen += t[1]

1051

break

1052

e = index[iterrev]

1053

else:

1054

# Add text length of base since decompressing that also takes

1055

# work. For cache hits the length is already included.

1056

compresseddeltalen += e[1]

1057

r = (clen, compresseddeltalen)

1058

chaininfocache[rev] = r

1059

return r

1060

1061

def _deltachain(self, rev, stoprev=None):

1062

"""Obtain the delta chain for a revision.

1063

1064

``stoprev`` specifies a revision to stop at. If not specified, we

1065

stop at the base of the chain.

1066

1067

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

1068

revs in ascending order and ``stopped`` is a bool indicating whether

1069

``stoprev`` was hit.

1070

"""

1071

# Try C implementation.

1072

try:

1073

return self.index.deltachain(rev, stoprev, self._generaldelta)

1074

except AttributeError:

1075

pass

1076

1077

chain = []

1078

1079

# Alias to prevent attribute lookup in tight loop.

1080

index = self.index

1081

generaldelta = self._generaldelta

1082

1083

iterrev = rev

1084

e = index[iterrev]

1085

while iterrev != e[3] and iterrev != stoprev:

1086

chain.append(iterrev)

1087

if generaldelta:

1088

iterrev = e[3]

1089

else:

1090

iterrev -= 1

1091

e = index[iterrev]

1092

1093

if iterrev == stoprev:

1094

stopped = True

1095

else:

1096

chain.append(iterrev)

1097

stopped = False

1098

1099

chain.reverse()

1100

return chain, stopped

1101

1102

def ancestors(self, revs, stoprev=0, inclusive=False):

1103

"""Generate the ancestors of 'revs' in reverse revision order.

1104

Does not generate revs lower than stoprev.

1105

1106

See the documentation for ancestor.lazyancestors for more details."""

1107

1108

# first, make sure start revisions aren't filtered

1109

revs = list(revs)

1110

checkrev = self.node

1111

for r in revs:

1112

checkrev(r)

1113

# and we're sure ancestors aren't filtered as well

1114

1115

if rustancestor is not None and self.index.rust_ext_compat:

1116

lazyancestors = rustancestor.LazyAncestors

1117

arg = self.index

1118

else:

1119

lazyancestors = ancestor.lazyancestors

1120

arg = self._uncheckedparentrevs

1121

return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)

1122

1123

def descendants(self, revs):

1124

return dagop.descendantrevs(revs, self.revs, self.parentrevs)

1125

1126

def findcommonmissing(self, common=None, heads=None):

1127

"""Return a tuple of the ancestors of common and the ancestors of heads

1128

that are not ancestors of common. In revset terminology, we return the

1129

tuple:

1130

1131

::common, (::heads) - (::common)

1132

1133

The list is sorted by revision number, meaning it is

1134

topologically sorted.

1135

1136

'heads' and 'common' are both lists of node IDs. If heads is

1137

not supplied, uses all of the revlog's heads. If common is not

1138

supplied, uses nullid."""

1139

if common is None:

1140

common = [self.nullid]

1141

if heads is None:

1142

heads = self.heads()

1143

1144

common = [self.rev(n) for n in common]

1145

heads = [self.rev(n) for n in heads]

1146

1147

# we want the ancestors, but inclusive

1148

class lazyset(object):

1149

def __init__(self, lazyvalues):

1150

self.addedvalues = set()

1151

self.lazyvalues = lazyvalues

1152

1153

def __contains__(self, value):

1154

return value in self.addedvalues or value in self.lazyvalues

1155

1156

def __iter__(self):

1157

added = self.addedvalues

1158

for r in added:

1159

yield r

1160

for r in self.lazyvalues:

1161

if not r in added:

1162

yield r

1163

1164

def add(self, value):

1165

self.addedvalues.add(value)

1166

1167

def update(self, values):

1168

self.addedvalues.update(values)

1169

1170

has = lazyset(self.ancestors(common))

1171

has.add(nullrev)

1172

has.update(common)

1173

1174

# take all ancestors from heads that aren't in has

1175

missing = set()

1176

visit = collections.deque(r for r in heads if r not in has)

1177

while visit:

1178

r = visit.popleft()

1179

if r in missing:

1180

continue

1181

else:

1182

missing.add(r)

1183

for p in self.parentrevs(r):

1184

if p not in has:

1185

visit.append(p)

1186

missing = list(missing)

1187

missing.sort()

1188

return has, [self.node(miss) for miss in missing]

1189

1190

def incrementalmissingrevs(self, common=None):

1191

"""Return an object that can be used to incrementally compute the

1192

revision numbers of the ancestors of arbitrary sets that are not

1193

ancestors of common. This is an ancestor.incrementalmissingancestors

1194

object.

1195

53

1196

'common' is a list of revision numbers. If common is not supplied, uses

54

for rev in rl.revs():

1197

n~~ullrev.~~

55

node = rl.node(rev)

1198

"""

56

p1, p2 = rl.parents(node)

1199

if common is None:

1200

common = [nullrev]

1201

1202

if rustancestor is not None and self.index.rust_ext_compat:

1203

return rustancestor.MissingAncestors(self.index, common)

1204

return ancestor.incrementalmissingancestors(self.parentrevs, common)

1205

1206

def findmissingrevs(self, common=None, heads=None):

1207

"""Return the revision numbers of the ancestors of heads that

1208

are not ancestors of common.

1209

1210

More specifically, return a list of revision numbers corresponding to

1211

nodes N such that every N satisfies the following constraints:

1212

1213

1. N is an ancestor of some node in 'heads'

1214

2. N is not an ancestor of any node in 'common'

1215

1216

The list is sorted by revision number, meaning it is

1217

topologically sorted.

1218

1219

'heads' and 'common' are both lists of revision numbers. If heads is

1220

not supplied, uses all of the revlog's heads. If common is not

1221

supplied, uses nullid."""

1222

if common is None:

1223

common = [nullrev]

1224

if heads is None:

1225

heads = self.headrevs()

1226

1227

inc = self.incrementalmissingrevs(common=common)

1228

return inc.missingancestors(heads)

1229

1230

def findmissing(self, common=None, heads=None):

1231

"""Return the ancestors of heads that are not ancestors of common.

1232

1233

More specifically, return a list of nodes N such that every N

1234

satisfies the following constraints:

1235

1236

1. N is an ancestor of some node in 'heads'

1237

2. N is not an ancestor of any node in 'common'

1238

1239

The list is sorted by revision number, meaning it is

1240

topologically sorted.

1241

1242

'heads' and 'common' are both lists of node IDs. If heads is

1243

not supplied, uses all of the revlog's heads. If common is not

1244

supplied, uses nullid."""

1245

if common is None:

1246

common = [self.nullid]

1247

if heads is None:

1248

heads = self.heads()

1249

1250

common = [self.rev(n) for n in common]

1251

heads = [self.rev(n) for n in heads]

1252

1253

inc = self.incrementalmissingrevs(common=common)

1254

return [self.node(r) for r in inc.missingancestors(heads)]

1255

1256

def nodesbetween(self, roots=None, heads=None):

1257

"""Return a topological path from 'roots' to 'heads'.

1258

1259

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

1260

topologically sorted list of all nodes N that satisfy both of

1261

these constraints:

1262

1263

1. N is a descendant of some node in 'roots'

1264

2. N is an ancestor of some node in 'heads'

1265

1266

Every node is considered to be both a descendant and an ancestor

1267

of itself, so every reachable node in 'roots' and 'heads' will be

1268

included in 'nodes'.

1269

1270

'outroots' is the list of reachable nodes in 'roots', i.e., the

1271

subset of 'roots' that is returned in 'nodes'. Likewise,

1272

'outheads' is the subset of 'heads' that is also in 'nodes'.

1273

1274

'roots' and 'heads' are both lists of node IDs. If 'roots' is

1275

unspecified, uses nullid as the only root. If 'heads' is

1276

unspecified, uses list of all of the revlog's heads."""

1277

nonodes = ([], [], [])

1278

if roots is not None:

1279

roots = list(roots)

1280

if not roots:

1281

return nonodes

1282

lowestrev = min([self.rev(n) for n in roots])

1283

else:

1284

roots = [self.nullid] # Everybody's a descendant of nullid

1285

lowestrev = nullrev

1286

if (lowestrev == nullrev) and (heads is None):

1287

# We want _all_ the nodes!

1288

return (

1289

[self.node(r) for r in self],

1290

[self.nullid],

1291

list(self.heads()),

1292

)

1293

if heads is None:

1294

# All nodes are ancestors, so the latest ancestor is the last

1295

# node.

1296

highestrev = len(self) - 1

1297

# Set ancestors to None to signal that every node is an ancestor.

1298

ancestors = None

1299

# Set heads to an empty dictionary for later discovery of heads

1300

heads = {}

1301

else:

1302

heads = list(heads)

1303

if not heads:

1304

return nonodes

1305

ancestors = set()

1306

# Turn heads into a dictionary so we can remove 'fake' heads.

1307

# Also, later we will be using it to filter out the heads we can't

1308

# find from roots.

1309

heads = dict.fromkeys(heads, False)

1310

# Start at the top and keep marking parents until we're done.

1311

nodestotag = set(heads)

1312

# Remember where the top was so we can use it as a limit later.

1313

highestrev = max([self.rev(n) for n in nodestotag])

1314

while nodestotag:

1315

# grab a node to tag

1316

n = nodestotag.pop()

1317

# Never tag nullid

1318

if n == self.nullid:

1319

continue

1320

# A node's revision number represents its place in a

1321

# topologically sorted list of nodes.

1322

r = self.rev(n)

1323

if r >= lowestrev:

1324

if n not in ancestors:

1325

# If we are possibly a descendant of one of the roots

1326

# and we haven't already been marked as an ancestor

1327

ancestors.add(n) # Mark as ancestor

1328

# Add non-nullid parents to list of nodes to tag.

1329

nodestotag.update(

1330

[p for p in self.parents(n) if p != self.nullid]

1331

)

1332

elif n in heads: # We've seen it before, is it a fake head?

1333

# So it is, real heads should not be the ancestors of

1334

# any other heads.

1335

heads.pop(n)

1336

if not ancestors:

1337

return nonodes

1338

# Now that we have our set of ancestors, we want to remove any

1339

# roots that are not ancestors.

1340

1341

# If one of the roots was nullid, everything is included anyway.

1342

if lowestrev > nullrev:

1343

# But, since we weren't, let's recompute the lowest rev to not

1344

# include roots that aren't ancestors.

1345

57

1346

# Filter out roots that aren't ancestors of heads

58

if rev == censorrev:

1347

roots = [root for root in roots if root in ancestors]

59

newrl.addrawrevision(

1348

~~# Recompute the lowest revision~~

60

tombstone,

1349

if ~~roots~~:

61

tr,

1350

lowestrev = min([self.rev(root) for root in roots])

62

rl.linkrev(censorrev),

1351

~~else~~:

63

p1,

1352

# No more roots? Return empty list

64

p2,

1353

~~return~~ ~~nonodes~~

65

censornode,

1354

else:

66

constants.REVIDX_ISCENSORED,

1355

# We are descending from nullid, and don't need to care about

1356

# any other roots.

1357

lowestrev = nullrev

1358

roots = [self.nullid]

1359

# Transform our roots list into a set.

1360

descendants = set(roots)

1361

# Also, keep the original roots so we can filter out roots that aren't

1362

# 'real' roots (i.e. are descended from other roots).

1363

roots = descendants.copy()

1364

# Our topologically sorted list of output nodes.

1365

orderedout = []

1366

# Don't start at nullid since we don't want nullid in our output list,

1367

# and if nullid shows up in descendants, empty parents will look like

1368

# they're descendants.

1369

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

1370

n = self.node(r)

1371

isdescendant = False

1372

if lowestrev == nullrev: # Everybody is a descendant of nullid

1373

isdescendant = True

1374

elif n in descendants:

1375

# n is already a descendant

1376

isdescendant = True

1377

# This check only needs to be done here because all the roots

1378

# will start being marked is descendants before the loop.

1379

if n in roots:

1380

# If n was a root, check if it's a 'real' root.

1381

p = tuple(self.parents(n))

1382

# If any of its parents are descendants, it's not a root.

1383

if (p[0] in descendants) or (p[1] in descendants):

1384

roots.remove(n)

1385

else:

1386

p = tuple(self.parents(n))

1387

# A node is a descendant if either of its parents are

1388

# descendants. (We seeded the dependents list with the roots

1389

# up there, remember?)

1390

if (p[0] in descendants) or (p[1] in descendants):

1391

descendants.add(n)

1392

isdescendant = True

1393

if isdescendant and ((ancestors is None) or (n in ancestors)):

1394

# Only include nodes that are both descendants and ancestors.

1395

orderedout.append(n)

1396

if (ancestors is not None) and (n in heads):

1397

# We're trying to figure out which heads are reachable

1398

# from roots.

1399

# Mark this head as having been reached

1400

heads[n] = True

1401

elif ancestors is None:

1402

# Otherwise, we're trying to discover the heads.

1403

# Assume this is a head because if it isn't, the next step

1404

# will eventually remove it.

1405

heads[n] = True

1406

# But, obviously its parents aren't.

1407

for p in self.parents(n):

1408

heads.pop(p, None)

1409

heads = [head for head, flag in pycompat.iteritems(heads) if flag]

1410

roots = list(roots)

1411

assert orderedout

1412

assert roots

1413

assert heads

1414

return (orderedout, roots, heads)

1415

1416

def headrevs(self, revs=None):

1417

if revs is None:

1418

try:

1419

return self.index.headrevs()

1420

except AttributeError:

1421

return self._headrevs()

1422

if rustdagop is not None and self.index.rust_ext_compat:

1423

return rustdagop.headrevs(self.index, revs)

1424

return dagop.headrevs(revs, self._uncheckedparentrevs)

1425

1426

def computephases(self, roots):

1427

return self.index.computephasesmapsets(roots)

1428

1429

def _headrevs(self):

1430

count = len(self)

1431

if not count:

1432

return [nullrev]

1433

# we won't iter over filtered rev so nobody is a head at start

1434

ishead = [0] * (count + 1)

1435

index = self.index

1436

for r in self:

1437

ishead[r] = 1 # I may be an head

1438

e = index[r]

1439

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1440

return [r for r, val in enumerate(ishead) if val]

1441

1442

def heads(self, start=None, stop=None):

1443

"""return the list of all nodes that have no children

1444

1445

if start is specified, only heads that are descendants of

1446

start will be returned

1447

if stop is specified, it will consider all the revs from stop

1448

as if they had no children

1449

"""

1450

if start is None and stop is None:

1451

if not len(self):

1452

return [self.nullid]

1453

return [self.node(r) for r in self.headrevs()]

1454

1455

if start is None:

1456

start = nullrev

1457

else:

1458

start = self.rev(start)

1459

1460

stoprevs = {self.rev(n) for n in stop or []}

1461

1462

revs = dagop.headrevssubset(

1463

self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs

1464

)

1465

1466

return [self.node(rev) for rev in revs]

1467

1468

def children(self, node):

1469

"""find the children of a given node"""

1470

c = []

1471

p = self.rev(node)

1472

for r in self.revs(start=p + 1):

1473

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1474

if prevs:

1475

for pr in prevs:

1476

if pr == p:

1477

c.append(self.node(r))

1478

elif p == nullrev:

1479

c.append(self.node(r))

1480

return c

1481

1482

def commonancestorsheads(self, a, b):

1483

"""calculate all the heads of the common ancestors of nodes a and b"""

1484

a, b = self.rev(a), self.rev(b)

1485

ancs = self._commonancestorsheads(a, b)

1486

return pycompat.maplist(self.node, ancs)

1487

1488

def _commonancestorsheads(self, *revs):

1489

"""calculate all the heads of the common ancestors of revs"""

1490

try:

1491

ancs = self.index.commonancestorsheads(*revs)

1492

except (AttributeError, OverflowError): # C implementation failed

1493

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1494

return ancs

1495

1496

def isancestor(self, a, b):

1497

"""return True if node a is an ancestor of node b

1498

1499

A revision is considered an ancestor of itself."""

1500

a, b = self.rev(a), self.rev(b)

1501

return self.isancestorrev(a, b)

1502

1503

def isancestorrev(self, a, b):

1504

"""return True if revision a is an ancestor of revision b

1505

1506

A revision is considered an ancestor of itself.

1507

1508

The implementation of this is trivial but the use of

1509

reachableroots is not."""

1510

if a == nullrev:

1511

return True

1512

elif a == b:

1513

return True

1514

elif a > b:

1515

return False

1516

return bool(self.reachableroots(a, [b], [a], includepath=False))

1517

1518

def reachableroots(self, minroot, heads, roots, includepath=False):

1519

"""return (heads(::(<roots> and <roots>::<heads>)))

1520

1521

If includepath is True, return (<roots>::<heads>)."""

1522

try:

1523

return self.index.reachableroots2(

1524

minroot, heads, roots, includepath

1525

)

1526

except AttributeError:

1527

return dagop._reachablerootspure(

1528

self.parentrevs, minroot, roots, heads, includepath

1529

)

1530

1531

def ancestor(self, a, b):

1532

"""calculate the "best" common ancestor of nodes a and b"""

1533

1534

a, b = self.rev(a), self.rev(b)

1535

try:

1536

ancs = self.index.ancestors(a, b)

1537

except (AttributeError, OverflowError):

1538

ancs = ancestor.ancestors(self.parentrevs, a, b)

1539

if ancs:

1540

# choose a consistent winner when there's a tie

1541

return min(map(self.node, ancs))

1542

return self.nullid

1543

1544

def _match(self, id):

1545

if isinstance(id, int):

1546

# rev

1547

return self.node(id)

1548

if len(id) == self.nodeconstants.nodelen:

1549

# possibly a binary node

1550

# odds of a binary node being all hex in ASCII are 1 in 10**25

1551

try:

1552

node = id

1553

self.rev(node) # quick search the index

1554

return node

1555

except error.LookupError:

1556

pass # may be partial hex id

1557

try:

1558

# str(rev)

1559

rev = int(id)

1560

if b"%d" % rev != id:

1561

raise ValueError

1562

if rev < 0:

1563

rev = len(self) + rev

1564

if rev < 0 or rev >= len(self):

1565

raise ValueError

1566

return self.node(rev)

1567

except (ValueError, OverflowError):

1568

pass

1569

if len(id) == 2 * self.nodeconstants.nodelen:

1570

try:

1571

# a full hex nodeid?

1572

node = bin(id)

1573

self.rev(node)

1574

return node

1575

except (TypeError, error.LookupError):

1576

pass

1577

1578

def _partialmatch(self, id):

1579

# we don't care wdirfilenodeids as they should be always full hash

1580

maybewdir = self.nodeconstants.wdirhex.startswith(id)

1581

ambiguous = False

1582

try:

1583

partial = self.index.partialmatch(id)

1584

if partial and self.hasnode(partial):

1585

if maybewdir:

1586

# single 'ff...' match in radix tree, ambiguous with wdir

1587

ambiguous = True

1588

else:

1589

return partial

1590

elif maybewdir:

1591

# no 'ff...' match in radix tree, wdir identified

1592

raise error.WdirUnsupported

1593

else:

1594

return None

1595

except error.RevlogError:

1596

# parsers.c radix tree lookup gave multiple matches

1597

# fast path: for unfiltered changelog, radix tree is accurate

1598

if not getattr(self, 'filteredrevs', None):

1599

ambiguous = True

1600

# fall through to slow path that filters hidden revisions

1601

except (AttributeError, ValueError):

1602

# we are pure python, or key was too short to search radix tree

1603

pass

1604

if ambiguous:

1605

raise error.AmbiguousPrefixLookupError(

1606

id, self.display_id, _(b'ambiguous identifier')

1607

)

67

)

1608

68

1609

if id in self._pcache:

69

if newrl.deltaparent(rev) != nullrev:

1610

return self._pcache[id]

70

m = _(b'censored revision stored as delta; cannot censor')

1611

71

h = _(

1612

if len(id) <= 40:

72

b'censoring of revlogs is not fully implemented;'

1613

try:

73

b' please report this bug'

1614

~~# hex(node~~)~~[:...]~~

74

)

1615

l = len(id) // 2 # grab an even number of digits

75

raise error.Abort(m, hint=h)

1616

prefix = bin(id[: l * 2])

76

continue

1617

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1618

nl = [

1619

n for n in nl if hex(n).startswith(id) and self.hasnode(n)

1620

]

1621

if self.nodeconstants.nullhex.startswith(id):

1622

nl.append(self.nullid)

1623

if len(nl) > 0:

1624

if len(nl) == 1 and not maybewdir:

1625

self._pcache[id] = nl[0]

1626

return nl[0]

1627

raise error.AmbiguousPrefixLookupError(

1628

id, self.display_id, _(b'ambiguous identifier')

1629

)

1630

if maybewdir:

1631

raise error.WdirUnsupported

1632

return None

1633

except TypeError:

1634

pass

1635

1636

def lookup(self, id):

1637

"""locate a node based on:

1638

- revision number or str(revision number)

1639

- nodeid or subset of hex nodeid

1640

"""

1641

n = self._match(id)

1642

if n is not None:

1643

return n

1644

n = self._partialmatch(id)

1645

if n:

1646

return n

1647

1648

raise error.LookupError(id, self.display_id, _(b'no match found'))

1649

1650

def shortest(self, node, minlength=1):

1651

"""Find the shortest unambiguous prefix that matches node."""

1652

1653

def isvalid(prefix):

1654

try:

1655

matchednode = self._partialmatch(prefix)

1656

except error.AmbiguousPrefixLookupError:

1657

return False

1658

except error.WdirUnsupported:

1659

# single 'ff...' match

1660

return True

1661

if matchednode is None:

1662

raise error.LookupError(node, self.display_id, _(b'no node'))

1663

return True

1664

1665

def maybewdir(prefix):

1666

return all(c == b'f' for c in pycompat.iterbytestr(prefix))

1667

1668

hexnode = hex(node)

1669

1670

def disambiguate(hexnode, minlength):

1671

"""Disambiguate against wdirid."""

1672

for length in range(minlength, len(hexnode) + 1):

1673

prefix = hexnode[:length]

1674

if not maybewdir(prefix):

1675

return prefix

1676

1677

if not getattr(self, 'filteredrevs', None):

1678

try:

1679

length = max(self.index.shortest(node), minlength)

1680

return disambiguate(hexnode, length)

1681

except error.RevlogError:

1682

if node != self.nodeconstants.wdirid:

1683

raise error.LookupError(

1684

node, self.display_id, _(b'no node')

1685

)

1686

except AttributeError:

1687

# Fall through to pure code

1688

pass

1689

1690

if node == self.nodeconstants.wdirid:

1691

for length in range(minlength, len(hexnode) + 1):

1692

prefix = hexnode[:length]

1693

if isvalid(prefix):

1694

return prefix

1695

1696

for length in range(minlength, len(hexnode) + 1):

1697

prefix = hexnode[:length]

1698

if isvalid(prefix):

1699

return disambiguate(hexnode, length)

1700

1701

def cmp(self, node, text):

1702

"""compare text with a given file revision

1703

1704

returns True if text is different than what is stored.

1705

"""

1706

p1, p2 = self.parents(node)

1707

return storageutil.hashrevisionsha1(text, p1, p2) != node

1708

1709

def _cachesegment(self, offset, data):

1710

"""Add a segment to the revlog cache.

1711

1712

Accepts an absolute offset and the data that is at that location.

1713

"""

1714

o, d = self._chunkcache

1715

# try to add to existing cache

1716

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1717

self._chunkcache = o, d + data

1718

else:

1719

self._chunkcache = offset, data

1720

1721

def _readsegment(self, offset, length, df=None):

1722

"""Load a segment of raw data from the revlog.

1723

1724

Accepts an absolute offset, length to read, and an optional existing

1725

file handle to read from.

1726

1727

If an existing file handle is passed, it will be seeked and the

1728

original seek position will NOT be restored.

1729

1730

Returns a str or buffer of raw byte data.

1731

1732

Raises if the requested number of bytes could not be read.

1733

"""

1734

# Cache data both forward and backward around the requested

1735

# data, in a fixed size window. This helps speed up operations

1736

# involving reading the revlog backwards.

1737

cachesize = self._chunkcachesize

1738

realoffset = offset & ~(cachesize - 1)

1739

reallength = (

1740

(offset + length + cachesize) & ~(cachesize - 1)

1741

) - realoffset

1742

with self._datareadfp(df) as df:

1743

df.seek(realoffset)

1744

d = df.read(reallength)

1745

1746

self._cachesegment(realoffset, d)

1747

if offset != realoffset or reallength != length:

1748

startoffset = offset - realoffset

1749

if len(d) - startoffset < length:

1750

filename = self._indexfile if self._inline else self._datafile

1751

got = len(d) - startoffset

1752

m = PARTIAL_READ_MSG % (filename, length, offset, got)

1753

raise error.RevlogError(m)

1754

return util.buffer(d, startoffset, length)

1755

1756

if len(d) < length:

1757

filename = self._indexfile if self._inline else self._datafile

1758

got = len(d) - startoffset

1759

m = PARTIAL_READ_MSG % (filename, length, offset, got)

1760

raise error.RevlogError(m)

1761

1762

return d

1763

1764

def _getsegment(self, offset, length, df=None):

1765

"""Obtain a segment of raw data from the revlog.

1766

1767

Accepts an absolute offset, length of bytes to obtain, and an

1768

optional file handle to the already-opened revlog. If the file

1769

handle is used, it's original seek position will not be preserved.

1770

1771

Requests for data may be returned from a cache.

1772

1773

Returns a str or a buffer instance of raw byte data.

1774

"""

1775

o, d = self._chunkcache

1776

l = len(d)

1777

1778

# is it in the cache?

1779

cachestart = offset - o

1780

cacheend = cachestart + length

1781

if cachestart >= 0 and cacheend <= l:

1782

if cachestart == 0 and cacheend == l:

1783

return d # avoid a copy

1784

return util.buffer(d, cachestart, cacheend - cachestart)

1785

1786

return self._readsegment(offset, length, df=df)

1787

1788

def _getsegmentforrevs(self, startrev, endrev, df=None):

1789

"""Obtain a segment of raw data corresponding to a range of revisions.

1790

1791

Accepts the start and end revisions and an optional already-open

1792

file handle to be used for reading. If the file handle is read, its

1793

seek position will not be preserved.

1794

1795

Requests for data may be satisfied by a cache.

1796

1797

Returns a 2-tuple of (offset, data) for the requested range of

1798

revisions. Offset is the integer offset from the beginning of the

1799

revlog and data is a str or buffer of the raw byte data.

1800

1801

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1802

to determine where each revision's data begins and ends.

1803

"""

1804

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1805

# (functions are expensive).

1806

index = self.index

1807

istart = index[startrev]

1808

start = int(istart[0] >> 16)

1809

if startrev == endrev:

1810

end = start + istart[1]

1811

else:

1812

iend = index[endrev]

1813

end = int(iend[0] >> 16) + iend[1]

1814

1815

if self._inline:

1816

start += (startrev + 1) * self.index.entry_size

1817

end += (endrev + 1) * self.index.entry_size

1818

length = end - start

1819

1820

return start, self._getsegment(start, length, df=df)

1821

1822

def _chunk(self, rev, df=None):

1823

"""Obtain a single decompressed chunk for a revision.

1824

1825

Accepts an integer revision and an optional already-open file handle

1826

to be used for reading. If used, the seek position of the file will not

1827

be preserved.

1828

1829

Returns a str holding uncompressed data for the requested revision.

1830

"""

1831

compression_mode = self.index[rev][10]

1832

data = self._getsegmentforrevs(rev, rev, df=df)[1]

1833

if compression_mode == COMP_MODE_PLAIN:

1834

return data

1835

elif compression_mode == COMP_MODE_DEFAULT:

1836

return self._decompressor(data)

1837

elif compression_mode == COMP_MODE_INLINE:

1838

return self.decompress(data)

1839

else:

1840

msg = 'unknown compression mode %d'

1841

msg %= compression_mode

1842

raise error.RevlogError(msg)

1843

1844

def _chunks(self, revs, df=None, targetsize=None):

1845

"""Obtain decompressed chunks for the specified revisions.

1846

1847

Accepts an iterable of numeric revisions that are assumed to be in

1848

ascending order. Also accepts an optional already-open file handle

1849

to be used for reading. If used, the seek position of the file will

1850

not be preserved.

1851

1852

This function is similar to calling ``self._chunk()`` multiple times,

1853

but is faster.

1854

1855

Returns a list with decompressed data for each requested revision.

1856

"""

1857

if not revs:

1858

return []

1859

start = self.start

1860

length = self.length

1861

inline = self._inline

1862

iosize = self.index.entry_size

1863

buffer = util.buffer

1864

1865

l = []

1866

ladd = l.append

1867

1868

if not self._withsparseread:

1869

slicedchunks = (revs,)

1870

else:

1871

slicedchunks = deltautil.slicechunk(

1872

self, revs, targetsize=targetsize

1873

)

1874

1875

for revschunk in slicedchunks:

1876

firstrev = revschunk[0]

1877

# Skip trailing revisions with empty diff

1878

for lastrev in revschunk[::-1]:

1879

if length(lastrev) != 0:

1880

break

1881

1882

try:

1883

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

1884

except OverflowError:

1885

# issue4215 - we can't cache a run of chunks greater than

1886

# 2G on Windows

1887

return [self._chunk(rev, df=df) for rev in revschunk]

1888

1889

decomp = self.decompress

1890

# self._decompressor might be None, but will not be used in that case

1891

def_decomp = self._decompressor

1892

for rev in revschunk:

1893

chunkstart = start(rev)

1894

if inline:

1895

chunkstart += (rev + 1) * iosize

1896

chunklength = length(rev)

1897

comp_mode = self.index[rev][10]

1898

c = buffer(data, chunkstart - offset, chunklength)

1899

if comp_mode == COMP_MODE_PLAIN:

1900

ladd(c)

1901

elif comp_mode == COMP_MODE_INLINE:

1902

ladd(decomp(c))

1903

elif comp_mode == COMP_MODE_DEFAULT:

1904

ladd(def_decomp(c))

1905

else:

1906

msg = 'unknown compression mode %d'

1907

msg %= comp_mode

1908

raise error.RevlogError(msg)

1909

1910

return l

1911

1912

def _chunkclear(self):

1913

"""Clear the raw chunk cache."""

1914

self._chunkcache = (0, b'')

1915

1916

def deltaparent(self, rev):

1917

"""return deltaparent of the given revision"""

1918

base = self.index[rev][3]

1919

if base == rev:

1920

return nullrev

1921

elif self._generaldelta:

1922

return base

1923

else:

1924

return rev - 1

1925

1926

def issnapshot(self, rev):

1927

"""tells whether rev is a snapshot"""

1928

if not self._sparserevlog:

1929

return self.deltaparent(rev) == nullrev

1930

elif util.safehasattr(self.index, b'issnapshot'):

1931

# directly assign the method to cache the testing and access

1932

self.issnapshot = self.index.issnapshot

1933

return self.issnapshot(rev)

1934

if rev == nullrev:

1935

return True

1936

entry = self.index[rev]

1937

base = entry[3]

1938

if base == rev:

1939

return True

1940

if base == nullrev:

1941

return True

1942

p1 = entry[5]

1943

p2 = entry[6]

1944

if base == p1 or base == p2:

1945

return False

1946

return self.issnapshot(base)

1947

1948

def snapshotdepth(self, rev):

1949

"""number of snapshot in the chain before this one"""

1950

if not self.issnapshot(rev):

1951

raise error.ProgrammingError(b'revision %d not a snapshot')

1952

return len(self._deltachain(rev)[0]) - 1

1953

1954

def revdiff(self, rev1, rev2):

1955

"""return or calculate a delta between two revisions

1956

1957

The delta calculated is in binary form and is intended to be written to

1958

revlog data directly. So this function needs raw revision data.

1959

"""

1960

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1961

return bytes(self._chunk(rev2))

1962

1963

return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))

1964

1965

def _processflags(self, text, flags, operation, raw=False):

1966

"""deprecated entry point to access flag processors"""

1967

msg = b'_processflag(...) use the specialized variant'

1968

util.nouideprecwarn(msg, b'5.2', stacklevel=2)

1969

if raw:

1970

return text, flagutil.processflagsraw(self, text, flags)

1971

elif operation == b'read':

1972

return flagutil.processflagsread(self, text, flags)

1973

else: # write operation

1974

return flagutil.processflagswrite(self, text, flags)

1975

1976

def revision(self, nodeorrev, _df=None, raw=False):

1977

"""return an uncompressed revision of a given node or revision

1978

number.

1979

1980

_df - an existing file handle to read from. (internal-only)

1981

raw - an optional argument specifying if the revision data is to be

1982

treated as raw data when applying flag transforms. 'raw' should be set

1983

to True when generating changegroups or in debug commands.

1984

"""

1985

if raw:

1986

msg = (

1987

b'revlog.revision(..., raw=True) is deprecated, '

1988

b'use revlog.rawdata(...)'

1989

)

1990

util.nouideprecwarn(msg, b'5.2', stacklevel=2)

1991

return self._revisiondata(nodeorrev, _df, raw=raw)

1992

1993

def sidedata(self, nodeorrev, _df=None):

1994

"""a map of extra data related to the changeset but not part of the hash

1995

1996

This function currently return a dictionary. However, more advanced

1997

mapping object will likely be used in the future for a more

1998

efficient/lazy code.

1999

"""

2000

# deal with <nodeorrev> argument type

2001

if isinstance(nodeorrev, int):

2002

rev = nodeorrev

2003

else:

2004

rev = self.rev(nodeorrev)

2005

return self._sidedata(rev)

2006

77

2007

def _revisiondata(self, nodeorrev, _df=None, raw=False):

78

if rl.iscensored(rev):

2008

# deal with <nodeorrev> argument type

79

if rl.deltaparent(rev) != nullrev:

2009

if isinstance(nodeorrev, int):

80

m = _(

2010

rev = nodeorrev

81

b'cannot censor due to censored '

2011

node = self.node(rev)

82

b'revision having delta stored'

2012

else:

83

)

2013

node = nodeorrev

84

raise error.Abort(m)

2014

rev = ~~None~~

85

rawtext = rl._chunk(rev)

2015

2016

# fast path the special `nullid` rev

2017

if node == self.nullid:

2018

return b""

2019

2020

# ``rawtext`` is the text as stored inside the revlog. Might be the

2021

# revision or might need to be processed to retrieve the revision.

2022

rev, rawtext, validated = self._rawtext(node, rev, _df=_df)

2023

2024

if raw and validated:

2025

# if we don't want to process the raw text and that raw

2026

# text is cached, we can exit early.

2027

return rawtext

2028

if rev is None:

2029

rev = self.rev(node)

2030

# the revlog's flag for this revision

2031

# (usually alter its state or content)

2032

flags = self.flags(rev)

2033

2034

if validated and flags == REVIDX_DEFAULT_FLAGS:

2035

# no extra flags set, no flag processor runs, text = rawtext

2036

return rawtext

2037

2038

if raw:

2039

validatehash = flagutil.processflagsraw(self, rawtext, flags)

2040

text = rawtext

2041

else:

86

else:

2042

r = flagutil.processflagsread(self, rawtext, flags)

87

rawtext = rl.rawdata(rev)

2043

text, validatehash = r

2044

if validatehash:

2045

self.checkhash(text, node, rev=rev)

2046

if not validated:

2047

self._revisioncache = (node, rev, rawtext)

2048

2049

return text

2050

2051

def _rawtext(self, node, rev, _df=None):

2052

"""return the possibly unvalidated rawtext for a revision

2053

2054

returns (rev, rawtext, validated)

2055

"""

2056

2057

# revision in the cache (could be useful to apply delta)

2058

cachedrev = None

2059

# An intermediate text to apply deltas to

2060

basetext = None

2061

2062

# Check if we have the entry in cache

2063

# The cache entry looks like (node, rev, rawtext)

2064

if self._revisioncache:

2065

if self._revisioncache[0] == node:

2066

return (rev, self._revisioncache[2], True)

2067

cachedrev = self._revisioncache[1]

2068

2069

if rev is None:

2070

rev = self.rev(node)

2071

2072

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

2073

if stopped:

2074

basetext = self._revisioncache[2]

2075

2076

# drop cache to save memory, the caller is expected to

2077

# update self._revisioncache after validating the text

2078

self._revisioncache = None

2079

2080

targetsize = None

2081

rawsize = self.index[rev][2]

2082

if 0 <= rawsize:

2083

targetsize = 4 * rawsize

2084

2085

bins = self._chunks(chain, df=_df, targetsize=targetsize)

2086

if basetext is None:

2087

basetext = bytes(bins[0])

2088

bins = bins[1:]

2089

2090

rawtext = mdiff.patches(basetext, bins)

2091

del basetext # let us have a chance to free memory early

2092

return (rev, rawtext, False)

2093

2094

def _sidedata(self, rev):

2095

"""Return the sidedata for a given revision number."""

2096

index_entry = self.index[rev]

2097

sidedata_offset = index_entry[8]

2098

sidedata_size = index_entry[9]

2099

2100

if self._inline:

2101

sidedata_offset += self.index.entry_size * (1 + rev)

2102

if sidedata_size == 0:

2103

return {}

2104

2105

# XXX this need caching, as we do for data

2106

with self._sidedatareadfp() as sdf:

2107

if self._docket.sidedata_end < sidedata_offset + sidedata_size:

2108

filename = self._sidedatafile

2109

end = self._docket.sidedata_end

2110

offset = sidedata_offset

2111

length = sidedata_size

2112

m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)

2113

raise error.RevlogError(m)

2114

2115

sdf.seek(sidedata_offset, os.SEEK_SET)

2116

comp_segment = sdf.read(sidedata_size)

2117

2118

if len(comp_segment) < sidedata_size:

2119

filename = self._sidedatafile

2120

length = sidedata_size

2121

offset = sidedata_offset

2122

got = len(comp_segment)

2123

m = PARTIAL_READ_MSG % (filename, length, offset, got)

2124

raise error.RevlogError(m)

2125

2126

comp = self.index[rev][11]

2127

if comp == COMP_MODE_PLAIN:

2128

segment = comp_segment

2129

elif comp == COMP_MODE_DEFAULT:

2130

segment = self._decompressor(comp_segment)

2131

elif comp == COMP_MODE_INLINE:

2132

segment = self.decompress(comp_segment)

2133

else:

2134

msg = 'unknown compression mode %d'

2135

msg %= comp

2136

raise error.RevlogError(msg)

2137

2138

sidedata = sidedatautil.deserialize_sidedata(segment)

2139

return sidedata

2140

2141

def rawdata(self, nodeorrev, _df=None):

2142

"""return an uncompressed raw data of a given node or revision number.

2143

2144

_df - an existing file handle to read from. (internal-only)

2145

"""

2146

return self._revisiondata(nodeorrev, _df, raw=True)

2147

2148

def hash(self, text, p1, p2):

2149

"""Compute a node hash.

2150

2151

Available as a function so that subclasses can replace the hash

2152

as needed.

2153

"""

2154

return storageutil.hashrevisionsha1(text, p1, p2)

2155

2156

def checkhash(self, text, node, p1=None, p2=None, rev=None):

2157

"""Check node hash integrity.

2158

2159

Available as a function so that subclasses can extend hash mismatch

2160

behaviors as needed.

2161

"""

2162

try:

2163

if p1 is None and p2 is None:

2164

p1, p2 = self.parents(node)

2165

if node != self.hash(text, p1, p2):

2166

# Clear the revision cache on hash failure. The revision cache

2167

# only stores the raw revision and clearing the cache does have

2168

# the side-effect that we won't have a cache hit when the raw

2169

# revision data is accessed. But this case should be rare and

2170

# it is extra work to teach the cache about the hash

2171

# verification state.

2172

if self._revisioncache and self._revisioncache[0] == node:

2173

self._revisioncache = None

2174

2175

revornode = rev

2176

if revornode is None:

2177

revornode = templatefilters.short(hex(node))

2178

raise error.RevlogError(

2179

_(b"integrity check failed on %s:%s")

2180

% (self.display_id, pycompat.bytestr(revornode))

2181

)

2182

except error.RevlogError:

2183

if self._censorable and storageutil.iscensoredtext(text):

2184

raise error.CensoredNodeError(self.display_id, node, text)

2185

raise

2186

2187

def _enforceinlinesize(self, tr):

2188

"""Check if the revlog is too big for inline and convert if so.

2189

2190

This should be called after revisions are added to the revlog. If the

2191

revlog has grown too large to be an inline revlog, it will convert it

2192

to use multiple index and data files.

2193

"""

2194

tiprev = len(self) - 1

2195

total_size = self.start(tiprev) + self.length(tiprev)

2196

if not self._inline or total_size < _maxinline:

2197

return

2198

2199

troffset = tr.findoffset(self._indexfile)

2200

if troffset is None:

2201

raise error.RevlogError(

2202

_(b"%s not found in the transaction") % self._indexfile

2203

)

2204

trindex = 0

2205

tr.add(self._datafile, 0)

2206

88

2207

existing_handles = False

89

newrl.addrawrevision(

2208

if self._writinghandles is not None:

90

rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)

2209

existing_handles = True

2210

fp = self._writinghandles[0]

2211

fp.flush()

2212

fp.close()

2213

# We can't use the cached file handle after close(). So prevent

2214

# its usage.

2215

self._writinghandles = None

2216

2217

new_dfh = self._datafp(b'w+')

2218

new_dfh.truncate(0) # drop any potentially existing data

2219

try:

2220

with self._indexfp() as read_ifh:

2221

for r in self:

2222

new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])

2223

if troffset <= self.start(r) + r * self.index.entry_size:

2224

trindex = r

2225

new_dfh.flush()

2226

2227

with self.__index_new_fp() as fp:

2228

self._format_flags &= ~FLAG_INLINE_DATA

2229

self._inline = False

2230

for i in self:

2231

e = self.index.entry_binary(i)

2232

if i == 0 and self._docket is None:

2233

header = self._format_flags | self._format_version

2234

header = self.index.pack_header(header)

2235

e = header + e

2236

fp.write(e)

2237

if self._docket is not None:

2238

self._docket.index_end = fp.tell()

2239

2240

# There is a small transactional race here. If the rename of

2241

# the index fails, we should remove the datafile. It is more

2242

# important to ensure that the data file is not truncated

2243

# when the index is replaced as otherwise data is lost.

2244

tr.replace(self._datafile, self.start(trindex))

2245

2246

# the temp file replace the real index when we exit the context

2247

# manager

2248

2249

tr.replace(self._indexfile, trindex * self.index.entry_size)

2250

nodemaputil.setup_persistent_nodemap(tr, self)

2251

self._chunkclear()

2252

2253

if existing_handles:

2254

# switched from inline to conventional reopen the index

2255

ifh = self.__index_write_fp()

2256

self._writinghandles = (ifh, new_dfh, None)

2257

new_dfh = None

2258

finally:

2259

if new_dfh is not None:

2260

new_dfh.close()

2261

2262

def _nodeduplicatecallback(self, transaction, node):

2263

"""called when trying to add a node already stored."""

2264

2265

@contextlib.contextmanager

2266

def _writing(self, transaction):

2267

if self._trypending:

2268

msg = b'try to write in a `trypending` revlog: %s'

2269

msg %= self.display_id

2270

raise error.ProgrammingError(msg)

2271

if self._writinghandles is not None:

2272

yield

2273

else:

2274

ifh = dfh = sdfh = None

2275

try:

2276

r = len(self)

2277

# opening the data file.

2278

dsize = 0

2279

if r:

2280

dsize = self.end(r - 1)

2281

dfh = None

2282

if not self._inline:

2283

try:

2284

dfh = self._datafp(b"r+")

2285

if self._docket is None:

2286

dfh.seek(0, os.SEEK_END)

2287

else:

2288

dfh.seek(self._docket.data_end, os.SEEK_SET)

2289

except IOError as inst:

2290

if inst.errno != errno.ENOENT:

2291

raise

2292

dfh = self._datafp(b"w+")

2293

transaction.add(self._datafile, dsize)

2294

if self._sidedatafile is not None:

2295

try:

2296

sdfh = self.opener(self._sidedatafile, mode=b"r+")

2297

dfh.seek(self._docket.sidedata_end, os.SEEK_SET)

2298

except IOError as inst:

2299

if inst.errno != errno.ENOENT:

2300

raise

2301

sdfh = self.opener(self._sidedatafile, mode=b"w+")

2302

transaction.add(

2303

self._sidedatafile, self._docket.sidedata_end

2304

)

2305

2306

# opening the index file.

2307

isize = r * self.index.entry_size

2308

ifh = self.__index_write_fp()

2309

if self._inline:

2310

transaction.add(self._indexfile, dsize + isize)

2311

else:

2312

transaction.add(self._indexfile, isize)

2313

# exposing all file handle for writing.

2314

self._writinghandles = (ifh, dfh, sdfh)

2315

yield

2316

if self._docket is not None:

2317

self._write_docket(transaction)

2318

finally:

2319

self._writinghandles = None

2320

if dfh is not None:

2321

dfh.close()

2322

if sdfh is not None:

2323

dfh.close()

2324

# closing the index file last to avoid exposing referent to

2325

# potential unflushed data content.

2326

if ifh is not None:

2327

ifh.close()

2328

2329

def _write_docket(self, transaction):

2330

"""write the current docket on disk

2331

2332

Exist as a method to help changelog to implement transaction logic

2333

2334

We could also imagine using the same transaction logic for all revlog

2335

since docket are cheap."""

2336

self._docket.write(transaction)

2337

2338

def addrevision(

2339

self,

2340

text,

2341

transaction,

2342

link,

2343

p1,

2344

p2,

2345

cachedelta=None,

2346

node=None,

2347

flags=REVIDX_DEFAULT_FLAGS,

2348

deltacomputer=None,

2349

sidedata=None,

2350

):

2351

"""add a revision to the log

2352

2353

text - the revision data to add

2354

transaction - the transaction object used for rollback

2355

link - the linkrev data to add

2356

p1, p2 - the parent nodeids of the revision

2357

cachedelta - an optional precomputed delta

2358

node - nodeid of revision; typically node is not specified, and it is

2359

computed by default as hash(text, p1, p2), however subclasses might

2360

use different hashing method (and override checkhash() in such case)

2361

flags - the known flags to set on the revision

2362

deltacomputer - an optional deltacomputer instance shared between

2363

multiple calls

2364

"""

2365

if link == nullrev:

2366

raise error.RevlogError(

2367

_(b"attempted to add linkrev -1 to %s") % self.display_id

2368

)

2369

2370

if sidedata is None:

2371

sidedata = {}

2372

elif sidedata and not self.hassidedata:

2373

raise error.ProgrammingError(

2374

_(b"trying to add sidedata to a revlog who don't support them")

2375

)

2376

2377

if flags:

2378

node = node or self.hash(text, p1, p2)

2379

2380

rawtext, validatehash = flagutil.processflagswrite(self, text, flags)

2381

2382

# If the flag processor modifies the revision data, ignore any provided

2383

# cachedelta.

2384

if rawtext != text:

2385

cachedelta = None

2386

2387

if len(rawtext) > _maxentrysize:

2388

raise error.RevlogError(

2389

_(

2390

b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"

2391

)

2392

% (self.display_id, len(rawtext))

2393

)

2394

2395

node = node or self.hash(rawtext, p1, p2)

2396

rev = self.index.get_rev(node)

2397

if rev is not None:

2398

return rev

2399

2400

if validatehash:

2401

self.checkhash(rawtext, node, p1=p1, p2=p2)

2402

2403

return self.addrawrevision(

2404

rawtext,

2405

transaction,

2406

link,

2407

p1,

2408

p2,

2409

node,

2410

flags,

2411

cachedelta=cachedelta,

2412

deltacomputer=deltacomputer,

2413

sidedata=sidedata,

2414

)

91

)

2415

92

2416

def addrawrevision(

93

tr.addbackup(rl._indexfile, location=b'store')

2417

self,

94

if not rl._inline:

2418

rawtext,

95

tr.addbackup(rl._datafile, location=b'store')

2419

transaction,

2420

link,

2421

p1,

2422

p2,

2423

node,

2424

flags,

2425

cachedelta=None,

2426

deltacomputer=None,

2427

sidedata=None,

2428

):

2429

"""add a raw revision with known flags, node and parents

2430

useful when reusing a revision not stored in this revlog (ex: received

2431

over wire, or read from an external bundle).

2432

"""

2433

with self._writing(transaction):

2434

return self._addrevision(

2435

node,

2436

rawtext,

2437

transaction,

2438

link,

2439

p1,

2440

p2,

2441

flags,

2442

cachedelta,

2443

deltacomputer=deltacomputer,

2444

sidedata=sidedata,

2445

)

2446

2447

def compress(self, data):

2448

"""Generate a possibly-compressed representation of data."""

2449

if not data:

2450

return b'', data

2451

2452

compressed = self._compressor.compress(data)

2453

2454

if compressed:

2455

# The revlog compressor added the header in the returned data.

2456

return b'', compressed

2457

2458

if data[0:1] == b'\0':

2459

return b'', data

2460

return b'u', data

2461

2462

def decompress(self, data):

2463

"""Decompress a revlog chunk.

2464

2465

The chunk is expected to begin with a header identifying the

2466

format type so it can be routed to an appropriate decompressor.

2467

"""

2468

if not data:

2469

return data

2470

2471

# Revlogs are read much more frequently than they are written and many

2472

# chunks only take microseconds to decompress, so performance is

2473

# important here.

2474

#

2475

# We can make a few assumptions about revlogs:

2476

#

2477

# 1) the majority of chunks will be compressed (as opposed to inline

2478

# raw data).

2479

# 2) decompressing *any* data will likely by at least 10x slower than

2480

# returning raw inline data.

2481

# 3) we want to prioritize common and officially supported compression

2482

# engines

2483

#

2484

# It follows that we want to optimize for "decompress compressed data

2485

# when encoded with common and officially supported compression engines"

2486

# case over "raw data" and "data encoded by less common or non-official

2487

# compression engines." That is why we have the inline lookup first

2488

# followed by the compengines lookup.

2489

#

2490

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

2491

# compressed chunks. And this matters for changelog and manifest reads.

2492

t = data[0:1]

2493

2494

if t == b'x':

2495

try:

2496

return _zlibdecompress(data)

2497

except zlib.error as e:

2498

raise error.RevlogError(

2499

_(b'revlog decompress error: %s')

2500

% stringutil.forcebytestr(e)

2501

)

2502

# '\0' is more common than 'u' so it goes first.

2503

elif t == b'\0':

2504

return data

2505

elif t == b'u':

2506

return util.buffer(data, 1)

2507

2508

compressor = self._get_decompressor(t)

2509

2510

return compressor.decompress(data)

2511

2512

def _addrevision(

2513

self,

2514

node,

2515

rawtext,

2516

transaction,

2517

link,

2518

p1,

2519

p2,

2520

flags,

2521

cachedelta,

2522

alwayscache=False,

2523

deltacomputer=None,

2524

sidedata=None,

2525

):

2526

"""internal function to add revisions to the log

2527

2528

see addrevision for argument descriptions.

2529

2530

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

2531

2532

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

2533

be used.

2534

2535

invariants:

2536

- rawtext is optional (can be None); if not set, cachedelta must be set.

2537

if both are set, they must correspond to each other.

2538

"""

2539

if node == self.nullid:

2540

raise error.RevlogError(

2541

_(b"%s: attempt to add null revision") % self.display_id

2542

)

2543

if (

2544

node == self.nodeconstants.wdirid

2545

or node in self.nodeconstants.wdirfilenodeids

2546

):

2547

raise error.RevlogError(

2548

_(b"%s: attempt to add wdir revision") % self.display_id

2549

)

2550

if self._writinghandles is None:

2551

msg = b'adding revision outside `revlog._writing` context'

2552

raise error.ProgrammingError(msg)

2553

2554

if self._inline:

2555

fh = self._writinghandles[0]

2556

else:

2557

fh = self._writinghandles[1]

2558

2559

btext = [rawtext]

2560

2561

curr = len(self)

2562

prev = curr - 1

2563

2564

offset = self._get_data_offset(prev)

2565

2566

if self._concurrencychecker:

2567

ifh, dfh, sdfh = self._writinghandles

2568

# XXX no checking for the sidedata file

2569

if self._inline:

2570

# offset is "as if" it were in the .d file, so we need to add on

2571

# the size of the entry metadata.

2572

self._concurrencychecker(

2573

ifh, self._indexfile, offset + curr * self.index.entry_size

2574

)

2575

else:

2576

# Entries in the .i are a consistent size.

2577

self._concurrencychecker(

2578

ifh, self._indexfile, curr * self.index.entry_size

2579

)

2580

self._concurrencychecker(dfh, self._datafile, offset)

2581

2582

p1r, p2r = self.rev(p1), self.rev(p2)

2583

2584

# full versions are inserted when the needed deltas

2585

# become comparable to the uncompressed text

2586

if rawtext is None:

2587

# need rawtext size, before changed by flag processors, which is

2588

# the non-raw size. use revlog explicitly to avoid filelog's extra

2589

# logic that might remove metadata size.

2590

textlen = mdiff.patchedsize(

2591

revlog.size(self, cachedelta[0]), cachedelta[1]

2592

)

2593

else:

2594

textlen = len(rawtext)

2595

2596

if deltacomputer is None:

2597

deltacomputer = deltautil.deltacomputer(self)

2598

2599

revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)

2600

2601

deltainfo = deltacomputer.finddeltainfo(revinfo, fh)

2602

2603

compression_mode = COMP_MODE_INLINE

2604

if self._docket is not None:

2605

h, d = deltainfo.data

2606

if not h and not d:

2607

# not data to store at all... declare them uncompressed

2608

compression_mode = COMP_MODE_PLAIN

2609

elif not h:

2610

t = d[0:1]

2611

if t == b'\0':

2612

compression_mode = COMP_MODE_PLAIN

2613

elif t == self._docket.default_compression_header:

2614

compression_mode = COMP_MODE_DEFAULT

2615

elif h == b'u':

2616

# we have a more efficient way to declare uncompressed

2617

h = b''

2618

compression_mode = COMP_MODE_PLAIN

2619

deltainfo = deltautil.drop_u_compression(deltainfo)

2620

2621

sidedata_compression_mode = COMP_MODE_INLINE

2622

if sidedata and self.hassidedata:

2623

sidedata_compression_mode = COMP_MODE_PLAIN

2624

serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)

2625

sidedata_offset = self._docket.sidedata_end

2626

h, comp_sidedata = self.compress(serialized_sidedata)

2627

if (

2628

h != b'u'

2629

and comp_sidedata[0:1] != b'\0'

2630

and len(comp_sidedata) < len(serialized_sidedata)

2631

):

2632

assert not h

2633

if (

2634

comp_sidedata[0:1]

2635

== self._docket.default_compression_header

2636

):

2637

sidedata_compression_mode = COMP_MODE_DEFAULT

2638

serialized_sidedata = comp_sidedata

2639

else:

2640

sidedata_compression_mode = COMP_MODE_INLINE

2641

serialized_sidedata = comp_sidedata

2642

else:

2643

serialized_sidedata = b""

2644

# Don't store the offset if the sidedata is empty, that way

2645

# we can easily detect empty sidedata and they will be no different

2646

# than ones we manually add.

2647

sidedata_offset = 0

2648

2649

e = (

2650

offset_type(offset, flags),

2651

deltainfo.deltalen,

2652

textlen,

2653

deltainfo.base,

2654

link,

2655

p1r,

2656

p2r,

2657

node,

2658

sidedata_offset,

2659

len(serialized_sidedata),

2660

compression_mode,

2661

sidedata_compression_mode,

2662

)

2663

2664

self.index.append(e)

2665

entry = self.index.entry_binary(curr)

2666

if curr == 0 and self._docket is None:

2667

header = self._format_flags | self._format_version

2668

header = self.index.pack_header(header)

2669

entry = header + entry

2670

self._writeentry(

2671

transaction,

2672

entry,

2673

deltainfo.data,

2674

link,

2675

offset,

2676

serialized_sidedata,

2677

sidedata_offset,

2678

)

2679

2680

rawtext = btext[0]

2681

2682

if alwayscache and rawtext is None:

2683

rawtext = deltacomputer.buildtext(revinfo, fh)

2684

2685

if type(rawtext) == bytes: # only accept immutable objects

2686

self._revisioncache = (node, curr, rawtext)

2687

self._chainbasecache[curr] = deltainfo.chainbase

2688

return curr

2689

2690

def _get_data_offset(self, prev):

2691

"""Returns the current offset in the (in-transaction) data file.

2692

Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket

2693

file to store that information: since sidedata can be rewritten to the

2694

end of the data file within a transaction, you can have cases where, for

2695

example, rev `n` does not have sidedata while rev `n - 1` does, leading

2696

to `n - 1`'s sidedata being written after `n`'s data.

2697

2698

TODO cache this in a docket file before getting out of experimental."""

2699

if self._docket is None:

2700

return self.end(prev)

2701

else:

2702

return self._docket.data_end

2703

2704

def _writeentry(

2705

self, transaction, entry, data, link, offset, sidedata, sidedata_offset

2706

):

2707

# Files opened in a+ mode have inconsistent behavior on various

2708

# platforms. Windows requires that a file positioning call be made

2709

# when the file handle transitions between reads and writes. See

2710

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

2711

# platforms, Python or the platform itself can be buggy. Some versions

2712

# of Solaris have been observed to not append at the end of the file

2713

# if the file was seeked to before the end. See issue4943 for more.

2714

#

2715

# We work around this issue by inserting a seek() before writing.

2716

# Note: This is likely not necessary on Python 3. However, because

2717

# the file handle is reused for reads and may be seeked there, we need

2718

# to be careful before changing this.

2719

if self._writinghandles is None:

2720

msg = b'adding revision outside `revlog._writing` context'

2721

raise error.ProgrammingError(msg)

2722

ifh, dfh, sdfh = self._writinghandles

2723

if self._docket is None:

2724

ifh.seek(0, os.SEEK_END)

2725

else:

2726

ifh.seek(self._docket.index_end, os.SEEK_SET)

2727

if dfh:

2728

if self._docket is None:

2729

dfh.seek(0, os.SEEK_END)

2730

else:

2731

dfh.seek(self._docket.data_end, os.SEEK_SET)

2732

if sdfh:

2733

sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)

2734

2735

curr = len(self) - 1

2736

if not self._inline:

2737

transaction.add(self._datafile, offset)

2738

if self._sidedatafile:

2739

transaction.add(self._sidedatafile, sidedata_offset)

2740

transaction.add(self._indexfile, curr * len(entry))

2741

if data[0]:

2742

dfh.write(data[0])

2743

dfh.write(data[1])

2744

if sidedata:

2745

sdfh.write(sidedata)

2746

ifh.write(entry)

2747

else:

2748

offset += curr * self.index.entry_size

2749

transaction.add(self._indexfile, offset)

2750

ifh.write(entry)

2751

ifh.write(data[0])

2752

ifh.write(data[1])

2753

assert not sidedata

2754

self._enforceinlinesize(transaction)

2755

if self._docket is not None:

2756

self._docket.index_end = self._writinghandles[0].tell()

2757

self._docket.data_end = self._writinghandles[1].tell()

2758

self._docket.sidedata_end = self._writinghandles[2].tell()

2759

2760

nodemaputil.setup_persistent_nodemap(transaction, self)

2761

2762

def addgroup(

2763

self,

2764

deltas,

2765

linkmapper,

2766

transaction,

2767

alwayscache=False,

2768

addrevisioncb=None,

2769

duplicaterevisioncb=None,

2770

):

2771

"""

2772

add a delta group

2773

2774

given a set of deltas, add them to the revision log. the

2775

first delta is against its parent, which should be in our

2776

log, the rest are against the previous delta.

2777

2778

If ``addrevisioncb`` is defined, it will be called with arguments of

2779

this revlog and the node that was added.

2780

"""

2781

2782

if self._adding_group:

2783

raise error.ProgrammingError(b'cannot nest addgroup() calls')

2784

2785

self._adding_group = True

2786

empty = True

2787

try:

2788

with self._writing(transaction):

2789

deltacomputer = deltautil.deltacomputer(self)

2790

# loop through our set of deltas

2791

for data in deltas:

2792

(

2793

node,

2794

p1,

2795

p2,

2796

linknode,

2797

deltabase,

2798

delta,

2799

flags,

2800

sidedata,

2801

) = data

2802

link = linkmapper(linknode)

2803

flags = flags or REVIDX_DEFAULT_FLAGS

2804

2805

rev = self.index.get_rev(node)

2806

if rev is not None:

2807

# this can happen if two branches make the same change

2808

self._nodeduplicatecallback(transaction, rev)

2809

if duplicaterevisioncb:

2810

duplicaterevisioncb(self, rev)

2811

empty = False

2812

continue

2813

2814

for p in (p1, p2):

2815

if not self.index.has_node(p):

2816

raise error.LookupError(

2817

p, self.radix, _(b'unknown parent')

2818

)

2819

2820

if not self.index.has_node(deltabase):

2821

raise error.LookupError(

2822

deltabase, self.display_id, _(b'unknown delta base')

2823

)

2824

2825

baserev = self.rev(deltabase)

2826

2827

if baserev != nullrev and self.iscensored(baserev):

2828

# if base is censored, delta must be full replacement in a

2829

# single patch operation

2830

hlen = struct.calcsize(b">lll")

2831

oldlen = self.rawsize(baserev)

2832

newlen = len(delta) - hlen

2833

if delta[:hlen] != mdiff.replacediffheader(

2834

oldlen, newlen

2835

):

2836

raise error.CensoredBaseError(

2837

self.display_id, self.node(baserev)

2838

)

2839

2840

if not flags and self._peek_iscensored(baserev, delta):

2841

flags |= REVIDX_ISCENSORED

2842

2843

# We assume consumers of addrevisioncb will want to retrieve

2844

# the added revision, which will require a call to

2845

# revision(). revision() will fast path if there is a cache

2846

# hit. So, we tell _addrevision() to always cache in this case.

2847

# We're only using addgroup() in the context of changegroup

2848

# generation so the revision data can always be handled as raw

2849

# by the flagprocessor.

2850

rev = self._addrevision(

2851

node,

2852

None,

2853

transaction,

2854

link,

2855

p1,

2856

p2,

2857

flags,

2858

(baserev, delta),

2859

alwayscache=alwayscache,

2860

deltacomputer=deltacomputer,

2861

sidedata=sidedata,

2862

)

2863

2864

if addrevisioncb:

2865

addrevisioncb(self, rev)

2866

empty = False

2867

finally:

2868

self._adding_group = False

2869

return not empty

2870

2871

def iscensored(self, rev):

2872

"""Check if a file revision is censored."""

2873

if not self._censorable:

2874

return False

2875

2876

return self.flags(rev) & REVIDX_ISCENSORED

2877

2878

def _peek_iscensored(self, baserev, delta):

2879

"""Quickly check if a delta produces a censored revision."""

2880

if not self._censorable:

2881

return False

2882

2883

return storageutil.deltaiscensored(delta, baserev, self.rawsize)

2884

2885

def getstrippoint(self, minlink):

2886

"""find the minimum rev that must be stripped to strip the linkrev

2887

2888

Returns a tuple containing the minimum rev and a set of all revs that

2889

have linkrevs that will be broken by this strip.

2890

"""

2891

return storageutil.resolvestripinfo(

2892

minlink,

2893

len(self) - 1,

2894

self.headrevs(),

2895

self.linkrev,

2896

self.parentrevs,

2897

)

2898

2899

def strip(self, minlink, transaction):

2900

"""truncate the revlog on the first revision with a linkrev >= minlink

2901

2902

This function is called when we're stripping revision minlink and

2903

its descendants from the repository.

2904

2905

We have to remove all revisions with linkrev >= minlink, because

2906

the equivalent changelog revisions will be renumbered after the

2907

strip.

2908

2909

So we truncate the revlog on the first of these revisions, and

2910

trust that the caller has saved the revisions that shouldn't be

2911

removed and that it'll re-add them after this truncation.

2912

"""

2913

if len(self) == 0:

2914

return

2915

2916

rev, _ = self.getstrippoint(minlink)

2917

if rev == len(self):

2918

return

2919

2920

# first truncate the files on disk

2921

data_end = self.start(rev)

2922

if not self._inline:

2923

transaction.add(self._datafile, data_end)

2924

end = rev * self.index.entry_size

2925

else:

2926

end = data_end + (rev * self.index.entry_size)

2927

2928

if self._sidedatafile:

2929

sidedata_end = self.sidedata_cut_off(rev)

2930

transaction.add(self._sidedatafile, sidedata_end)

2931

2932

transaction.add(self._indexfile, end)

2933

if self._docket is not None:

2934

# XXX we could, leverage the docket while stripping. However it is

2935

# not powerfull enough at the time of this comment

2936

self._docket.index_end = end

2937

self._docket.data_end = data_end

2938

self._docket.sidedata_end = sidedata_end

2939

self._docket.write(transaction, stripping=True)

2940

2941

# then reset internal state in memory to forget those revisions

2942

self._revisioncache = None

2943

self._chaininfocache = util.lrucachedict(500)

2944

self._chunkclear()

2945

2946

del self.index[rev:-1]

2947

2948

def checksize(self):

2949

"""Check size of index and data files

2950

2951

return a (dd, di) tuple.

2952

- dd: extra bytes for the "data" file

2953

- di: extra bytes for the "index" file

2954

2955

A healthy revlog will return (0, 0).

2956

"""

2957

expected = 0

2958

if len(self):

2959

expected = max(0, self.end(len(self) - 1))

2960

2961

try:

2962

with self._datafp() as f:

2963

f.seek(0, io.SEEK_END)

2964

actual = f.tell()

2965

dd = actual - expected

2966

except IOError as inst:

2967

if inst.errno != errno.ENOENT:

2968

raise

2969

dd = 0

2970

96

2971

try:

97

rl.opener.rename(newrl._indexfile, rl._indexfile)

2972

f = self.opener(self._indexfile)

98

if not rl._inline:

2973

f.seek(0, io.SEEK_END)

99

rl.opener.rename(newrl._datafile, rl._datafile)

2974

actual = f.tell()

2975

f.close()

2976

s = self.index.entry_size

2977

i = max(0, actual // s)

2978

di = actual - (i * s)

2979

if self._inline:

2980

databytes = 0

2981

for r in self:

2982

databytes += max(0, self.length(r))

2983

dd = 0

2984

di = actual - len(self) * s - databytes

2985

except IOError as inst:

2986

if inst.errno != errno.ENOENT:

2987

raise

2988

di = 0

2989

2990

return (dd, di)

2991

2992

def files(self):

2993

res = [self._indexfile]

2994

if not self._inline:

2995

res.append(self._datafile)

2996

return res

2997

2998

def emitrevisions(

2999

self,

3000

nodes,

3001

nodesorder=None,

3002

revisiondata=False,

3003

assumehaveparentrevisions=False,

3004

deltamode=repository.CG_DELTAMODE_STD,

3005

sidedata_helpers=None,

3006

):

3007

if nodesorder not in (b'nodes', b'storage', b'linear', None):

3008

raise error.ProgrammingError(

3009

b'unhandled value for nodesorder: %s' % nodesorder

3010

)

3011

3012

if nodesorder is None and not self._generaldelta:

3013

nodesorder = b'storage'

3014

3015

if (

3016

not self._storedeltachains

3017

and deltamode != repository.CG_DELTAMODE_PREV

3018

):

3019

deltamode = repository.CG_DELTAMODE_FULL

3020

3021

return storageutil.emitrevisions(

3022

self,

3023

nodes,

3024

nodesorder,

3025

revlogrevisiondelta,

3026

deltaparentfn=self.deltaparent,

3027

candeltafn=self.candelta,

3028

rawsizefn=self.rawsize,

3029

revdifffn=self.revdiff,

3030

flagsfn=self.flags,

3031

deltamode=deltamode,

3032

revisiondata=revisiondata,

3033

assumehaveparentrevisions=assumehaveparentrevisions,

3034

sidedata_helpers=sidedata_helpers,

3035

)

3036

3037

DELTAREUSEALWAYS = b'always'

3038

DELTAREUSESAMEREVS = b'samerevs'

3039

DELTAREUSENEVER = b'never'

3040

3041

DELTAREUSEFULLADD = b'fulladd'

3042

3043

DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}

3044

3045

def clone(

3046

self,

3047

tr,

3048

destrevlog,

3049

addrevisioncb=None,

3050

deltareuse=DELTAREUSESAMEREVS,

3051

forcedeltabothparents=None,

3052

sidedata_helpers=None,

3053

):

3054

"""Copy this revlog to another, possibly with format changes.

3055

3056

The destination revlog will contain the same revisions and nodes.

3057

However, it may not be bit-for-bit identical due to e.g. delta encoding

3058

differences.

3059

3060

The ``deltareuse`` argument control how deltas from the existing revlog

3061

are preserved in the destination revlog. The argument can have the

3062

following values:

3063

3064

DELTAREUSEALWAYS

3065

Deltas will always be reused (if possible), even if the destination

3066

revlog would not select the same revisions for the delta. This is the

3067

fastest mode of operation.

3068

DELTAREUSESAMEREVS

3069

Deltas will be reused if the destination revlog would pick the same

3070

revisions for the delta. This mode strikes a balance between speed

3071

and optimization.

3072

DELTAREUSENEVER

3073

Deltas will never be reused. This is the slowest mode of execution.

3074

This mode can be used to recompute deltas (e.g. if the diff/delta

3075

algorithm changes).

3076

DELTAREUSEFULLADD

3077

Revision will be re-added as if their were new content. This is

3078

slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.

3079

eg: large file detection and handling.

3080

3081

Delta computation can be slow, so the choice of delta reuse policy can

3082

significantly affect run time.

3083

3084

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

3085

two extremes. Deltas will be reused if they are appropriate. But if the

3086

delta could choose a better revision, it will do so. This means if you

3087

are converting a non-generaldelta revlog to a generaldelta revlog,

3088

deltas will be recomputed if the delta's parent isn't a parent of the

3089

revision.

3090

3091

In addition to the delta policy, the ``forcedeltabothparents``

3092

argument controls whether to force compute deltas against both parents

3093

for merges. By default, the current default is used.

3094

3095

See `revlogutil.sidedata.get_sidedata_helpers` for the doc on

3096

`sidedata_helpers`.

3097

"""

3098

if deltareuse not in self.DELTAREUSEALL:

3099

raise ValueError(

3100

_(b'value for deltareuse invalid: %s') % deltareuse

3101

)

3102

3103

if len(destrevlog):

3104

raise ValueError(_(b'destination revlog is not empty'))

3105

3106

if getattr(self, 'filteredrevs', None):

3107

raise ValueError(_(b'source revlog has filtered revisions'))

3108

if getattr(destrevlog, 'filteredrevs', None):

3109

raise ValueError(_(b'destination revlog has filtered revisions'))

3110

3111

# lazydelta and lazydeltabase controls whether to reuse a cached delta,

3112

# if possible.

3113

oldlazydelta = destrevlog._lazydelta

3114

oldlazydeltabase = destrevlog._lazydeltabase

3115

oldamd = destrevlog._deltabothparents

3116

3117

try:

3118

if deltareuse == self.DELTAREUSEALWAYS:

3119

destrevlog._lazydeltabase = True

3120

destrevlog._lazydelta = True

3121

elif deltareuse == self.DELTAREUSESAMEREVS:

3122

destrevlog._lazydeltabase = False

3123

destrevlog._lazydelta = True

3124

elif deltareuse == self.DELTAREUSENEVER:

3125

destrevlog._lazydeltabase = False

3126

destrevlog._lazydelta = False

3127

3128

destrevlog._deltabothparents = forcedeltabothparents or oldamd

3129

3130

self._clone(

3131

tr,

3132

destrevlog,

3133

addrevisioncb,

3134

deltareuse,

3135

forcedeltabothparents,

3136

sidedata_helpers,

3137

)

3138

3139

finally:

3140

destrevlog._lazydelta = oldlazydelta

3141

destrevlog._lazydeltabase = oldlazydeltabase

3142

destrevlog._deltabothparents = oldamd

3143

3144

def _clone(

3145

self,

3146

tr,

3147

destrevlog,

3148

addrevisioncb,

3149

deltareuse,

3150

forcedeltabothparents,

3151

sidedata_helpers,

3152

):

3153

"""perform the core duty of `revlog.clone` after parameter processing"""

3154

deltacomputer = deltautil.deltacomputer(destrevlog)

3155

index = self.index

3156

for rev in self:

3157

entry = index[rev]

3158

3159

# Some classes override linkrev to take filtered revs into

3160

# account. Use raw entry from index.

3161

flags = entry[0] & 0xFFFF

3162

linkrev = entry[4]

3163

p1 = index[entry[5]][7]

3164

p2 = index[entry[6]][7]

3165

node = entry[7]

3166

3167

# (Possibly) reuse the delta from the revlog if allowed and

3168

# the revlog chunk is a delta.

3169

cachedelta = None

3170

rawtext = None

3171

if deltareuse == self.DELTAREUSEFULLADD:

3172

text = self._revisiondata(rev)

3173

sidedata = self.sidedata(rev)

3174

3175

if sidedata_helpers is not None:

3176

(sidedata, new_flags) = sidedatautil.run_sidedata_helpers(

3177

self, sidedata_helpers, sidedata, rev

3178

)

3179

flags = flags | new_flags[0] & ~new_flags[1]

3180

3181

destrevlog.addrevision(

3182

text,

3183

tr,

3184

linkrev,

3185

p1,

3186

p2,

3187

cachedelta=cachedelta,

3188

node=node,

3189

flags=flags,

3190

deltacomputer=deltacomputer,

3191

sidedata=sidedata,

3192

)

3193

else:

3194

if destrevlog._lazydelta:

3195

dp = self.deltaparent(rev)

3196

if dp != nullrev:

3197

cachedelta = (dp, bytes(self._chunk(rev)))

3198

3199

sidedata = None

3200

if not cachedelta:

3201

rawtext = self._revisiondata(rev)

3202

sidedata = self.sidedata(rev)

3203

if sidedata is None:

3204

sidedata = self.sidedata(rev)

3205

3206

if sidedata_helpers is not None:

3207

(sidedata, new_flags) = sidedatautil.run_sidedata_helpers(

3208

self, sidedata_helpers, sidedata, rev

3209

)

3210

flags = flags | new_flags[0] & ~new_flags[1]

3211

3212

with destrevlog._writing(tr):

3213

destrevlog._addrevision(

3214

node,

3215

rawtext,

3216

tr,

3217

linkrev,

3218

p1,

3219

p2,

3220

flags,

3221

cachedelta,

3222

deltacomputer=deltacomputer,

3223

sidedata=sidedata,

3224

)

3225

3226

if addrevisioncb:

3227

addrevisioncb(self, rev, node)

3228

3229

def censorrevision(self, tr, censornode, tombstone=b''):

3230

if self._format_version == REVLOGV0:

3231

raise error.RevlogError(

3232

_(b'cannot censor with version %d revlogs')

3233

% self._format_version

3234

)

3235

3236

censorrev = self.rev(censornode)

3237

tombstone = storageutil.packmeta({b'censored': tombstone}, b'')

3238

3239

if len(tombstone) > self.rawsize(censorrev):

3240

raise error.Abort(

3241

_(b'censor tombstone must be no longer than censored data')

3242

)

3243

100

3244

# Rewriting the revlog in place is hard. Our strategy for censoring is

101

rl.clearcaches()

3245

# to create a new revlog, copy all revisions to it, then replace the

102

rl._loadindex()

3246

# revlogs on transaction close.

3247

#

3248

# This is a bit dangerous. We could easily have a mismatch of state.

3249

newrl = revlog(

3250

self.opener,

3251

target=self.target,

3252

radix=self.radix,

3253

postfix=b'tmpcensored',

3254

censorable=True,

3255

)

3256

newrl._format_version = self._format_version

3257

newrl._format_flags = self._format_flags

3258

newrl._generaldelta = self._generaldelta

3259

newrl._parse_index = self._parse_index

3260

3261

for rev in self.revs():

3262

node = self.node(rev)

3263

p1, p2 = self.parents(node)

3264

3265

if rev == censorrev:

3266

newrl.addrawrevision(

3267

tombstone,

3268

tr,

3269

self.linkrev(censorrev),

3270

p1,

3271

p2,

3272

censornode,

3273

REVIDX_ISCENSORED,

3274

)

3275

3276

if newrl.deltaparent(rev) != nullrev:

3277

raise error.Abort(

3278

_(

3279

b'censored revision stored as delta; '

3280

b'cannot censor'

3281

),

3282

hint=_(

3283

b'censoring of revlogs is not '

3284

b'fully implemented; please report '

3285

b'this bug'

3286

),

3287

)

3288

continue

3289

3290

if self.iscensored(rev):

3291

if self.deltaparent(rev) != nullrev:

3292

raise error.Abort(

3293

_(

3294

b'cannot censor due to censored '

3295

b'revision having delta stored'

3296

)

3297

)

3298

rawtext = self._chunk(rev)

3299

else:

3300

rawtext = self.rawdata(rev)

3301

3302

newrl.addrawrevision(

3303

rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)

3304

)

3305

3306

tr.addbackup(self._indexfile, location=b'store')

3307

if not self._inline:

3308

tr.addbackup(self._datafile, location=b'store')

3309

3310

self.opener.rename(newrl._indexfile, self._indexfile)

3311

if not self._inline:

3312

self.opener.rename(newrl._datafile, self._datafile)

3313

3314

self.clearcaches()

3315

self._loadindex()

3316

3317

def verifyintegrity(self, state):

3318

"""Verifies the integrity of the revlog.

3319

3320

Yields ``revlogproblem`` instances describing problems that are

3321

found.

3322

"""

3323

dd, di = self.checksize()

3324

if dd:

3325

yield revlogproblem(error=_(b'data length off by %d bytes') % dd)

3326

if di:

3327

yield revlogproblem(error=_(b'index contains %d extra bytes') % di)

3328

3329

version = self._format_version

3330

3331

# The verifier tells us what version revlog we should be.

3332

if version != state[b'expectedversion']:

3333

yield revlogproblem(

3334

warning=_(b"warning: '%s' uses revlog format %d; expected %d")

3335

% (self.display_id, version, state[b'expectedversion'])

3336

)

3337

3338

state[b'skipread'] = set()

3339

state[b'safe_renamed'] = set()

3340

3341

for rev in self:

3342

node = self.node(rev)

3343

3344

# Verify contents. 4 cases to care about:

3345

#

3346

# common: the most common case

3347

# rename: with a rename

3348

# meta: file content starts with b'\1\n', the metadata

3349

# header defined in filelog.py, but without a rename

3350

# ext: content stored externally

3351

#

3352

# More formally, their differences are shown below:

3353

#

3354

# | common | rename | meta | ext

3355

# -------------------------------------------------------

3356

# flags() | 0 | 0 | 0 | not 0

3357

# renamed() | False | True | False | ?

3358

# rawtext[0:2]=='\1\n'| False | True | True | ?

3359

#

3360

# "rawtext" means the raw text stored in revlog data, which

3361

# could be retrieved by "rawdata(rev)". "text"

3362

# mentioned below is "revision(rev)".

3363

#

3364

# There are 3 different lengths stored physically:

3365

# 1. L1: rawsize, stored in revlog index

3366

# 2. L2: len(rawtext), stored in revlog data

3367

# 3. L3: len(text), stored in revlog data if flags==0, or

3368

# possibly somewhere else if flags!=0

3369

#

3370

# L1 should be equal to L2. L3 could be different from them.

3371

# "text" may or may not affect commit hash depending on flag

3372

# processors (see flagutil.addflagprocessor).

3373

#

3374

# | common | rename | meta | ext

3375

# -------------------------------------------------

3376

# rawsize() | L1 | L1 | L1 | L1

3377

# size() | L1 | L2-LM | L1(*) | L1 (?)

3378

# len(rawtext) | L2 | L2 | L2 | L2

3379

# len(text) | L2 | L2 | L2 | L3

3380

# len(read()) | L2 | L2-LM | L2-LM | L3 (?)

3381

#

3382

# LM: length of metadata, depending on rawtext

3383

# (*): not ideal, see comment in filelog.size

3384

# (?): could be "- len(meta)" if the resolved content has

3385

# rename metadata

3386

#

3387

# Checks needed to be done:

3388

# 1. length check: L1 == L2, in all cases.

3389

# 2. hash check: depending on flag processor, we may need to

3390

# use either "text" (external), or "rawtext" (in revlog).

3391

3392

try:

3393

skipflags = state.get(b'skipflags', 0)

3394

if skipflags:

3395

skipflags &= self.flags(rev)

3396

3397

_verify_revision(self, skipflags, state, node)

3398

3399

l1 = self.rawsize(rev)

3400

l2 = len(self.rawdata(node))

3401

3402

if l1 != l2:

3403

yield revlogproblem(

3404

error=_(b'unpacked size is %d, %d expected') % (l2, l1),

3405

node=node,

3406

)

3407

3408

except error.CensoredNodeError:

3409

if state[b'erroroncensored']:

3410

yield revlogproblem(

3411

error=_(b'censored file data'), node=node

3412

)

3413

state[b'skipread'].add(node)

3414

except Exception as e:

3415

yield revlogproblem(

3416

error=_(b'unpacking %s: %s')

3417

% (short(node), stringutil.forcebytestr(e)),

3418

node=node,

3419

)

3420

state[b'skipread'].add(node)

3421

3422

def storageinfo(

3423

self,

3424

exclusivefiles=False,

3425

sharedfiles=False,

3426

revisionscount=False,

3427

trackedsize=False,

3428

storedsize=False,

3429

):

3430

d = {}

3431

3432

if exclusivefiles:

3433

d[b'exclusivefiles'] = [(self.opener, self._indexfile)]

3434

if not self._inline:

3435

d[b'exclusivefiles'].append((self.opener, self._datafile))

3436

3437

if sharedfiles:

3438

d[b'sharedfiles'] = []

3439

3440

if revisionscount:

3441

d[b'revisionscount'] = len(self)

3442

3443

if trackedsize:

3444

d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))

3445

3446

if storedsize:

3447

d[b'storedsize'] = sum(

3448

self.opener.stat(path).st_size for path in self.files()

3449

)

3450

3451

return d

3452

3453

def rewrite_sidedata(self, transaction, helpers, startrev, endrev):

3454

if not self.hassidedata:

3455

return

3456

# revlog formats with sidedata support does not support inline

3457

assert not self._inline

3458

if not helpers[1] and not helpers[2]:

3459

# Nothing to generate or remove

3460

return

3461

3462

new_entries = []

3463

# append the new sidedata

3464

with self._writing(transaction):

3465

ifh, dfh, sdfh = self._writinghandles

3466

dfh.seek(self._docket.sidedata_end, os.SEEK_SET)

3467

3468

current_offset = sdfh.tell()

3469

for rev in range(startrev, endrev + 1):

3470

entry = self.index[rev]

3471

new_sidedata, flags = sidedatautil.run_sidedata_helpers(

3472

store=self,

3473

sidedata_helpers=helpers,

3474

sidedata={},

3475

rev=rev,

3476

)

3477

3478

serialized_sidedata = sidedatautil.serialize_sidedata(

3479

new_sidedata

3480

)

3481

3482

sidedata_compression_mode = COMP_MODE_INLINE

3483

if serialized_sidedata and self.hassidedata:

3484

sidedata_compression_mode = COMP_MODE_PLAIN

3485

h, comp_sidedata = self.compress(serialized_sidedata)

3486

if (

3487

h != b'u'

3488

and comp_sidedata[0] != b'\0'

3489

and len(comp_sidedata) < len(serialized_sidedata)

3490

):

3491

assert not h

3492

if (

3493

comp_sidedata[0]

3494

== self._docket.default_compression_header

3495

):

3496

sidedata_compression_mode = COMP_MODE_DEFAULT

3497

serialized_sidedata = comp_sidedata

3498

else:

3499

sidedata_compression_mode = COMP_MODE_INLINE

3500

serialized_sidedata = comp_sidedata

3501

if entry[8] != 0 or entry[9] != 0:

3502

# rewriting entries that already have sidedata is not

3503

# supported yet, because it introduces garbage data in the

3504

# revlog.

3505

msg = b"rewriting existing sidedata is not supported yet"

3506

raise error.Abort(msg)

3507

3508

# Apply (potential) flags to add and to remove after running

3509

# the sidedata helpers

3510

new_offset_flags = entry[0] | flags[0] & ~flags[1]

3511

entry_update = (

3512

current_offset,

3513

len(serialized_sidedata),

3514

new_offset_flags,

3515

sidedata_compression_mode,

3516

)

3517

3518

# the sidedata computation might have move the file cursors around

3519

sdfh.seek(current_offset, os.SEEK_SET)

3520

sdfh.write(serialized_sidedata)

3521

new_entries.append(entry_update)

3522

current_offset += len(serialized_sidedata)

3523

self._docket.sidedata_end = sdfh.tell()

3524

3525

# rewrite the new index entries

3526

ifh.seek(startrev * self.index.entry_size)

3527

for i, e in enumerate(new_entries):

3528

rev = startrev + i

3529

self.index.replace_sidedata_info(rev, *e)

3530

packed = self.index.entry_binary(rev)

3531

if rev == 0 and self._docket is None:

3532

header = self._format_flags | self._format_version

3533

header = self.index.pack_header(header)

3534

packed = header + packed

3535

ifh.write(packed)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

@@ -80,6 +80,7 b' from .interfaces import ('
80	util as interfaceutil,	80	util as interfaceutil,
81	)	81	)
82	from .revlogutils import (	82	from .revlogutils import (
		83	censor,
83	deltas as deltautil,	84	deltas as deltautil,
84	docket as docketutil,	85	docket as docketutil,
85	flagutil,	86	flagutil,
@@ -3232,88 +3233,15 b' class revlog(object):'
3232	_(b'cannot censor with version %d revlogs')	3233	_(b'cannot censor with version %d revlogs')
3233	% self._format_version	3234	% self._format_version
3234	)	3235	)
3235		3236	elif self._format_version == REVLOGV1:
3236	censor~~rev~~ = ~~self~~.~~rev~~(censornode)	3237	censor.v1_censor(self, tr, censornode, tombstone)
3237	tombstone = storageutil.packmeta({b'censored': tombstone}, b'')	3238	else:
3238		3239	# revlog v2
3239	if len(tombstone) > self.rawsize(censorrev):	3240	raise error.RevlogError(
3240	raise error.Abort(	3241	_(b'cannot censor with version %d revlogs')
3241	_(b'censor tombstone must be no longer than censored data')	3242	% self._format_version
3242	)	3243	)
3243		3244
3244	# Rewriting the revlog in place is hard. Our strategy for censoring is
3245	# to create a new revlog, copy all revisions to it, then replace the
3246	# revlogs on transaction close.
3247	#
3248	# This is a bit dangerous. We could easily have a mismatch of state.
3249	newrl = revlog(
3250	self.opener,
3251	target=self.target,
3252	radix=self.radix,
3253	postfix=b'tmpcensored',
3254	censorable=True,
3255	)
3256	newrl._format_version = self._format_version
3257	newrl._format_flags = self._format_flags
3258	newrl._generaldelta = self._generaldelta
3259	newrl._parse_index = self._parse_index
3260
3261	for rev in self.revs():
3262	node = self.node(rev)
3263	p1, p2 = self.parents(node)
3264
3265	if rev == censorrev:
3266	newrl.addrawrevision(
3267	tombstone,
3268	tr,
3269	self.linkrev(censorrev),
3270	p1,
3271	p2,
3272	censornode,
3273	REVIDX_ISCENSORED,
3274	)
3275
3276	if newrl.deltaparent(rev) != nullrev:
3277	raise error.Abort(
3278	_(
3279	b'censored revision stored as delta; '
3280	b'cannot censor'
3281	),
3282	hint=_(
3283	b'censoring of revlogs is not '
3284	b'fully implemented; please report '
3285	b'this bug'
3286	),
3287	)
3288	continue
3289
3290	if self.iscensored(rev):
3291	if self.deltaparent(rev) != nullrev:
3292	raise error.Abort(
3293	_(
3294	b'cannot censor due to censored '
3295	b'revision having delta stored'
3296	)
3297	)
3298	rawtext = self._chunk(rev)
3299	else:
3300	rawtext = self.rawdata(rev)
3301
3302	newrl.addrawrevision(
3303	rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3304	)
3305
3306	tr.addbackup(self._indexfile, location=b'store')
3307	if not self._inline:
3308	tr.addbackup(self._datafile, location=b'store')
3309
3310	self.opener.rename(newrl._indexfile, self._indexfile)
3311	if not self._inline:
3312	self.opener.rename(newrl._datafile, self._datafile)
3313
3314	self.clearcaches()
3315	self._loadindex()
3316
3317	def verifyintegrity(self, state):	3245	def verifyintegrity(self, state):
3318	"""Verifies the integrity of the revlog.	3246	"""Verifies the integrity of the revlog.
3319		3247