upstream/mercurial-mirror Commit - r48132:24ea3ef3

1

# storageutil.py - Storage functionality agnostic of backend implementation.

1

# storageutil.py - Storage functionality agnostic of backend implementation.

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import re

10

import re

11

import struct

11

import struct

12

13

from ..i18n import _

13

from ..i18n import _

14

from ..node import (

14

from ..node import (

15

bin,

15

bin,

16

nullrev,

16

nullrev,

17

sha1nodeconstants,

17

sha1nodeconstants,

18

)

18

)

19

from .. import (

19

from .. import (

20

dagop,

20

dagop,

21

error,

21

error,

22

mdiff,

22

mdiff,

23

pycompat,

23

pycompat,

24

)

24

)

25

from ..interfaces import repository

25

from ..interfaces import repository

26

from ..revlogutils import sidedata as sidedatamod

26

from ..revlogutils import sidedata as sidedatamod

27

from ..utils import hashutil

27

from ..utils import hashutil

28

29

_nullhash = hashutil.sha1(sha1nodeconstants.nullid)

29

_nullhash = hashutil.sha1(sha1nodeconstants.nullid)

30

31

# revision data contains extra metadata not part of the official digest

31

# revision data contains extra metadata not part of the official digest

32

# Only used in changegroup >= v4.

32

# Only used in changegroup >= v4.

33

CG_FLAG_SIDEDATA = 1

33

CG_FLAG_SIDEDATA = 1

34

35

36

def hashrevisionsha1(text, p1, p2):

36

def hashrevisionsha1(text, p1, p2):

37

"""Compute the SHA-1 for revision data and its parents.

37

"""Compute the SHA-1 for revision data and its parents.

38

39

This hash combines both the current file contents and its history

39

This hash combines both the current file contents and its history

40

in a manner that makes it easy to distinguish nodes with the same

40

in a manner that makes it easy to distinguish nodes with the same

41

content in the revision graph.

41

content in the revision graph.

42

"""

42

"""

43

# As of now, if one of the parent node is null, p2 is null

43

# As of now, if one of the parent node is null, p2 is null

44

if p2 == sha1nodeconstants.nullid:

44

if p2 == sha1nodeconstants.nullid:

45

# deep copy of a hash is faster than creating one

45

# deep copy of a hash is faster than creating one

46

s = _nullhash.copy()

46

s = _nullhash.copy()

47

s.update(p1)

47

s.update(p1)

48

else:

48

else:

49

# none of the parent nodes are nullid

49

# none of the parent nodes are nullid

50

if p1 < p2:

50

if p1 < p2:

51

a = p1

51

a = p1

52

b = p2

52

b = p2

53

else:

53

else:

54

a = p2

54

a = p2

55

b = p1

55

b = p1

56

s = hashutil.sha1(a)

56

s = hashutil.sha1(a)

57

s.update(b)

57

s.update(b)

58

s.update(text)

58

s.update(text)

59

return s.digest()

59

return s.digest()

60

61

62

METADATA_RE = re.compile(b'\x01\n')

62

METADATA_RE = re.compile(b'\x01\n')

63

64

65

def parsemeta(text):

65

def parsemeta(text):

66

"""Parse metadata header from revision data.

66

"""Parse metadata header from revision data.

67

68

Returns a 2-tuple of (metadata, offset), where both can be None if there

68

Returns a 2-tuple of (metadata, offset), where both can be None if there

69

is no metadata.

69

is no metadata.

70

"""

70

"""

71

# text can be buffer, so we can't use .startswith or .index

71

# text can be buffer, so we can't use .startswith or .index

72

if text[:2] != b'\x01\n':

72

if text[:2] != b'\x01\n':

73

return None, None

73

return None, None

74

s = METADATA_RE.search(text, 2).start()

74

s = METADATA_RE.search(text, 2).start()

75

mtext = text[2:s]

75

mtext = text[2:s]

76

meta = {}

76

meta = {}

77

for l in mtext.splitlines():

77

for l in mtext.splitlines():

78

k, v = l.split(b': ', 1)

78

k, v = l.split(b': ', 1)

79

meta[k] = v

79

meta[k] = v

80

return meta, s + 2

80

return meta, s + 2

81

82

83

def packmeta(meta, text):

83

def packmeta(meta, text):

84

"""Add metadata to fulltext to produce revision text."""

84

"""Add metadata to fulltext to produce revision text."""

85

keys = sorted(meta)

85

keys = sorted(meta)

86

metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)

86

metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)

87

return b'\x01\n%s\x01\n%s' % (metatext, text)

87

return b'\x01\n%s\x01\n%s' % (metatext, text)

88

89

90

def iscensoredtext(text):

90

def iscensoredtext(text):

91

meta = parsemeta(text)[0]

91

meta = parsemeta(text)[0]

92

return meta and b'censored' in meta

92

return meta and b'censored' in meta

93

94

95

def filtermetadata(text):

95

def filtermetadata(text):

96

"""Extract just the revision data from source text.

96

"""Extract just the revision data from source text.

97

98

Returns ``text`` unless it has a metadata header, in which case we return

98

Returns ``text`` unless it has a metadata header, in which case we return

99

a new buffer without hte metadata.

99

a new buffer without hte metadata.

100

"""

100

"""

101

if not text.startswith(b'\x01\n'):

101

if not text.startswith(b'\x01\n'):

102

return text

102

return text

103

104

offset = text.index(b'\x01\n', 2)

104

offset = text.index(b'\x01\n', 2)

105

return text[offset + 2 :]

105

return text[offset + 2 :]

106

107

108

def filerevisioncopied(store, node):

108

def filerevisioncopied(store, node):

109

"""Resolve file revision copy metadata.

109

"""Resolve file revision copy metadata.

110

111

Returns ``False`` if the file has no copy metadata. Otherwise a

111

Returns ``False`` if the file has no copy metadata. Otherwise a

112

2-tuple of the source filename and node.

112

2-tuple of the source filename and node.

113

"""

113

"""

114

if store.parents(node)[0] != sha1nodeconstants.nullid:

114

if store.parents(node)[0] != sha1nodeconstants.nullid:

115

return False

115

return False

116

117

meta = parsemeta(store.revision(node))[0]

117

meta = parsemeta(store.revision(node))[0]

118

119

# copy and copyrev occur in pairs. In rare cases due to old bugs,

119

# copy and copyrev occur in pairs. In rare cases due to old bugs,

120

# one can occur without the other. So ensure both are present to flag

120

# one can occur without the other. So ensure both are present to flag

121

# as a copy.

121

# as a copy.

122

if meta and b'copy' in meta and b'copyrev' in meta:

122

if meta and b'copy' in meta and b'copyrev' in meta:

123

return meta[b'copy'], bin(meta[b'copyrev'])

123

return meta[b'copy'], bin(meta[b'copyrev'])

124

125

return False

125

return False

126

127

128

def filedataequivalent(store, node, filedata):

128

def filedataequivalent(store, node, filedata):

129

"""Determines whether file data is equivalent to a stored node.

129

"""Determines whether file data is equivalent to a stored node.

130

131

Returns True if the passed file data would hash to the same value

131

Returns True if the passed file data would hash to the same value

132

as a stored revision and False otherwise.

132

as a stored revision and False otherwise.

133

134

When a stored revision is censored, filedata must be empty to have

134

When a stored revision is censored, filedata must be empty to have

135

equivalence.

135

equivalence.

136

137

When a stored revision has copy metadata, it is ignored as part

137

When a stored revision has copy metadata, it is ignored as part

138

of the compare.

138

of the compare.

139

"""

139

"""

140

141

if filedata.startswith(b'\x01\n'):

141

if filedata.startswith(b'\x01\n'):

142

revisiontext = b'\x01\n\x01\n' + filedata

142

revisiontext = b'\x01\n\x01\n' + filedata

143

else:

143

else:

144

revisiontext = filedata

144

revisiontext = filedata

145

146

p1, p2 = store.parents(node)

146

p1, p2 = store.parents(node)

147

148

computednode = hashrevisionsha1(revisiontext, p1, p2)

148

computednode = hashrevisionsha1(revisiontext, p1, p2)

149

150

if computednode == node:

150

if computednode == node:

151

return True

151

return True

152

153

# Censored files compare against the empty file.

153

# Censored files compare against the empty file.

154

if store.iscensored(store.rev(node)):

154

if store.iscensored(store.rev(node)):

155

return filedata == b''

155

return filedata == b''

156

157

# Renaming a file produces a different hash, even if the data

157

# Renaming a file produces a different hash, even if the data

158

# remains unchanged. Check if that's the case.

158

# remains unchanged. Check if that's the case.

159

if store.renamed(node):

159

if store.renamed(node):

160

return store.read(node) == filedata

160

return store.read(node) == filedata

161

162

return False

162

return False

163

164

165

def iterrevs(storelen, start=0, stop=None):

165

def iterrevs(storelen, start=0, stop=None):

166

"""Iterate over revision numbers in a store."""

166

"""Iterate over revision numbers in a store."""

167

step = 1

167

step = 1

168

169

if stop is not None:

169

if stop is not None:

170

if start > stop:

170

if start > stop:

171

step = -1

171

step = -1

172

stop += step

172

stop += step

173

if stop > storelen:

173

if stop > storelen:

174

stop = storelen

174

stop = storelen

175

else:

175

else:

176

stop = storelen

176

stop = storelen

177

178

return pycompat.xrange(start, stop, step)

178

return pycompat.xrange(start, stop, step)

179

180

181

def fileidlookup(store, fileid, identifier):

181

def fileidlookup(store, fileid, identifier):

182

"""Resolve the file node for a value.

182

"""Resolve the file node for a value.

183

184

``store`` is an object implementing the ``ifileindex`` interface.

184

``store`` is an object implementing the ``ifileindex`` interface.

185

186

``fileid`` can be:

186

``fileid`` can be:

187

188

* A 20 or 32 byte binary node.

188

* A 20 or 32 byte binary node.

189

* An integer revision number

189

* An integer revision number

190

* A 40 or 64 byte hex node.

190

* A 40 or 64 byte hex node.

191

* A bytes that can be parsed as an integer representing a revision number.

191

* A bytes that can be parsed as an integer representing a revision number.

192

193

``identifier`` is used to populate ``error.LookupError`` with an identifier

193

``identifier`` is used to populate ``error.LookupError`` with an identifier

194

for the store.

194

for the store.

195

196

Raises ``error.LookupError`` on failure.

196

Raises ``error.LookupError`` on failure.

197

"""

197

"""

198

if isinstance(fileid, int):

198

if isinstance(fileid, int):

199

try:

199

try:

200

return store.node(fileid)

200

return store.node(fileid)

201

except IndexError:

201

except IndexError:

202

raise error.LookupError(

202

raise error.LookupError(

203

b'%d' % fileid, identifier, _(b'no match found')

203

b'%d' % fileid, identifier, _(b'no match found')

204

)

204

)

205

206

if len(fileid) in (20, 32):

206

if len(fileid) in (20, 32):

207

try:

207

try:

208

store.rev(fileid)

208

store.rev(fileid)

209

return fileid

209

return fileid

210

except error.LookupError:

210

except error.LookupError:

211

pass

211

pass

212

213

if len(fileid) in (40, 64):

213

if len(fileid) in (40, 64):

214

try:

214

try:

215

rawnode = bin(fileid)

215

rawnode = bin(fileid)

216

store.rev(rawnode)

216

store.rev(rawnode)

217

return rawnode

217

return rawnode

218

except TypeError:

218

except TypeError:

219

pass

219

pass

220

221

try:

221

try:

222

rev = int(fileid)

222

rev = int(fileid)

223

224

if b'%d' % rev != fileid:

224

if b'%d' % rev != fileid:

225

raise ValueError

225

raise ValueError

226

227

try:

227

try:

228

return store.node(rev)

228

return store.node(rev)

229

except (IndexError, TypeError):

229

except (IndexError, TypeError):

230

pass

230

pass

231

except (ValueError, OverflowError):

231

except (ValueError, OverflowError):

232

pass

232

pass

233

234

raise error.LookupError(fileid, identifier, _(b'no match found'))

234

raise error.LookupError(fileid, identifier, _(b'no match found'))

235

236

237

def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn):

237

def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn):

238

"""Resolve information needed to strip revisions.

238

"""Resolve information needed to strip revisions.

239

240

Finds the minimum revision number that must be stripped in order to

240

Finds the minimum revision number that must be stripped in order to

241

strip ``minlinkrev``.

241

strip ``minlinkrev``.

242

243

Returns a 2-tuple of the minimum revision number to do that and a set

243

Returns a 2-tuple of the minimum revision number to do that and a set

244

of all revision numbers that have linkrevs that would be broken

244

of all revision numbers that have linkrevs that would be broken

245

by that strip.

245

by that strip.

246

247

``tiprev`` is the current tip-most revision. It is ``len(store) - 1``.

247

``tiprev`` is the current tip-most revision. It is ``len(store) - 1``.

248

``headrevs`` is an iterable of head revisions.

248

``headrevs`` is an iterable of head revisions.

249

``linkrevfn`` is a callable that receives a revision and returns a linked

249

``linkrevfn`` is a callable that receives a revision and returns a linked

250

revision.

250

revision.

251

``parentrevsfn`` is a callable that receives a revision number and returns

251

``parentrevsfn`` is a callable that receives a revision number and returns

252

an iterable of its parent revision numbers.

252

an iterable of its parent revision numbers.

253

"""

253

"""

254

brokenrevs = set()

254

brokenrevs = set()

255

strippoint = tiprev + 1

255

strippoint = tiprev + 1

256

257

heads = {}

257

heads = {}

258

futurelargelinkrevs = set()

258

futurelargelinkrevs = set()

259

for head in headrevs:

259

for head in headrevs:

260

headlinkrev = linkrevfn(head)

260

headlinkrev = linkrevfn(head)

261

heads[head] = headlinkrev

261

heads[head] = headlinkrev

262

if headlinkrev >= minlinkrev:

262

if headlinkrev >= minlinkrev:

263

futurelargelinkrevs.add(headlinkrev)

263

futurelargelinkrevs.add(headlinkrev)

264

265

# This algorithm involves walking down the rev graph, starting at the

265

# This algorithm involves walking down the rev graph, starting at the

266

# heads. Since the revs are topologically sorted according to linkrev,

266

# heads. Since the revs are topologically sorted according to linkrev,

267

# once all head linkrevs are below the minlink, we know there are

267

# once all head linkrevs are below the minlink, we know there are

268

# no more revs that could have a linkrev greater than minlink.

268

# no more revs that could have a linkrev greater than minlink.

269

# So we can stop walking.

269

# So we can stop walking.

270

while futurelargelinkrevs:

270

while futurelargelinkrevs:

271

strippoint -= 1

271

strippoint -= 1

272

linkrev = heads.pop(strippoint)

272

linkrev = heads.pop(strippoint)

273

274

if linkrev < minlinkrev:

274

if linkrev < minlinkrev:

275

brokenrevs.add(strippoint)

275

brokenrevs.add(strippoint)

276

else:

276

else:

277

futurelargelinkrevs.remove(linkrev)

277

futurelargelinkrevs.remove(linkrev)

278

279

for p in parentrevsfn(strippoint):

279

for p in parentrevsfn(strippoint):

280

if p != nullrev:

280

if p != nullrev:

281

plinkrev = linkrevfn(p)

281

plinkrev = linkrevfn(p)

282

heads[p] = plinkrev

282

heads[p] = plinkrev

283

if plinkrev >= minlinkrev:

283

if plinkrev >= minlinkrev:

284

futurelargelinkrevs.add(plinkrev)

284

futurelargelinkrevs.add(plinkrev)

285

286

return strippoint, brokenrevs

286

return strippoint, brokenrevs

287

288

289

def emitrevisions(

289

def emitrevisions(

290

store,

290

store,

291

nodes,

291

nodes,

292

nodesorder,

292

nodesorder,

293

resultcls,

293

resultcls,

294

deltaparentfn=None,

294

deltaparentfn=None,

295

candeltafn=None,

295

candeltafn=None,

296

rawsizefn=None,

296

rawsizefn=None,

297

revdifffn=None,

297

revdifffn=None,

298

flagsfn=None,

298

flagsfn=None,

299

deltamode=repository.CG_DELTAMODE_STD,

299

deltamode=repository.CG_DELTAMODE_STD,

300

revisiondata=False,

300

revisiondata=False,

301

assumehaveparentrevisions=False,

301

assumehaveparentrevisions=False,

302

sidedata_helpers=None,

302

sidedata_helpers=None,

303

):

303

):

304

"""Generic implementation of ifiledata.emitrevisions().

304

"""Generic implementation of ifiledata.emitrevisions().

305

306

Emitting revision data is subtly complex. This function attempts to

306

Emitting revision data is subtly complex. This function attempts to

307

encapsulate all the logic for doing so in a backend-agnostic way.

307

encapsulate all the logic for doing so in a backend-agnostic way.

308

309

``store``

309

``store``

310

Object conforming to ``ifilestorage`` interface.

310

Object conforming to ``ifilestorage`` interface.

311

312

``nodes``

312

``nodes``

313

List of revision nodes whose data to emit.

313

List of revision nodes whose data to emit.

314

315

``resultcls``

315

``resultcls``

316

A type implementing the ``irevisiondelta`` interface that will be

316

A type implementing the ``irevisiondelta`` interface that will be

317

constructed and returned.

317

constructed and returned.

318

319

``deltaparentfn`` (optional)

319

``deltaparentfn`` (optional)

320

Callable receiving a revision number and returning the revision number

320

Callable receiving a revision number and returning the revision number

321

of a revision that the internal delta is stored against. This delta

321

of a revision that the internal delta is stored against. This delta

322

will be preferred over computing a new arbitrary delta.

322

will be preferred over computing a new arbitrary delta.

323

324

If not defined, a delta will always be computed from raw revision

324

If not defined, a delta will always be computed from raw revision

325

data.

325

data.

326

327

``candeltafn`` (optional)

327

``candeltafn`` (optional)

328

Callable receiving a pair of revision numbers that returns a bool

328

Callable receiving a pair of revision numbers that returns a bool

329

indicating whether a delta between them can be produced.

329

indicating whether a delta between them can be produced.

330

331

If not defined, it is assumed that any two revisions can delta with

331

If not defined, it is assumed that any two revisions can delta with

332

each other.

332

each other.

333

334

``rawsizefn`` (optional)

334

``rawsizefn`` (optional)

335

Callable receiving a revision number and returning the length of the

335

Callable receiving a revision number and returning the length of the

336

``store.rawdata(rev)``.

336

``store.rawdata(rev)``.

337

338

If not defined, ``len(store.rawdata(rev))`` will be called.

338

If not defined, ``len(store.rawdata(rev))`` will be called.

339

340

``revdifffn`` (optional)

340

``revdifffn`` (optional)

341

Callable receiving a pair of revision numbers that returns a delta

341

Callable receiving a pair of revision numbers that returns a delta

342

between them.

342

between them.

343

344

If not defined, a delta will be computed by invoking mdiff code

344

If not defined, a delta will be computed by invoking mdiff code

345

on ``store.revision()`` results.

345

on ``store.revision()`` results.

346

347

Defining this function allows a precomputed or stored delta to be

347

Defining this function allows a precomputed or stored delta to be

348

used without having to compute on.

348

used without having to compute on.

349

350

``flagsfn`` (optional)

350

``flagsfn`` (optional)

351

Callable receiving a revision number and returns the integer flags

351

Callable receiving a revision number and returns the integer flags

352

value for it. If not defined, flags value will be 0.

352

value for it. If not defined, flags value will be 0.

353

354

``deltamode``

354

``deltamode``

355

constaint on delta to be sent:

355

constaint on delta to be sent:

356

* CG_DELTAMODE_STD - normal mode, try to reuse storage deltas,

356

* CG_DELTAMODE_STD - normal mode, try to reuse storage deltas,

357

* CG_DELTAMODE_PREV - only delta against "prev",

357

* CG_DELTAMODE_PREV - only delta against "prev",

358

* CG_DELTAMODE_FULL - only issue full snapshot.

358

* CG_DELTAMODE_FULL - only issue full snapshot.

359

360

Whether to send fulltext revisions instead of deltas, if allowed.

360

Whether to send fulltext revisions instead of deltas, if allowed.

361

362

``nodesorder``

362

``nodesorder``

363

``revisiondata``

363

``revisiondata``

364

``assumehaveparentrevisions``

364

``assumehaveparentrevisions``

365

``sidedata_helpers`` (optional)

365

``sidedata_helpers`` (optional)

366

If not None, means that sidedata should be included.

366

If not None, means that sidedata should be included.

367

See `revlogutil.sidedata.get_sidedata_helpers`.

367

See `revlogutil.sidedata.get_sidedata_helpers`.

368

"""

368

"""

369

370

fnode = store.node

370

fnode = store.node

371

frev = store.rev

371

frev = store.rev

372

373

if nodesorder == b'nodes':

373

if nodesorder == b'nodes':

374

revs = [frev(n) for n in nodes]

374

revs = [frev(n) for n in nodes]

375

elif nodesorder == b'linear':

375

elif nodesorder == b'linear':

376

revs = {frev(n) for n in nodes}

376

revs = {frev(n) for n in nodes}

377

revs = dagop.linearize(revs, store.parentrevs)

377

revs = dagop.linearize(revs, store.parentrevs)

378

else: # storage and default

378

else: # storage and default

379

revs = sorted(frev(n) for n in nodes)

379

revs = sorted(frev(n) for n in nodes)

380

381

prevrev = None

381

prevrev = None

382

383

if deltamode == repository.CG_DELTAMODE_PREV or assumehaveparentrevisions:

383

if deltamode == repository.CG_DELTAMODE_PREV or assumehaveparentrevisions:

384

prevrev = store.parentrevs(revs[0])[0]

384

prevrev = store.parentrevs(revs[0])[0]

385

386

# Set of revs available to delta against.

386

# Set of revs available to delta against.

387

available = set()

387

available = set()

388

389

for rev in revs:

389

for rev in revs:

390

if rev == nullrev:

390

if rev == nullrev:

391

continue

391

continue

392

393

node = fnode(rev)

393

node = fnode(rev)

394

p1rev, p2rev = store.parentrevs(rev)

394

p1rev, p2rev = store.parentrevs(rev)

395

396

if deltaparentfn:

396

if deltaparentfn:

397

deltaparentrev = deltaparentfn(rev)

397

deltaparentrev = deltaparentfn(rev)

398

else:

398

else:

399

deltaparentrev = nullrev

399

deltaparentrev = nullrev

400

401

# Forced delta against previous mode.

401

# Forced delta against previous mode.

402

if deltamode == repository.CG_DELTAMODE_PREV:

402

if deltamode == repository.CG_DELTAMODE_PREV:

403

baserev = prevrev

403

baserev = prevrev

404

405

# We're instructed to send fulltext. Honor that.

405

# We're instructed to send fulltext. Honor that.

406

elif deltamode == repository.CG_DELTAMODE_FULL:

406

elif deltamode == repository.CG_DELTAMODE_FULL:

407

baserev = nullrev

407

baserev = nullrev

408

# We're instructed to use p1. Honor that

408

# We're instructed to use p1. Honor that

409

elif deltamode == repository.CG_DELTAMODE_P1:

409

elif deltamode == repository.CG_DELTAMODE_P1:

410

baserev = p1rev

410

baserev = p1rev

411

412

# There is a delta in storage. We try to use that because it

412

# There is a delta in storage. We try to use that because it

413

# amounts to effectively copying data from storage and is

413

# amounts to effectively copying data from storage and is

414

# therefore the fastest.

414

# therefore the fastest.

415

elif deltaparentrev != nullrev:

415

elif deltaparentrev != nullrev:

416

# Base revision was already emitted in this group. We can

416

# Base revision was already emitted in this group. We can

417

# always safely use the delta.

417

# always safely use the delta.

418

if deltaparentrev in available:

418

if deltaparentrev in available:

419

baserev = deltaparentrev

419

baserev = deltaparentrev

420

421

# Base revision is a parent that hasn't been emitted already.

421

# Base revision is a parent that hasn't been emitted already.

422

# Use it if we can assume the receiver has the parent revision.

422

# Use it if we can assume the receiver has the parent revision.

423

elif assumehaveparentrevisions and deltaparentrev in (p1rev, p2rev):

423

elif assumehaveparentrevisions and deltaparentrev in (p1rev, p2rev):

424

baserev = deltaparentrev

424

baserev = deltaparentrev

425

426

# No guarantee the receiver has the delta parent. Send delta

426

# No guarantee the receiver has the delta parent. Send delta

427

# against last revision (if possible), which in the common case

427

# against last revision (if possible), which in the common case

428

# should be similar enough to this revision that the delta is

428

# should be similar enough to this revision that the delta is

429

# reasonable.

429

# reasonable.

430

elif prevrev is not None:

430

elif prevrev is not None:

431

baserev = prevrev

431

baserev = prevrev

432

else:

432

else:

433

baserev = nullrev

433

baserev = nullrev

434

435

# Storage has a fulltext revision.

435

# Storage has a fulltext revision.

436

437

# Let's use the previous revision, which is as good a guess as any.

437

# Let's use the previous revision, which is as good a guess as any.

438

# There is definitely room to improve this logic.

438

# There is definitely room to improve this logic.

439

elif prevrev is not None:

439

elif prevrev is not None:

440

baserev = prevrev

440

baserev = prevrev

441

else:

441

else:

442

baserev = nullrev

442

baserev = nullrev

443

444

# But we can't actually use our chosen delta base for whatever

444

# But we can't actually use our chosen delta base for whatever

445

# reason. Reset to fulltext.

445

# reason. Reset to fulltext.

446

if baserev != nullrev and (candeltafn and not candeltafn(baserev, rev)):

446

if baserev != nullrev and (candeltafn and not candeltafn(baserev, rev)):

447

baserev = nullrev

447

baserev = nullrev

448

449

revision = None

449

revision = None

450

delta = None

450

delta = None

451

baserevisionsize = None

451

baserevisionsize = None

452

453

if revisiondata:

453

if revisiondata:

454

if store.iscensored(baserev) or store.iscensored(rev):

454

if store.iscensored(baserev) or store.iscensored(rev):

455

try:

455

try:

456

revision = store.rawdata(node)

456

revision = store.rawdata(node)

457

except error.CensoredNodeError as e:

457

except error.CensoredNodeError as e:

458

revision = e.tombstone

458

revision = e.tombstone

459

460

if baserev != nullrev:

460

if baserev != nullrev:

461

if rawsizefn:

461

if rawsizefn:

462

baserevisionsize = rawsizefn(baserev)

462

baserevisionsize = rawsizefn(baserev)

463

else:

463

else:

464

baserevisionsize = len(store.rawdata(baserev))

464

baserevisionsize = len(store.rawdata(baserev))

465

466

elif (

466

elif (

467

baserev == nullrev and deltamode != repository.CG_DELTAMODE_PREV

467

baserev == nullrev and deltamode != repository.CG_DELTAMODE_PREV

468

):

468

):

469

revision = store.rawdata(node)

469

revision = store.rawdata(node)

470

available.add(rev)

470

available.add(rev)

471

else:

471

else:

472

if revdifffn:

472

if revdifffn:

473

delta = revdifffn(baserev, rev)

473

delta = revdifffn(baserev, rev)

474

else:

474

else:

475

delta = mdiff.textdiff(

475

delta = mdiff.textdiff(

476

store.rawdata(baserev), store.rawdata(rev)

476

store.rawdata(baserev), store.rawdata(rev)

477

)

477

)

478

479

available.add(rev)

479

available.add(rev)

480

481

serialized_sidedata = None

481

serialized_sidedata = None

482

sidedata_flags = (0, 0)

482

sidedata_flags = (0, 0)

483

if sidedata_helpers:

483

if sidedata_helpers:

484

try:

484

old_sidedata = store.sidedata(rev)

485

old_sidedata = store.sidedata(rev)

486

except error.CensoredNodeError:

487

# skip any potential sidedata of the censored revision

488

sidedata = {}

489

else:

485

sidedata, sidedata_flags = sidedatamod.run_sidedata_helpers(

490

sidedata, sidedata_flags = sidedatamod.run_sidedata_helpers(

486

store=store,

491

store=store,

487

sidedata_helpers=sidedata_helpers,

492

sidedata_helpers=sidedata_helpers,

488

sidedata=old_sidedata,

493

sidedata=old_sidedata,

489

rev=rev,

494

rev=rev,

490

)

495

)

491

if sidedata:

496

if sidedata:

492

serialized_sidedata = sidedatamod.serialize_sidedata(sidedata)

497

serialized_sidedata = sidedatamod.serialize_sidedata(sidedata)

493

498

494

flags = flagsfn(rev) if flagsfn else 0

499

flags = flagsfn(rev) if flagsfn else 0

495

protocol_flags = 0

500

protocol_flags = 0

496

if serialized_sidedata:

501

if serialized_sidedata:

497

# Advertise that sidedata exists to the other side

502

# Advertise that sidedata exists to the other side

498

protocol_flags |= CG_FLAG_SIDEDATA

503

protocol_flags |= CG_FLAG_SIDEDATA

499

# Computers and removers can return flags to add and/or remove

504

# Computers and removers can return flags to add and/or remove

500

flags = flags | sidedata_flags[0] & ~sidedata_flags[1]

505

flags = flags | sidedata_flags[0] & ~sidedata_flags[1]

501

506

502

yield resultcls(

507

yield resultcls(

503

node=node,

508

node=node,

504

p1node=fnode(p1rev),

509

p1node=fnode(p1rev),

505

p2node=fnode(p2rev),

510

p2node=fnode(p2rev),

506

basenode=fnode(baserev),

511

basenode=fnode(baserev),

507

flags=flags,

512

flags=flags,

508

baserevisionsize=baserevisionsize,

513

baserevisionsize=baserevisionsize,

509

revision=revision,

514

revision=revision,

510

delta=delta,

515

delta=delta,

511

sidedata=serialized_sidedata,

516

sidedata=serialized_sidedata,

512

protocol_flags=protocol_flags,

517

protocol_flags=protocol_flags,

513

)

518

)

514

519

515

prevrev = rev

520

prevrev = rev

516

521

517

522

518

def deltaiscensored(delta, baserev, baselenfn):

523

def deltaiscensored(delta, baserev, baselenfn):

519

"""Determine if a delta represents censored revision data.

524

"""Determine if a delta represents censored revision data.

520

525

521

``baserev`` is the base revision this delta is encoded against.

526

``baserev`` is the base revision this delta is encoded against.

522

``baselenfn`` is a callable receiving a revision number that resolves the

527

``baselenfn`` is a callable receiving a revision number that resolves the

523

length of the revision fulltext.

528

length of the revision fulltext.

524

529

525

Returns a bool indicating if the result of the delta represents a censored

530

Returns a bool indicating if the result of the delta represents a censored

526

revision.

531

revision.

527

"""

532

"""

528

# Fragile heuristic: unless new file meta keys are added alphabetically

533

# Fragile heuristic: unless new file meta keys are added alphabetically

529

# preceding "censored", all censored revisions are prefixed by

534

# preceding "censored", all censored revisions are prefixed by

530

# "\1\ncensored:". A delta producing such a censored revision must be a

535

# "\1\ncensored:". A delta producing such a censored revision must be a

531

# full-replacement delta, so we inspect the first and only patch in the

536

# full-replacement delta, so we inspect the first and only patch in the

532

# delta for this prefix.

537

# delta for this prefix.

533

hlen = struct.calcsize(b">lll")

538

hlen = struct.calcsize(b">lll")

534

if len(delta) <= hlen:

539

if len(delta) <= hlen:

535

return False

540

return False

536

541

537

oldlen = baselenfn(baserev)

542

oldlen = baselenfn(baserev)

538

newlen = len(delta) - hlen

543

newlen = len(delta) - hlen

539

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

544

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

540

return False

545

return False

541

546

542

add = b"\1\ncensored:"

547

add = b"\1\ncensored:"

543

addlen = len(add)

548

addlen = len(add)

544

return newlen >= addlen and delta[hlen : hlen + addlen] == add

549

return newlen >= addlen and delta[hlen : hlen + addlen] == add

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # storageutil.py - Storage functionality agnostic of backend implementation.
             #
             # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import re
             import struct
             from ..i18n import _
             from ..node import (
                 bin,
                 nullrev,
                 sha1nodeconstants,
             )
             from .. import (
                 dagop,
                 error,
                 mdiff,
                 pycompat,
             )
             from ..interfaces import repository
             from ..revlogutils import sidedata as sidedatamod
             from ..utils import hashutil
             _nullhash = hashutil.sha1(sha1nodeconstants.nullid)
             # revision data contains extra metadata not part of the official digest
             # Only used in changegroup >= v4.
             CG_FLAG_SIDEDATA = 1
             def hashrevisionsha1(text, p1, p2):
                 """Compute the SHA-1 for revision data and its parents.
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 # As of now, if one of the parent node is null, p2 is null
                 if p2 == sha1nodeconstants.nullid:
                     # deep copy of a hash is faster than creating one
                     s = _nullhash.copy()
                     s.update(p1)
                 else:
                     # none of the parent nodes are nullid
                     if p1 < p2:
                         a = p1
                         b = p2
                     else:
                         a = p2
                         b = p1
                     s = hashutil.sha1(a)
                     s.update(b)
                 s.update(text)
                 return s.digest()
             METADATA_RE = re.compile(b'\x01\n')
             def parsemeta(text):
                 """Parse metadata header from revision data.
                 Returns a 2-tuple of (metadata, offset), where both can be None if there
                 is no metadata.
                 """
                 # text can be buffer, so we can't use .startswith or .index
                 if text[:2] != b'\x01\n':
                     return None, None
                 s = METADATA_RE.search(text, 2).start()
                 mtext = text[2:s]
                 meta = {}
                 for l in mtext.splitlines():
                     k, v = l.split(b': ', 1)
                     meta[k] = v
                 return meta, s + 2
             def packmeta(meta, text):
                 """Add metadata to fulltext to produce revision text."""
                 keys = sorted(meta)
                 metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)
                 return b'\x01\n%s\x01\n%s' % (metatext, text)
             def iscensoredtext(text):
                 meta = parsemeta(text)[0]
                 return meta and b'censored' in meta
             def filtermetadata(text):
                 """Extract just the revision data from source text.
                 Returns ``text`` unless it has a metadata header, in which case we return
                 a new buffer without hte metadata.
                 """
                 if not text.startswith(b'\x01\n'):
                     return text
                 offset = text.index(b'\x01\n', 2)
                 return text[offset + 2 :]
             def filerevisioncopied(store, node):
                 """Resolve file revision copy metadata.
                 Returns ``False`` if the file has no copy metadata. Otherwise a
 -tuple of the source filename and node.
                 """
                 if store.parents(node)[0] != sha1nodeconstants.nullid:
                     return False
                 meta = parsemeta(store.revision(node))[0]
                 # copy and copyrev occur in pairs. In rare cases due to old bugs,
                 # one can occur without the other. So ensure both are present to flag
                 # as a copy.
                 if meta and b'copy' in meta and b'copyrev' in meta:
                     return meta[b'copy'], bin(meta[b'copyrev'])
                 return False
             def filedataequivalent(store, node, filedata):
                 """Determines whether file data is equivalent to a stored node.
                 Returns True if the passed file data would hash to the same value
                 as a stored revision and False otherwise.
                 When a stored revision is censored, filedata must be empty to have
                 equivalence.
                 When a stored revision has copy metadata, it is ignored as part
                 of the compare.
                 """
                 if filedata.startswith(b'\x01\n'):
                     revisiontext = b'\x01\n\x01\n' + filedata
                 else:
                     revisiontext = filedata
                 p1, p2 = store.parents(node)
                 computednode = hashrevisionsha1(revisiontext, p1, p2)
                 if computednode == node:
                     return True
                 # Censored files compare against the empty file.
                 if store.iscensored(store.rev(node)):
                     return filedata == b''
                 # Renaming a file produces a different hash, even if the data
                 # remains unchanged. Check if that's the case.
                 if store.renamed(node):
                     return store.read(node) == filedata
                 return False
             def iterrevs(storelen, start=0, stop=None):
                 """Iterate over revision numbers in a store."""
                 step = 1
                 if stop is not None:
                     if start > stop:
                         step = -1
                     stop += step
                     if stop > storelen:
                         stop = storelen
                 else:
                     stop = storelen
                 return pycompat.xrange(start, stop, step)
             def fileidlookup(store, fileid, identifier):
                 """Resolve the file node for a value.
                 ``store`` is an object implementing the ``ifileindex`` interface.
                 ``fileid`` can be:
                 * A 20 or 32 byte binary node.
                 * An integer revision number
                 * A 40 or 64 byte hex node.
                 * A bytes that can be parsed as an integer representing a revision number.
                 ``identifier`` is used to populate ``error.LookupError`` with an identifier
                 for the store.
                 Raises ``error.LookupError`` on failure.
                 """
                 if isinstance(fileid, int):
                     try:
                         return store.node(fileid)
                     except IndexError:
                         raise error.LookupError(
                             b'%d' % fileid, identifier, _(b'no match found')
                         )
                 if len(fileid) in (20, 32):
                     try:
                         store.rev(fileid)
                         return fileid
                     except error.LookupError:
                         pass
                 if len(fileid) in (40, 64):
                     try:
                         rawnode = bin(fileid)
                         store.rev(rawnode)
                         return rawnode
                     except TypeError:
                         pass
                 try:
                     rev = int(fileid)
                     if b'%d' % rev != fileid:
                         raise ValueError
                     try:
                         return store.node(rev)
                     except (IndexError, TypeError):
                         pass
                 except (ValueError, OverflowError):
                     pass
                 raise error.LookupError(fileid, identifier, _(b'no match found'))
             def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn):
                 """Resolve information needed to strip revisions.
                 Finds the minimum revision number that must be stripped in order to
                 strip ``minlinkrev``.
                 Returns a 2-tuple of the minimum revision number to do that and a set
                 of all revision numbers that have linkrevs that would be broken
                 by that strip.
                 ``tiprev`` is the current tip-most revision. It is ``len(store) - 1``.
                 ``headrevs`` is an iterable of head revisions.
                 ``linkrevfn`` is a callable that receives a revision and returns a linked
                 revision.
                 ``parentrevsfn`` is a callable that receives a revision number and returns
                 an iterable of its parent revision numbers.
                 """
                 brokenrevs = set()
                 strippoint = tiprev + 1
                 heads = {}
                 futurelargelinkrevs = set()
                 for head in headrevs:
                     headlinkrev = linkrevfn(head)
                     heads[head] = headlinkrev
                     if headlinkrev >= minlinkrev:
                         futurelargelinkrevs.add(headlinkrev)
                 # This algorithm involves walking down the rev graph, starting at the
                 # heads. Since the revs are topologically sorted according to linkrev,
                 # once all head linkrevs are below the minlink, we know there are
                 # no more revs that could have a linkrev greater than minlink.
                 # So we can stop walking.
                 while futurelargelinkrevs:
                     strippoint -= 1
                     linkrev = heads.pop(strippoint)
                     if linkrev < minlinkrev:
                         brokenrevs.add(strippoint)
                     else:
                         futurelargelinkrevs.remove(linkrev)
                     for p in parentrevsfn(strippoint):
                         if p != nullrev:
                             plinkrev = linkrevfn(p)
                             heads[p] = plinkrev
                             if plinkrev >= minlinkrev:
                                 futurelargelinkrevs.add(plinkrev)
                 return strippoint, brokenrevs
             def emitrevisions(
                 store,
                 nodes,
                 nodesorder,
                 resultcls,
                 deltaparentfn=None,
                 candeltafn=None,
                 rawsizefn=None,
                 revdifffn=None,
                 flagsfn=None,
                 deltamode=repository.CG_DELTAMODE_STD,
                 revisiondata=False,
                 assumehaveparentrevisions=False,
                 sidedata_helpers=None,
             ):
                 """Generic implementation of ifiledata.emitrevisions().
                 Emitting revision data is subtly complex. This function attempts to
                 encapsulate all the logic for doing so in a backend-agnostic way.
                 ``store``
                    Object conforming to ``ifilestorage`` interface.
                 ``nodes``
                    List of revision nodes whose data to emit.
                 ``resultcls``
                    A type implementing the ``irevisiondelta`` interface that will be
                    constructed and returned.
                 ``deltaparentfn`` (optional)
                    Callable receiving a revision number and returning the revision number
                    of a revision that the internal delta is stored against. This delta
                    will be preferred over computing a new arbitrary delta.
                    If not defined, a delta will always be computed from raw revision
                    data.
                 ``candeltafn`` (optional)
                    Callable receiving a pair of revision numbers that returns a bool
                    indicating whether a delta between them can be produced.
                    If not defined, it is assumed that any two revisions can delta with
                    each other.
                 ``rawsizefn`` (optional)
                    Callable receiving a revision number and returning the length of the
                    ``store.rawdata(rev)``.
                    If not defined, ``len(store.rawdata(rev))`` will be called.
                 ``revdifffn`` (optional)
                    Callable receiving a pair of revision numbers that returns a delta
                    between them.
                    If not defined, a delta will be computed by invoking mdiff code
                    on ``store.revision()`` results.
                    Defining this function allows a precomputed or stored delta to be
                    used without having to compute on.
                 ``flagsfn`` (optional)
                    Callable receiving a revision number and returns the integer flags
                    value for it. If not defined, flags value will be 0.
                 ``deltamode``
                    constaint on delta to be sent:
                    * CG_DELTAMODE_STD  - normal mode, try to reuse storage deltas,
                    * CG_DELTAMODE_PREV - only delta against "prev",
                    * CG_DELTAMODE_FULL - only issue full snapshot.
                    Whether to send fulltext revisions instead of deltas, if allowed.
                 ``nodesorder``
                 ``revisiondata``
                 ``assumehaveparentrevisions``
                 ``sidedata_helpers`` (optional)
                     If not None, means that sidedata should be included.
                     See `revlogutil.sidedata.get_sidedata_helpers`.
                 """
                 fnode = store.node
                 frev = store.rev
                 if nodesorder == b'nodes':
                     revs = [frev(n) for n in nodes]
                 elif nodesorder == b'linear':
                     revs = {frev(n) for n in nodes}
                     revs = dagop.linearize(revs, store.parentrevs)
                 else:  # storage and default
                     revs = sorted(frev(n) for n in nodes)
                 prevrev = None
                 if deltamode == repository.CG_DELTAMODE_PREV or assumehaveparentrevisions:
                     prevrev = store.parentrevs(revs[0])[0]
                 # Set of revs available to delta against.
                 available = set()
                 for rev in revs:
                     if rev == nullrev:
                         continue
                     node = fnode(rev)
                     p1rev, p2rev = store.parentrevs(rev)
                     if deltaparentfn:
                         deltaparentrev = deltaparentfn(rev)
                     else:
                         deltaparentrev = nullrev
                     # Forced delta against previous mode.
                     if deltamode == repository.CG_DELTAMODE_PREV:
                         baserev = prevrev
                     # We're instructed to send fulltext. Honor that.
                     elif deltamode == repository.CG_DELTAMODE_FULL:
                         baserev = nullrev
                     # We're instructed to use p1. Honor that
                     elif deltamode == repository.CG_DELTAMODE_P1:
                         baserev = p1rev
                     # There is a delta in storage. We try to use that because it
                     # amounts to effectively copying data from storage and is
                     # therefore the fastest.
                     elif deltaparentrev != nullrev:
                         # Base revision was already emitted in this group. We can
                         # always safely use the delta.
                         if deltaparentrev in available:
                             baserev = deltaparentrev
                         # Base revision is a parent that hasn't been emitted already.
                         # Use it if we can assume the receiver has the parent revision.
                         elif assumehaveparentrevisions and deltaparentrev in (p1rev, p2rev):
                             baserev = deltaparentrev
                         # No guarantee the receiver has the delta parent. Send delta
                         # against last revision (if possible), which in the common case
                         # should be similar enough to this revision that the delta is
                         # reasonable.
                         elif prevrev is not None:
                             baserev = prevrev
                         else:
                             baserev = nullrev
                     # Storage has a fulltext revision.
                     # Let's use the previous revision, which is as good a guess as any.
                     # There is definitely room to improve this logic.
                     elif prevrev is not None:
                         baserev = prevrev
                     else:
                         baserev = nullrev
                     # But we can't actually use our chosen delta base for whatever
                     # reason. Reset to fulltext.
                     if baserev != nullrev and (candeltafn and not candeltafn(baserev, rev)):
                         baserev = nullrev
                     revision = None
                     delta = None
                     baserevisionsize = None
                     if revisiondata:
                         if store.iscensored(baserev) or store.iscensored(rev):
                             try:
                                 revision = store.rawdata(node)
                             except error.CensoredNodeError as e:
                                 revision = e.tombstone
                             if baserev != nullrev:
                                 if rawsizefn:
                                     baserevisionsize = rawsizefn(baserev)
                                 else:
                                     baserevisionsize = len(store.rawdata(baserev))
                         elif (
                             baserev == nullrev and deltamode != repository.CG_DELTAMODE_PREV
                         ):
                             revision = store.rawdata(node)
                             available.add(rev)
                         else:
                             if revdifffn:
                                 delta = revdifffn(baserev, rev)
                             else:
                                 delta = mdiff.textdiff(
                                     store.rawdata(baserev), store.rawdata(rev)
                                 )
                             available.add(rev)
                     serialized_sidedata = None
                     sidedata_flags = (0, 0)
                     if sidedata_helpers:
+                        try:
                             old_sidedata = store.sidedata(rev)
+                        except error.CensoredNodeError:
+                            # skip any potential sidedata of the censored revision
+                            sidedata = {}
+                        else:
                             sidedata, sidedata_flags = sidedatamod.run_sidedata_helpers(
                                 store=store,
                                 sidedata_helpers=sidedata_helpers,
                                 sidedata=old_sidedata,
                                 rev=rev,
                             )
                         if sidedata:
                             serialized_sidedata = sidedatamod.serialize_sidedata(sidedata)
                     flags = flagsfn(rev) if flagsfn else 0
                     protocol_flags = 0
                     if serialized_sidedata:
                         # Advertise that sidedata exists to the other side
                         protocol_flags |= CG_FLAG_SIDEDATA
                         # Computers and removers can return flags to add and/or remove
                         flags = flags | sidedata_flags[0] & ~sidedata_flags[1]
                     yield resultcls(
                         node=node,
                         p1node=fnode(p1rev),
                         p2node=fnode(p2rev),
                         basenode=fnode(baserev),
                         flags=flags,
                         baserevisionsize=baserevisionsize,
                         revision=revision,
                         delta=delta,
                         sidedata=serialized_sidedata,
                         protocol_flags=protocol_flags,
                     )
                     prevrev = rev
             def deltaiscensored(delta, baserev, baselenfn):
                 """Determine if a delta represents censored revision data.
                 ``baserev`` is the base revision this delta is encoded against.
                 ``baselenfn`` is a callable receiving a revision number that resolves the
                 length of the revision fulltext.
                 Returns a bool indicating if the result of the delta represents a censored
                 revision.
                 """
                 # Fragile heuristic: unless new file meta keys are added alphabetically
                 # preceding "censored", all censored revisions are prefixed by
                 # "\1\ncensored:". A delta producing such a censored revision must be a
                 # full-replacement delta, so we inspect the first and only patch in the
                 # delta for this prefix.
                 hlen = struct.calcsize(b">lll")
                 if len(delta) <= hlen:
                     return False
                 oldlen = baselenfn(baserev)
                 newlen = len(delta) - hlen
                 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
                     return False
                 add = b"\1\ncensored:"
                 addlen = len(add)
                 return newlen >= addlen and delta[hlen : hlen + addlen] == add