upstream/mercurial-mirror Commit - r43026:2119647f

1

# storageutil.py - Storage functionality agnostic of backend implementation.

1

# storageutil.py - Storage functionality agnostic of backend implementation.

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import hashlib

10

import hashlib

11

import re

11

import re

12

import struct

12

import struct

13

14

from ..i18n import _

14

from ..i18n import _

15

from ..node import (

15

from ..node import (

16

bin,

16

bin,

17

nullid,

17

nullid,

18

nullrev,

18

nullrev,

19

)

19

)

20

from .. import (

20

from .. import (

21

dagop,

21

dagop,

22

error,

22

error,

23

mdiff,

23

mdiff,

24

pycompat,

24

pycompat,

25

repository,

25

repository,

26

)

26

)

27

28

_nullhash = hashlib.sha1(nullid)

28

_nullhash = hashlib.sha1(nullid)

29

30

def hashrevisionsha1(text, p1, p2):

30

def hashrevisionsha1(text, p1, p2):

31

"""Compute the SHA-1 for revision data and its parents.

31

"""Compute the SHA-1 for revision data and its parents.

32

33

This hash combines both the current file contents and its history

33

This hash combines both the current file contents and its history

34

in a manner that makes it easy to distinguish nodes with the same

34

in a manner that makes it easy to distinguish nodes with the same

35

content in the revision graph.

35

content in the revision graph.

36

"""

36

"""

37

# As of now, if one of the parent node is null, p2 is null

37

# As of now, if one of the parent node is null, p2 is null

38

if p2 == nullid:

38

if p2 == nullid:

39

# deep copy of a hash is faster than creating one

39

# deep copy of a hash is faster than creating one

40

s = _nullhash.copy()

40

s = _nullhash.copy()

41

s.update(p1)

41

s.update(p1)

42

else:

42

else:

43

# none of the parent nodes are nullid

43

# none of the parent nodes are nullid

44

if p1 < p2:

44

if p1 < p2:

45

a = p1

45

a = p1

46

b = p2

46

b = p2

47

else:

47

else:

48

a = p2

48

a = p2

49

b = p1

49

b = p1

50

s = hashlib.sha1(a)

50

s = hashlib.sha1(a)

51

s.update(b)

51

s.update(b)

52

s.update(text)

52

s.update(text)

53

return s.digest()

53

return s.digest()

54

55

METADATA_RE = re.compile(b'\x01\n')

55

METADATA_RE = re.compile(b'\x01\n')

56

57

def parsemeta(text):

57

def parsemeta(text):

58

"""Parse metadata header from revision data.

58

"""Parse metadata header from revision data.

59

60

Returns a 2-tuple of (metadata, offset), where both can be None if there

60

Returns a 2-tuple of (metadata, offset), where both can be None if there

61

is no metadata.

61

is no metadata.

62

"""

62

"""

63

# text can be buffer, so we can't use .startswith or .index

63

# text can be buffer, so we can't use .startswith or .index

64

if text[:2] != b'\x01\n':

64

if text[:2] != b'\x01\n':

65

return None, None

65

return None, None

66

s = METADATA_RE.search(text, 2).start()

66

s = METADATA_RE.search(text, 2).start()

67

mtext = text[2:s]

67

mtext = text[2:s]

68

meta = {}

68

meta = {}

69

for l in mtext.splitlines():

69

for l in mtext.splitlines():

70

k, v = l.split(b': ', 1)

70

k, v = l.split(b': ', 1)

71

meta[k] = v

71

meta[k] = v

72

return meta, s + 2

72

return meta, s + 2

73

74

def packmeta(meta, text):

74

def packmeta(meta, text):

75

"""Add metadata to fulltext to produce revision text."""

75

"""Add metadata to fulltext to produce revision text."""

76

keys = sorted(meta)

76

keys = sorted(meta)

77

metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)

77

metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)

78

return b'\x01\n%s\x01\n%s' % (metatext, text)

78

return b'\x01\n%s\x01\n%s' % (metatext, text)

79

80

def iscensoredtext(text):

80

def iscensoredtext(text):

81

meta = parsemeta(text)[0]

81

meta = parsemeta(text)[0]

82

return meta and b'censored' in meta

82

return meta and b'censored' in meta

83

84

def filtermetadata(text):

84

def filtermetadata(text):

85

"""Extract just the revision data from source text.

85

"""Extract just the revision data from source text.

86

87

Returns ``text`` unless it has a metadata header, in which case we return

87

Returns ``text`` unless it has a metadata header, in which case we return

88

a new buffer without hte metadata.

88

a new buffer without hte metadata.

89

"""

89

"""

90

if not text.startswith(b'\x01\n'):

90

if not text.startswith(b'\x01\n'):

91

return text

91

return text

92

93

offset = text.index(b'\x01\n', 2)

93

offset = text.index(b'\x01\n', 2)

94

return text[offset + 2:]

94

return text[offset + 2:]

95

96

def filerevisioncopied(store, node):

96

def filerevisioncopied(store, node):

97

"""Resolve file revision copy metadata.

97

"""Resolve file revision copy metadata.

98

99

Returns ``False`` if the file has no copy metadata. Otherwise a

99

Returns ``False`` if the file has no copy metadata. Otherwise a

100

2-tuple of the source filename and node.

100

2-tuple of the source filename and node.

101

"""

101

"""

102

if store.parents(node)[0] != nullid:

102

if store.parents(node)[0] != nullid:

103

return False

103

return False

104

105

meta = parsemeta(store.revision(node))[0]

105

meta = parsemeta(store.revision(node))[0]

106

107

# copy and copyrev occur in pairs. In rare cases due to old bugs,

107

# copy and copyrev occur in pairs. In rare cases due to old bugs,

108

# one can occur without the other. So ensure both are present to flag

108

# one can occur without the other. So ensure both are present to flag

109

# as a copy.

109

# as a copy.

110

if meta and b'copy' in meta and b'copyrev' in meta:

110

if meta and b'copy' in meta and b'copyrev' in meta:

111

return meta[b'copy'], bin(meta[b'copyrev'])

111

return meta[b'copy'], bin(meta[b'copyrev'])

112

113

return False

113

return False

114

115

def filedataequivalent(store, node, filedata):

115

def filedataequivalent(store, node, filedata):

116

"""Determines whether file data is equivalent to a stored node.

116

"""Determines whether file data is equivalent to a stored node.

117

118

Returns True if the passed file data would hash to the same value

118

Returns True if the passed file data would hash to the same value

119

as a stored revision and False otherwise.

119

as a stored revision and False otherwise.

120

121

When a stored revision is censored, filedata must be empty to have

121

When a stored revision is censored, filedata must be empty to have

122

equivalence.

122

equivalence.

123

124

When a stored revision has copy metadata, it is ignored as part

124

When a stored revision has copy metadata, it is ignored as part

125

of the compare.

125

of the compare.

126

"""

126

"""

127

128

if filedata.startswith(b'\x01\n'):

128

if filedata.startswith(b'\x01\n'):

129

revisiontext = b'\x01\n\x01\n' + filedata

129

revisiontext = b'\x01\n\x01\n' + filedata

130

else:

130

else:

131

revisiontext = filedata

131

revisiontext = filedata

132

133

p1, p2 = store.parents(node)

133

p1, p2 = store.parents(node)

134

135

computednode = hashrevisionsha1(revisiontext, p1, p2)

135

computednode = hashrevisionsha1(revisiontext, p1, p2)

136

137

if computednode == node:

137

if computednode == node:

138

return True

138

return True

139

140

# Censored files compare against the empty file.

140

# Censored files compare against the empty file.

141

if store.iscensored(store.rev(node)):

141

if store.iscensored(store.rev(node)):

142

return filedata == b''

142

return filedata == b''

143

144

# Renaming a file produces a different hash, even if the data

144

# Renaming a file produces a different hash, even if the data

145

# remains unchanged. Check if that's the case.

145

# remains unchanged. Check if that's the case.

146

if store.renamed(node):

146

if store.renamed(node):

147

return store.read(node) == filedata

147

return store.read(node) == filedata

148

149

return False

149

return False

150

151

def iterrevs(storelen, start=0, stop=None):

151

def iterrevs(storelen, start=0, stop=None):

152

"""Iterate over revision numbers in a store."""

152

"""Iterate over revision numbers in a store."""

153

step = 1

153

step = 1

154

155

if stop is not None:

155

if stop is not None:

156

if start > stop:

156

if start > stop:

157

step = -1

157

step = -1

158

stop += step

158

stop += step

159

if stop > storelen:

159

if stop > storelen:

160

stop = storelen

160

stop = storelen

161

else:

161

else:

162

stop = storelen

162

stop = storelen

163

164

return pycompat.xrange(start, stop, step)

164

return pycompat.xrange(start, stop, step)

165

166

def fileidlookup(store, fileid, identifier):

166

def fileidlookup(store, fileid, identifier):

167

"""Resolve the file node for a value.

167

"""Resolve the file node for a value.

168

169

``store`` is an object implementing the ``ifileindex`` interface.

169

``store`` is an object implementing the ``ifileindex`` interface.

170

171

``fileid`` can be:

171

``fileid`` can be:

172

173

* A 20 byte binary node.

173

* A 20 byte binary node.

174

* An integer revision number

174

* An integer revision number

175

* A 40 byte hex node.

175

* A 40 byte hex node.

176

* A bytes that can be parsed as an integer representing a revision number.

176

* A bytes that can be parsed as an integer representing a revision number.

177

178

``identifier`` is used to populate ``error.LookupError`` with an identifier

178

``identifier`` is used to populate ``error.LookupError`` with an identifier

179

for the store.

179

for the store.

180

181

Raises ``error.LookupError`` on failure.

181

Raises ``error.LookupError`` on failure.

182

"""

182

"""

183

if isinstance(fileid, int):

183

if isinstance(fileid, int):

184

try:

184

try:

185

return store.node(fileid)

185

return store.node(fileid)

186

except IndexError:

186

except IndexError:

187

raise error.LookupError('%d' % fileid, identifier,

187

raise error.LookupError('%d' % fileid, identifier,

188

_('no match found'))

188

_('no match found'))

189

190

if len(fileid) == 20:

190

if len(fileid) == 20:

191

try:

191

try:

192

store.rev(fileid)

192

store.rev(fileid)

193

return fileid

193

return fileid

194

except error.LookupError:

194

except error.LookupError:

195

pass

195

pass

196

197

if len(fileid) == 40:

197

if len(fileid) == 40:

198

try:

198

try:

199

rawnode = bin(fileid)

199

rawnode = bin(fileid)

200

store.rev(rawnode)

200

store.rev(rawnode)

201

return rawnode

201

return rawnode

202

except TypeError:

202

except TypeError:

203

pass

203

pass

204

205

try:

205

try:

206

rev = int(fileid)

206

rev = int(fileid)

207

208

if b'%d' % rev != fileid:

208

if b'%d' % rev != fileid:

209

raise ValueError

209

raise ValueError

210

211

try:

211

try:

212

return store.node(rev)

212

return store.node(rev)

213

except (IndexError, TypeError):

213

except (IndexError, TypeError):

214

pass

214

pass

215

except (ValueError, OverflowError):

215

except (ValueError, OverflowError):

216

pass

216

pass

217

218

raise error.LookupError(fileid, identifier, _('no match found'))

218

raise error.LookupError(fileid, identifier, _('no match found'))

219

220

def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn):

220

def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn):

221

"""Resolve information needed to strip revisions.

221

"""Resolve information needed to strip revisions.

222

223

Finds the minimum revision number that must be stripped in order to

223

Finds the minimum revision number that must be stripped in order to

224

strip ``minlinkrev``.

224

strip ``minlinkrev``.

225

226

Returns a 2-tuple of the minimum revision number to do that and a set

226

Returns a 2-tuple of the minimum revision number to do that and a set

227

of all revision numbers that have linkrevs that would be broken

227

of all revision numbers that have linkrevs that would be broken

228

by that strip.

228

by that strip.

229

230

``tiprev`` is the current tip-most revision. It is ``len(store) - 1``.

230

``tiprev`` is the current tip-most revision. It is ``len(store) - 1``.

231

``headrevs`` is an iterable of head revisions.

231

``headrevs`` is an iterable of head revisions.

232

``linkrevfn`` is a callable that receives a revision and returns a linked

232

``linkrevfn`` is a callable that receives a revision and returns a linked

233

revision.

233

revision.

234

``parentrevsfn`` is a callable that receives a revision number and returns

234

``parentrevsfn`` is a callable that receives a revision number and returns

235

an iterable of its parent revision numbers.

235

an iterable of its parent revision numbers.

236

"""

236

"""

237

brokenrevs = set()

237

brokenrevs = set()

238

strippoint = tiprev + 1

238

strippoint = tiprev + 1

239

240

heads = {}

240

heads = {}

241

futurelargelinkrevs = set()

241

futurelargelinkrevs = set()

242

for head in headrevs:

242

for head in headrevs:

243

headlinkrev = linkrevfn(head)

243

headlinkrev = linkrevfn(head)

244

heads[head] = headlinkrev

244

heads[head] = headlinkrev

245

if headlinkrev >= minlinkrev:

245

if headlinkrev >= minlinkrev:

246

futurelargelinkrevs.add(headlinkrev)

246

futurelargelinkrevs.add(headlinkrev)

247

248

# This algorithm involves walking down the rev graph, starting at the

248

# This algorithm involves walking down the rev graph, starting at the

249

# heads. Since the revs are topologically sorted according to linkrev,

249

# heads. Since the revs are topologically sorted according to linkrev,

250

# once all head linkrevs are below the minlink, we know there are

250

# once all head linkrevs are below the minlink, we know there are

251

# no more revs that could have a linkrev greater than minlink.

251

# no more revs that could have a linkrev greater than minlink.

252

# So we can stop walking.

252

# So we can stop walking.

253

while futurelargelinkrevs:

253

while futurelargelinkrevs:

254

strippoint -= 1

254

strippoint -= 1

255

linkrev = heads.pop(strippoint)

255

linkrev = heads.pop(strippoint)

256

257

if linkrev < minlinkrev:

257

if linkrev < minlinkrev:

258

brokenrevs.add(strippoint)

258

brokenrevs.add(strippoint)

259

else:

259

else:

260

futurelargelinkrevs.remove(linkrev)

260

futurelargelinkrevs.remove(linkrev)

261

262

for p in parentrevsfn(strippoint):

262

for p in parentrevsfn(strippoint):

263

if p != nullrev:

263

if p != nullrev:

264

plinkrev = linkrevfn(p)

264

plinkrev = linkrevfn(p)

265

heads[p] = plinkrev

265

heads[p] = plinkrev

266

if plinkrev >= minlinkrev:

266

if plinkrev >= minlinkrev:

267

futurelargelinkrevs.add(plinkrev)

267

futurelargelinkrevs.add(plinkrev)

268

269

return strippoint, brokenrevs

269

return strippoint, brokenrevs

270

271

def emitrevisions(store, nodes, nodesorder, resultcls, deltaparentfn=None,

271

def emitrevisions(store, nodes, nodesorder, resultcls, deltaparentfn=None,

272

candeltafn=None, rawsizefn=None, revdifffn=None, flagsfn=None,

272

candeltafn=None, rawsizefn=None, revdifffn=None, flagsfn=None,

273

deltamode=repository.CG_DELTAMODE_STD,

273

deltamode=repository.CG_DELTAMODE_STD,

274

revisiondata=False, assumehaveparentrevisions=False):

274

revisiondata=False, assumehaveparentrevisions=False):

275

"""Generic implementation of ifiledata.emitrevisions().

275

"""Generic implementation of ifiledata.emitrevisions().

276

277

Emitting revision data is subtly complex. This function attempts to

277

Emitting revision data is subtly complex. This function attempts to

278

encapsulate all the logic for doing so in a backend-agnostic way.

278

encapsulate all the logic for doing so in a backend-agnostic way.

279

280

``store``

280

``store``

281

Object conforming to ``ifilestorage`` interface.

281

Object conforming to ``ifilestorage`` interface.

282

283

``nodes``

283

``nodes``

284

List of revision nodes whose data to emit.

284

List of revision nodes whose data to emit.

285

286

``resultcls``

286

``resultcls``

287

A type implementing the ``irevisiondelta`` interface that will be

287

A type implementing the ``irevisiondelta`` interface that will be

288

constructed and returned.

288

constructed and returned.

289

290

``deltaparentfn`` (optional)

290

``deltaparentfn`` (optional)

291

Callable receiving a revision number and returning the revision number

291

Callable receiving a revision number and returning the revision number

292

of a revision that the internal delta is stored against. This delta

292

of a revision that the internal delta is stored against. This delta

293

will be preferred over computing a new arbitrary delta.

293

will be preferred over computing a new arbitrary delta.

294

295

If not defined, a delta will always be computed from raw revision

295

If not defined, a delta will always be computed from raw revision

296

data.

296

data.

297

298

``candeltafn`` (optional)

298

``candeltafn`` (optional)

299

Callable receiving a pair of revision numbers that returns a bool

299

Callable receiving a pair of revision numbers that returns a bool

300

indicating whether a delta between them can be produced.

300

indicating whether a delta between them can be produced.

301

302

If not defined, it is assumed that any two revisions can delta with

302

If not defined, it is assumed that any two revisions can delta with

303

each other.

303

each other.

304

305

``rawsizefn`` (optional)

305

``rawsizefn`` (optional)

306

Callable receiving a revision number and returning the length of the

306

Callable receiving a revision number and returning the length of the

307

``store.r~~evision(rev, raw=True~~)``.

307

``store.rawdata(rev)``.

308

309

If not defined, ``len(store.r~~evision(rev, raw=True~~))`` will be called.

309

If not defined, ``len(store.rawdata(rev))`` will be called.

310

311

``revdifffn`` (optional)

311

``revdifffn`` (optional)

312

Callable receiving a pair of revision numbers that returns a delta

312

Callable receiving a pair of revision numbers that returns a delta

313

between them.

313

between them.

314

315

If not defined, a delta will be computed by invoking mdiff code

315

If not defined, a delta will be computed by invoking mdiff code

316

on ``store.revision()`` results.

316

on ``store.revision()`` results.

317

318

Defining this function allows a precomputed or stored delta to be

318

Defining this function allows a precomputed or stored delta to be

319

used without having to compute on.

319

used without having to compute on.

320

321

``flagsfn`` (optional)

321

``flagsfn`` (optional)

322

Callable receiving a revision number and returns the integer flags

322

Callable receiving a revision number and returns the integer flags

323

value for it. If not defined, flags value will be 0.

323

value for it. If not defined, flags value will be 0.

324

325

``deltamode``

325

``deltamode``

326

constaint on delta to be sent:

326

constaint on delta to be sent:

327

* CG_DELTAMODE_STD - normal mode, try to reuse storage deltas,

327

* CG_DELTAMODE_STD - normal mode, try to reuse storage deltas,

328

* CG_DELTAMODE_PREV - only delta against "prev",

328

* CG_DELTAMODE_PREV - only delta against "prev",

329

* CG_DELTAMODE_FULL - only issue full snapshot.

329

* CG_DELTAMODE_FULL - only issue full snapshot.

330

331

Whether to send fulltext revisions instead of deltas, if allowed.

331

Whether to send fulltext revisions instead of deltas, if allowed.

332

333

``nodesorder``

333

``nodesorder``

334

``revisiondata``

334

``revisiondata``

335

``assumehaveparentrevisions``

335

``assumehaveparentrevisions``

336

"""

336

"""

337

338

fnode = store.node

338

fnode = store.node

339

frev = store.rev

339

frev = store.rev

340

341

if nodesorder == 'nodes':

341

if nodesorder == 'nodes':

342

revs = [frev(n) for n in nodes]

342

revs = [frev(n) for n in nodes]

343

elif nodesorder == 'linear':

343

elif nodesorder == 'linear':

344

revs = set(frev(n) for n in nodes)

344

revs = set(frev(n) for n in nodes)

345

revs = dagop.linearize(revs, store.parentrevs)

345

revs = dagop.linearize(revs, store.parentrevs)

346

else: # storage and default

346

else: # storage and default

347

revs = sorted(frev(n) for n in nodes)

347

revs = sorted(frev(n) for n in nodes)

348

349

prevrev = None

349

prevrev = None

350

351

if deltamode == repository.CG_DELTAMODE_PREV or assumehaveparentrevisions:

351

if deltamode == repository.CG_DELTAMODE_PREV or assumehaveparentrevisions:

352

prevrev = store.parentrevs(revs[0])[0]

352

prevrev = store.parentrevs(revs[0])[0]

353

354

# Set of revs available to delta against.

354

# Set of revs available to delta against.

355

available = set()

355

available = set()

356

357

for rev in revs:

357

for rev in revs:

358

if rev == nullrev:

358

if rev == nullrev:

359

continue

359

continue

360

361

node = fnode(rev)

361

node = fnode(rev)

362

p1rev, p2rev = store.parentrevs(rev)

362

p1rev, p2rev = store.parentrevs(rev)

363

364

if deltaparentfn:

364

if deltaparentfn:

365

deltaparentrev = deltaparentfn(rev)

365

deltaparentrev = deltaparentfn(rev)

366

else:

366

else:

367

deltaparentrev = nullrev

367

deltaparentrev = nullrev

368

369

# Forced delta against previous mode.

369

# Forced delta against previous mode.

370

if deltamode == repository.CG_DELTAMODE_PREV:

370

if deltamode == repository.CG_DELTAMODE_PREV:

371

baserev = prevrev

371

baserev = prevrev

372

373

# We're instructed to send fulltext. Honor that.

373

# We're instructed to send fulltext. Honor that.

374

elif deltamode == repository.CG_DELTAMODE_FULL:

374

elif deltamode == repository.CG_DELTAMODE_FULL:

375

baserev = nullrev

375

baserev = nullrev

376

# We're instructed to use p1. Honor that

376

# We're instructed to use p1. Honor that

377

elif deltamode == repository.CG_DELTAMODE_P1:

377

elif deltamode == repository.CG_DELTAMODE_P1:

378

baserev = p1rev

378

baserev = p1rev

379

380

# There is a delta in storage. We try to use that because it

380

# There is a delta in storage. We try to use that because it

381

# amounts to effectively copying data from storage and is

381

# amounts to effectively copying data from storage and is

382

# therefore the fastest.

382

# therefore the fastest.

383

elif deltaparentrev != nullrev:

383

elif deltaparentrev != nullrev:

384

# Base revision was already emitted in this group. We can

384

# Base revision was already emitted in this group. We can

385

# always safely use the delta.

385

# always safely use the delta.

386

if deltaparentrev in available:

386

if deltaparentrev in available:

387

baserev = deltaparentrev

387

baserev = deltaparentrev

388

389

# Base revision is a parent that hasn't been emitted already.

389

# Base revision is a parent that hasn't been emitted already.

390

# Use it if we can assume the receiver has the parent revision.

390

# Use it if we can assume the receiver has the parent revision.

391

elif (assumehaveparentrevisions

391

elif (assumehaveparentrevisions

392

and deltaparentrev in (p1rev, p2rev)):

392

and deltaparentrev in (p1rev, p2rev)):

393

baserev = deltaparentrev

393

baserev = deltaparentrev

394

395

# No guarantee the receiver has the delta parent. Send delta

395

# No guarantee the receiver has the delta parent. Send delta

396

# against last revision (if possible), which in the common case

396

# against last revision (if possible), which in the common case

397

# should be similar enough to this revision that the delta is

397

# should be similar enough to this revision that the delta is

398

# reasonable.

398

# reasonable.

399

elif prevrev is not None:

399

elif prevrev is not None:

400

baserev = prevrev

400

baserev = prevrev

401

else:

401

else:

402

baserev = nullrev

402

baserev = nullrev

403

404

# Storage has a fulltext revision.

404

# Storage has a fulltext revision.

405

406

# Let's use the previous revision, which is as good a guess as any.

406

# Let's use the previous revision, which is as good a guess as any.

407

# There is definitely room to improve this logic.

407

# There is definitely room to improve this logic.

408

elif prevrev is not None:

408

elif prevrev is not None:

409

baserev = prevrev

409

baserev = prevrev

410

else:

410

else:

411

baserev = nullrev

411

baserev = nullrev

412

413

# But we can't actually use our chosen delta base for whatever

413

# But we can't actually use our chosen delta base for whatever

414

# reason. Reset to fulltext.

414

# reason. Reset to fulltext.

415

if baserev != nullrev and (candeltafn and not candeltafn(baserev, rev)):

415

if baserev != nullrev and (candeltafn and not candeltafn(baserev, rev)):

416

baserev = nullrev

416

baserev = nullrev

417

418

revision = None

418

revision = None

419

delta = None

419

delta = None

420

baserevisionsize = None

420

baserevisionsize = None

421

422

if revisiondata:

422

if revisiondata:

423

if store.iscensored(baserev) or store.iscensored(rev):

423

if store.iscensored(baserev) or store.iscensored(rev):

424

try:

424

try:

425

revision = store.r~~evision~~(node, ~~raw~~=~~True~~)

425

revision = store.rawdata(node)

426

except error.CensoredNodeError as e:

426

except error.CensoredNodeError as e:

427

revision = e.tombstone

427

revision = e.tombstone

428

429

if baserev != nullrev:

429

if baserev != nullrev:

430

if rawsizefn:

430

if rawsizefn:

431

baserevisionsize = rawsizefn(baserev)

431

baserevisionsize = rawsizefn(baserev)

432

else:

432

else:

433

baserevisionsize = len(store.r~~evision~~(baserev,

433

baserevisionsize = len(store.rawdata(baserev))

434

raw=True))

435

434

436

elif (baserev == nullrev

435

elif (baserev == nullrev

437

and deltamode != repository.CG_DELTAMODE_PREV):

436

and deltamode != repository.CG_DELTAMODE_PREV):

438

revision = store.r~~evision~~(node, ~~raw~~=~~True~~)

437

revision = store.rawdata(node)

439

available.add(rev)

438

available.add(rev)

440

else:

439

else:

441

if revdifffn:

440

if revdifffn:

442

delta = revdifffn(baserev, rev)

441

delta = revdifffn(baserev, rev)

443

else:

442

else:

444

delta = mdiff.textdiff(store.r~~evision~~(baserev, ~~raw~~=~~True~~),

443

delta = mdiff.textdiff(store.rawdata(baserev),

445

store.r~~evision~~(rev, ~~raw~~=~~True~~))

444

store.rawdata(rev))

446

445

447

available.add(rev)

446

available.add(rev)

448

447

449

yield resultcls(

448

yield resultcls(

450

node=node,

449

node=node,

451

p1node=fnode(p1rev),

450

p1node=fnode(p1rev),

452

p2node=fnode(p2rev),

451

p2node=fnode(p2rev),

453

basenode=fnode(baserev),

452

basenode=fnode(baserev),

454

flags=flagsfn(rev) if flagsfn else 0,

453

flags=flagsfn(rev) if flagsfn else 0,

455

baserevisionsize=baserevisionsize,

454

baserevisionsize=baserevisionsize,

456

revision=revision,

455

revision=revision,

457

delta=delta)

456

delta=delta)

458

457

459

prevrev = rev

458

prevrev = rev

460

459

461

def deltaiscensored(delta, baserev, baselenfn):

460

def deltaiscensored(delta, baserev, baselenfn):

462

"""Determine if a delta represents censored revision data.

461

"""Determine if a delta represents censored revision data.

463

462

464

``baserev`` is the base revision this delta is encoded against.

463

``baserev`` is the base revision this delta is encoded against.

465

``baselenfn`` is a callable receiving a revision number that resolves the

464

``baselenfn`` is a callable receiving a revision number that resolves the

466

length of the revision fulltext.

465

length of the revision fulltext.

467

466

468

Returns a bool indicating if the result of the delta represents a censored

467

Returns a bool indicating if the result of the delta represents a censored

469

revision.

468

revision.

470

"""

469

"""

471

# Fragile heuristic: unless new file meta keys are added alphabetically

470

# Fragile heuristic: unless new file meta keys are added alphabetically

472

# preceding "censored", all censored revisions are prefixed by

471

# preceding "censored", all censored revisions are prefixed by

473

# "\1\ncensored:". A delta producing such a censored revision must be a

472

# "\1\ncensored:". A delta producing such a censored revision must be a

474

# full-replacement delta, so we inspect the first and only patch in the

473

# full-replacement delta, so we inspect the first and only patch in the

475

# delta for this prefix.

474

# delta for this prefix.

476

hlen = struct.calcsize(">lll")

475

hlen = struct.calcsize(">lll")

477

if len(delta) <= hlen:

476

if len(delta) <= hlen:

478

return False

477

return False

479

478

480

oldlen = baselenfn(baserev)

479

oldlen = baselenfn(baserev)

481

newlen = len(delta) - hlen

480

newlen = len(delta) - hlen

482

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

481

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

483

return False

482

return False

484

483

485

add = "\1\ncensored:"

484

add = "\1\ncensored:"

486

addlen = len(add)

485

addlen = len(add)

487

return newlen >= addlen and delta[hlen:hlen + addlen] == add

486

return newlen >= addlen and delta[hlen:hlen + addlen] == add

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # storageutil.py - Storage functionality agnostic of backend implementation.
             #
             # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import hashlib
             import re
             import struct
             from ..i18n import _
             from ..node import (
                 bin,
                 nullid,
                 nullrev,
             )
             from .. import (
                 dagop,
                 error,
                 mdiff,
                 pycompat,
                 repository,
             )
             _nullhash = hashlib.sha1(nullid)
             def hashrevisionsha1(text, p1, p2):
                 """Compute the SHA-1 for revision data and its parents.
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 # As of now, if one of the parent node is null, p2 is null
                 if p2 == nullid:
                     # deep copy of a hash is faster than creating one
                     s = _nullhash.copy()
                     s.update(p1)
                 else:
                     # none of the parent nodes are nullid
                     if p1 < p2:
                         a = p1
                         b = p2
                     else:
                         a = p2
                         b = p1
                     s = hashlib.sha1(a)
                     s.update(b)
                 s.update(text)
                 return s.digest()
             METADATA_RE = re.compile(b'\x01\n')
             def parsemeta(text):
                 """Parse metadata header from revision data.
                 Returns a 2-tuple of (metadata, offset), where both can be None if there
                 is no metadata.
                 """
                 # text can be buffer, so we can't use .startswith or .index
                 if text[:2] != b'\x01\n':
                     return None, None
                 s = METADATA_RE.search(text, 2).start()
                 mtext = text[2:s]
                 meta = {}
                 for l in mtext.splitlines():
                     k, v = l.split(b': ', 1)
                     meta[k] = v
                 return meta, s + 2
             def packmeta(meta, text):
                 """Add metadata to fulltext to produce revision text."""
                 keys = sorted(meta)
                 metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)
                 return b'\x01\n%s\x01\n%s' % (metatext, text)
             def iscensoredtext(text):
                 meta = parsemeta(text)[0]
                 return meta and b'censored' in meta
             def filtermetadata(text):
                 """Extract just the revision data from source text.
                 Returns ``text`` unless it has a metadata header, in which case we return
                 a new buffer without hte metadata.
                 """
                 if not text.startswith(b'\x01\n'):
                     return text
                 offset = text.index(b'\x01\n', 2)
                 return text[offset + 2:]
             def filerevisioncopied(store, node):
                 """Resolve file revision copy metadata.
                 Returns ``False`` if the file has no copy metadata. Otherwise a
 -tuple of the source filename and node.
                 """
                 if store.parents(node)[0] != nullid:
                     return False
                 meta = parsemeta(store.revision(node))[0]
                 # copy and copyrev occur in pairs. In rare cases due to old bugs,
                 # one can occur without the other. So ensure both are present to flag
                 # as a copy.
                 if meta and b'copy' in meta and b'copyrev' in meta:
                     return meta[b'copy'], bin(meta[b'copyrev'])
                 return False
             def filedataequivalent(store, node, filedata):
                 """Determines whether file data is equivalent to a stored node.
                 Returns True if the passed file data would hash to the same value
                 as a stored revision and False otherwise.
                 When a stored revision is censored, filedata must be empty to have
                 equivalence.
                 When a stored revision has copy metadata, it is ignored as part
                 of the compare.
                 """
                 if filedata.startswith(b'\x01\n'):
                     revisiontext = b'\x01\n\x01\n' + filedata
                 else:
                     revisiontext = filedata
                 p1, p2 = store.parents(node)
                 computednode = hashrevisionsha1(revisiontext, p1, p2)
                 if computednode == node:
                     return True
                 # Censored files compare against the empty file.
                 if store.iscensored(store.rev(node)):
                     return filedata == b''
                 # Renaming a file produces a different hash, even if the data
                 # remains unchanged. Check if that's the case.
                 if store.renamed(node):
                     return store.read(node) == filedata
                 return False
             def iterrevs(storelen, start=0, stop=None):
                 """Iterate over revision numbers in a store."""
                 step = 1
                 if stop is not None:
                     if start > stop:
                         step = -1
                     stop += step
                     if stop > storelen:
                         stop = storelen
                 else:
                     stop = storelen
                 return pycompat.xrange(start, stop, step)
             def fileidlookup(store, fileid, identifier):
                 """Resolve the file node for a value.
                 ``store`` is an object implementing the ``ifileindex`` interface.
                 ``fileid`` can be:
                 * A 20 byte binary node.
                 * An integer revision number
                 * A 40 byte hex node.
                 * A bytes that can be parsed as an integer representing a revision number.
                 ``identifier`` is used to populate ``error.LookupError`` with an identifier
                 for the store.
                 Raises ``error.LookupError`` on failure.
                 """
                 if isinstance(fileid, int):
                     try:
                         return store.node(fileid)
                     except IndexError:
                         raise error.LookupError('%d' % fileid, identifier,
                                                 _('no match found'))
                 if len(fileid) == 20:
                     try:
                         store.rev(fileid)
                         return fileid
                     except error.LookupError:
                         pass
                 if len(fileid) == 40:
                     try:
                         rawnode = bin(fileid)
                         store.rev(rawnode)
                         return rawnode
                     except TypeError:
                         pass
                 try:
                     rev = int(fileid)
                     if b'%d' % rev != fileid:
                         raise ValueError
                     try:
                         return store.node(rev)
                     except (IndexError, TypeError):
                         pass
                 except (ValueError, OverflowError):
                     pass
                 raise error.LookupError(fileid, identifier, _('no match found'))
             def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn):
                 """Resolve information needed to strip revisions.
                 Finds the minimum revision number that must be stripped in order to
                 strip ``minlinkrev``.
                 Returns a 2-tuple of the minimum revision number to do that and a set
                 of all revision numbers that have linkrevs that would be broken
                 by that strip.
                 ``tiprev`` is the current tip-most revision. It is ``len(store) - 1``.
                 ``headrevs`` is an iterable of head revisions.
                 ``linkrevfn`` is a callable that receives a revision and returns a linked
                 revision.
                 ``parentrevsfn`` is a callable that receives a revision number and returns
                 an iterable of its parent revision numbers.
                 """
                 brokenrevs = set()
                 strippoint = tiprev + 1
                 heads = {}
                 futurelargelinkrevs = set()
                 for head in headrevs:
                     headlinkrev = linkrevfn(head)
                     heads[head] = headlinkrev
                     if headlinkrev >= minlinkrev:
                         futurelargelinkrevs.add(headlinkrev)
                 # This algorithm involves walking down the rev graph, starting at the
                 # heads. Since the revs are topologically sorted according to linkrev,
                 # once all head linkrevs are below the minlink, we know there are
                 # no more revs that could have a linkrev greater than minlink.
                 # So we can stop walking.
                 while futurelargelinkrevs:
                     strippoint -= 1
                     linkrev = heads.pop(strippoint)
                     if linkrev < minlinkrev:
                         brokenrevs.add(strippoint)
                     else:
                         futurelargelinkrevs.remove(linkrev)
                     for p in parentrevsfn(strippoint):
                         if p != nullrev:
                             plinkrev = linkrevfn(p)
                             heads[p] = plinkrev
                             if plinkrev >= minlinkrev:
                                 futurelargelinkrevs.add(plinkrev)
                 return strippoint, brokenrevs
             def emitrevisions(store, nodes, nodesorder, resultcls, deltaparentfn=None,
                               candeltafn=None, rawsizefn=None, revdifffn=None, flagsfn=None,
                               deltamode=repository.CG_DELTAMODE_STD,
                               revisiondata=False, assumehaveparentrevisions=False):
                 """Generic implementation of ifiledata.emitrevisions().
                 Emitting revision data is subtly complex. This function attempts to
                 encapsulate all the logic for doing so in a backend-agnostic way.
                 ``store``
                    Object conforming to ``ifilestorage`` interface.
                 ``nodes``
                    List of revision nodes whose data to emit.
                 ``resultcls``
                    A type implementing the ``irevisiondelta`` interface that will be
                    constructed and returned.
                 ``deltaparentfn`` (optional)
                    Callable receiving a revision number and returning the revision number
                    of a revision that the internal delta is stored against. This delta
                    will be preferred over computing a new arbitrary delta.
                    If not defined, a delta will always be computed from raw revision
                    data.
                 ``candeltafn`` (optional)
                    Callable receiving a pair of revision numbers that returns a bool
                    indicating whether a delta between them can be produced.
                    If not defined, it is assumed that any two revisions can delta with
                    each other.
                 ``rawsizefn`` (optional)
                    Callable receiving a revision number and returning the length of the
-                   ``store.revision(rev, raw=True)``.
+                   ``store.rawdata(rev)``.
-                   If not defined, ``len(store.revision(rev, raw=True))`` will be called.
+                   If not defined, ``len(store.rawdata(rev))`` will be called.
                 ``revdifffn`` (optional)
                    Callable receiving a pair of revision numbers that returns a delta
                    between them.
                    If not defined, a delta will be computed by invoking mdiff code
                    on ``store.revision()`` results.
                    Defining this function allows a precomputed or stored delta to be
                    used without having to compute on.
                 ``flagsfn`` (optional)
                    Callable receiving a revision number and returns the integer flags
                    value for it. If not defined, flags value will be 0.
                 ``deltamode``
                    constaint on delta to be sent:
                    * CG_DELTAMODE_STD  - normal mode, try to reuse storage deltas,
                    * CG_DELTAMODE_PREV - only delta against "prev",
                    * CG_DELTAMODE_FULL - only issue full snapshot.
                    Whether to send fulltext revisions instead of deltas, if allowed.
                 ``nodesorder``
                 ``revisiondata``
                 ``assumehaveparentrevisions``
                 """
                 fnode = store.node
                 frev = store.rev
                 if nodesorder == 'nodes':
                     revs = [frev(n) for n in nodes]
                 elif nodesorder == 'linear':
                     revs = set(frev(n) for n in nodes)
                     revs = dagop.linearize(revs, store.parentrevs)
                 else: # storage and default
                     revs = sorted(frev(n) for n in nodes)
                 prevrev = None
                 if deltamode == repository.CG_DELTAMODE_PREV or assumehaveparentrevisions:
                     prevrev = store.parentrevs(revs[0])[0]
                 # Set of revs available to delta against.
                 available = set()
                 for rev in revs:
                     if rev == nullrev:
                         continue
                     node = fnode(rev)
                     p1rev, p2rev = store.parentrevs(rev)
                     if deltaparentfn:
                         deltaparentrev = deltaparentfn(rev)
                     else:
                         deltaparentrev = nullrev
                     # Forced delta against previous mode.
                     if deltamode == repository.CG_DELTAMODE_PREV:
                         baserev = prevrev
                     # We're instructed to send fulltext. Honor that.
                     elif deltamode == repository.CG_DELTAMODE_FULL:
                         baserev = nullrev
                     # We're instructed to use p1. Honor that
                     elif deltamode == repository.CG_DELTAMODE_P1:
                         baserev = p1rev
                     # There is a delta in storage. We try to use that because it
                     # amounts to effectively copying data from storage and is
                     # therefore the fastest.
                     elif deltaparentrev != nullrev:
                         # Base revision was already emitted in this group. We can
                         # always safely use the delta.
                         if deltaparentrev in available:
                             baserev = deltaparentrev
                         # Base revision is a parent that hasn't been emitted already.
                         # Use it if we can assume the receiver has the parent revision.
                         elif (assumehaveparentrevisions
                               and deltaparentrev in (p1rev, p2rev)):
                             baserev = deltaparentrev
                         # No guarantee the receiver has the delta parent. Send delta
                         # against last revision (if possible), which in the common case
                         # should be similar enough to this revision that the delta is
                         # reasonable.
                         elif prevrev is not None:
                             baserev = prevrev
                         else:
                             baserev = nullrev
                     # Storage has a fulltext revision.
                     # Let's use the previous revision, which is as good a guess as any.
                     # There is definitely room to improve this logic.
                     elif prevrev is not None:
                         baserev = prevrev
                     else:
                         baserev = nullrev
                     # But we can't actually use our chosen delta base for whatever
                     # reason. Reset to fulltext.
                     if baserev != nullrev and (candeltafn and not candeltafn(baserev, rev)):
                         baserev = nullrev
                     revision = None
                     delta = None
                     baserevisionsize = None
                     if revisiondata:
                         if store.iscensored(baserev) or store.iscensored(rev):
                             try:
-                                revision = store.revision(node, raw=True)
+                                revision = store.rawdata(node)
                             except error.CensoredNodeError as e:
                                 revision = e.tombstone
                             if baserev != nullrev:
                                 if rawsizefn:
                                     baserevisionsize = rawsizefn(baserev)
                                 else:
-                                    baserevisionsize = len(store.revision(baserev,
+                                    baserevisionsize = len(store.rawdata(baserev))
-                                                                          raw=True))
                         elif (baserev == nullrev
                                 and deltamode != repository.CG_DELTAMODE_PREV):
-                            revision = store.revision(node, raw=True)
+                            revision = store.rawdata(node)
                             available.add(rev)
                         else:
                             if revdifffn:
                                 delta = revdifffn(baserev, rev)
                             else:
-                                delta = mdiff.textdiff(store.revision(baserev, raw=True),
+                                delta = mdiff.textdiff(store.rawdata(baserev),
-                                                       store.revision(rev, raw=True))
+                                                       store.rawdata(rev))
                             available.add(rev)
                     yield resultcls(
                         node=node,
                         p1node=fnode(p1rev),
                         p2node=fnode(p2rev),
                         basenode=fnode(baserev),
                         flags=flagsfn(rev) if flagsfn else 0,
                         baserevisionsize=baserevisionsize,
                         revision=revision,
                         delta=delta)
                     prevrev = rev
             def deltaiscensored(delta, baserev, baselenfn):
                 """Determine if a delta represents censored revision data.
                 ``baserev`` is the base revision this delta is encoded against.
                 ``baselenfn`` is a callable receiving a revision number that resolves the
                 length of the revision fulltext.
                 Returns a bool indicating if the result of the delta represents a censored
                 revision.
                 """
                 # Fragile heuristic: unless new file meta keys are added alphabetically
                 # preceding "censored", all censored revisions are prefixed by
                 # "\1\ncensored:". A delta producing such a censored revision must be a
                 # full-replacement delta, so we inspect the first and only patch in the
                 # delta for this prefix.
                 hlen = struct.calcsize(">lll")
                 if len(delta) <= hlen:
                     return False
                 oldlen = baselenfn(baserev)
                 newlen = len(delta) - hlen
                 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
                     return False
                 add = "\1\ncensored:"
                 addlen = len(add)
                 return newlen >= addlen and delta[hlen:hlen + addlen] == add