##// END OF EJS Templates
revlog: deal with nodemap deletion within the index...
marmoute -
r43933:64243362 default
parent child Browse files
Show More
@@ -1,670 +1,669 b''
1 # bundlerepo.py - repository class for viewing uncompressed bundles
1 # bundlerepo.py - repository class for viewing uncompressed bundles
2 #
2 #
3 # Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
3 # Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Repository class for viewing uncompressed bundles.
8 """Repository class for viewing uncompressed bundles.
9
9
10 This provides a read-only repository interface to bundles as if they
10 This provides a read-only repository interface to bundles as if they
11 were part of the actual repository.
11 were part of the actual repository.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import os
16 import os
17 import shutil
17 import shutil
18
18
19 from .i18n import _
19 from .i18n import _
20 from .node import nullid, nullrev
20 from .node import nullid, nullrev
21
21
22 from . import (
22 from . import (
23 bundle2,
23 bundle2,
24 changegroup,
24 changegroup,
25 changelog,
25 changelog,
26 cmdutil,
26 cmdutil,
27 discovery,
27 discovery,
28 encoding,
28 encoding,
29 error,
29 error,
30 exchange,
30 exchange,
31 filelog,
31 filelog,
32 localrepo,
32 localrepo,
33 manifest,
33 manifest,
34 mdiff,
34 mdiff,
35 node as nodemod,
35 node as nodemod,
36 pathutil,
36 pathutil,
37 phases,
37 phases,
38 pycompat,
38 pycompat,
39 revlog,
39 revlog,
40 util,
40 util,
41 vfs as vfsmod,
41 vfs as vfsmod,
42 )
42 )
43
43
44
44
45 class bundlerevlog(revlog.revlog):
45 class bundlerevlog(revlog.revlog):
46 def __init__(self, opener, indexfile, cgunpacker, linkmapper):
46 def __init__(self, opener, indexfile, cgunpacker, linkmapper):
47 # How it works:
47 # How it works:
48 # To retrieve a revision, we need to know the offset of the revision in
48 # To retrieve a revision, we need to know the offset of the revision in
49 # the bundle (an unbundle object). We store this offset in the index
49 # the bundle (an unbundle object). We store this offset in the index
50 # (start). The base of the delta is stored in the base field.
50 # (start). The base of the delta is stored in the base field.
51 #
51 #
52 # To differentiate a rev in the bundle from a rev in the revlog, we
52 # To differentiate a rev in the bundle from a rev in the revlog, we
53 # check revision against repotiprev.
53 # check revision against repotiprev.
54 opener = vfsmod.readonlyvfs(opener)
54 opener = vfsmod.readonlyvfs(opener)
55 revlog.revlog.__init__(self, opener, indexfile)
55 revlog.revlog.__init__(self, opener, indexfile)
56 self.bundle = cgunpacker
56 self.bundle = cgunpacker
57 n = len(self)
57 n = len(self)
58 self.repotiprev = n - 1
58 self.repotiprev = n - 1
59 self.bundlerevs = set() # used by 'bundle()' revset expression
59 self.bundlerevs = set() # used by 'bundle()' revset expression
60 for deltadata in cgunpacker.deltaiter():
60 for deltadata in cgunpacker.deltaiter():
61 node, p1, p2, cs, deltabase, delta, flags = deltadata
61 node, p1, p2, cs, deltabase, delta, flags = deltadata
62
62
63 size = len(delta)
63 size = len(delta)
64 start = cgunpacker.tell() - size
64 start = cgunpacker.tell() - size
65
65
66 link = linkmapper(cs)
66 link = linkmapper(cs)
67 if node in self.nodemap:
67 if node in self.nodemap:
68 # this can happen if two branches make the same change
68 # this can happen if two branches make the same change
69 self.bundlerevs.add(self.nodemap[node])
69 self.bundlerevs.add(self.nodemap[node])
70 continue
70 continue
71
71
72 for p in (p1, p2):
72 for p in (p1, p2):
73 if p not in self.nodemap:
73 if p not in self.nodemap:
74 raise error.LookupError(
74 raise error.LookupError(
75 p, self.indexfile, _(b"unknown parent")
75 p, self.indexfile, _(b"unknown parent")
76 )
76 )
77
77
78 if deltabase not in self.nodemap:
78 if deltabase not in self.nodemap:
79 raise LookupError(
79 raise LookupError(
80 deltabase, self.indexfile, _(b'unknown delta base')
80 deltabase, self.indexfile, _(b'unknown delta base')
81 )
81 )
82
82
83 baserev = self.rev(deltabase)
83 baserev = self.rev(deltabase)
84 # start, size, full unc. size, base (unused), link, p1, p2, node
84 # start, size, full unc. size, base (unused), link, p1, p2, node
85 e = (
85 e = (
86 revlog.offset_type(start, flags),
86 revlog.offset_type(start, flags),
87 size,
87 size,
88 -1,
88 -1,
89 baserev,
89 baserev,
90 link,
90 link,
91 self.rev(p1),
91 self.rev(p1),
92 self.rev(p2),
92 self.rev(p2),
93 node,
93 node,
94 )
94 )
95 self.index.append(e)
95 self.index.append(e)
96 self.nodemap[node] = n
97 self.bundlerevs.add(n)
96 self.bundlerevs.add(n)
98 n += 1
97 n += 1
99
98
100 def _chunk(self, rev, df=None):
99 def _chunk(self, rev, df=None):
101 # Warning: in case of bundle, the diff is against what we stored as
100 # Warning: in case of bundle, the diff is against what we stored as
102 # delta base, not against rev - 1
101 # delta base, not against rev - 1
103 # XXX: could use some caching
102 # XXX: could use some caching
104 if rev <= self.repotiprev:
103 if rev <= self.repotiprev:
105 return revlog.revlog._chunk(self, rev)
104 return revlog.revlog._chunk(self, rev)
106 self.bundle.seek(self.start(rev))
105 self.bundle.seek(self.start(rev))
107 return self.bundle.read(self.length(rev))
106 return self.bundle.read(self.length(rev))
108
107
109 def revdiff(self, rev1, rev2):
108 def revdiff(self, rev1, rev2):
110 """return or calculate a delta between two revisions"""
109 """return or calculate a delta between two revisions"""
111 if rev1 > self.repotiprev and rev2 > self.repotiprev:
110 if rev1 > self.repotiprev and rev2 > self.repotiprev:
112 # hot path for bundle
111 # hot path for bundle
113 revb = self.index[rev2][3]
112 revb = self.index[rev2][3]
114 if revb == rev1:
113 if revb == rev1:
115 return self._chunk(rev2)
114 return self._chunk(rev2)
116 elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
115 elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
117 return revlog.revlog.revdiff(self, rev1, rev2)
116 return revlog.revlog.revdiff(self, rev1, rev2)
118
117
119 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
118 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
120
119
121 def _rawtext(self, node, rev, _df=None):
120 def _rawtext(self, node, rev, _df=None):
122 if rev is None:
121 if rev is None:
123 rev = self.rev(node)
122 rev = self.rev(node)
124 validated = False
123 validated = False
125 rawtext = None
124 rawtext = None
126 chain = []
125 chain = []
127 iterrev = rev
126 iterrev = rev
128 # reconstruct the revision if it is from a changegroup
127 # reconstruct the revision if it is from a changegroup
129 while iterrev > self.repotiprev:
128 while iterrev > self.repotiprev:
130 if self._revisioncache and self._revisioncache[1] == iterrev:
129 if self._revisioncache and self._revisioncache[1] == iterrev:
131 rawtext = self._revisioncache[2]
130 rawtext = self._revisioncache[2]
132 break
131 break
133 chain.append(iterrev)
132 chain.append(iterrev)
134 iterrev = self.index[iterrev][3]
133 iterrev = self.index[iterrev][3]
135 if iterrev == nullrev:
134 if iterrev == nullrev:
136 rawtext = b''
135 rawtext = b''
137 elif rawtext is None:
136 elif rawtext is None:
138 r = super(bundlerevlog, self)._rawtext(
137 r = super(bundlerevlog, self)._rawtext(
139 self.node(iterrev), iterrev, _df=_df
138 self.node(iterrev), iterrev, _df=_df
140 )
139 )
141 __, rawtext, validated = r
140 __, rawtext, validated = r
142 if chain:
141 if chain:
143 validated = False
142 validated = False
144 while chain:
143 while chain:
145 delta = self._chunk(chain.pop())
144 delta = self._chunk(chain.pop())
146 rawtext = mdiff.patches(rawtext, [delta])
145 rawtext = mdiff.patches(rawtext, [delta])
147 return rev, rawtext, validated
146 return rev, rawtext, validated
148
147
149 def addrevision(self, *args, **kwargs):
148 def addrevision(self, *args, **kwargs):
150 raise NotImplementedError
149 raise NotImplementedError
151
150
152 def addgroup(self, *args, **kwargs):
151 def addgroup(self, *args, **kwargs):
153 raise NotImplementedError
152 raise NotImplementedError
154
153
155 def strip(self, *args, **kwargs):
154 def strip(self, *args, **kwargs):
156 raise NotImplementedError
155 raise NotImplementedError
157
156
158 def checksize(self):
157 def checksize(self):
159 raise NotImplementedError
158 raise NotImplementedError
160
159
161
160
162 class bundlechangelog(bundlerevlog, changelog.changelog):
161 class bundlechangelog(bundlerevlog, changelog.changelog):
163 def __init__(self, opener, cgunpacker):
162 def __init__(self, opener, cgunpacker):
164 changelog.changelog.__init__(self, opener)
163 changelog.changelog.__init__(self, opener)
165 linkmapper = lambda x: x
164 linkmapper = lambda x: x
166 bundlerevlog.__init__(
165 bundlerevlog.__init__(
167 self, opener, self.indexfile, cgunpacker, linkmapper
166 self, opener, self.indexfile, cgunpacker, linkmapper
168 )
167 )
169
168
170
169
171 class bundlemanifest(bundlerevlog, manifest.manifestrevlog):
170 class bundlemanifest(bundlerevlog, manifest.manifestrevlog):
172 def __init__(
171 def __init__(
173 self, opener, cgunpacker, linkmapper, dirlogstarts=None, dir=b''
172 self, opener, cgunpacker, linkmapper, dirlogstarts=None, dir=b''
174 ):
173 ):
175 manifest.manifestrevlog.__init__(self, opener, tree=dir)
174 manifest.manifestrevlog.__init__(self, opener, tree=dir)
176 bundlerevlog.__init__(
175 bundlerevlog.__init__(
177 self, opener, self.indexfile, cgunpacker, linkmapper
176 self, opener, self.indexfile, cgunpacker, linkmapper
178 )
177 )
179 if dirlogstarts is None:
178 if dirlogstarts is None:
180 dirlogstarts = {}
179 dirlogstarts = {}
181 if self.bundle.version == b"03":
180 if self.bundle.version == b"03":
182 dirlogstarts = _getfilestarts(self.bundle)
181 dirlogstarts = _getfilestarts(self.bundle)
183 self._dirlogstarts = dirlogstarts
182 self._dirlogstarts = dirlogstarts
184 self._linkmapper = linkmapper
183 self._linkmapper = linkmapper
185
184
186 def dirlog(self, d):
185 def dirlog(self, d):
187 if d in self._dirlogstarts:
186 if d in self._dirlogstarts:
188 self.bundle.seek(self._dirlogstarts[d])
187 self.bundle.seek(self._dirlogstarts[d])
189 return bundlemanifest(
188 return bundlemanifest(
190 self.opener,
189 self.opener,
191 self.bundle,
190 self.bundle,
192 self._linkmapper,
191 self._linkmapper,
193 self._dirlogstarts,
192 self._dirlogstarts,
194 dir=d,
193 dir=d,
195 )
194 )
196 return super(bundlemanifest, self).dirlog(d)
195 return super(bundlemanifest, self).dirlog(d)
197
196
198
197
199 class bundlefilelog(filelog.filelog):
198 class bundlefilelog(filelog.filelog):
200 def __init__(self, opener, path, cgunpacker, linkmapper):
199 def __init__(self, opener, path, cgunpacker, linkmapper):
201 filelog.filelog.__init__(self, opener, path)
200 filelog.filelog.__init__(self, opener, path)
202 self._revlog = bundlerevlog(
201 self._revlog = bundlerevlog(
203 opener, self.indexfile, cgunpacker, linkmapper
202 opener, self.indexfile, cgunpacker, linkmapper
204 )
203 )
205
204
206
205
207 class bundlepeer(localrepo.localpeer):
206 class bundlepeer(localrepo.localpeer):
208 def canpush(self):
207 def canpush(self):
209 return False
208 return False
210
209
211
210
212 class bundlephasecache(phases.phasecache):
211 class bundlephasecache(phases.phasecache):
213 def __init__(self, *args, **kwargs):
212 def __init__(self, *args, **kwargs):
214 super(bundlephasecache, self).__init__(*args, **kwargs)
213 super(bundlephasecache, self).__init__(*args, **kwargs)
215 if util.safehasattr(self, 'opener'):
214 if util.safehasattr(self, 'opener'):
216 self.opener = vfsmod.readonlyvfs(self.opener)
215 self.opener = vfsmod.readonlyvfs(self.opener)
217
216
218 def write(self):
217 def write(self):
219 raise NotImplementedError
218 raise NotImplementedError
220
219
221 def _write(self, fp):
220 def _write(self, fp):
222 raise NotImplementedError
221 raise NotImplementedError
223
222
224 def _updateroots(self, phase, newroots, tr):
223 def _updateroots(self, phase, newroots, tr):
225 self.phaseroots[phase] = newroots
224 self.phaseroots[phase] = newroots
226 self.invalidate()
225 self.invalidate()
227 self.dirty = True
226 self.dirty = True
228
227
229
228
230 def _getfilestarts(cgunpacker):
229 def _getfilestarts(cgunpacker):
231 filespos = {}
230 filespos = {}
232 for chunkdata in iter(cgunpacker.filelogheader, {}):
231 for chunkdata in iter(cgunpacker.filelogheader, {}):
233 fname = chunkdata[b'filename']
232 fname = chunkdata[b'filename']
234 filespos[fname] = cgunpacker.tell()
233 filespos[fname] = cgunpacker.tell()
235 for chunk in iter(lambda: cgunpacker.deltachunk(None), {}):
234 for chunk in iter(lambda: cgunpacker.deltachunk(None), {}):
236 pass
235 pass
237 return filespos
236 return filespos
238
237
239
238
240 class bundlerepository(object):
239 class bundlerepository(object):
241 """A repository instance that is a union of a local repo and a bundle.
240 """A repository instance that is a union of a local repo and a bundle.
242
241
243 Instances represent a read-only repository composed of a local repository
242 Instances represent a read-only repository composed of a local repository
244 with the contents of a bundle file applied. The repository instance is
243 with the contents of a bundle file applied. The repository instance is
245 conceptually similar to the state of a repository after an
244 conceptually similar to the state of a repository after an
246 ``hg unbundle`` operation. However, the contents of the bundle are never
245 ``hg unbundle`` operation. However, the contents of the bundle are never
247 applied to the actual base repository.
246 applied to the actual base repository.
248
247
249 Instances constructed directly are not usable as repository objects.
248 Instances constructed directly are not usable as repository objects.
250 Use instance() or makebundlerepository() to create instances.
249 Use instance() or makebundlerepository() to create instances.
251 """
250 """
252
251
253 def __init__(self, bundlepath, url, tempparent):
252 def __init__(self, bundlepath, url, tempparent):
254 self._tempparent = tempparent
253 self._tempparent = tempparent
255 self._url = url
254 self._url = url
256
255
257 self.ui.setconfig(b'phases', b'publish', False, b'bundlerepo')
256 self.ui.setconfig(b'phases', b'publish', False, b'bundlerepo')
258
257
259 self.tempfile = None
258 self.tempfile = None
260 f = util.posixfile(bundlepath, b"rb")
259 f = util.posixfile(bundlepath, b"rb")
261 bundle = exchange.readbundle(self.ui, f, bundlepath)
260 bundle = exchange.readbundle(self.ui, f, bundlepath)
262
261
263 if isinstance(bundle, bundle2.unbundle20):
262 if isinstance(bundle, bundle2.unbundle20):
264 self._bundlefile = bundle
263 self._bundlefile = bundle
265 self._cgunpacker = None
264 self._cgunpacker = None
266
265
267 cgpart = None
266 cgpart = None
268 for part in bundle.iterparts(seekable=True):
267 for part in bundle.iterparts(seekable=True):
269 if part.type == b'changegroup':
268 if part.type == b'changegroup':
270 if cgpart:
269 if cgpart:
271 raise NotImplementedError(
270 raise NotImplementedError(
272 b"can't process multiple changegroups"
271 b"can't process multiple changegroups"
273 )
272 )
274 cgpart = part
273 cgpart = part
275
274
276 self._handlebundle2part(bundle, part)
275 self._handlebundle2part(bundle, part)
277
276
278 if not cgpart:
277 if not cgpart:
279 raise error.Abort(_(b"No changegroups found"))
278 raise error.Abort(_(b"No changegroups found"))
280
279
281 # This is required to placate a later consumer, which expects
280 # This is required to placate a later consumer, which expects
282 # the payload offset to be at the beginning of the changegroup.
281 # the payload offset to be at the beginning of the changegroup.
283 # We need to do this after the iterparts() generator advances
282 # We need to do this after the iterparts() generator advances
284 # because iterparts() will seek to end of payload after the
283 # because iterparts() will seek to end of payload after the
285 # generator returns control to iterparts().
284 # generator returns control to iterparts().
286 cgpart.seek(0, os.SEEK_SET)
285 cgpart.seek(0, os.SEEK_SET)
287
286
288 elif isinstance(bundle, changegroup.cg1unpacker):
287 elif isinstance(bundle, changegroup.cg1unpacker):
289 if bundle.compressed():
288 if bundle.compressed():
290 f = self._writetempbundle(
289 f = self._writetempbundle(
291 bundle.read, b'.hg10un', header=b'HG10UN'
290 bundle.read, b'.hg10un', header=b'HG10UN'
292 )
291 )
293 bundle = exchange.readbundle(self.ui, f, bundlepath, self.vfs)
292 bundle = exchange.readbundle(self.ui, f, bundlepath, self.vfs)
294
293
295 self._bundlefile = bundle
294 self._bundlefile = bundle
296 self._cgunpacker = bundle
295 self._cgunpacker = bundle
297 else:
296 else:
298 raise error.Abort(
297 raise error.Abort(
299 _(b'bundle type %s cannot be read') % type(bundle)
298 _(b'bundle type %s cannot be read') % type(bundle)
300 )
299 )
301
300
302 # dict with the mapping 'filename' -> position in the changegroup.
301 # dict with the mapping 'filename' -> position in the changegroup.
303 self._cgfilespos = {}
302 self._cgfilespos = {}
304
303
305 self.firstnewrev = self.changelog.repotiprev + 1
304 self.firstnewrev = self.changelog.repotiprev + 1
306 phases.retractboundary(
305 phases.retractboundary(
307 self,
306 self,
308 None,
307 None,
309 phases.draft,
308 phases.draft,
310 [ctx.node() for ctx in self[self.firstnewrev :]],
309 [ctx.node() for ctx in self[self.firstnewrev :]],
311 )
310 )
312
311
313 def _handlebundle2part(self, bundle, part):
312 def _handlebundle2part(self, bundle, part):
314 if part.type != b'changegroup':
313 if part.type != b'changegroup':
315 return
314 return
316
315
317 cgstream = part
316 cgstream = part
318 version = part.params.get(b'version', b'01')
317 version = part.params.get(b'version', b'01')
319 legalcgvers = changegroup.supportedincomingversions(self)
318 legalcgvers = changegroup.supportedincomingversions(self)
320 if version not in legalcgvers:
319 if version not in legalcgvers:
321 msg = _(b'Unsupported changegroup version: %s')
320 msg = _(b'Unsupported changegroup version: %s')
322 raise error.Abort(msg % version)
321 raise error.Abort(msg % version)
323 if bundle.compressed():
322 if bundle.compressed():
324 cgstream = self._writetempbundle(part.read, b'.cg%sun' % version)
323 cgstream = self._writetempbundle(part.read, b'.cg%sun' % version)
325
324
326 self._cgunpacker = changegroup.getunbundler(version, cgstream, b'UN')
325 self._cgunpacker = changegroup.getunbundler(version, cgstream, b'UN')
327
326
328 def _writetempbundle(self, readfn, suffix, header=b''):
327 def _writetempbundle(self, readfn, suffix, header=b''):
329 """Write a temporary file to disk
328 """Write a temporary file to disk
330 """
329 """
331 fdtemp, temp = self.vfs.mkstemp(prefix=b"hg-bundle-", suffix=suffix)
330 fdtemp, temp = self.vfs.mkstemp(prefix=b"hg-bundle-", suffix=suffix)
332 self.tempfile = temp
331 self.tempfile = temp
333
332
334 with os.fdopen(fdtemp, 'wb') as fptemp:
333 with os.fdopen(fdtemp, 'wb') as fptemp:
335 fptemp.write(header)
334 fptemp.write(header)
336 while True:
335 while True:
337 chunk = readfn(2 ** 18)
336 chunk = readfn(2 ** 18)
338 if not chunk:
337 if not chunk:
339 break
338 break
340 fptemp.write(chunk)
339 fptemp.write(chunk)
341
340
342 return self.vfs.open(self.tempfile, mode=b"rb")
341 return self.vfs.open(self.tempfile, mode=b"rb")
343
342
344 @localrepo.unfilteredpropertycache
343 @localrepo.unfilteredpropertycache
345 def _phasecache(self):
344 def _phasecache(self):
346 return bundlephasecache(self, self._phasedefaults)
345 return bundlephasecache(self, self._phasedefaults)
347
346
348 @localrepo.unfilteredpropertycache
347 @localrepo.unfilteredpropertycache
349 def changelog(self):
348 def changelog(self):
350 # consume the header if it exists
349 # consume the header if it exists
351 self._cgunpacker.changelogheader()
350 self._cgunpacker.changelogheader()
352 c = bundlechangelog(self.svfs, self._cgunpacker)
351 c = bundlechangelog(self.svfs, self._cgunpacker)
353 self.manstart = self._cgunpacker.tell()
352 self.manstart = self._cgunpacker.tell()
354 return c
353 return c
355
354
356 def _refreshchangelog(self):
355 def _refreshchangelog(self):
357 # changelog for bundle repo are not filecache, this method is not
356 # changelog for bundle repo are not filecache, this method is not
358 # applicable.
357 # applicable.
359 pass
358 pass
360
359
361 @localrepo.unfilteredpropertycache
360 @localrepo.unfilteredpropertycache
362 def manifestlog(self):
361 def manifestlog(self):
363 self._cgunpacker.seek(self.manstart)
362 self._cgunpacker.seek(self.manstart)
364 # consume the header if it exists
363 # consume the header if it exists
365 self._cgunpacker.manifestheader()
364 self._cgunpacker.manifestheader()
366 linkmapper = self.unfiltered().changelog.rev
365 linkmapper = self.unfiltered().changelog.rev
367 rootstore = bundlemanifest(self.svfs, self._cgunpacker, linkmapper)
366 rootstore = bundlemanifest(self.svfs, self._cgunpacker, linkmapper)
368 self.filestart = self._cgunpacker.tell()
367 self.filestart = self._cgunpacker.tell()
369
368
370 return manifest.manifestlog(
369 return manifest.manifestlog(
371 self.svfs, self, rootstore, self.narrowmatch()
370 self.svfs, self, rootstore, self.narrowmatch()
372 )
371 )
373
372
374 def _consumemanifest(self):
373 def _consumemanifest(self):
375 """Consumes the manifest portion of the bundle, setting filestart so the
374 """Consumes the manifest portion of the bundle, setting filestart so the
376 file portion can be read."""
375 file portion can be read."""
377 self._cgunpacker.seek(self.manstart)
376 self._cgunpacker.seek(self.manstart)
378 self._cgunpacker.manifestheader()
377 self._cgunpacker.manifestheader()
379 for delta in self._cgunpacker.deltaiter():
378 for delta in self._cgunpacker.deltaiter():
380 pass
379 pass
381 self.filestart = self._cgunpacker.tell()
380 self.filestart = self._cgunpacker.tell()
382
381
383 @localrepo.unfilteredpropertycache
382 @localrepo.unfilteredpropertycache
384 def manstart(self):
383 def manstart(self):
385 self.changelog
384 self.changelog
386 return self.manstart
385 return self.manstart
387
386
388 @localrepo.unfilteredpropertycache
387 @localrepo.unfilteredpropertycache
389 def filestart(self):
388 def filestart(self):
390 self.manifestlog
389 self.manifestlog
391
390
392 # If filestart was not set by self.manifestlog, that means the
391 # If filestart was not set by self.manifestlog, that means the
393 # manifestlog implementation did not consume the manifests from the
392 # manifestlog implementation did not consume the manifests from the
394 # changegroup (ex: it might be consuming trees from a separate bundle2
393 # changegroup (ex: it might be consuming trees from a separate bundle2
395 # part instead). So we need to manually consume it.
394 # part instead). So we need to manually consume it.
396 if 'filestart' not in self.__dict__:
395 if 'filestart' not in self.__dict__:
397 self._consumemanifest()
396 self._consumemanifest()
398
397
399 return self.filestart
398 return self.filestart
400
399
401 def url(self):
400 def url(self):
402 return self._url
401 return self._url
403
402
404 def file(self, f):
403 def file(self, f):
405 if not self._cgfilespos:
404 if not self._cgfilespos:
406 self._cgunpacker.seek(self.filestart)
405 self._cgunpacker.seek(self.filestart)
407 self._cgfilespos = _getfilestarts(self._cgunpacker)
406 self._cgfilespos = _getfilestarts(self._cgunpacker)
408
407
409 if f in self._cgfilespos:
408 if f in self._cgfilespos:
410 self._cgunpacker.seek(self._cgfilespos[f])
409 self._cgunpacker.seek(self._cgfilespos[f])
411 linkmapper = self.unfiltered().changelog.rev
410 linkmapper = self.unfiltered().changelog.rev
412 return bundlefilelog(self.svfs, f, self._cgunpacker, linkmapper)
411 return bundlefilelog(self.svfs, f, self._cgunpacker, linkmapper)
413 else:
412 else:
414 return super(bundlerepository, self).file(f)
413 return super(bundlerepository, self).file(f)
415
414
416 def close(self):
415 def close(self):
417 """Close assigned bundle file immediately."""
416 """Close assigned bundle file immediately."""
418 self._bundlefile.close()
417 self._bundlefile.close()
419 if self.tempfile is not None:
418 if self.tempfile is not None:
420 self.vfs.unlink(self.tempfile)
419 self.vfs.unlink(self.tempfile)
421 if self._tempparent:
420 if self._tempparent:
422 shutil.rmtree(self._tempparent, True)
421 shutil.rmtree(self._tempparent, True)
423
422
424 def cancopy(self):
423 def cancopy(self):
425 return False
424 return False
426
425
427 def peer(self):
426 def peer(self):
428 return bundlepeer(self)
427 return bundlepeer(self)
429
428
430 def getcwd(self):
429 def getcwd(self):
431 return encoding.getcwd() # always outside the repo
430 return encoding.getcwd() # always outside the repo
432
431
433 # Check if parents exist in localrepo before setting
432 # Check if parents exist in localrepo before setting
434 def setparents(self, p1, p2=nullid):
433 def setparents(self, p1, p2=nullid):
435 p1rev = self.changelog.rev(p1)
434 p1rev = self.changelog.rev(p1)
436 p2rev = self.changelog.rev(p2)
435 p2rev = self.changelog.rev(p2)
437 msg = _(b"setting parent to node %s that only exists in the bundle\n")
436 msg = _(b"setting parent to node %s that only exists in the bundle\n")
438 if self.changelog.repotiprev < p1rev:
437 if self.changelog.repotiprev < p1rev:
439 self.ui.warn(msg % nodemod.hex(p1))
438 self.ui.warn(msg % nodemod.hex(p1))
440 if self.changelog.repotiprev < p2rev:
439 if self.changelog.repotiprev < p2rev:
441 self.ui.warn(msg % nodemod.hex(p2))
440 self.ui.warn(msg % nodemod.hex(p2))
442 return super(bundlerepository, self).setparents(p1, p2)
441 return super(bundlerepository, self).setparents(p1, p2)
443
442
444
443
445 def instance(ui, path, create, intents=None, createopts=None):
444 def instance(ui, path, create, intents=None, createopts=None):
446 if create:
445 if create:
447 raise error.Abort(_(b'cannot create new bundle repository'))
446 raise error.Abort(_(b'cannot create new bundle repository'))
448 # internal config: bundle.mainreporoot
447 # internal config: bundle.mainreporoot
449 parentpath = ui.config(b"bundle", b"mainreporoot")
448 parentpath = ui.config(b"bundle", b"mainreporoot")
450 if not parentpath:
449 if not parentpath:
451 # try to find the correct path to the working directory repo
450 # try to find the correct path to the working directory repo
452 parentpath = cmdutil.findrepo(encoding.getcwd())
451 parentpath = cmdutil.findrepo(encoding.getcwd())
453 if parentpath is None:
452 if parentpath is None:
454 parentpath = b''
453 parentpath = b''
455 if parentpath:
454 if parentpath:
456 # Try to make the full path relative so we get a nice, short URL.
455 # Try to make the full path relative so we get a nice, short URL.
457 # In particular, we don't want temp dir names in test outputs.
456 # In particular, we don't want temp dir names in test outputs.
458 cwd = encoding.getcwd()
457 cwd = encoding.getcwd()
459 if parentpath == cwd:
458 if parentpath == cwd:
460 parentpath = b''
459 parentpath = b''
461 else:
460 else:
462 cwd = pathutil.normasprefix(cwd)
461 cwd = pathutil.normasprefix(cwd)
463 if parentpath.startswith(cwd):
462 if parentpath.startswith(cwd):
464 parentpath = parentpath[len(cwd) :]
463 parentpath = parentpath[len(cwd) :]
465 u = util.url(path)
464 u = util.url(path)
466 path = u.localpath()
465 path = u.localpath()
467 if u.scheme == b'bundle':
466 if u.scheme == b'bundle':
468 s = path.split(b"+", 1)
467 s = path.split(b"+", 1)
469 if len(s) == 1:
468 if len(s) == 1:
470 repopath, bundlename = parentpath, s[0]
469 repopath, bundlename = parentpath, s[0]
471 else:
470 else:
472 repopath, bundlename = s
471 repopath, bundlename = s
473 else:
472 else:
474 repopath, bundlename = parentpath, path
473 repopath, bundlename = parentpath, path
475
474
476 return makebundlerepository(ui, repopath, bundlename)
475 return makebundlerepository(ui, repopath, bundlename)
477
476
478
477
479 def makebundlerepository(ui, repopath, bundlepath):
478 def makebundlerepository(ui, repopath, bundlepath):
480 """Make a bundle repository object based on repo and bundle paths."""
479 """Make a bundle repository object based on repo and bundle paths."""
481 if repopath:
480 if repopath:
482 url = b'bundle:%s+%s' % (util.expandpath(repopath), bundlepath)
481 url = b'bundle:%s+%s' % (util.expandpath(repopath), bundlepath)
483 else:
482 else:
484 url = b'bundle:%s' % bundlepath
483 url = b'bundle:%s' % bundlepath
485
484
486 # Because we can't make any guarantees about the type of the base
485 # Because we can't make any guarantees about the type of the base
487 # repository, we can't have a static class representing the bundle
486 # repository, we can't have a static class representing the bundle
488 # repository. We also can't make any guarantees about how to even
487 # repository. We also can't make any guarantees about how to even
489 # call the base repository's constructor!
488 # call the base repository's constructor!
490 #
489 #
491 # So, our strategy is to go through ``localrepo.instance()`` to construct
490 # So, our strategy is to go through ``localrepo.instance()`` to construct
492 # a repo instance. Then, we dynamically create a new type derived from
491 # a repo instance. Then, we dynamically create a new type derived from
493 # both it and our ``bundlerepository`` class which overrides some
492 # both it and our ``bundlerepository`` class which overrides some
494 # functionality. We then change the type of the constructed repository
493 # functionality. We then change the type of the constructed repository
495 # to this new type and initialize the bundle-specific bits of it.
494 # to this new type and initialize the bundle-specific bits of it.
496
495
497 try:
496 try:
498 repo = localrepo.instance(ui, repopath, create=False)
497 repo = localrepo.instance(ui, repopath, create=False)
499 tempparent = None
498 tempparent = None
500 except error.RepoError:
499 except error.RepoError:
501 tempparent = pycompat.mkdtemp()
500 tempparent = pycompat.mkdtemp()
502 try:
501 try:
503 repo = localrepo.instance(ui, tempparent, create=True)
502 repo = localrepo.instance(ui, tempparent, create=True)
504 except Exception:
503 except Exception:
505 shutil.rmtree(tempparent)
504 shutil.rmtree(tempparent)
506 raise
505 raise
507
506
508 class derivedbundlerepository(bundlerepository, repo.__class__):
507 class derivedbundlerepository(bundlerepository, repo.__class__):
509 pass
508 pass
510
509
511 repo.__class__ = derivedbundlerepository
510 repo.__class__ = derivedbundlerepository
512 bundlerepository.__init__(repo, bundlepath, url, tempparent)
511 bundlerepository.__init__(repo, bundlepath, url, tempparent)
513
512
514 return repo
513 return repo
515
514
516
515
517 class bundletransactionmanager(object):
516 class bundletransactionmanager(object):
518 def transaction(self):
517 def transaction(self):
519 return None
518 return None
520
519
521 def close(self):
520 def close(self):
522 raise NotImplementedError
521 raise NotImplementedError
523
522
524 def release(self):
523 def release(self):
525 raise NotImplementedError
524 raise NotImplementedError
526
525
527
526
528 def getremotechanges(
527 def getremotechanges(
529 ui, repo, peer, onlyheads=None, bundlename=None, force=False
528 ui, repo, peer, onlyheads=None, bundlename=None, force=False
530 ):
529 ):
531 '''obtains a bundle of changes incoming from peer
530 '''obtains a bundle of changes incoming from peer
532
531
533 "onlyheads" restricts the returned changes to those reachable from the
532 "onlyheads" restricts the returned changes to those reachable from the
534 specified heads.
533 specified heads.
535 "bundlename", if given, stores the bundle to this file path permanently;
534 "bundlename", if given, stores the bundle to this file path permanently;
536 otherwise it's stored to a temp file and gets deleted again when you call
535 otherwise it's stored to a temp file and gets deleted again when you call
537 the returned "cleanupfn".
536 the returned "cleanupfn".
538 "force" indicates whether to proceed on unrelated repos.
537 "force" indicates whether to proceed on unrelated repos.
539
538
540 Returns a tuple (local, csets, cleanupfn):
539 Returns a tuple (local, csets, cleanupfn):
541
540
542 "local" is a local repo from which to obtain the actual incoming
541 "local" is a local repo from which to obtain the actual incoming
543 changesets; it is a bundlerepo for the obtained bundle when the
542 changesets; it is a bundlerepo for the obtained bundle when the
544 original "peer" is remote.
543 original "peer" is remote.
545 "csets" lists the incoming changeset node ids.
544 "csets" lists the incoming changeset node ids.
546 "cleanupfn" must be called without arguments when you're done processing
545 "cleanupfn" must be called without arguments when you're done processing
547 the changes; it closes both the original "peer" and the one returned
546 the changes; it closes both the original "peer" and the one returned
548 here.
547 here.
549 '''
548 '''
550 tmp = discovery.findcommonincoming(repo, peer, heads=onlyheads, force=force)
549 tmp = discovery.findcommonincoming(repo, peer, heads=onlyheads, force=force)
551 common, incoming, rheads = tmp
550 common, incoming, rheads = tmp
552 if not incoming:
551 if not incoming:
553 try:
552 try:
554 if bundlename:
553 if bundlename:
555 os.unlink(bundlename)
554 os.unlink(bundlename)
556 except OSError:
555 except OSError:
557 pass
556 pass
558 return repo, [], peer.close
557 return repo, [], peer.close
559
558
560 commonset = set(common)
559 commonset = set(common)
561 rheads = [x for x in rheads if x not in commonset]
560 rheads = [x for x in rheads if x not in commonset]
562
561
563 bundle = None
562 bundle = None
564 bundlerepo = None
563 bundlerepo = None
565 localrepo = peer.local()
564 localrepo = peer.local()
566 if bundlename or not localrepo:
565 if bundlename or not localrepo:
567 # create a bundle (uncompressed if peer repo is not local)
566 # create a bundle (uncompressed if peer repo is not local)
568
567
569 # developer config: devel.legacy.exchange
568 # developer config: devel.legacy.exchange
570 legexc = ui.configlist(b'devel', b'legacy.exchange')
569 legexc = ui.configlist(b'devel', b'legacy.exchange')
571 forcebundle1 = b'bundle2' not in legexc and b'bundle1' in legexc
570 forcebundle1 = b'bundle2' not in legexc and b'bundle1' in legexc
572 canbundle2 = (
571 canbundle2 = (
573 not forcebundle1
572 not forcebundle1
574 and peer.capable(b'getbundle')
573 and peer.capable(b'getbundle')
575 and peer.capable(b'bundle2')
574 and peer.capable(b'bundle2')
576 )
575 )
577 if canbundle2:
576 if canbundle2:
578 with peer.commandexecutor() as e:
577 with peer.commandexecutor() as e:
579 b2 = e.callcommand(
578 b2 = e.callcommand(
580 b'getbundle',
579 b'getbundle',
581 {
580 {
582 b'source': b'incoming',
581 b'source': b'incoming',
583 b'common': common,
582 b'common': common,
584 b'heads': rheads,
583 b'heads': rheads,
585 b'bundlecaps': exchange.caps20to10(
584 b'bundlecaps': exchange.caps20to10(
586 repo, role=b'client'
585 repo, role=b'client'
587 ),
586 ),
588 b'cg': True,
587 b'cg': True,
589 },
588 },
590 ).result()
589 ).result()
591
590
592 fname = bundle = changegroup.writechunks(
591 fname = bundle = changegroup.writechunks(
593 ui, b2._forwardchunks(), bundlename
592 ui, b2._forwardchunks(), bundlename
594 )
593 )
595 else:
594 else:
596 if peer.capable(b'getbundle'):
595 if peer.capable(b'getbundle'):
597 with peer.commandexecutor() as e:
596 with peer.commandexecutor() as e:
598 cg = e.callcommand(
597 cg = e.callcommand(
599 b'getbundle',
598 b'getbundle',
600 {
599 {
601 b'source': b'incoming',
600 b'source': b'incoming',
602 b'common': common,
601 b'common': common,
603 b'heads': rheads,
602 b'heads': rheads,
604 },
603 },
605 ).result()
604 ).result()
606 elif onlyheads is None and not peer.capable(b'changegroupsubset'):
605 elif onlyheads is None and not peer.capable(b'changegroupsubset'):
607 # compat with older servers when pulling all remote heads
606 # compat with older servers when pulling all remote heads
608
607
609 with peer.commandexecutor() as e:
608 with peer.commandexecutor() as e:
610 cg = e.callcommand(
609 cg = e.callcommand(
611 b'changegroup',
610 b'changegroup',
612 {b'nodes': incoming, b'source': b'incoming',},
611 {b'nodes': incoming, b'source': b'incoming',},
613 ).result()
612 ).result()
614
613
615 rheads = None
614 rheads = None
616 else:
615 else:
617 with peer.commandexecutor() as e:
616 with peer.commandexecutor() as e:
618 cg = e.callcommand(
617 cg = e.callcommand(
619 b'changegroupsubset',
618 b'changegroupsubset',
620 {
619 {
621 b'bases': incoming,
620 b'bases': incoming,
622 b'heads': rheads,
621 b'heads': rheads,
623 b'source': b'incoming',
622 b'source': b'incoming',
624 },
623 },
625 ).result()
624 ).result()
626
625
627 if localrepo:
626 if localrepo:
628 bundletype = b"HG10BZ"
627 bundletype = b"HG10BZ"
629 else:
628 else:
630 bundletype = b"HG10UN"
629 bundletype = b"HG10UN"
631 fname = bundle = bundle2.writebundle(ui, cg, bundlename, bundletype)
630 fname = bundle = bundle2.writebundle(ui, cg, bundlename, bundletype)
632 # keep written bundle?
631 # keep written bundle?
633 if bundlename:
632 if bundlename:
634 bundle = None
633 bundle = None
635 if not localrepo:
634 if not localrepo:
636 # use the created uncompressed bundlerepo
635 # use the created uncompressed bundlerepo
637 localrepo = bundlerepo = makebundlerepository(
636 localrepo = bundlerepo = makebundlerepository(
638 repo.baseui, repo.root, fname
637 repo.baseui, repo.root, fname
639 )
638 )
640
639
641 # this repo contains local and peer now, so filter out local again
640 # this repo contains local and peer now, so filter out local again
642 common = repo.heads()
641 common = repo.heads()
643 if localrepo:
642 if localrepo:
644 # Part of common may be remotely filtered
643 # Part of common may be remotely filtered
645 # So use an unfiltered version
644 # So use an unfiltered version
646 # The discovery process probably need cleanup to avoid that
645 # The discovery process probably need cleanup to avoid that
647 localrepo = localrepo.unfiltered()
646 localrepo = localrepo.unfiltered()
648
647
649 csets = localrepo.changelog.findmissing(common, rheads)
648 csets = localrepo.changelog.findmissing(common, rheads)
650
649
651 if bundlerepo:
650 if bundlerepo:
652 reponodes = [ctx.node() for ctx in bundlerepo[bundlerepo.firstnewrev :]]
651 reponodes = [ctx.node() for ctx in bundlerepo[bundlerepo.firstnewrev :]]
653
652
654 with peer.commandexecutor() as e:
653 with peer.commandexecutor() as e:
655 remotephases = e.callcommand(
654 remotephases = e.callcommand(
656 b'listkeys', {b'namespace': b'phases',}
655 b'listkeys', {b'namespace': b'phases',}
657 ).result()
656 ).result()
658
657
659 pullop = exchange.pulloperation(bundlerepo, peer, heads=reponodes)
658 pullop = exchange.pulloperation(bundlerepo, peer, heads=reponodes)
660 pullop.trmanager = bundletransactionmanager()
659 pullop.trmanager = bundletransactionmanager()
661 exchange._pullapplyphases(pullop, remotephases)
660 exchange._pullapplyphases(pullop, remotephases)
662
661
663 def cleanup():
662 def cleanup():
664 if bundlerepo:
663 if bundlerepo:
665 bundlerepo.close()
664 bundlerepo.close()
666 if bundle:
665 if bundle:
667 os.unlink(bundle)
666 os.unlink(bundle)
668 peer.close()
667 peer.close()
669
668
670 return (localrepo, csets, cleanup)
669 return (localrepo, csets, cleanup)
@@ -1,205 +1,213 b''
1 # parsers.py - Python implementation of parsers.c
1 # parsers.py - Python implementation of parsers.c
2 #
2 #
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11 import zlib
11 import zlib
12
12
13 from ..node import nullid, nullrev
13 from ..node import nullid, nullrev
14 from .. import (
14 from .. import (
15 pycompat,
15 pycompat,
16 revlogutils,
16 revlogutils,
17 util,
17 util,
18 )
18 )
19
19
20 stringio = pycompat.bytesio
20 stringio = pycompat.bytesio
21
21
22
22
23 _pack = struct.pack
23 _pack = struct.pack
24 _unpack = struct.unpack
24 _unpack = struct.unpack
25 _compress = zlib.compress
25 _compress = zlib.compress
26 _decompress = zlib.decompress
26 _decompress = zlib.decompress
27
27
28 # Some code below makes tuples directly because it's more convenient. However,
28 # Some code below makes tuples directly because it's more convenient. However,
29 # code outside this module should always use dirstatetuple.
29 # code outside this module should always use dirstatetuple.
30 def dirstatetuple(*x):
30 def dirstatetuple(*x):
31 # x is a tuple
31 # x is a tuple
32 return x
32 return x
33
33
34
34
35 indexformatng = b">Qiiiiii20s12x"
35 indexformatng = b">Qiiiiii20s12x"
36 indexfirst = struct.calcsize(b'Q')
36 indexfirst = struct.calcsize(b'Q')
37 sizeint = struct.calcsize(b'i')
37 sizeint = struct.calcsize(b'i')
38 indexsize = struct.calcsize(indexformatng)
38 indexsize = struct.calcsize(indexformatng)
39
39
40
40
41 def gettype(q):
41 def gettype(q):
42 return int(q & 0xFFFF)
42 return int(q & 0xFFFF)
43
43
44
44
45 def offset_type(offset, type):
45 def offset_type(offset, type):
46 return int(int(offset) << 16 | type)
46 return int(int(offset) << 16 | type)
47
47
48
48
49 class BaseIndexObject(object):
49 class BaseIndexObject(object):
50 @util.propertycache
50 @util.propertycache
51 def nodemap(self):
51 def nodemap(self):
52 nodemap = revlogutils.NodeMap({nullid: nullrev})
52 nodemap = revlogutils.NodeMap({nullid: nullrev})
53 for r in range(0, len(self)):
53 for r in range(0, len(self)):
54 n = self[r][7]
54 n = self[r][7]
55 nodemap[n] = r
55 nodemap[n] = r
56 return nodemap
56 return nodemap
57
57
58 def _stripnodes(self, start):
59 if 'nodemap' in vars(self):
60 for r in range(start, len(self)):
61 n = self[r][7]
62 del self.nodemap[n]
63
58 def clearcaches(self):
64 def clearcaches(self):
59 self.__dict__.pop('nodemap', None)
65 self.__dict__.pop('nodemap', None)
60
66
61 def __len__(self):
67 def __len__(self):
62 return self._lgt + len(self._extra)
68 return self._lgt + len(self._extra)
63
69
64 def append(self, tup):
70 def append(self, tup):
65 if 'nodemap' in vars(self):
71 if 'nodemap' in vars(self):
66 self.nodemap[tup[7]] = len(self)
72 self.nodemap[tup[7]] = len(self)
67 self._extra.append(tup)
73 self._extra.append(tup)
68
74
69 def _check_index(self, i):
75 def _check_index(self, i):
70 if not isinstance(i, int):
76 if not isinstance(i, int):
71 raise TypeError(b"expecting int indexes")
77 raise TypeError(b"expecting int indexes")
72 if i < 0 or i >= len(self):
78 if i < 0 or i >= len(self):
73 raise IndexError
79 raise IndexError
74
80
75 def __getitem__(self, i):
81 def __getitem__(self, i):
76 if i == -1:
82 if i == -1:
77 return (0, 0, 0, -1, -1, -1, -1, nullid)
83 return (0, 0, 0, -1, -1, -1, -1, nullid)
78 self._check_index(i)
84 self._check_index(i)
79 if i >= self._lgt:
85 if i >= self._lgt:
80 return self._extra[i - self._lgt]
86 return self._extra[i - self._lgt]
81 index = self._calculate_index(i)
87 index = self._calculate_index(i)
82 r = struct.unpack(indexformatng, self._data[index : index + indexsize])
88 r = struct.unpack(indexformatng, self._data[index : index + indexsize])
83 if i == 0:
89 if i == 0:
84 e = list(r)
90 e = list(r)
85 type = gettype(e[0])
91 type = gettype(e[0])
86 e[0] = offset_type(0, type)
92 e[0] = offset_type(0, type)
87 return tuple(e)
93 return tuple(e)
88 return r
94 return r
89
95
90
96
91 class IndexObject(BaseIndexObject):
97 class IndexObject(BaseIndexObject):
92 def __init__(self, data):
98 def __init__(self, data):
93 assert len(data) % indexsize == 0
99 assert len(data) % indexsize == 0
94 self._data = data
100 self._data = data
95 self._lgt = len(data) // indexsize
101 self._lgt = len(data) // indexsize
96 self._extra = []
102 self._extra = []
97
103
98 def _calculate_index(self, i):
104 def _calculate_index(self, i):
99 return i * indexsize
105 return i * indexsize
100
106
101 def __delitem__(self, i):
107 def __delitem__(self, i):
102 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
108 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
103 raise ValueError(b"deleting slices only supports a:-1 with step 1")
109 raise ValueError(b"deleting slices only supports a:-1 with step 1")
104 i = i.start
110 i = i.start
105 self._check_index(i)
111 self._check_index(i)
112 self._stripnodes(i)
106 if i < self._lgt:
113 if i < self._lgt:
107 self._data = self._data[: i * indexsize]
114 self._data = self._data[: i * indexsize]
108 self._lgt = i
115 self._lgt = i
109 self._extra = []
116 self._extra = []
110 else:
117 else:
111 self._extra = self._extra[: i - self._lgt]
118 self._extra = self._extra[: i - self._lgt]
112
119
113
120
114 class InlinedIndexObject(BaseIndexObject):
121 class InlinedIndexObject(BaseIndexObject):
115 def __init__(self, data, inline=0):
122 def __init__(self, data, inline=0):
116 self._data = data
123 self._data = data
117 self._lgt = self._inline_scan(None)
124 self._lgt = self._inline_scan(None)
118 self._inline_scan(self._lgt)
125 self._inline_scan(self._lgt)
119 self._extra = []
126 self._extra = []
120
127
121 def _inline_scan(self, lgt):
128 def _inline_scan(self, lgt):
122 off = 0
129 off = 0
123 if lgt is not None:
130 if lgt is not None:
124 self._offsets = [0] * lgt
131 self._offsets = [0] * lgt
125 count = 0
132 count = 0
126 while off <= len(self._data) - indexsize:
133 while off <= len(self._data) - indexsize:
127 (s,) = struct.unpack(
134 (s,) = struct.unpack(
128 b'>i', self._data[off + indexfirst : off + sizeint + indexfirst]
135 b'>i', self._data[off + indexfirst : off + sizeint + indexfirst]
129 )
136 )
130 if lgt is not None:
137 if lgt is not None:
131 self._offsets[count] = off
138 self._offsets[count] = off
132 count += 1
139 count += 1
133 off += indexsize + s
140 off += indexsize + s
134 if off != len(self._data):
141 if off != len(self._data):
135 raise ValueError(b"corrupted data")
142 raise ValueError(b"corrupted data")
136 return count
143 return count
137
144
138 def __delitem__(self, i):
145 def __delitem__(self, i):
139 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
146 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
140 raise ValueError(b"deleting slices only supports a:-1 with step 1")
147 raise ValueError(b"deleting slices only supports a:-1 with step 1")
141 i = i.start
148 i = i.start
142 self._check_index(i)
149 self._check_index(i)
150 self._stripnodes(i)
143 if i < self._lgt:
151 if i < self._lgt:
144 self._offsets = self._offsets[:i]
152 self._offsets = self._offsets[:i]
145 self._lgt = i
153 self._lgt = i
146 self._extra = []
154 self._extra = []
147 else:
155 else:
148 self._extra = self._extra[: i - self._lgt]
156 self._extra = self._extra[: i - self._lgt]
149
157
150 def _calculate_index(self, i):
158 def _calculate_index(self, i):
151 return self._offsets[i]
159 return self._offsets[i]
152
160
153
161
154 def parse_index2(data, inline):
162 def parse_index2(data, inline):
155 if not inline:
163 if not inline:
156 return IndexObject(data), None
164 return IndexObject(data), None
157 return InlinedIndexObject(data, inline), (0, data)
165 return InlinedIndexObject(data, inline), (0, data)
158
166
159
167
160 def parse_dirstate(dmap, copymap, st):
168 def parse_dirstate(dmap, copymap, st):
161 parents = [st[:20], st[20:40]]
169 parents = [st[:20], st[20:40]]
162 # dereference fields so they will be local in loop
170 # dereference fields so they will be local in loop
163 format = b">cllll"
171 format = b">cllll"
164 e_size = struct.calcsize(format)
172 e_size = struct.calcsize(format)
165 pos1 = 40
173 pos1 = 40
166 l = len(st)
174 l = len(st)
167
175
168 # the inner loop
176 # the inner loop
169 while pos1 < l:
177 while pos1 < l:
170 pos2 = pos1 + e_size
178 pos2 = pos1 + e_size
171 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
179 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
172 pos1 = pos2 + e[4]
180 pos1 = pos2 + e[4]
173 f = st[pos2:pos1]
181 f = st[pos2:pos1]
174 if b'\0' in f:
182 if b'\0' in f:
175 f, c = f.split(b'\0')
183 f, c = f.split(b'\0')
176 copymap[f] = c
184 copymap[f] = c
177 dmap[f] = e[:4]
185 dmap[f] = e[:4]
178 return parents
186 return parents
179
187
180
188
181 def pack_dirstate(dmap, copymap, pl, now):
189 def pack_dirstate(dmap, copymap, pl, now):
182 now = int(now)
190 now = int(now)
183 cs = stringio()
191 cs = stringio()
184 write = cs.write
192 write = cs.write
185 write(b"".join(pl))
193 write(b"".join(pl))
186 for f, e in pycompat.iteritems(dmap):
194 for f, e in pycompat.iteritems(dmap):
187 if e[0] == b'n' and e[3] == now:
195 if e[0] == b'n' and e[3] == now:
188 # The file was last modified "simultaneously" with the current
196 # The file was last modified "simultaneously" with the current
189 # write to dirstate (i.e. within the same second for file-
197 # write to dirstate (i.e. within the same second for file-
190 # systems with a granularity of 1 sec). This commonly happens
198 # systems with a granularity of 1 sec). This commonly happens
191 # for at least a couple of files on 'update'.
199 # for at least a couple of files on 'update'.
192 # The user could change the file without changing its size
200 # The user could change the file without changing its size
193 # within the same second. Invalidate the file's mtime in
201 # within the same second. Invalidate the file's mtime in
194 # dirstate, forcing future 'status' calls to compare the
202 # dirstate, forcing future 'status' calls to compare the
195 # contents of the file if the size is the same. This prevents
203 # contents of the file if the size is the same. This prevents
196 # mistakenly treating such files as clean.
204 # mistakenly treating such files as clean.
197 e = dirstatetuple(e[0], e[1], e[2], -1)
205 e = dirstatetuple(e[0], e[1], e[2], -1)
198 dmap[f] = e
206 dmap[f] = e
199
207
200 if f in copymap:
208 if f in copymap:
201 f = b"%s\0%s" % (f, copymap[f])
209 f = b"%s\0%s" % (f, copymap[f])
202 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
210 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
203 write(e)
211 write(e)
204 write(f)
212 write(f)
205 return cs.getvalue()
213 return cs.getvalue()
@@ -1,2955 +1,2960 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .pycompat import getattr
38 from .pycompat import getattr
39 from .revlogutils.constants import (
39 from .revlogutils.constants import (
40 FLAG_GENERALDELTA,
40 FLAG_GENERALDELTA,
41 FLAG_INLINE_DATA,
41 FLAG_INLINE_DATA,
42 REVLOGV0,
42 REVLOGV0,
43 REVLOGV1,
43 REVLOGV1,
44 REVLOGV1_FLAGS,
44 REVLOGV1_FLAGS,
45 REVLOGV2,
45 REVLOGV2,
46 REVLOGV2_FLAGS,
46 REVLOGV2_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
48 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_FORMAT,
49 REVLOG_DEFAULT_VERSION,
49 REVLOG_DEFAULT_VERSION,
50 )
50 )
51 from .revlogutils.flagutil import (
51 from .revlogutils.flagutil import (
52 REVIDX_DEFAULT_FLAGS,
52 REVIDX_DEFAULT_FLAGS,
53 REVIDX_ELLIPSIS,
53 REVIDX_ELLIPSIS,
54 REVIDX_EXTSTORED,
54 REVIDX_EXTSTORED,
55 REVIDX_FLAGS_ORDER,
55 REVIDX_FLAGS_ORDER,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 REVIDX_SIDEDATA,
58 REVIDX_SIDEDATA,
59 )
59 )
60 from .thirdparty import attr
60 from .thirdparty import attr
61 from . import (
61 from . import (
62 ancestor,
62 ancestor,
63 dagop,
63 dagop,
64 error,
64 error,
65 mdiff,
65 mdiff,
66 policy,
66 policy,
67 pycompat,
67 pycompat,
68 revlogutils,
68 revlogutils,
69 templatefilters,
69 templatefilters,
70 util,
70 util,
71 )
71 )
72 from .interfaces import (
72 from .interfaces import (
73 repository,
73 repository,
74 util as interfaceutil,
74 util as interfaceutil,
75 )
75 )
76 from .revlogutils import (
76 from .revlogutils import (
77 deltas as deltautil,
77 deltas as deltautil,
78 flagutil,
78 flagutil,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88 REVLOGV0
88 REVLOGV0
89 REVLOGV1
89 REVLOGV1
90 REVLOGV2
90 REVLOGV2
91 FLAG_INLINE_DATA
91 FLAG_INLINE_DATA
92 FLAG_GENERALDELTA
92 FLAG_GENERALDELTA
93 REVLOG_DEFAULT_FLAGS
93 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FORMAT
94 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_VERSION
95 REVLOG_DEFAULT_VERSION
96 REVLOGV1_FLAGS
96 REVLOGV1_FLAGS
97 REVLOGV2_FLAGS
97 REVLOGV2_FLAGS
98 REVIDX_ISCENSORED
98 REVIDX_ISCENSORED
99 REVIDX_ELLIPSIS
99 REVIDX_ELLIPSIS
100 REVIDX_SIDEDATA
100 REVIDX_SIDEDATA
101 REVIDX_EXTSTORED
101 REVIDX_EXTSTORED
102 REVIDX_DEFAULT_FLAGS
102 REVIDX_DEFAULT_FLAGS
103 REVIDX_FLAGS_ORDER
103 REVIDX_FLAGS_ORDER
104 REVIDX_RAWTEXT_CHANGING_FLAGS
104 REVIDX_RAWTEXT_CHANGING_FLAGS
105
105
106 parsers = policy.importmod('parsers')
106 parsers = policy.importmod('parsers')
107 rustancestor = policy.importrust('ancestor')
107 rustancestor = policy.importrust('ancestor')
108 rustdagop = policy.importrust('dagop')
108 rustdagop = policy.importrust('dagop')
109
109
110 # Aliased for performance.
110 # Aliased for performance.
111 _zlibdecompress = zlib.decompress
111 _zlibdecompress = zlib.decompress
112
112
113 # max size of revlog with inline data
113 # max size of revlog with inline data
114 _maxinline = 131072
114 _maxinline = 131072
115 _chunksize = 1048576
115 _chunksize = 1048576
116
116
117 # Flag processors for REVIDX_ELLIPSIS.
117 # Flag processors for REVIDX_ELLIPSIS.
118 def ellipsisreadprocessor(rl, text):
118 def ellipsisreadprocessor(rl, text):
119 return text, False, {}
119 return text, False, {}
120
120
121
121
122 def ellipsiswriteprocessor(rl, text, sidedata):
122 def ellipsiswriteprocessor(rl, text, sidedata):
123 return text, False
123 return text, False
124
124
125
125
126 def ellipsisrawprocessor(rl, text):
126 def ellipsisrawprocessor(rl, text):
127 return False
127 return False
128
128
129
129
130 ellipsisprocessor = (
130 ellipsisprocessor = (
131 ellipsisreadprocessor,
131 ellipsisreadprocessor,
132 ellipsiswriteprocessor,
132 ellipsiswriteprocessor,
133 ellipsisrawprocessor,
133 ellipsisrawprocessor,
134 )
134 )
135
135
136
136
137 def getoffset(q):
137 def getoffset(q):
138 return int(q >> 16)
138 return int(q >> 16)
139
139
140
140
141 def gettype(q):
141 def gettype(q):
142 return int(q & 0xFFFF)
142 return int(q & 0xFFFF)
143
143
144
144
145 def offset_type(offset, type):
145 def offset_type(offset, type):
146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
147 raise ValueError(b'unknown revlog index flags')
147 raise ValueError(b'unknown revlog index flags')
148 return int(int(offset) << 16 | type)
148 return int(int(offset) << 16 | type)
149
149
150
150
151 @attr.s(slots=True, frozen=True)
151 @attr.s(slots=True, frozen=True)
152 class _revisioninfo(object):
152 class _revisioninfo(object):
153 """Information about a revision that allows building its fulltext
153 """Information about a revision that allows building its fulltext
154 node: expected hash of the revision
154 node: expected hash of the revision
155 p1, p2: parent revs of the revision
155 p1, p2: parent revs of the revision
156 btext: built text cache consisting of a one-element list
156 btext: built text cache consisting of a one-element list
157 cachedelta: (baserev, uncompressed_delta) or None
157 cachedelta: (baserev, uncompressed_delta) or None
158 flags: flags associated to the revision storage
158 flags: flags associated to the revision storage
159
159
160 One of btext[0] or cachedelta must be set.
160 One of btext[0] or cachedelta must be set.
161 """
161 """
162
162
163 node = attr.ib()
163 node = attr.ib()
164 p1 = attr.ib()
164 p1 = attr.ib()
165 p2 = attr.ib()
165 p2 = attr.ib()
166 btext = attr.ib()
166 btext = attr.ib()
167 textlen = attr.ib()
167 textlen = attr.ib()
168 cachedelta = attr.ib()
168 cachedelta = attr.ib()
169 flags = attr.ib()
169 flags = attr.ib()
170
170
171
171
172 @interfaceutil.implementer(repository.irevisiondelta)
172 @interfaceutil.implementer(repository.irevisiondelta)
173 @attr.s(slots=True)
173 @attr.s(slots=True)
174 class revlogrevisiondelta(object):
174 class revlogrevisiondelta(object):
175 node = attr.ib()
175 node = attr.ib()
176 p1node = attr.ib()
176 p1node = attr.ib()
177 p2node = attr.ib()
177 p2node = attr.ib()
178 basenode = attr.ib()
178 basenode = attr.ib()
179 flags = attr.ib()
179 flags = attr.ib()
180 baserevisionsize = attr.ib()
180 baserevisionsize = attr.ib()
181 revision = attr.ib()
181 revision = attr.ib()
182 delta = attr.ib()
182 delta = attr.ib()
183 linknode = attr.ib(default=None)
183 linknode = attr.ib(default=None)
184
184
185
185
186 @interfaceutil.implementer(repository.iverifyproblem)
186 @interfaceutil.implementer(repository.iverifyproblem)
187 @attr.s(frozen=True)
187 @attr.s(frozen=True)
188 class revlogproblem(object):
188 class revlogproblem(object):
189 warning = attr.ib(default=None)
189 warning = attr.ib(default=None)
190 error = attr.ib(default=None)
190 error = attr.ib(default=None)
191 node = attr.ib(default=None)
191 node = attr.ib(default=None)
192
192
193
193
194 # index v0:
194 # index v0:
195 # 4 bytes: offset
195 # 4 bytes: offset
196 # 4 bytes: compressed length
196 # 4 bytes: compressed length
197 # 4 bytes: base rev
197 # 4 bytes: base rev
198 # 4 bytes: link rev
198 # 4 bytes: link rev
199 # 20 bytes: parent 1 nodeid
199 # 20 bytes: parent 1 nodeid
200 # 20 bytes: parent 2 nodeid
200 # 20 bytes: parent 2 nodeid
201 # 20 bytes: nodeid
201 # 20 bytes: nodeid
202 indexformatv0 = struct.Struct(b">4l20s20s20s")
202 indexformatv0 = struct.Struct(b">4l20s20s20s")
203 indexformatv0_pack = indexformatv0.pack
203 indexformatv0_pack = indexformatv0.pack
204 indexformatv0_unpack = indexformatv0.unpack
204 indexformatv0_unpack = indexformatv0.unpack
205
205
206
206
207 class revlogoldindex(list):
207 class revlogoldindex(list):
208 @util.propertycache
208 @util.propertycache
209 def nodemap(self):
209 def nodemap(self):
210 nodemap = revlogutils.NodeMap({nullid: nullrev})
210 nodemap = revlogutils.NodeMap({nullid: nullrev})
211 for r in range(0, len(self)):
211 for r in range(0, len(self)):
212 n = self[r][7]
212 n = self[r][7]
213 nodemap[n] = r
213 nodemap[n] = r
214 return nodemap
214 return nodemap
215
215
216 def append(self, tup):
216 def append(self, tup):
217 self.nodemap[tup[7]] = len(self)
217 self.nodemap[tup[7]] = len(self)
218 super(revlogoldindex, self).append(tup)
218 super(revlogoldindex, self).append(tup)
219
219
220 def __delitem__(self, i):
221 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
222 raise ValueError(b"deleting slices only supports a:-1 with step 1")
223 for r in pycompat.xrange(i.start, len(self)):
224 del self.nodemap[self[r][7]]
225 super(revlogoldindex, self).__delitem__(i)
226
220 def clearcaches(self):
227 def clearcaches(self):
221 self.__dict__.pop('nodemap', None)
228 self.__dict__.pop('nodemap', None)
222
229
223 def __getitem__(self, i):
230 def __getitem__(self, i):
224 if i == -1:
231 if i == -1:
225 return (0, 0, 0, -1, -1, -1, -1, nullid)
232 return (0, 0, 0, -1, -1, -1, -1, nullid)
226 return list.__getitem__(self, i)
233 return list.__getitem__(self, i)
227
234
228
235
229 class revlogoldio(object):
236 class revlogoldio(object):
230 def __init__(self):
237 def __init__(self):
231 self.size = indexformatv0.size
238 self.size = indexformatv0.size
232
239
233 def parseindex(self, data, inline):
240 def parseindex(self, data, inline):
234 s = self.size
241 s = self.size
235 index = []
242 index = []
236 nodemap = revlogutils.NodeMap({nullid: nullrev})
243 nodemap = revlogutils.NodeMap({nullid: nullrev})
237 n = off = 0
244 n = off = 0
238 l = len(data)
245 l = len(data)
239 while off + s <= l:
246 while off + s <= l:
240 cur = data[off : off + s]
247 cur = data[off : off + s]
241 off += s
248 off += s
242 e = indexformatv0_unpack(cur)
249 e = indexformatv0_unpack(cur)
243 # transform to revlogv1 format
250 # transform to revlogv1 format
244 e2 = (
251 e2 = (
245 offset_type(e[0], 0),
252 offset_type(e[0], 0),
246 e[1],
253 e[1],
247 -1,
254 -1,
248 e[2],
255 e[2],
249 e[3],
256 e[3],
250 nodemap.get(e[4], nullrev),
257 nodemap.get(e[4], nullrev),
251 nodemap.get(e[5], nullrev),
258 nodemap.get(e[5], nullrev),
252 e[6],
259 e[6],
253 )
260 )
254 index.append(e2)
261 index.append(e2)
255 nodemap[e[6]] = n
262 nodemap[e[6]] = n
256 n += 1
263 n += 1
257
264
258 index = revlogoldindex(index)
265 index = revlogoldindex(index)
259 return index, None
266 return index, None
260
267
261 def packentry(self, entry, node, version, rev):
268 def packentry(self, entry, node, version, rev):
262 if gettype(entry[0]):
269 if gettype(entry[0]):
263 raise error.RevlogError(
270 raise error.RevlogError(
264 _(b'index entry flags need revlog version 1')
271 _(b'index entry flags need revlog version 1')
265 )
272 )
266 e2 = (
273 e2 = (
267 getoffset(entry[0]),
274 getoffset(entry[0]),
268 entry[1],
275 entry[1],
269 entry[3],
276 entry[3],
270 entry[4],
277 entry[4],
271 node(entry[5]),
278 node(entry[5]),
272 node(entry[6]),
279 node(entry[6]),
273 entry[7],
280 entry[7],
274 )
281 )
275 return indexformatv0_pack(*e2)
282 return indexformatv0_pack(*e2)
276
283
277
284
278 # index ng:
285 # index ng:
279 # 6 bytes: offset
286 # 6 bytes: offset
280 # 2 bytes: flags
287 # 2 bytes: flags
281 # 4 bytes: compressed length
288 # 4 bytes: compressed length
282 # 4 bytes: uncompressed length
289 # 4 bytes: uncompressed length
283 # 4 bytes: base rev
290 # 4 bytes: base rev
284 # 4 bytes: link rev
291 # 4 bytes: link rev
285 # 4 bytes: parent 1 rev
292 # 4 bytes: parent 1 rev
286 # 4 bytes: parent 2 rev
293 # 4 bytes: parent 2 rev
287 # 32 bytes: nodeid
294 # 32 bytes: nodeid
288 indexformatng = struct.Struct(b">Qiiiiii20s12x")
295 indexformatng = struct.Struct(b">Qiiiiii20s12x")
289 indexformatng_pack = indexformatng.pack
296 indexformatng_pack = indexformatng.pack
290 versionformat = struct.Struct(b">I")
297 versionformat = struct.Struct(b">I")
291 versionformat_pack = versionformat.pack
298 versionformat_pack = versionformat.pack
292 versionformat_unpack = versionformat.unpack
299 versionformat_unpack = versionformat.unpack
293
300
294 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
301 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
295 # signed integer)
302 # signed integer)
296 _maxentrysize = 0x7FFFFFFF
303 _maxentrysize = 0x7FFFFFFF
297
304
298
305
299 class revlogio(object):
306 class revlogio(object):
300 def __init__(self):
307 def __init__(self):
301 self.size = indexformatng.size
308 self.size = indexformatng.size
302
309
303 def parseindex(self, data, inline):
310 def parseindex(self, data, inline):
304 # call the C implementation to parse the index data
311 # call the C implementation to parse the index data
305 index, cache = parsers.parse_index2(data, inline)
312 index, cache = parsers.parse_index2(data, inline)
306 return index, cache
313 return index, cache
307
314
308 def packentry(self, entry, node, version, rev):
315 def packentry(self, entry, node, version, rev):
309 p = indexformatng_pack(*entry)
316 p = indexformatng_pack(*entry)
310 if rev == 0:
317 if rev == 0:
311 p = versionformat_pack(version) + p[4:]
318 p = versionformat_pack(version) + p[4:]
312 return p
319 return p
313
320
314
321
315 class revlog(object):
322 class revlog(object):
316 """
323 """
317 the underlying revision storage object
324 the underlying revision storage object
318
325
319 A revlog consists of two parts, an index and the revision data.
326 A revlog consists of two parts, an index and the revision data.
320
327
321 The index is a file with a fixed record size containing
328 The index is a file with a fixed record size containing
322 information on each revision, including its nodeid (hash), the
329 information on each revision, including its nodeid (hash), the
323 nodeids of its parents, the position and offset of its data within
330 nodeids of its parents, the position and offset of its data within
324 the data file, and the revision it's based on. Finally, each entry
331 the data file, and the revision it's based on. Finally, each entry
325 contains a linkrev entry that can serve as a pointer to external
332 contains a linkrev entry that can serve as a pointer to external
326 data.
333 data.
327
334
328 The revision data itself is a linear collection of data chunks.
335 The revision data itself is a linear collection of data chunks.
329 Each chunk represents a revision and is usually represented as a
336 Each chunk represents a revision and is usually represented as a
330 delta against the previous chunk. To bound lookup time, runs of
337 delta against the previous chunk. To bound lookup time, runs of
331 deltas are limited to about 2 times the length of the original
338 deltas are limited to about 2 times the length of the original
332 version data. This makes retrieval of a version proportional to
339 version data. This makes retrieval of a version proportional to
333 its size, or O(1) relative to the number of revisions.
340 its size, or O(1) relative to the number of revisions.
334
341
335 Both pieces of the revlog are written to in an append-only
342 Both pieces of the revlog are written to in an append-only
336 fashion, which means we never need to rewrite a file to insert or
343 fashion, which means we never need to rewrite a file to insert or
337 remove data, and can use some simple techniques to avoid the need
344 remove data, and can use some simple techniques to avoid the need
338 for locking while reading.
345 for locking while reading.
339
346
340 If checkambig, indexfile is opened with checkambig=True at
347 If checkambig, indexfile is opened with checkambig=True at
341 writing, to avoid file stat ambiguity.
348 writing, to avoid file stat ambiguity.
342
349
343 If mmaplargeindex is True, and an mmapindexthreshold is set, the
350 If mmaplargeindex is True, and an mmapindexthreshold is set, the
344 index will be mmapped rather than read if it is larger than the
351 index will be mmapped rather than read if it is larger than the
345 configured threshold.
352 configured threshold.
346
353
347 If censorable is True, the revlog can have censored revisions.
354 If censorable is True, the revlog can have censored revisions.
348
355
349 If `upperboundcomp` is not None, this is the expected maximal gain from
356 If `upperboundcomp` is not None, this is the expected maximal gain from
350 compression for the data content.
357 compression for the data content.
351 """
358 """
352
359
353 _flagserrorclass = error.RevlogError
360 _flagserrorclass = error.RevlogError
354
361
355 def __init__(
362 def __init__(
356 self,
363 self,
357 opener,
364 opener,
358 indexfile,
365 indexfile,
359 datafile=None,
366 datafile=None,
360 checkambig=False,
367 checkambig=False,
361 mmaplargeindex=False,
368 mmaplargeindex=False,
362 censorable=False,
369 censorable=False,
363 upperboundcomp=None,
370 upperboundcomp=None,
364 ):
371 ):
365 """
372 """
366 create a revlog object
373 create a revlog object
367
374
368 opener is a function that abstracts the file opening operation
375 opener is a function that abstracts the file opening operation
369 and can be used to implement COW semantics or the like.
376 and can be used to implement COW semantics or the like.
370
377
371 """
378 """
372 self.upperboundcomp = upperboundcomp
379 self.upperboundcomp = upperboundcomp
373 self.indexfile = indexfile
380 self.indexfile = indexfile
374 self.datafile = datafile or (indexfile[:-2] + b".d")
381 self.datafile = datafile or (indexfile[:-2] + b".d")
375 self.opener = opener
382 self.opener = opener
376 # When True, indexfile is opened with checkambig=True at writing, to
383 # When True, indexfile is opened with checkambig=True at writing, to
377 # avoid file stat ambiguity.
384 # avoid file stat ambiguity.
378 self._checkambig = checkambig
385 self._checkambig = checkambig
379 self._mmaplargeindex = mmaplargeindex
386 self._mmaplargeindex = mmaplargeindex
380 self._censorable = censorable
387 self._censorable = censorable
381 # 3-tuple of (node, rev, text) for a raw revision.
388 # 3-tuple of (node, rev, text) for a raw revision.
382 self._revisioncache = None
389 self._revisioncache = None
383 # Maps rev to chain base rev.
390 # Maps rev to chain base rev.
384 self._chainbasecache = util.lrucachedict(100)
391 self._chainbasecache = util.lrucachedict(100)
385 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
392 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
386 self._chunkcache = (0, b'')
393 self._chunkcache = (0, b'')
387 # How much data to read and cache into the raw revlog data cache.
394 # How much data to read and cache into the raw revlog data cache.
388 self._chunkcachesize = 65536
395 self._chunkcachesize = 65536
389 self._maxchainlen = None
396 self._maxchainlen = None
390 self._deltabothparents = True
397 self._deltabothparents = True
391 self.index = None
398 self.index = None
392 # Mapping of partial identifiers to full nodes.
399 # Mapping of partial identifiers to full nodes.
393 self._pcache = {}
400 self._pcache = {}
394 # Mapping of revision integer to full node.
401 # Mapping of revision integer to full node.
395 self._nodepos = None
402 self._nodepos = None
396 self._compengine = b'zlib'
403 self._compengine = b'zlib'
397 self._compengineopts = {}
404 self._compengineopts = {}
398 self._maxdeltachainspan = -1
405 self._maxdeltachainspan = -1
399 self._withsparseread = False
406 self._withsparseread = False
400 self._sparserevlog = False
407 self._sparserevlog = False
401 self._srdensitythreshold = 0.50
408 self._srdensitythreshold = 0.50
402 self._srmingapsize = 262144
409 self._srmingapsize = 262144
403
410
404 # Make copy of flag processors so each revlog instance can support
411 # Make copy of flag processors so each revlog instance can support
405 # custom flags.
412 # custom flags.
406 self._flagprocessors = dict(flagutil.flagprocessors)
413 self._flagprocessors = dict(flagutil.flagprocessors)
407
414
408 # 2-tuple of file handles being used for active writing.
415 # 2-tuple of file handles being used for active writing.
409 self._writinghandles = None
416 self._writinghandles = None
410
417
411 self._loadindex()
418 self._loadindex()
412
419
413 def _loadindex(self):
420 def _loadindex(self):
414 mmapindexthreshold = None
421 mmapindexthreshold = None
415 opts = self.opener.options
422 opts = self.opener.options
416
423
417 if b'revlogv2' in opts:
424 if b'revlogv2' in opts:
418 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
425 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
419 elif b'revlogv1' in opts:
426 elif b'revlogv1' in opts:
420 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
427 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
421 if b'generaldelta' in opts:
428 if b'generaldelta' in opts:
422 newversionflags |= FLAG_GENERALDELTA
429 newversionflags |= FLAG_GENERALDELTA
423 elif b'revlogv0' in self.opener.options:
430 elif b'revlogv0' in self.opener.options:
424 newversionflags = REVLOGV0
431 newversionflags = REVLOGV0
425 else:
432 else:
426 newversionflags = REVLOG_DEFAULT_VERSION
433 newversionflags = REVLOG_DEFAULT_VERSION
427
434
428 if b'chunkcachesize' in opts:
435 if b'chunkcachesize' in opts:
429 self._chunkcachesize = opts[b'chunkcachesize']
436 self._chunkcachesize = opts[b'chunkcachesize']
430 if b'maxchainlen' in opts:
437 if b'maxchainlen' in opts:
431 self._maxchainlen = opts[b'maxchainlen']
438 self._maxchainlen = opts[b'maxchainlen']
432 if b'deltabothparents' in opts:
439 if b'deltabothparents' in opts:
433 self._deltabothparents = opts[b'deltabothparents']
440 self._deltabothparents = opts[b'deltabothparents']
434 self._lazydelta = bool(opts.get(b'lazydelta', True))
441 self._lazydelta = bool(opts.get(b'lazydelta', True))
435 self._lazydeltabase = False
442 self._lazydeltabase = False
436 if self._lazydelta:
443 if self._lazydelta:
437 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
444 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
438 if b'compengine' in opts:
445 if b'compengine' in opts:
439 self._compengine = opts[b'compengine']
446 self._compengine = opts[b'compengine']
440 if b'zlib.level' in opts:
447 if b'zlib.level' in opts:
441 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
448 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
442 if b'zstd.level' in opts:
449 if b'zstd.level' in opts:
443 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
450 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
444 if b'maxdeltachainspan' in opts:
451 if b'maxdeltachainspan' in opts:
445 self._maxdeltachainspan = opts[b'maxdeltachainspan']
452 self._maxdeltachainspan = opts[b'maxdeltachainspan']
446 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
453 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
447 mmapindexthreshold = opts[b'mmapindexthreshold']
454 mmapindexthreshold = opts[b'mmapindexthreshold']
448 self.hassidedata = bool(opts.get(b'side-data', False))
455 self.hassidedata = bool(opts.get(b'side-data', False))
449 if self.hassidedata:
456 if self.hassidedata:
450 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
457 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
451 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
458 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
452 withsparseread = bool(opts.get(b'with-sparse-read', False))
459 withsparseread = bool(opts.get(b'with-sparse-read', False))
453 # sparse-revlog forces sparse-read
460 # sparse-revlog forces sparse-read
454 self._withsparseread = self._sparserevlog or withsparseread
461 self._withsparseread = self._sparserevlog or withsparseread
455 if b'sparse-read-density-threshold' in opts:
462 if b'sparse-read-density-threshold' in opts:
456 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
463 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
457 if b'sparse-read-min-gap-size' in opts:
464 if b'sparse-read-min-gap-size' in opts:
458 self._srmingapsize = opts[b'sparse-read-min-gap-size']
465 self._srmingapsize = opts[b'sparse-read-min-gap-size']
459 if opts.get(b'enableellipsis'):
466 if opts.get(b'enableellipsis'):
460 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
467 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
461
468
462 # revlog v0 doesn't have flag processors
469 # revlog v0 doesn't have flag processors
463 for flag, processor in pycompat.iteritems(
470 for flag, processor in pycompat.iteritems(
464 opts.get(b'flagprocessors', {})
471 opts.get(b'flagprocessors', {})
465 ):
472 ):
466 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
473 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
467
474
468 if self._chunkcachesize <= 0:
475 if self._chunkcachesize <= 0:
469 raise error.RevlogError(
476 raise error.RevlogError(
470 _(b'revlog chunk cache size %r is not greater than 0')
477 _(b'revlog chunk cache size %r is not greater than 0')
471 % self._chunkcachesize
478 % self._chunkcachesize
472 )
479 )
473 elif self._chunkcachesize & (self._chunkcachesize - 1):
480 elif self._chunkcachesize & (self._chunkcachesize - 1):
474 raise error.RevlogError(
481 raise error.RevlogError(
475 _(b'revlog chunk cache size %r is not a power of 2')
482 _(b'revlog chunk cache size %r is not a power of 2')
476 % self._chunkcachesize
483 % self._chunkcachesize
477 )
484 )
478
485
479 indexdata = b''
486 indexdata = b''
480 self._initempty = True
487 self._initempty = True
481 try:
488 try:
482 with self._indexfp() as f:
489 with self._indexfp() as f:
483 if (
490 if (
484 mmapindexthreshold is not None
491 mmapindexthreshold is not None
485 and self.opener.fstat(f).st_size >= mmapindexthreshold
492 and self.opener.fstat(f).st_size >= mmapindexthreshold
486 ):
493 ):
487 # TODO: should .close() to release resources without
494 # TODO: should .close() to release resources without
488 # relying on Python GC
495 # relying on Python GC
489 indexdata = util.buffer(util.mmapread(f))
496 indexdata = util.buffer(util.mmapread(f))
490 else:
497 else:
491 indexdata = f.read()
498 indexdata = f.read()
492 if len(indexdata) > 0:
499 if len(indexdata) > 0:
493 versionflags = versionformat_unpack(indexdata[:4])[0]
500 versionflags = versionformat_unpack(indexdata[:4])[0]
494 self._initempty = False
501 self._initempty = False
495 else:
502 else:
496 versionflags = newversionflags
503 versionflags = newversionflags
497 except IOError as inst:
504 except IOError as inst:
498 if inst.errno != errno.ENOENT:
505 if inst.errno != errno.ENOENT:
499 raise
506 raise
500
507
501 versionflags = newversionflags
508 versionflags = newversionflags
502
509
503 self.version = versionflags
510 self.version = versionflags
504
511
505 flags = versionflags & ~0xFFFF
512 flags = versionflags & ~0xFFFF
506 fmt = versionflags & 0xFFFF
513 fmt = versionflags & 0xFFFF
507
514
508 if fmt == REVLOGV0:
515 if fmt == REVLOGV0:
509 if flags:
516 if flags:
510 raise error.RevlogError(
517 raise error.RevlogError(
511 _(b'unknown flags (%#04x) in version %d revlog %s')
518 _(b'unknown flags (%#04x) in version %d revlog %s')
512 % (flags >> 16, fmt, self.indexfile)
519 % (flags >> 16, fmt, self.indexfile)
513 )
520 )
514
521
515 self._inline = False
522 self._inline = False
516 self._generaldelta = False
523 self._generaldelta = False
517
524
518 elif fmt == REVLOGV1:
525 elif fmt == REVLOGV1:
519 if flags & ~REVLOGV1_FLAGS:
526 if flags & ~REVLOGV1_FLAGS:
520 raise error.RevlogError(
527 raise error.RevlogError(
521 _(b'unknown flags (%#04x) in version %d revlog %s')
528 _(b'unknown flags (%#04x) in version %d revlog %s')
522 % (flags >> 16, fmt, self.indexfile)
529 % (flags >> 16, fmt, self.indexfile)
523 )
530 )
524
531
525 self._inline = versionflags & FLAG_INLINE_DATA
532 self._inline = versionflags & FLAG_INLINE_DATA
526 self._generaldelta = versionflags & FLAG_GENERALDELTA
533 self._generaldelta = versionflags & FLAG_GENERALDELTA
527
534
528 elif fmt == REVLOGV2:
535 elif fmt == REVLOGV2:
529 if flags & ~REVLOGV2_FLAGS:
536 if flags & ~REVLOGV2_FLAGS:
530 raise error.RevlogError(
537 raise error.RevlogError(
531 _(b'unknown flags (%#04x) in version %d revlog %s')
538 _(b'unknown flags (%#04x) in version %d revlog %s')
532 % (flags >> 16, fmt, self.indexfile)
539 % (flags >> 16, fmt, self.indexfile)
533 )
540 )
534
541
535 self._inline = versionflags & FLAG_INLINE_DATA
542 self._inline = versionflags & FLAG_INLINE_DATA
536 # generaldelta implied by version 2 revlogs.
543 # generaldelta implied by version 2 revlogs.
537 self._generaldelta = True
544 self._generaldelta = True
538
545
539 else:
546 else:
540 raise error.RevlogError(
547 raise error.RevlogError(
541 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
548 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
542 )
549 )
543 # sparse-revlog can't be on without general-delta (issue6056)
550 # sparse-revlog can't be on without general-delta (issue6056)
544 if not self._generaldelta:
551 if not self._generaldelta:
545 self._sparserevlog = False
552 self._sparserevlog = False
546
553
547 self._storedeltachains = True
554 self._storedeltachains = True
548
555
549 self._io = revlogio()
556 self._io = revlogio()
550 if self.version == REVLOGV0:
557 if self.version == REVLOGV0:
551 self._io = revlogoldio()
558 self._io = revlogoldio()
552 try:
559 try:
553 d = self._io.parseindex(indexdata, self._inline)
560 d = self._io.parseindex(indexdata, self._inline)
554 except (ValueError, IndexError):
561 except (ValueError, IndexError):
555 raise error.RevlogError(
562 raise error.RevlogError(
556 _(b"index %s is corrupted") % self.indexfile
563 _(b"index %s is corrupted") % self.indexfile
557 )
564 )
558 self.index, self._chunkcache = d
565 self.index, self._chunkcache = d
559 self.nodemap = self.index.nodemap
566 self.nodemap = self.index.nodemap
560 if not self._chunkcache:
567 if not self._chunkcache:
561 self._chunkclear()
568 self._chunkclear()
562 # revnum -> (chain-length, sum-delta-length)
569 # revnum -> (chain-length, sum-delta-length)
563 self._chaininfocache = {}
570 self._chaininfocache = {}
564 # revlog header -> revlog compressor
571 # revlog header -> revlog compressor
565 self._decompressors = {}
572 self._decompressors = {}
566
573
567 @util.propertycache
574 @util.propertycache
568 def _compressor(self):
575 def _compressor(self):
569 engine = util.compengines[self._compengine]
576 engine = util.compengines[self._compengine]
570 return engine.revlogcompressor(self._compengineopts)
577 return engine.revlogcompressor(self._compengineopts)
571
578
572 def _indexfp(self, mode=b'r'):
579 def _indexfp(self, mode=b'r'):
573 """file object for the revlog's index file"""
580 """file object for the revlog's index file"""
574 args = {'mode': mode}
581 args = {'mode': mode}
575 if mode != b'r':
582 if mode != b'r':
576 args['checkambig'] = self._checkambig
583 args['checkambig'] = self._checkambig
577 if mode == b'w':
584 if mode == b'w':
578 args['atomictemp'] = True
585 args['atomictemp'] = True
579 return self.opener(self.indexfile, **args)
586 return self.opener(self.indexfile, **args)
580
587
581 def _datafp(self, mode=b'r'):
588 def _datafp(self, mode=b'r'):
582 """file object for the revlog's data file"""
589 """file object for the revlog's data file"""
583 return self.opener(self.datafile, mode=mode)
590 return self.opener(self.datafile, mode=mode)
584
591
585 @contextlib.contextmanager
592 @contextlib.contextmanager
586 def _datareadfp(self, existingfp=None):
593 def _datareadfp(self, existingfp=None):
587 """file object suitable to read data"""
594 """file object suitable to read data"""
588 # Use explicit file handle, if given.
595 # Use explicit file handle, if given.
589 if existingfp is not None:
596 if existingfp is not None:
590 yield existingfp
597 yield existingfp
591
598
592 # Use a file handle being actively used for writes, if available.
599 # Use a file handle being actively used for writes, if available.
593 # There is some danger to doing this because reads will seek the
600 # There is some danger to doing this because reads will seek the
594 # file. However, _writeentry() performs a SEEK_END before all writes,
601 # file. However, _writeentry() performs a SEEK_END before all writes,
595 # so we should be safe.
602 # so we should be safe.
596 elif self._writinghandles:
603 elif self._writinghandles:
597 if self._inline:
604 if self._inline:
598 yield self._writinghandles[0]
605 yield self._writinghandles[0]
599 else:
606 else:
600 yield self._writinghandles[1]
607 yield self._writinghandles[1]
601
608
602 # Otherwise open a new file handle.
609 # Otherwise open a new file handle.
603 else:
610 else:
604 if self._inline:
611 if self._inline:
605 func = self._indexfp
612 func = self._indexfp
606 else:
613 else:
607 func = self._datafp
614 func = self._datafp
608 with func() as fp:
615 with func() as fp:
609 yield fp
616 yield fp
610
617
611 def tiprev(self):
618 def tiprev(self):
612 return len(self.index) - 1
619 return len(self.index) - 1
613
620
614 def tip(self):
621 def tip(self):
615 return self.node(self.tiprev())
622 return self.node(self.tiprev())
616
623
617 def __contains__(self, rev):
624 def __contains__(self, rev):
618 return 0 <= rev < len(self)
625 return 0 <= rev < len(self)
619
626
620 def __len__(self):
627 def __len__(self):
621 return len(self.index)
628 return len(self.index)
622
629
623 def __iter__(self):
630 def __iter__(self):
624 return iter(pycompat.xrange(len(self)))
631 return iter(pycompat.xrange(len(self)))
625
632
626 def revs(self, start=0, stop=None):
633 def revs(self, start=0, stop=None):
627 """iterate over all rev in this revlog (from start to stop)"""
634 """iterate over all rev in this revlog (from start to stop)"""
628 return storageutil.iterrevs(len(self), start=start, stop=stop)
635 return storageutil.iterrevs(len(self), start=start, stop=stop)
629
636
630 @util.propertycache
637 @util.propertycache
631 def nodemap(self):
638 def nodemap(self):
632 if self.index:
639 if self.index:
633 # populate mapping down to the initial node
640 # populate mapping down to the initial node
634 node0 = self.index[0][7] # get around changelog filtering
641 node0 = self.index[0][7] # get around changelog filtering
635 self.rev(node0)
642 self.rev(node0)
636 return self.index.nodemap
643 return self.index.nodemap
637
644
638 @property
645 @property
639 def _nodecache(self):
646 def _nodecache(self):
640 msg = "revlog._nodecache is deprecated, use revlog.index.nodemap"
647 msg = "revlog._nodecache is deprecated, use revlog.index.nodemap"
641 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
648 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
642 return self.index.nodemap
649 return self.index.nodemap
643
650
644 def hasnode(self, node):
651 def hasnode(self, node):
645 try:
652 try:
646 self.rev(node)
653 self.rev(node)
647 return True
654 return True
648 except KeyError:
655 except KeyError:
649 return False
656 return False
650
657
651 def candelta(self, baserev, rev):
658 def candelta(self, baserev, rev):
652 """whether two revisions (baserev, rev) can be delta-ed or not"""
659 """whether two revisions (baserev, rev) can be delta-ed or not"""
653 # Disable delta if either rev requires a content-changing flag
660 # Disable delta if either rev requires a content-changing flag
654 # processor (ex. LFS). This is because such flag processor can alter
661 # processor (ex. LFS). This is because such flag processor can alter
655 # the rawtext content that the delta will be based on, and two clients
662 # the rawtext content that the delta will be based on, and two clients
656 # could have a same revlog node with different flags (i.e. different
663 # could have a same revlog node with different flags (i.e. different
657 # rawtext contents) and the delta could be incompatible.
664 # rawtext contents) and the delta could be incompatible.
658 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
665 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
659 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
666 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
660 ):
667 ):
661 return False
668 return False
662 return True
669 return True
663
670
664 def clearcaches(self):
671 def clearcaches(self):
665 self._revisioncache = None
672 self._revisioncache = None
666 self._chainbasecache.clear()
673 self._chainbasecache.clear()
667 self._chunkcache = (0, b'')
674 self._chunkcache = (0, b'')
668 self._pcache = {}
675 self._pcache = {}
669 self.index.clearcaches()
676 self.index.clearcaches()
670
677
671 def rev(self, node):
678 def rev(self, node):
672 try:
679 try:
673 return self.index.nodemap[node]
680 return self.index.nodemap[node]
674 except TypeError:
681 except TypeError:
675 raise
682 raise
676 except error.RevlogError:
683 except error.RevlogError:
677 # parsers.c radix tree lookup failed
684 # parsers.c radix tree lookup failed
678 if node == wdirid or node in wdirfilenodeids:
685 if node == wdirid or node in wdirfilenodeids:
679 raise error.WdirUnsupported
686 raise error.WdirUnsupported
680 raise error.LookupError(node, self.indexfile, _(b'no node'))
687 raise error.LookupError(node, self.indexfile, _(b'no node'))
681
688
682 # Accessors for index entries.
689 # Accessors for index entries.
683
690
684 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
691 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
685 # are flags.
692 # are flags.
686 def start(self, rev):
693 def start(self, rev):
687 return int(self.index[rev][0] >> 16)
694 return int(self.index[rev][0] >> 16)
688
695
689 def flags(self, rev):
696 def flags(self, rev):
690 return self.index[rev][0] & 0xFFFF
697 return self.index[rev][0] & 0xFFFF
691
698
692 def length(self, rev):
699 def length(self, rev):
693 return self.index[rev][1]
700 return self.index[rev][1]
694
701
695 def rawsize(self, rev):
702 def rawsize(self, rev):
696 """return the length of the uncompressed text for a given revision"""
703 """return the length of the uncompressed text for a given revision"""
697 l = self.index[rev][2]
704 l = self.index[rev][2]
698 if l >= 0:
705 if l >= 0:
699 return l
706 return l
700
707
701 t = self.rawdata(rev)
708 t = self.rawdata(rev)
702 return len(t)
709 return len(t)
703
710
704 def size(self, rev):
711 def size(self, rev):
705 """length of non-raw text (processed by a "read" flag processor)"""
712 """length of non-raw text (processed by a "read" flag processor)"""
706 # fast path: if no "read" flag processor could change the content,
713 # fast path: if no "read" flag processor could change the content,
707 # size is rawsize. note: ELLIPSIS is known to not change the content.
714 # size is rawsize. note: ELLIPSIS is known to not change the content.
708 flags = self.flags(rev)
715 flags = self.flags(rev)
709 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
716 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
710 return self.rawsize(rev)
717 return self.rawsize(rev)
711
718
712 return len(self.revision(rev, raw=False))
719 return len(self.revision(rev, raw=False))
713
720
714 def chainbase(self, rev):
721 def chainbase(self, rev):
715 base = self._chainbasecache.get(rev)
722 base = self._chainbasecache.get(rev)
716 if base is not None:
723 if base is not None:
717 return base
724 return base
718
725
719 index = self.index
726 index = self.index
720 iterrev = rev
727 iterrev = rev
721 base = index[iterrev][3]
728 base = index[iterrev][3]
722 while base != iterrev:
729 while base != iterrev:
723 iterrev = base
730 iterrev = base
724 base = index[iterrev][3]
731 base = index[iterrev][3]
725
732
726 self._chainbasecache[rev] = base
733 self._chainbasecache[rev] = base
727 return base
734 return base
728
735
729 def linkrev(self, rev):
736 def linkrev(self, rev):
730 return self.index[rev][4]
737 return self.index[rev][4]
731
738
732 def parentrevs(self, rev):
739 def parentrevs(self, rev):
733 try:
740 try:
734 entry = self.index[rev]
741 entry = self.index[rev]
735 except IndexError:
742 except IndexError:
736 if rev == wdirrev:
743 if rev == wdirrev:
737 raise error.WdirUnsupported
744 raise error.WdirUnsupported
738 raise
745 raise
739
746
740 return entry[5], entry[6]
747 return entry[5], entry[6]
741
748
742 # fast parentrevs(rev) where rev isn't filtered
749 # fast parentrevs(rev) where rev isn't filtered
743 _uncheckedparentrevs = parentrevs
750 _uncheckedparentrevs = parentrevs
744
751
745 def node(self, rev):
752 def node(self, rev):
746 try:
753 try:
747 return self.index[rev][7]
754 return self.index[rev][7]
748 except IndexError:
755 except IndexError:
749 if rev == wdirrev:
756 if rev == wdirrev:
750 raise error.WdirUnsupported
757 raise error.WdirUnsupported
751 raise
758 raise
752
759
753 # Derived from index values.
760 # Derived from index values.
754
761
755 def end(self, rev):
762 def end(self, rev):
756 return self.start(rev) + self.length(rev)
763 return self.start(rev) + self.length(rev)
757
764
758 def parents(self, node):
765 def parents(self, node):
759 i = self.index
766 i = self.index
760 d = i[self.rev(node)]
767 d = i[self.rev(node)]
761 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
768 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
762
769
763 def chainlen(self, rev):
770 def chainlen(self, rev):
764 return self._chaininfo(rev)[0]
771 return self._chaininfo(rev)[0]
765
772
766 def _chaininfo(self, rev):
773 def _chaininfo(self, rev):
767 chaininfocache = self._chaininfocache
774 chaininfocache = self._chaininfocache
768 if rev in chaininfocache:
775 if rev in chaininfocache:
769 return chaininfocache[rev]
776 return chaininfocache[rev]
770 index = self.index
777 index = self.index
771 generaldelta = self._generaldelta
778 generaldelta = self._generaldelta
772 iterrev = rev
779 iterrev = rev
773 e = index[iterrev]
780 e = index[iterrev]
774 clen = 0
781 clen = 0
775 compresseddeltalen = 0
782 compresseddeltalen = 0
776 while iterrev != e[3]:
783 while iterrev != e[3]:
777 clen += 1
784 clen += 1
778 compresseddeltalen += e[1]
785 compresseddeltalen += e[1]
779 if generaldelta:
786 if generaldelta:
780 iterrev = e[3]
787 iterrev = e[3]
781 else:
788 else:
782 iterrev -= 1
789 iterrev -= 1
783 if iterrev in chaininfocache:
790 if iterrev in chaininfocache:
784 t = chaininfocache[iterrev]
791 t = chaininfocache[iterrev]
785 clen += t[0]
792 clen += t[0]
786 compresseddeltalen += t[1]
793 compresseddeltalen += t[1]
787 break
794 break
788 e = index[iterrev]
795 e = index[iterrev]
789 else:
796 else:
790 # Add text length of base since decompressing that also takes
797 # Add text length of base since decompressing that also takes
791 # work. For cache hits the length is already included.
798 # work. For cache hits the length is already included.
792 compresseddeltalen += e[1]
799 compresseddeltalen += e[1]
793 r = (clen, compresseddeltalen)
800 r = (clen, compresseddeltalen)
794 chaininfocache[rev] = r
801 chaininfocache[rev] = r
795 return r
802 return r
796
803
797 def _deltachain(self, rev, stoprev=None):
804 def _deltachain(self, rev, stoprev=None):
798 """Obtain the delta chain for a revision.
805 """Obtain the delta chain for a revision.
799
806
800 ``stoprev`` specifies a revision to stop at. If not specified, we
807 ``stoprev`` specifies a revision to stop at. If not specified, we
801 stop at the base of the chain.
808 stop at the base of the chain.
802
809
803 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
810 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
804 revs in ascending order and ``stopped`` is a bool indicating whether
811 revs in ascending order and ``stopped`` is a bool indicating whether
805 ``stoprev`` was hit.
812 ``stoprev`` was hit.
806 """
813 """
807 # Try C implementation.
814 # Try C implementation.
808 try:
815 try:
809 return self.index.deltachain(rev, stoprev, self._generaldelta)
816 return self.index.deltachain(rev, stoprev, self._generaldelta)
810 except AttributeError:
817 except AttributeError:
811 pass
818 pass
812
819
813 chain = []
820 chain = []
814
821
815 # Alias to prevent attribute lookup in tight loop.
822 # Alias to prevent attribute lookup in tight loop.
816 index = self.index
823 index = self.index
817 generaldelta = self._generaldelta
824 generaldelta = self._generaldelta
818
825
819 iterrev = rev
826 iterrev = rev
820 e = index[iterrev]
827 e = index[iterrev]
821 while iterrev != e[3] and iterrev != stoprev:
828 while iterrev != e[3] and iterrev != stoprev:
822 chain.append(iterrev)
829 chain.append(iterrev)
823 if generaldelta:
830 if generaldelta:
824 iterrev = e[3]
831 iterrev = e[3]
825 else:
832 else:
826 iterrev -= 1
833 iterrev -= 1
827 e = index[iterrev]
834 e = index[iterrev]
828
835
829 if iterrev == stoprev:
836 if iterrev == stoprev:
830 stopped = True
837 stopped = True
831 else:
838 else:
832 chain.append(iterrev)
839 chain.append(iterrev)
833 stopped = False
840 stopped = False
834
841
835 chain.reverse()
842 chain.reverse()
836 return chain, stopped
843 return chain, stopped
837
844
838 def ancestors(self, revs, stoprev=0, inclusive=False):
845 def ancestors(self, revs, stoprev=0, inclusive=False):
839 """Generate the ancestors of 'revs' in reverse revision order.
846 """Generate the ancestors of 'revs' in reverse revision order.
840 Does not generate revs lower than stoprev.
847 Does not generate revs lower than stoprev.
841
848
842 See the documentation for ancestor.lazyancestors for more details."""
849 See the documentation for ancestor.lazyancestors for more details."""
843
850
844 # first, make sure start revisions aren't filtered
851 # first, make sure start revisions aren't filtered
845 revs = list(revs)
852 revs = list(revs)
846 checkrev = self.node
853 checkrev = self.node
847 for r in revs:
854 for r in revs:
848 checkrev(r)
855 checkrev(r)
849 # and we're sure ancestors aren't filtered as well
856 # and we're sure ancestors aren't filtered as well
850
857
851 if rustancestor is not None:
858 if rustancestor is not None:
852 lazyancestors = rustancestor.LazyAncestors
859 lazyancestors = rustancestor.LazyAncestors
853 arg = self.index
860 arg = self.index
854 elif util.safehasattr(parsers, b'rustlazyancestors'):
861 elif util.safehasattr(parsers, b'rustlazyancestors'):
855 lazyancestors = ancestor.rustlazyancestors
862 lazyancestors = ancestor.rustlazyancestors
856 arg = self.index
863 arg = self.index
857 else:
864 else:
858 lazyancestors = ancestor.lazyancestors
865 lazyancestors = ancestor.lazyancestors
859 arg = self._uncheckedparentrevs
866 arg = self._uncheckedparentrevs
860 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
867 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
861
868
862 def descendants(self, revs):
869 def descendants(self, revs):
863 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
870 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
864
871
865 def findcommonmissing(self, common=None, heads=None):
872 def findcommonmissing(self, common=None, heads=None):
866 """Return a tuple of the ancestors of common and the ancestors of heads
873 """Return a tuple of the ancestors of common and the ancestors of heads
867 that are not ancestors of common. In revset terminology, we return the
874 that are not ancestors of common. In revset terminology, we return the
868 tuple:
875 tuple:
869
876
870 ::common, (::heads) - (::common)
877 ::common, (::heads) - (::common)
871
878
872 The list is sorted by revision number, meaning it is
879 The list is sorted by revision number, meaning it is
873 topologically sorted.
880 topologically sorted.
874
881
875 'heads' and 'common' are both lists of node IDs. If heads is
882 'heads' and 'common' are both lists of node IDs. If heads is
876 not supplied, uses all of the revlog's heads. If common is not
883 not supplied, uses all of the revlog's heads. If common is not
877 supplied, uses nullid."""
884 supplied, uses nullid."""
878 if common is None:
885 if common is None:
879 common = [nullid]
886 common = [nullid]
880 if heads is None:
887 if heads is None:
881 heads = self.heads()
888 heads = self.heads()
882
889
883 common = [self.rev(n) for n in common]
890 common = [self.rev(n) for n in common]
884 heads = [self.rev(n) for n in heads]
891 heads = [self.rev(n) for n in heads]
885
892
886 # we want the ancestors, but inclusive
893 # we want the ancestors, but inclusive
887 class lazyset(object):
894 class lazyset(object):
888 def __init__(self, lazyvalues):
895 def __init__(self, lazyvalues):
889 self.addedvalues = set()
896 self.addedvalues = set()
890 self.lazyvalues = lazyvalues
897 self.lazyvalues = lazyvalues
891
898
892 def __contains__(self, value):
899 def __contains__(self, value):
893 return value in self.addedvalues or value in self.lazyvalues
900 return value in self.addedvalues or value in self.lazyvalues
894
901
895 def __iter__(self):
902 def __iter__(self):
896 added = self.addedvalues
903 added = self.addedvalues
897 for r in added:
904 for r in added:
898 yield r
905 yield r
899 for r in self.lazyvalues:
906 for r in self.lazyvalues:
900 if not r in added:
907 if not r in added:
901 yield r
908 yield r
902
909
903 def add(self, value):
910 def add(self, value):
904 self.addedvalues.add(value)
911 self.addedvalues.add(value)
905
912
906 def update(self, values):
913 def update(self, values):
907 self.addedvalues.update(values)
914 self.addedvalues.update(values)
908
915
909 has = lazyset(self.ancestors(common))
916 has = lazyset(self.ancestors(common))
910 has.add(nullrev)
917 has.add(nullrev)
911 has.update(common)
918 has.update(common)
912
919
913 # take all ancestors from heads that aren't in has
920 # take all ancestors from heads that aren't in has
914 missing = set()
921 missing = set()
915 visit = collections.deque(r for r in heads if r not in has)
922 visit = collections.deque(r for r in heads if r not in has)
916 while visit:
923 while visit:
917 r = visit.popleft()
924 r = visit.popleft()
918 if r in missing:
925 if r in missing:
919 continue
926 continue
920 else:
927 else:
921 missing.add(r)
928 missing.add(r)
922 for p in self.parentrevs(r):
929 for p in self.parentrevs(r):
923 if p not in has:
930 if p not in has:
924 visit.append(p)
931 visit.append(p)
925 missing = list(missing)
932 missing = list(missing)
926 missing.sort()
933 missing.sort()
927 return has, [self.node(miss) for miss in missing]
934 return has, [self.node(miss) for miss in missing]
928
935
929 def incrementalmissingrevs(self, common=None):
936 def incrementalmissingrevs(self, common=None):
930 """Return an object that can be used to incrementally compute the
937 """Return an object that can be used to incrementally compute the
931 revision numbers of the ancestors of arbitrary sets that are not
938 revision numbers of the ancestors of arbitrary sets that are not
932 ancestors of common. This is an ancestor.incrementalmissingancestors
939 ancestors of common. This is an ancestor.incrementalmissingancestors
933 object.
940 object.
934
941
935 'common' is a list of revision numbers. If common is not supplied, uses
942 'common' is a list of revision numbers. If common is not supplied, uses
936 nullrev.
943 nullrev.
937 """
944 """
938 if common is None:
945 if common is None:
939 common = [nullrev]
946 common = [nullrev]
940
947
941 if rustancestor is not None:
948 if rustancestor is not None:
942 return rustancestor.MissingAncestors(self.index, common)
949 return rustancestor.MissingAncestors(self.index, common)
943 return ancestor.incrementalmissingancestors(self.parentrevs, common)
950 return ancestor.incrementalmissingancestors(self.parentrevs, common)
944
951
945 def findmissingrevs(self, common=None, heads=None):
952 def findmissingrevs(self, common=None, heads=None):
946 """Return the revision numbers of the ancestors of heads that
953 """Return the revision numbers of the ancestors of heads that
947 are not ancestors of common.
954 are not ancestors of common.
948
955
949 More specifically, return a list of revision numbers corresponding to
956 More specifically, return a list of revision numbers corresponding to
950 nodes N such that every N satisfies the following constraints:
957 nodes N such that every N satisfies the following constraints:
951
958
952 1. N is an ancestor of some node in 'heads'
959 1. N is an ancestor of some node in 'heads'
953 2. N is not an ancestor of any node in 'common'
960 2. N is not an ancestor of any node in 'common'
954
961
955 The list is sorted by revision number, meaning it is
962 The list is sorted by revision number, meaning it is
956 topologically sorted.
963 topologically sorted.
957
964
958 'heads' and 'common' are both lists of revision numbers. If heads is
965 'heads' and 'common' are both lists of revision numbers. If heads is
959 not supplied, uses all of the revlog's heads. If common is not
966 not supplied, uses all of the revlog's heads. If common is not
960 supplied, uses nullid."""
967 supplied, uses nullid."""
961 if common is None:
968 if common is None:
962 common = [nullrev]
969 common = [nullrev]
963 if heads is None:
970 if heads is None:
964 heads = self.headrevs()
971 heads = self.headrevs()
965
972
966 inc = self.incrementalmissingrevs(common=common)
973 inc = self.incrementalmissingrevs(common=common)
967 return inc.missingancestors(heads)
974 return inc.missingancestors(heads)
968
975
969 def findmissing(self, common=None, heads=None):
976 def findmissing(self, common=None, heads=None):
970 """Return the ancestors of heads that are not ancestors of common.
977 """Return the ancestors of heads that are not ancestors of common.
971
978
972 More specifically, return a list of nodes N such that every N
979 More specifically, return a list of nodes N such that every N
973 satisfies the following constraints:
980 satisfies the following constraints:
974
981
975 1. N is an ancestor of some node in 'heads'
982 1. N is an ancestor of some node in 'heads'
976 2. N is not an ancestor of any node in 'common'
983 2. N is not an ancestor of any node in 'common'
977
984
978 The list is sorted by revision number, meaning it is
985 The list is sorted by revision number, meaning it is
979 topologically sorted.
986 topologically sorted.
980
987
981 'heads' and 'common' are both lists of node IDs. If heads is
988 'heads' and 'common' are both lists of node IDs. If heads is
982 not supplied, uses all of the revlog's heads. If common is not
989 not supplied, uses all of the revlog's heads. If common is not
983 supplied, uses nullid."""
990 supplied, uses nullid."""
984 if common is None:
991 if common is None:
985 common = [nullid]
992 common = [nullid]
986 if heads is None:
993 if heads is None:
987 heads = self.heads()
994 heads = self.heads()
988
995
989 common = [self.rev(n) for n in common]
996 common = [self.rev(n) for n in common]
990 heads = [self.rev(n) for n in heads]
997 heads = [self.rev(n) for n in heads]
991
998
992 inc = self.incrementalmissingrevs(common=common)
999 inc = self.incrementalmissingrevs(common=common)
993 return [self.node(r) for r in inc.missingancestors(heads)]
1000 return [self.node(r) for r in inc.missingancestors(heads)]
994
1001
995 def nodesbetween(self, roots=None, heads=None):
1002 def nodesbetween(self, roots=None, heads=None):
996 """Return a topological path from 'roots' to 'heads'.
1003 """Return a topological path from 'roots' to 'heads'.
997
1004
998 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1005 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
999 topologically sorted list of all nodes N that satisfy both of
1006 topologically sorted list of all nodes N that satisfy both of
1000 these constraints:
1007 these constraints:
1001
1008
1002 1. N is a descendant of some node in 'roots'
1009 1. N is a descendant of some node in 'roots'
1003 2. N is an ancestor of some node in 'heads'
1010 2. N is an ancestor of some node in 'heads'
1004
1011
1005 Every node is considered to be both a descendant and an ancestor
1012 Every node is considered to be both a descendant and an ancestor
1006 of itself, so every reachable node in 'roots' and 'heads' will be
1013 of itself, so every reachable node in 'roots' and 'heads' will be
1007 included in 'nodes'.
1014 included in 'nodes'.
1008
1015
1009 'outroots' is the list of reachable nodes in 'roots', i.e., the
1016 'outroots' is the list of reachable nodes in 'roots', i.e., the
1010 subset of 'roots' that is returned in 'nodes'. Likewise,
1017 subset of 'roots' that is returned in 'nodes'. Likewise,
1011 'outheads' is the subset of 'heads' that is also in 'nodes'.
1018 'outheads' is the subset of 'heads' that is also in 'nodes'.
1012
1019
1013 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1020 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1014 unspecified, uses nullid as the only root. If 'heads' is
1021 unspecified, uses nullid as the only root. If 'heads' is
1015 unspecified, uses list of all of the revlog's heads."""
1022 unspecified, uses list of all of the revlog's heads."""
1016 nonodes = ([], [], [])
1023 nonodes = ([], [], [])
1017 if roots is not None:
1024 if roots is not None:
1018 roots = list(roots)
1025 roots = list(roots)
1019 if not roots:
1026 if not roots:
1020 return nonodes
1027 return nonodes
1021 lowestrev = min([self.rev(n) for n in roots])
1028 lowestrev = min([self.rev(n) for n in roots])
1022 else:
1029 else:
1023 roots = [nullid] # Everybody's a descendant of nullid
1030 roots = [nullid] # Everybody's a descendant of nullid
1024 lowestrev = nullrev
1031 lowestrev = nullrev
1025 if (lowestrev == nullrev) and (heads is None):
1032 if (lowestrev == nullrev) and (heads is None):
1026 # We want _all_ the nodes!
1033 # We want _all_ the nodes!
1027 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1034 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1028 if heads is None:
1035 if heads is None:
1029 # All nodes are ancestors, so the latest ancestor is the last
1036 # All nodes are ancestors, so the latest ancestor is the last
1030 # node.
1037 # node.
1031 highestrev = len(self) - 1
1038 highestrev = len(self) - 1
1032 # Set ancestors to None to signal that every node is an ancestor.
1039 # Set ancestors to None to signal that every node is an ancestor.
1033 ancestors = None
1040 ancestors = None
1034 # Set heads to an empty dictionary for later discovery of heads
1041 # Set heads to an empty dictionary for later discovery of heads
1035 heads = {}
1042 heads = {}
1036 else:
1043 else:
1037 heads = list(heads)
1044 heads = list(heads)
1038 if not heads:
1045 if not heads:
1039 return nonodes
1046 return nonodes
1040 ancestors = set()
1047 ancestors = set()
1041 # Turn heads into a dictionary so we can remove 'fake' heads.
1048 # Turn heads into a dictionary so we can remove 'fake' heads.
1042 # Also, later we will be using it to filter out the heads we can't
1049 # Also, later we will be using it to filter out the heads we can't
1043 # find from roots.
1050 # find from roots.
1044 heads = dict.fromkeys(heads, False)
1051 heads = dict.fromkeys(heads, False)
1045 # Start at the top and keep marking parents until we're done.
1052 # Start at the top and keep marking parents until we're done.
1046 nodestotag = set(heads)
1053 nodestotag = set(heads)
1047 # Remember where the top was so we can use it as a limit later.
1054 # Remember where the top was so we can use it as a limit later.
1048 highestrev = max([self.rev(n) for n in nodestotag])
1055 highestrev = max([self.rev(n) for n in nodestotag])
1049 while nodestotag:
1056 while nodestotag:
1050 # grab a node to tag
1057 # grab a node to tag
1051 n = nodestotag.pop()
1058 n = nodestotag.pop()
1052 # Never tag nullid
1059 # Never tag nullid
1053 if n == nullid:
1060 if n == nullid:
1054 continue
1061 continue
1055 # A node's revision number represents its place in a
1062 # A node's revision number represents its place in a
1056 # topologically sorted list of nodes.
1063 # topologically sorted list of nodes.
1057 r = self.rev(n)
1064 r = self.rev(n)
1058 if r >= lowestrev:
1065 if r >= lowestrev:
1059 if n not in ancestors:
1066 if n not in ancestors:
1060 # If we are possibly a descendant of one of the roots
1067 # If we are possibly a descendant of one of the roots
1061 # and we haven't already been marked as an ancestor
1068 # and we haven't already been marked as an ancestor
1062 ancestors.add(n) # Mark as ancestor
1069 ancestors.add(n) # Mark as ancestor
1063 # Add non-nullid parents to list of nodes to tag.
1070 # Add non-nullid parents to list of nodes to tag.
1064 nodestotag.update(
1071 nodestotag.update(
1065 [p for p in self.parents(n) if p != nullid]
1072 [p for p in self.parents(n) if p != nullid]
1066 )
1073 )
1067 elif n in heads: # We've seen it before, is it a fake head?
1074 elif n in heads: # We've seen it before, is it a fake head?
1068 # So it is, real heads should not be the ancestors of
1075 # So it is, real heads should not be the ancestors of
1069 # any other heads.
1076 # any other heads.
1070 heads.pop(n)
1077 heads.pop(n)
1071 if not ancestors:
1078 if not ancestors:
1072 return nonodes
1079 return nonodes
1073 # Now that we have our set of ancestors, we want to remove any
1080 # Now that we have our set of ancestors, we want to remove any
1074 # roots that are not ancestors.
1081 # roots that are not ancestors.
1075
1082
1076 # If one of the roots was nullid, everything is included anyway.
1083 # If one of the roots was nullid, everything is included anyway.
1077 if lowestrev > nullrev:
1084 if lowestrev > nullrev:
1078 # But, since we weren't, let's recompute the lowest rev to not
1085 # But, since we weren't, let's recompute the lowest rev to not
1079 # include roots that aren't ancestors.
1086 # include roots that aren't ancestors.
1080
1087
1081 # Filter out roots that aren't ancestors of heads
1088 # Filter out roots that aren't ancestors of heads
1082 roots = [root for root in roots if root in ancestors]
1089 roots = [root for root in roots if root in ancestors]
1083 # Recompute the lowest revision
1090 # Recompute the lowest revision
1084 if roots:
1091 if roots:
1085 lowestrev = min([self.rev(root) for root in roots])
1092 lowestrev = min([self.rev(root) for root in roots])
1086 else:
1093 else:
1087 # No more roots? Return empty list
1094 # No more roots? Return empty list
1088 return nonodes
1095 return nonodes
1089 else:
1096 else:
1090 # We are descending from nullid, and don't need to care about
1097 # We are descending from nullid, and don't need to care about
1091 # any other roots.
1098 # any other roots.
1092 lowestrev = nullrev
1099 lowestrev = nullrev
1093 roots = [nullid]
1100 roots = [nullid]
1094 # Transform our roots list into a set.
1101 # Transform our roots list into a set.
1095 descendants = set(roots)
1102 descendants = set(roots)
1096 # Also, keep the original roots so we can filter out roots that aren't
1103 # Also, keep the original roots so we can filter out roots that aren't
1097 # 'real' roots (i.e. are descended from other roots).
1104 # 'real' roots (i.e. are descended from other roots).
1098 roots = descendants.copy()
1105 roots = descendants.copy()
1099 # Our topologically sorted list of output nodes.
1106 # Our topologically sorted list of output nodes.
1100 orderedout = []
1107 orderedout = []
1101 # Don't start at nullid since we don't want nullid in our output list,
1108 # Don't start at nullid since we don't want nullid in our output list,
1102 # and if nullid shows up in descendants, empty parents will look like
1109 # and if nullid shows up in descendants, empty parents will look like
1103 # they're descendants.
1110 # they're descendants.
1104 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1111 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1105 n = self.node(r)
1112 n = self.node(r)
1106 isdescendant = False
1113 isdescendant = False
1107 if lowestrev == nullrev: # Everybody is a descendant of nullid
1114 if lowestrev == nullrev: # Everybody is a descendant of nullid
1108 isdescendant = True
1115 isdescendant = True
1109 elif n in descendants:
1116 elif n in descendants:
1110 # n is already a descendant
1117 # n is already a descendant
1111 isdescendant = True
1118 isdescendant = True
1112 # This check only needs to be done here because all the roots
1119 # This check only needs to be done here because all the roots
1113 # will start being marked is descendants before the loop.
1120 # will start being marked is descendants before the loop.
1114 if n in roots:
1121 if n in roots:
1115 # If n was a root, check if it's a 'real' root.
1122 # If n was a root, check if it's a 'real' root.
1116 p = tuple(self.parents(n))
1123 p = tuple(self.parents(n))
1117 # If any of its parents are descendants, it's not a root.
1124 # If any of its parents are descendants, it's not a root.
1118 if (p[0] in descendants) or (p[1] in descendants):
1125 if (p[0] in descendants) or (p[1] in descendants):
1119 roots.remove(n)
1126 roots.remove(n)
1120 else:
1127 else:
1121 p = tuple(self.parents(n))
1128 p = tuple(self.parents(n))
1122 # A node is a descendant if either of its parents are
1129 # A node is a descendant if either of its parents are
1123 # descendants. (We seeded the dependents list with the roots
1130 # descendants. (We seeded the dependents list with the roots
1124 # up there, remember?)
1131 # up there, remember?)
1125 if (p[0] in descendants) or (p[1] in descendants):
1132 if (p[0] in descendants) or (p[1] in descendants):
1126 descendants.add(n)
1133 descendants.add(n)
1127 isdescendant = True
1134 isdescendant = True
1128 if isdescendant and ((ancestors is None) or (n in ancestors)):
1135 if isdescendant and ((ancestors is None) or (n in ancestors)):
1129 # Only include nodes that are both descendants and ancestors.
1136 # Only include nodes that are both descendants and ancestors.
1130 orderedout.append(n)
1137 orderedout.append(n)
1131 if (ancestors is not None) and (n in heads):
1138 if (ancestors is not None) and (n in heads):
1132 # We're trying to figure out which heads are reachable
1139 # We're trying to figure out which heads are reachable
1133 # from roots.
1140 # from roots.
1134 # Mark this head as having been reached
1141 # Mark this head as having been reached
1135 heads[n] = True
1142 heads[n] = True
1136 elif ancestors is None:
1143 elif ancestors is None:
1137 # Otherwise, we're trying to discover the heads.
1144 # Otherwise, we're trying to discover the heads.
1138 # Assume this is a head because if it isn't, the next step
1145 # Assume this is a head because if it isn't, the next step
1139 # will eventually remove it.
1146 # will eventually remove it.
1140 heads[n] = True
1147 heads[n] = True
1141 # But, obviously its parents aren't.
1148 # But, obviously its parents aren't.
1142 for p in self.parents(n):
1149 for p in self.parents(n):
1143 heads.pop(p, None)
1150 heads.pop(p, None)
1144 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1151 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1145 roots = list(roots)
1152 roots = list(roots)
1146 assert orderedout
1153 assert orderedout
1147 assert roots
1154 assert roots
1148 assert heads
1155 assert heads
1149 return (orderedout, roots, heads)
1156 return (orderedout, roots, heads)
1150
1157
1151 def headrevs(self, revs=None):
1158 def headrevs(self, revs=None):
1152 if revs is None:
1159 if revs is None:
1153 try:
1160 try:
1154 return self.index.headrevs()
1161 return self.index.headrevs()
1155 except AttributeError:
1162 except AttributeError:
1156 return self._headrevs()
1163 return self._headrevs()
1157 if rustdagop is not None:
1164 if rustdagop is not None:
1158 return rustdagop.headrevs(self.index, revs)
1165 return rustdagop.headrevs(self.index, revs)
1159 return dagop.headrevs(revs, self._uncheckedparentrevs)
1166 return dagop.headrevs(revs, self._uncheckedparentrevs)
1160
1167
1161 def computephases(self, roots):
1168 def computephases(self, roots):
1162 return self.index.computephasesmapsets(roots)
1169 return self.index.computephasesmapsets(roots)
1163
1170
1164 def _headrevs(self):
1171 def _headrevs(self):
1165 count = len(self)
1172 count = len(self)
1166 if not count:
1173 if not count:
1167 return [nullrev]
1174 return [nullrev]
1168 # we won't iter over filtered rev so nobody is a head at start
1175 # we won't iter over filtered rev so nobody is a head at start
1169 ishead = [0] * (count + 1)
1176 ishead = [0] * (count + 1)
1170 index = self.index
1177 index = self.index
1171 for r in self:
1178 for r in self:
1172 ishead[r] = 1 # I may be an head
1179 ishead[r] = 1 # I may be an head
1173 e = index[r]
1180 e = index[r]
1174 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1181 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1175 return [r for r, val in enumerate(ishead) if val]
1182 return [r for r, val in enumerate(ishead) if val]
1176
1183
1177 def heads(self, start=None, stop=None):
1184 def heads(self, start=None, stop=None):
1178 """return the list of all nodes that have no children
1185 """return the list of all nodes that have no children
1179
1186
1180 if start is specified, only heads that are descendants of
1187 if start is specified, only heads that are descendants of
1181 start will be returned
1188 start will be returned
1182 if stop is specified, it will consider all the revs from stop
1189 if stop is specified, it will consider all the revs from stop
1183 as if they had no children
1190 as if they had no children
1184 """
1191 """
1185 if start is None and stop is None:
1192 if start is None and stop is None:
1186 if not len(self):
1193 if not len(self):
1187 return [nullid]
1194 return [nullid]
1188 return [self.node(r) for r in self.headrevs()]
1195 return [self.node(r) for r in self.headrevs()]
1189
1196
1190 if start is None:
1197 if start is None:
1191 start = nullrev
1198 start = nullrev
1192 else:
1199 else:
1193 start = self.rev(start)
1200 start = self.rev(start)
1194
1201
1195 stoprevs = set(self.rev(n) for n in stop or [])
1202 stoprevs = set(self.rev(n) for n in stop or [])
1196
1203
1197 revs = dagop.headrevssubset(
1204 revs = dagop.headrevssubset(
1198 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1205 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1199 )
1206 )
1200
1207
1201 return [self.node(rev) for rev in revs]
1208 return [self.node(rev) for rev in revs]
1202
1209
1203 def children(self, node):
1210 def children(self, node):
1204 """find the children of a given node"""
1211 """find the children of a given node"""
1205 c = []
1212 c = []
1206 p = self.rev(node)
1213 p = self.rev(node)
1207 for r in self.revs(start=p + 1):
1214 for r in self.revs(start=p + 1):
1208 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1215 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1209 if prevs:
1216 if prevs:
1210 for pr in prevs:
1217 for pr in prevs:
1211 if pr == p:
1218 if pr == p:
1212 c.append(self.node(r))
1219 c.append(self.node(r))
1213 elif p == nullrev:
1220 elif p == nullrev:
1214 c.append(self.node(r))
1221 c.append(self.node(r))
1215 return c
1222 return c
1216
1223
1217 def commonancestorsheads(self, a, b):
1224 def commonancestorsheads(self, a, b):
1218 """calculate all the heads of the common ancestors of nodes a and b"""
1225 """calculate all the heads of the common ancestors of nodes a and b"""
1219 a, b = self.rev(a), self.rev(b)
1226 a, b = self.rev(a), self.rev(b)
1220 ancs = self._commonancestorsheads(a, b)
1227 ancs = self._commonancestorsheads(a, b)
1221 return pycompat.maplist(self.node, ancs)
1228 return pycompat.maplist(self.node, ancs)
1222
1229
1223 def _commonancestorsheads(self, *revs):
1230 def _commonancestorsheads(self, *revs):
1224 """calculate all the heads of the common ancestors of revs"""
1231 """calculate all the heads of the common ancestors of revs"""
1225 try:
1232 try:
1226 ancs = self.index.commonancestorsheads(*revs)
1233 ancs = self.index.commonancestorsheads(*revs)
1227 except (AttributeError, OverflowError): # C implementation failed
1234 except (AttributeError, OverflowError): # C implementation failed
1228 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1235 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1229 return ancs
1236 return ancs
1230
1237
1231 def isancestor(self, a, b):
1238 def isancestor(self, a, b):
1232 """return True if node a is an ancestor of node b
1239 """return True if node a is an ancestor of node b
1233
1240
1234 A revision is considered an ancestor of itself."""
1241 A revision is considered an ancestor of itself."""
1235 a, b = self.rev(a), self.rev(b)
1242 a, b = self.rev(a), self.rev(b)
1236 return self.isancestorrev(a, b)
1243 return self.isancestorrev(a, b)
1237
1244
1238 def isancestorrev(self, a, b):
1245 def isancestorrev(self, a, b):
1239 """return True if revision a is an ancestor of revision b
1246 """return True if revision a is an ancestor of revision b
1240
1247
1241 A revision is considered an ancestor of itself.
1248 A revision is considered an ancestor of itself.
1242
1249
1243 The implementation of this is trivial but the use of
1250 The implementation of this is trivial but the use of
1244 reachableroots is not."""
1251 reachableroots is not."""
1245 if a == nullrev:
1252 if a == nullrev:
1246 return True
1253 return True
1247 elif a == b:
1254 elif a == b:
1248 return True
1255 return True
1249 elif a > b:
1256 elif a > b:
1250 return False
1257 return False
1251 return bool(self.reachableroots(a, [b], [a], includepath=False))
1258 return bool(self.reachableroots(a, [b], [a], includepath=False))
1252
1259
1253 def reachableroots(self, minroot, heads, roots, includepath=False):
1260 def reachableroots(self, minroot, heads, roots, includepath=False):
1254 """return (heads(::<roots> and <roots>::<heads>))
1261 """return (heads(::<roots> and <roots>::<heads>))
1255
1262
1256 If includepath is True, return (<roots>::<heads>)."""
1263 If includepath is True, return (<roots>::<heads>)."""
1257 try:
1264 try:
1258 return self.index.reachableroots2(
1265 return self.index.reachableroots2(
1259 minroot, heads, roots, includepath
1266 minroot, heads, roots, includepath
1260 )
1267 )
1261 except AttributeError:
1268 except AttributeError:
1262 return dagop._reachablerootspure(
1269 return dagop._reachablerootspure(
1263 self.parentrevs, minroot, roots, heads, includepath
1270 self.parentrevs, minroot, roots, heads, includepath
1264 )
1271 )
1265
1272
1266 def ancestor(self, a, b):
1273 def ancestor(self, a, b):
1267 """calculate the "best" common ancestor of nodes a and b"""
1274 """calculate the "best" common ancestor of nodes a and b"""
1268
1275
1269 a, b = self.rev(a), self.rev(b)
1276 a, b = self.rev(a), self.rev(b)
1270 try:
1277 try:
1271 ancs = self.index.ancestors(a, b)
1278 ancs = self.index.ancestors(a, b)
1272 except (AttributeError, OverflowError):
1279 except (AttributeError, OverflowError):
1273 ancs = ancestor.ancestors(self.parentrevs, a, b)
1280 ancs = ancestor.ancestors(self.parentrevs, a, b)
1274 if ancs:
1281 if ancs:
1275 # choose a consistent winner when there's a tie
1282 # choose a consistent winner when there's a tie
1276 return min(map(self.node, ancs))
1283 return min(map(self.node, ancs))
1277 return nullid
1284 return nullid
1278
1285
1279 def _match(self, id):
1286 def _match(self, id):
1280 if isinstance(id, int):
1287 if isinstance(id, int):
1281 # rev
1288 # rev
1282 return self.node(id)
1289 return self.node(id)
1283 if len(id) == 20:
1290 if len(id) == 20:
1284 # possibly a binary node
1291 # possibly a binary node
1285 # odds of a binary node being all hex in ASCII are 1 in 10**25
1292 # odds of a binary node being all hex in ASCII are 1 in 10**25
1286 try:
1293 try:
1287 node = id
1294 node = id
1288 self.rev(node) # quick search the index
1295 self.rev(node) # quick search the index
1289 return node
1296 return node
1290 except error.LookupError:
1297 except error.LookupError:
1291 pass # may be partial hex id
1298 pass # may be partial hex id
1292 try:
1299 try:
1293 # str(rev)
1300 # str(rev)
1294 rev = int(id)
1301 rev = int(id)
1295 if b"%d" % rev != id:
1302 if b"%d" % rev != id:
1296 raise ValueError
1303 raise ValueError
1297 if rev < 0:
1304 if rev < 0:
1298 rev = len(self) + rev
1305 rev = len(self) + rev
1299 if rev < 0 or rev >= len(self):
1306 if rev < 0 or rev >= len(self):
1300 raise ValueError
1307 raise ValueError
1301 return self.node(rev)
1308 return self.node(rev)
1302 except (ValueError, OverflowError):
1309 except (ValueError, OverflowError):
1303 pass
1310 pass
1304 if len(id) == 40:
1311 if len(id) == 40:
1305 try:
1312 try:
1306 # a full hex nodeid?
1313 # a full hex nodeid?
1307 node = bin(id)
1314 node = bin(id)
1308 self.rev(node)
1315 self.rev(node)
1309 return node
1316 return node
1310 except (TypeError, error.LookupError):
1317 except (TypeError, error.LookupError):
1311 pass
1318 pass
1312
1319
1313 def _partialmatch(self, id):
1320 def _partialmatch(self, id):
1314 # we don't care wdirfilenodeids as they should be always full hash
1321 # we don't care wdirfilenodeids as they should be always full hash
1315 maybewdir = wdirhex.startswith(id)
1322 maybewdir = wdirhex.startswith(id)
1316 try:
1323 try:
1317 partial = self.index.partialmatch(id)
1324 partial = self.index.partialmatch(id)
1318 if partial and self.hasnode(partial):
1325 if partial and self.hasnode(partial):
1319 if maybewdir:
1326 if maybewdir:
1320 # single 'ff...' match in radix tree, ambiguous with wdir
1327 # single 'ff...' match in radix tree, ambiguous with wdir
1321 raise error.RevlogError
1328 raise error.RevlogError
1322 return partial
1329 return partial
1323 if maybewdir:
1330 if maybewdir:
1324 # no 'ff...' match in radix tree, wdir identified
1331 # no 'ff...' match in radix tree, wdir identified
1325 raise error.WdirUnsupported
1332 raise error.WdirUnsupported
1326 return None
1333 return None
1327 except error.RevlogError:
1334 except error.RevlogError:
1328 # parsers.c radix tree lookup gave multiple matches
1335 # parsers.c radix tree lookup gave multiple matches
1329 # fast path: for unfiltered changelog, radix tree is accurate
1336 # fast path: for unfiltered changelog, radix tree is accurate
1330 if not getattr(self, 'filteredrevs', None):
1337 if not getattr(self, 'filteredrevs', None):
1331 raise error.AmbiguousPrefixLookupError(
1338 raise error.AmbiguousPrefixLookupError(
1332 id, self.indexfile, _(b'ambiguous identifier')
1339 id, self.indexfile, _(b'ambiguous identifier')
1333 )
1340 )
1334 # fall through to slow path that filters hidden revisions
1341 # fall through to slow path that filters hidden revisions
1335 except (AttributeError, ValueError):
1342 except (AttributeError, ValueError):
1336 # we are pure python, or key was too short to search radix tree
1343 # we are pure python, or key was too short to search radix tree
1337 pass
1344 pass
1338
1345
1339 if id in self._pcache:
1346 if id in self._pcache:
1340 return self._pcache[id]
1347 return self._pcache[id]
1341
1348
1342 if len(id) <= 40:
1349 if len(id) <= 40:
1343 try:
1350 try:
1344 # hex(node)[:...]
1351 # hex(node)[:...]
1345 l = len(id) // 2 # grab an even number of digits
1352 l = len(id) // 2 # grab an even number of digits
1346 prefix = bin(id[: l * 2])
1353 prefix = bin(id[: l * 2])
1347 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1354 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1348 nl = [
1355 nl = [
1349 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1356 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1350 ]
1357 ]
1351 if nullhex.startswith(id):
1358 if nullhex.startswith(id):
1352 nl.append(nullid)
1359 nl.append(nullid)
1353 if len(nl) > 0:
1360 if len(nl) > 0:
1354 if len(nl) == 1 and not maybewdir:
1361 if len(nl) == 1 and not maybewdir:
1355 self._pcache[id] = nl[0]
1362 self._pcache[id] = nl[0]
1356 return nl[0]
1363 return nl[0]
1357 raise error.AmbiguousPrefixLookupError(
1364 raise error.AmbiguousPrefixLookupError(
1358 id, self.indexfile, _(b'ambiguous identifier')
1365 id, self.indexfile, _(b'ambiguous identifier')
1359 )
1366 )
1360 if maybewdir:
1367 if maybewdir:
1361 raise error.WdirUnsupported
1368 raise error.WdirUnsupported
1362 return None
1369 return None
1363 except TypeError:
1370 except TypeError:
1364 pass
1371 pass
1365
1372
1366 def lookup(self, id):
1373 def lookup(self, id):
1367 """locate a node based on:
1374 """locate a node based on:
1368 - revision number or str(revision number)
1375 - revision number or str(revision number)
1369 - nodeid or subset of hex nodeid
1376 - nodeid or subset of hex nodeid
1370 """
1377 """
1371 n = self._match(id)
1378 n = self._match(id)
1372 if n is not None:
1379 if n is not None:
1373 return n
1380 return n
1374 n = self._partialmatch(id)
1381 n = self._partialmatch(id)
1375 if n:
1382 if n:
1376 return n
1383 return n
1377
1384
1378 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1385 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1379
1386
1380 def shortest(self, node, minlength=1):
1387 def shortest(self, node, minlength=1):
1381 """Find the shortest unambiguous prefix that matches node."""
1388 """Find the shortest unambiguous prefix that matches node."""
1382
1389
1383 def isvalid(prefix):
1390 def isvalid(prefix):
1384 try:
1391 try:
1385 matchednode = self._partialmatch(prefix)
1392 matchednode = self._partialmatch(prefix)
1386 except error.AmbiguousPrefixLookupError:
1393 except error.AmbiguousPrefixLookupError:
1387 return False
1394 return False
1388 except error.WdirUnsupported:
1395 except error.WdirUnsupported:
1389 # single 'ff...' match
1396 # single 'ff...' match
1390 return True
1397 return True
1391 if matchednode is None:
1398 if matchednode is None:
1392 raise error.LookupError(node, self.indexfile, _(b'no node'))
1399 raise error.LookupError(node, self.indexfile, _(b'no node'))
1393 return True
1400 return True
1394
1401
1395 def maybewdir(prefix):
1402 def maybewdir(prefix):
1396 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1403 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1397
1404
1398 hexnode = hex(node)
1405 hexnode = hex(node)
1399
1406
1400 def disambiguate(hexnode, minlength):
1407 def disambiguate(hexnode, minlength):
1401 """Disambiguate against wdirid."""
1408 """Disambiguate against wdirid."""
1402 for length in range(minlength, 41):
1409 for length in range(minlength, 41):
1403 prefix = hexnode[:length]
1410 prefix = hexnode[:length]
1404 if not maybewdir(prefix):
1411 if not maybewdir(prefix):
1405 return prefix
1412 return prefix
1406
1413
1407 if not getattr(self, 'filteredrevs', None):
1414 if not getattr(self, 'filteredrevs', None):
1408 try:
1415 try:
1409 length = max(self.index.shortest(node), minlength)
1416 length = max(self.index.shortest(node), minlength)
1410 return disambiguate(hexnode, length)
1417 return disambiguate(hexnode, length)
1411 except error.RevlogError:
1418 except error.RevlogError:
1412 if node != wdirid:
1419 if node != wdirid:
1413 raise error.LookupError(node, self.indexfile, _(b'no node'))
1420 raise error.LookupError(node, self.indexfile, _(b'no node'))
1414 except AttributeError:
1421 except AttributeError:
1415 # Fall through to pure code
1422 # Fall through to pure code
1416 pass
1423 pass
1417
1424
1418 if node == wdirid:
1425 if node == wdirid:
1419 for length in range(minlength, 41):
1426 for length in range(minlength, 41):
1420 prefix = hexnode[:length]
1427 prefix = hexnode[:length]
1421 if isvalid(prefix):
1428 if isvalid(prefix):
1422 return prefix
1429 return prefix
1423
1430
1424 for length in range(minlength, 41):
1431 for length in range(minlength, 41):
1425 prefix = hexnode[:length]
1432 prefix = hexnode[:length]
1426 if isvalid(prefix):
1433 if isvalid(prefix):
1427 return disambiguate(hexnode, length)
1434 return disambiguate(hexnode, length)
1428
1435
1429 def cmp(self, node, text):
1436 def cmp(self, node, text):
1430 """compare text with a given file revision
1437 """compare text with a given file revision
1431
1438
1432 returns True if text is different than what is stored.
1439 returns True if text is different than what is stored.
1433 """
1440 """
1434 p1, p2 = self.parents(node)
1441 p1, p2 = self.parents(node)
1435 return storageutil.hashrevisionsha1(text, p1, p2) != node
1442 return storageutil.hashrevisionsha1(text, p1, p2) != node
1436
1443
1437 def _cachesegment(self, offset, data):
1444 def _cachesegment(self, offset, data):
1438 """Add a segment to the revlog cache.
1445 """Add a segment to the revlog cache.
1439
1446
1440 Accepts an absolute offset and the data that is at that location.
1447 Accepts an absolute offset and the data that is at that location.
1441 """
1448 """
1442 o, d = self._chunkcache
1449 o, d = self._chunkcache
1443 # try to add to existing cache
1450 # try to add to existing cache
1444 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1451 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1445 self._chunkcache = o, d + data
1452 self._chunkcache = o, d + data
1446 else:
1453 else:
1447 self._chunkcache = offset, data
1454 self._chunkcache = offset, data
1448
1455
1449 def _readsegment(self, offset, length, df=None):
1456 def _readsegment(self, offset, length, df=None):
1450 """Load a segment of raw data from the revlog.
1457 """Load a segment of raw data from the revlog.
1451
1458
1452 Accepts an absolute offset, length to read, and an optional existing
1459 Accepts an absolute offset, length to read, and an optional existing
1453 file handle to read from.
1460 file handle to read from.
1454
1461
1455 If an existing file handle is passed, it will be seeked and the
1462 If an existing file handle is passed, it will be seeked and the
1456 original seek position will NOT be restored.
1463 original seek position will NOT be restored.
1457
1464
1458 Returns a str or buffer of raw byte data.
1465 Returns a str or buffer of raw byte data.
1459
1466
1460 Raises if the requested number of bytes could not be read.
1467 Raises if the requested number of bytes could not be read.
1461 """
1468 """
1462 # Cache data both forward and backward around the requested
1469 # Cache data both forward and backward around the requested
1463 # data, in a fixed size window. This helps speed up operations
1470 # data, in a fixed size window. This helps speed up operations
1464 # involving reading the revlog backwards.
1471 # involving reading the revlog backwards.
1465 cachesize = self._chunkcachesize
1472 cachesize = self._chunkcachesize
1466 realoffset = offset & ~(cachesize - 1)
1473 realoffset = offset & ~(cachesize - 1)
1467 reallength = (
1474 reallength = (
1468 (offset + length + cachesize) & ~(cachesize - 1)
1475 (offset + length + cachesize) & ~(cachesize - 1)
1469 ) - realoffset
1476 ) - realoffset
1470 with self._datareadfp(df) as df:
1477 with self._datareadfp(df) as df:
1471 df.seek(realoffset)
1478 df.seek(realoffset)
1472 d = df.read(reallength)
1479 d = df.read(reallength)
1473
1480
1474 self._cachesegment(realoffset, d)
1481 self._cachesegment(realoffset, d)
1475 if offset != realoffset or reallength != length:
1482 if offset != realoffset or reallength != length:
1476 startoffset = offset - realoffset
1483 startoffset = offset - realoffset
1477 if len(d) - startoffset < length:
1484 if len(d) - startoffset < length:
1478 raise error.RevlogError(
1485 raise error.RevlogError(
1479 _(
1486 _(
1480 b'partial read of revlog %s; expected %d bytes from '
1487 b'partial read of revlog %s; expected %d bytes from '
1481 b'offset %d, got %d'
1488 b'offset %d, got %d'
1482 )
1489 )
1483 % (
1490 % (
1484 self.indexfile if self._inline else self.datafile,
1491 self.indexfile if self._inline else self.datafile,
1485 length,
1492 length,
1486 realoffset,
1493 realoffset,
1487 len(d) - startoffset,
1494 len(d) - startoffset,
1488 )
1495 )
1489 )
1496 )
1490
1497
1491 return util.buffer(d, startoffset, length)
1498 return util.buffer(d, startoffset, length)
1492
1499
1493 if len(d) < length:
1500 if len(d) < length:
1494 raise error.RevlogError(
1501 raise error.RevlogError(
1495 _(
1502 _(
1496 b'partial read of revlog %s; expected %d bytes from offset '
1503 b'partial read of revlog %s; expected %d bytes from offset '
1497 b'%d, got %d'
1504 b'%d, got %d'
1498 )
1505 )
1499 % (
1506 % (
1500 self.indexfile if self._inline else self.datafile,
1507 self.indexfile if self._inline else self.datafile,
1501 length,
1508 length,
1502 offset,
1509 offset,
1503 len(d),
1510 len(d),
1504 )
1511 )
1505 )
1512 )
1506
1513
1507 return d
1514 return d
1508
1515
1509 def _getsegment(self, offset, length, df=None):
1516 def _getsegment(self, offset, length, df=None):
1510 """Obtain a segment of raw data from the revlog.
1517 """Obtain a segment of raw data from the revlog.
1511
1518
1512 Accepts an absolute offset, length of bytes to obtain, and an
1519 Accepts an absolute offset, length of bytes to obtain, and an
1513 optional file handle to the already-opened revlog. If the file
1520 optional file handle to the already-opened revlog. If the file
1514 handle is used, it's original seek position will not be preserved.
1521 handle is used, it's original seek position will not be preserved.
1515
1522
1516 Requests for data may be returned from a cache.
1523 Requests for data may be returned from a cache.
1517
1524
1518 Returns a str or a buffer instance of raw byte data.
1525 Returns a str or a buffer instance of raw byte data.
1519 """
1526 """
1520 o, d = self._chunkcache
1527 o, d = self._chunkcache
1521 l = len(d)
1528 l = len(d)
1522
1529
1523 # is it in the cache?
1530 # is it in the cache?
1524 cachestart = offset - o
1531 cachestart = offset - o
1525 cacheend = cachestart + length
1532 cacheend = cachestart + length
1526 if cachestart >= 0 and cacheend <= l:
1533 if cachestart >= 0 and cacheend <= l:
1527 if cachestart == 0 and cacheend == l:
1534 if cachestart == 0 and cacheend == l:
1528 return d # avoid a copy
1535 return d # avoid a copy
1529 return util.buffer(d, cachestart, cacheend - cachestart)
1536 return util.buffer(d, cachestart, cacheend - cachestart)
1530
1537
1531 return self._readsegment(offset, length, df=df)
1538 return self._readsegment(offset, length, df=df)
1532
1539
1533 def _getsegmentforrevs(self, startrev, endrev, df=None):
1540 def _getsegmentforrevs(self, startrev, endrev, df=None):
1534 """Obtain a segment of raw data corresponding to a range of revisions.
1541 """Obtain a segment of raw data corresponding to a range of revisions.
1535
1542
1536 Accepts the start and end revisions and an optional already-open
1543 Accepts the start and end revisions and an optional already-open
1537 file handle to be used for reading. If the file handle is read, its
1544 file handle to be used for reading. If the file handle is read, its
1538 seek position will not be preserved.
1545 seek position will not be preserved.
1539
1546
1540 Requests for data may be satisfied by a cache.
1547 Requests for data may be satisfied by a cache.
1541
1548
1542 Returns a 2-tuple of (offset, data) for the requested range of
1549 Returns a 2-tuple of (offset, data) for the requested range of
1543 revisions. Offset is the integer offset from the beginning of the
1550 revisions. Offset is the integer offset from the beginning of the
1544 revlog and data is a str or buffer of the raw byte data.
1551 revlog and data is a str or buffer of the raw byte data.
1545
1552
1546 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1553 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1547 to determine where each revision's data begins and ends.
1554 to determine where each revision's data begins and ends.
1548 """
1555 """
1549 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1556 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1550 # (functions are expensive).
1557 # (functions are expensive).
1551 index = self.index
1558 index = self.index
1552 istart = index[startrev]
1559 istart = index[startrev]
1553 start = int(istart[0] >> 16)
1560 start = int(istart[0] >> 16)
1554 if startrev == endrev:
1561 if startrev == endrev:
1555 end = start + istart[1]
1562 end = start + istart[1]
1556 else:
1563 else:
1557 iend = index[endrev]
1564 iend = index[endrev]
1558 end = int(iend[0] >> 16) + iend[1]
1565 end = int(iend[0] >> 16) + iend[1]
1559
1566
1560 if self._inline:
1567 if self._inline:
1561 start += (startrev + 1) * self._io.size
1568 start += (startrev + 1) * self._io.size
1562 end += (endrev + 1) * self._io.size
1569 end += (endrev + 1) * self._io.size
1563 length = end - start
1570 length = end - start
1564
1571
1565 return start, self._getsegment(start, length, df=df)
1572 return start, self._getsegment(start, length, df=df)
1566
1573
1567 def _chunk(self, rev, df=None):
1574 def _chunk(self, rev, df=None):
1568 """Obtain a single decompressed chunk for a revision.
1575 """Obtain a single decompressed chunk for a revision.
1569
1576
1570 Accepts an integer revision and an optional already-open file handle
1577 Accepts an integer revision and an optional already-open file handle
1571 to be used for reading. If used, the seek position of the file will not
1578 to be used for reading. If used, the seek position of the file will not
1572 be preserved.
1579 be preserved.
1573
1580
1574 Returns a str holding uncompressed data for the requested revision.
1581 Returns a str holding uncompressed data for the requested revision.
1575 """
1582 """
1576 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1583 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1577
1584
1578 def _chunks(self, revs, df=None, targetsize=None):
1585 def _chunks(self, revs, df=None, targetsize=None):
1579 """Obtain decompressed chunks for the specified revisions.
1586 """Obtain decompressed chunks for the specified revisions.
1580
1587
1581 Accepts an iterable of numeric revisions that are assumed to be in
1588 Accepts an iterable of numeric revisions that are assumed to be in
1582 ascending order. Also accepts an optional already-open file handle
1589 ascending order. Also accepts an optional already-open file handle
1583 to be used for reading. If used, the seek position of the file will
1590 to be used for reading. If used, the seek position of the file will
1584 not be preserved.
1591 not be preserved.
1585
1592
1586 This function is similar to calling ``self._chunk()`` multiple times,
1593 This function is similar to calling ``self._chunk()`` multiple times,
1587 but is faster.
1594 but is faster.
1588
1595
1589 Returns a list with decompressed data for each requested revision.
1596 Returns a list with decompressed data for each requested revision.
1590 """
1597 """
1591 if not revs:
1598 if not revs:
1592 return []
1599 return []
1593 start = self.start
1600 start = self.start
1594 length = self.length
1601 length = self.length
1595 inline = self._inline
1602 inline = self._inline
1596 iosize = self._io.size
1603 iosize = self._io.size
1597 buffer = util.buffer
1604 buffer = util.buffer
1598
1605
1599 l = []
1606 l = []
1600 ladd = l.append
1607 ladd = l.append
1601
1608
1602 if not self._withsparseread:
1609 if not self._withsparseread:
1603 slicedchunks = (revs,)
1610 slicedchunks = (revs,)
1604 else:
1611 else:
1605 slicedchunks = deltautil.slicechunk(
1612 slicedchunks = deltautil.slicechunk(
1606 self, revs, targetsize=targetsize
1613 self, revs, targetsize=targetsize
1607 )
1614 )
1608
1615
1609 for revschunk in slicedchunks:
1616 for revschunk in slicedchunks:
1610 firstrev = revschunk[0]
1617 firstrev = revschunk[0]
1611 # Skip trailing revisions with empty diff
1618 # Skip trailing revisions with empty diff
1612 for lastrev in revschunk[::-1]:
1619 for lastrev in revschunk[::-1]:
1613 if length(lastrev) != 0:
1620 if length(lastrev) != 0:
1614 break
1621 break
1615
1622
1616 try:
1623 try:
1617 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1624 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1618 except OverflowError:
1625 except OverflowError:
1619 # issue4215 - we can't cache a run of chunks greater than
1626 # issue4215 - we can't cache a run of chunks greater than
1620 # 2G on Windows
1627 # 2G on Windows
1621 return [self._chunk(rev, df=df) for rev in revschunk]
1628 return [self._chunk(rev, df=df) for rev in revschunk]
1622
1629
1623 decomp = self.decompress
1630 decomp = self.decompress
1624 for rev in revschunk:
1631 for rev in revschunk:
1625 chunkstart = start(rev)
1632 chunkstart = start(rev)
1626 if inline:
1633 if inline:
1627 chunkstart += (rev + 1) * iosize
1634 chunkstart += (rev + 1) * iosize
1628 chunklength = length(rev)
1635 chunklength = length(rev)
1629 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1636 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1630
1637
1631 return l
1638 return l
1632
1639
1633 def _chunkclear(self):
1640 def _chunkclear(self):
1634 """Clear the raw chunk cache."""
1641 """Clear the raw chunk cache."""
1635 self._chunkcache = (0, b'')
1642 self._chunkcache = (0, b'')
1636
1643
1637 def deltaparent(self, rev):
1644 def deltaparent(self, rev):
1638 """return deltaparent of the given revision"""
1645 """return deltaparent of the given revision"""
1639 base = self.index[rev][3]
1646 base = self.index[rev][3]
1640 if base == rev:
1647 if base == rev:
1641 return nullrev
1648 return nullrev
1642 elif self._generaldelta:
1649 elif self._generaldelta:
1643 return base
1650 return base
1644 else:
1651 else:
1645 return rev - 1
1652 return rev - 1
1646
1653
1647 def issnapshot(self, rev):
1654 def issnapshot(self, rev):
1648 """tells whether rev is a snapshot
1655 """tells whether rev is a snapshot
1649 """
1656 """
1650 if not self._sparserevlog:
1657 if not self._sparserevlog:
1651 return self.deltaparent(rev) == nullrev
1658 return self.deltaparent(rev) == nullrev
1652 elif util.safehasattr(self.index, b'issnapshot'):
1659 elif util.safehasattr(self.index, b'issnapshot'):
1653 # directly assign the method to cache the testing and access
1660 # directly assign the method to cache the testing and access
1654 self.issnapshot = self.index.issnapshot
1661 self.issnapshot = self.index.issnapshot
1655 return self.issnapshot(rev)
1662 return self.issnapshot(rev)
1656 if rev == nullrev:
1663 if rev == nullrev:
1657 return True
1664 return True
1658 entry = self.index[rev]
1665 entry = self.index[rev]
1659 base = entry[3]
1666 base = entry[3]
1660 if base == rev:
1667 if base == rev:
1661 return True
1668 return True
1662 if base == nullrev:
1669 if base == nullrev:
1663 return True
1670 return True
1664 p1 = entry[5]
1671 p1 = entry[5]
1665 p2 = entry[6]
1672 p2 = entry[6]
1666 if base == p1 or base == p2:
1673 if base == p1 or base == p2:
1667 return False
1674 return False
1668 return self.issnapshot(base)
1675 return self.issnapshot(base)
1669
1676
1670 def snapshotdepth(self, rev):
1677 def snapshotdepth(self, rev):
1671 """number of snapshot in the chain before this one"""
1678 """number of snapshot in the chain before this one"""
1672 if not self.issnapshot(rev):
1679 if not self.issnapshot(rev):
1673 raise error.ProgrammingError(b'revision %d not a snapshot')
1680 raise error.ProgrammingError(b'revision %d not a snapshot')
1674 return len(self._deltachain(rev)[0]) - 1
1681 return len(self._deltachain(rev)[0]) - 1
1675
1682
1676 def revdiff(self, rev1, rev2):
1683 def revdiff(self, rev1, rev2):
1677 """return or calculate a delta between two revisions
1684 """return or calculate a delta between two revisions
1678
1685
1679 The delta calculated is in binary form and is intended to be written to
1686 The delta calculated is in binary form and is intended to be written to
1680 revlog data directly. So this function needs raw revision data.
1687 revlog data directly. So this function needs raw revision data.
1681 """
1688 """
1682 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1689 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1683 return bytes(self._chunk(rev2))
1690 return bytes(self._chunk(rev2))
1684
1691
1685 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1692 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1686
1693
1687 def _processflags(self, text, flags, operation, raw=False):
1694 def _processflags(self, text, flags, operation, raw=False):
1688 """deprecated entry point to access flag processors"""
1695 """deprecated entry point to access flag processors"""
1689 msg = b'_processflag(...) use the specialized variant'
1696 msg = b'_processflag(...) use the specialized variant'
1690 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1697 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1691 if raw:
1698 if raw:
1692 return text, flagutil.processflagsraw(self, text, flags)
1699 return text, flagutil.processflagsraw(self, text, flags)
1693 elif operation == b'read':
1700 elif operation == b'read':
1694 return flagutil.processflagsread(self, text, flags)
1701 return flagutil.processflagsread(self, text, flags)
1695 else: # write operation
1702 else: # write operation
1696 return flagutil.processflagswrite(self, text, flags)
1703 return flagutil.processflagswrite(self, text, flags)
1697
1704
1698 def revision(self, nodeorrev, _df=None, raw=False):
1705 def revision(self, nodeorrev, _df=None, raw=False):
1699 """return an uncompressed revision of a given node or revision
1706 """return an uncompressed revision of a given node or revision
1700 number.
1707 number.
1701
1708
1702 _df - an existing file handle to read from. (internal-only)
1709 _df - an existing file handle to read from. (internal-only)
1703 raw - an optional argument specifying if the revision data is to be
1710 raw - an optional argument specifying if the revision data is to be
1704 treated as raw data when applying flag transforms. 'raw' should be set
1711 treated as raw data when applying flag transforms. 'raw' should be set
1705 to True when generating changegroups or in debug commands.
1712 to True when generating changegroups or in debug commands.
1706 """
1713 """
1707 if raw:
1714 if raw:
1708 msg = (
1715 msg = (
1709 b'revlog.revision(..., raw=True) is deprecated, '
1716 b'revlog.revision(..., raw=True) is deprecated, '
1710 b'use revlog.rawdata(...)'
1717 b'use revlog.rawdata(...)'
1711 )
1718 )
1712 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1719 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1713 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1720 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1714
1721
1715 def sidedata(self, nodeorrev, _df=None):
1722 def sidedata(self, nodeorrev, _df=None):
1716 """a map of extra data related to the changeset but not part of the hash
1723 """a map of extra data related to the changeset but not part of the hash
1717
1724
1718 This function currently return a dictionary. However, more advanced
1725 This function currently return a dictionary. However, more advanced
1719 mapping object will likely be used in the future for a more
1726 mapping object will likely be used in the future for a more
1720 efficient/lazy code.
1727 efficient/lazy code.
1721 """
1728 """
1722 return self._revisiondata(nodeorrev, _df)[1]
1729 return self._revisiondata(nodeorrev, _df)[1]
1723
1730
1724 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1731 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1725 # deal with <nodeorrev> argument type
1732 # deal with <nodeorrev> argument type
1726 if isinstance(nodeorrev, int):
1733 if isinstance(nodeorrev, int):
1727 rev = nodeorrev
1734 rev = nodeorrev
1728 node = self.node(rev)
1735 node = self.node(rev)
1729 else:
1736 else:
1730 node = nodeorrev
1737 node = nodeorrev
1731 rev = None
1738 rev = None
1732
1739
1733 # fast path the special `nullid` rev
1740 # fast path the special `nullid` rev
1734 if node == nullid:
1741 if node == nullid:
1735 return b"", {}
1742 return b"", {}
1736
1743
1737 # The text as stored inside the revlog. Might be the revision or might
1744 # The text as stored inside the revlog. Might be the revision or might
1738 # need to be processed to retrieve the revision.
1745 # need to be processed to retrieve the revision.
1739 rawtext = None
1746 rawtext = None
1740
1747
1741 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1748 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1742
1749
1743 if raw and validated:
1750 if raw and validated:
1744 # if we don't want to process the raw text and that raw
1751 # if we don't want to process the raw text and that raw
1745 # text is cached, we can exit early.
1752 # text is cached, we can exit early.
1746 return rawtext, {}
1753 return rawtext, {}
1747 if rev is None:
1754 if rev is None:
1748 rev = self.rev(node)
1755 rev = self.rev(node)
1749 # the revlog's flag for this revision
1756 # the revlog's flag for this revision
1750 # (usually alter its state or content)
1757 # (usually alter its state or content)
1751 flags = self.flags(rev)
1758 flags = self.flags(rev)
1752
1759
1753 if validated and flags == REVIDX_DEFAULT_FLAGS:
1760 if validated and flags == REVIDX_DEFAULT_FLAGS:
1754 # no extra flags set, no flag processor runs, text = rawtext
1761 # no extra flags set, no flag processor runs, text = rawtext
1755 return rawtext, {}
1762 return rawtext, {}
1756
1763
1757 sidedata = {}
1764 sidedata = {}
1758 if raw:
1765 if raw:
1759 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1766 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1760 text = rawtext
1767 text = rawtext
1761 else:
1768 else:
1762 try:
1769 try:
1763 r = flagutil.processflagsread(self, rawtext, flags)
1770 r = flagutil.processflagsread(self, rawtext, flags)
1764 except error.SidedataHashError as exc:
1771 except error.SidedataHashError as exc:
1765 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1772 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1766 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1773 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1767 raise error.RevlogError(msg)
1774 raise error.RevlogError(msg)
1768 text, validatehash, sidedata = r
1775 text, validatehash, sidedata = r
1769 if validatehash:
1776 if validatehash:
1770 self.checkhash(text, node, rev=rev)
1777 self.checkhash(text, node, rev=rev)
1771 if not validated:
1778 if not validated:
1772 self._revisioncache = (node, rev, rawtext)
1779 self._revisioncache = (node, rev, rawtext)
1773
1780
1774 return text, sidedata
1781 return text, sidedata
1775
1782
1776 def _rawtext(self, node, rev, _df=None):
1783 def _rawtext(self, node, rev, _df=None):
1777 """return the possibly unvalidated rawtext for a revision
1784 """return the possibly unvalidated rawtext for a revision
1778
1785
1779 returns (rev, rawtext, validated)
1786 returns (rev, rawtext, validated)
1780 """
1787 """
1781
1788
1782 # revision in the cache (could be useful to apply delta)
1789 # revision in the cache (could be useful to apply delta)
1783 cachedrev = None
1790 cachedrev = None
1784 # An intermediate text to apply deltas to
1791 # An intermediate text to apply deltas to
1785 basetext = None
1792 basetext = None
1786
1793
1787 # Check if we have the entry in cache
1794 # Check if we have the entry in cache
1788 # The cache entry looks like (node, rev, rawtext)
1795 # The cache entry looks like (node, rev, rawtext)
1789 if self._revisioncache:
1796 if self._revisioncache:
1790 if self._revisioncache[0] == node:
1797 if self._revisioncache[0] == node:
1791 return (rev, self._revisioncache[2], True)
1798 return (rev, self._revisioncache[2], True)
1792 cachedrev = self._revisioncache[1]
1799 cachedrev = self._revisioncache[1]
1793
1800
1794 if rev is None:
1801 if rev is None:
1795 rev = self.rev(node)
1802 rev = self.rev(node)
1796
1803
1797 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1804 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1798 if stopped:
1805 if stopped:
1799 basetext = self._revisioncache[2]
1806 basetext = self._revisioncache[2]
1800
1807
1801 # drop cache to save memory, the caller is expected to
1808 # drop cache to save memory, the caller is expected to
1802 # update self._revisioncache after validating the text
1809 # update self._revisioncache after validating the text
1803 self._revisioncache = None
1810 self._revisioncache = None
1804
1811
1805 targetsize = None
1812 targetsize = None
1806 rawsize = self.index[rev][2]
1813 rawsize = self.index[rev][2]
1807 if 0 <= rawsize:
1814 if 0 <= rawsize:
1808 targetsize = 4 * rawsize
1815 targetsize = 4 * rawsize
1809
1816
1810 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1817 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1811 if basetext is None:
1818 if basetext is None:
1812 basetext = bytes(bins[0])
1819 basetext = bytes(bins[0])
1813 bins = bins[1:]
1820 bins = bins[1:]
1814
1821
1815 rawtext = mdiff.patches(basetext, bins)
1822 rawtext = mdiff.patches(basetext, bins)
1816 del basetext # let us have a chance to free memory early
1823 del basetext # let us have a chance to free memory early
1817 return (rev, rawtext, False)
1824 return (rev, rawtext, False)
1818
1825
1819 def rawdata(self, nodeorrev, _df=None):
1826 def rawdata(self, nodeorrev, _df=None):
1820 """return an uncompressed raw data of a given node or revision number.
1827 """return an uncompressed raw data of a given node or revision number.
1821
1828
1822 _df - an existing file handle to read from. (internal-only)
1829 _df - an existing file handle to read from. (internal-only)
1823 """
1830 """
1824 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1831 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1825
1832
1826 def hash(self, text, p1, p2):
1833 def hash(self, text, p1, p2):
1827 """Compute a node hash.
1834 """Compute a node hash.
1828
1835
1829 Available as a function so that subclasses can replace the hash
1836 Available as a function so that subclasses can replace the hash
1830 as needed.
1837 as needed.
1831 """
1838 """
1832 return storageutil.hashrevisionsha1(text, p1, p2)
1839 return storageutil.hashrevisionsha1(text, p1, p2)
1833
1840
1834 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1841 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1835 """Check node hash integrity.
1842 """Check node hash integrity.
1836
1843
1837 Available as a function so that subclasses can extend hash mismatch
1844 Available as a function so that subclasses can extend hash mismatch
1838 behaviors as needed.
1845 behaviors as needed.
1839 """
1846 """
1840 try:
1847 try:
1841 if p1 is None and p2 is None:
1848 if p1 is None and p2 is None:
1842 p1, p2 = self.parents(node)
1849 p1, p2 = self.parents(node)
1843 if node != self.hash(text, p1, p2):
1850 if node != self.hash(text, p1, p2):
1844 # Clear the revision cache on hash failure. The revision cache
1851 # Clear the revision cache on hash failure. The revision cache
1845 # only stores the raw revision and clearing the cache does have
1852 # only stores the raw revision and clearing the cache does have
1846 # the side-effect that we won't have a cache hit when the raw
1853 # the side-effect that we won't have a cache hit when the raw
1847 # revision data is accessed. But this case should be rare and
1854 # revision data is accessed. But this case should be rare and
1848 # it is extra work to teach the cache about the hash
1855 # it is extra work to teach the cache about the hash
1849 # verification state.
1856 # verification state.
1850 if self._revisioncache and self._revisioncache[0] == node:
1857 if self._revisioncache and self._revisioncache[0] == node:
1851 self._revisioncache = None
1858 self._revisioncache = None
1852
1859
1853 revornode = rev
1860 revornode = rev
1854 if revornode is None:
1861 if revornode is None:
1855 revornode = templatefilters.short(hex(node))
1862 revornode = templatefilters.short(hex(node))
1856 raise error.RevlogError(
1863 raise error.RevlogError(
1857 _(b"integrity check failed on %s:%s")
1864 _(b"integrity check failed on %s:%s")
1858 % (self.indexfile, pycompat.bytestr(revornode))
1865 % (self.indexfile, pycompat.bytestr(revornode))
1859 )
1866 )
1860 except error.RevlogError:
1867 except error.RevlogError:
1861 if self._censorable and storageutil.iscensoredtext(text):
1868 if self._censorable and storageutil.iscensoredtext(text):
1862 raise error.CensoredNodeError(self.indexfile, node, text)
1869 raise error.CensoredNodeError(self.indexfile, node, text)
1863 raise
1870 raise
1864
1871
1865 def _enforceinlinesize(self, tr, fp=None):
1872 def _enforceinlinesize(self, tr, fp=None):
1866 """Check if the revlog is too big for inline and convert if so.
1873 """Check if the revlog is too big for inline and convert if so.
1867
1874
1868 This should be called after revisions are added to the revlog. If the
1875 This should be called after revisions are added to the revlog. If the
1869 revlog has grown too large to be an inline revlog, it will convert it
1876 revlog has grown too large to be an inline revlog, it will convert it
1870 to use multiple index and data files.
1877 to use multiple index and data files.
1871 """
1878 """
1872 tiprev = len(self) - 1
1879 tiprev = len(self) - 1
1873 if (
1880 if (
1874 not self._inline
1881 not self._inline
1875 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1882 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1876 ):
1883 ):
1877 return
1884 return
1878
1885
1879 trinfo = tr.find(self.indexfile)
1886 trinfo = tr.find(self.indexfile)
1880 if trinfo is None:
1887 if trinfo is None:
1881 raise error.RevlogError(
1888 raise error.RevlogError(
1882 _(b"%s not found in the transaction") % self.indexfile
1889 _(b"%s not found in the transaction") % self.indexfile
1883 )
1890 )
1884
1891
1885 trindex = trinfo[2]
1892 trindex = trinfo[2]
1886 if trindex is not None:
1893 if trindex is not None:
1887 dataoff = self.start(trindex)
1894 dataoff = self.start(trindex)
1888 else:
1895 else:
1889 # revlog was stripped at start of transaction, use all leftover data
1896 # revlog was stripped at start of transaction, use all leftover data
1890 trindex = len(self) - 1
1897 trindex = len(self) - 1
1891 dataoff = self.end(tiprev)
1898 dataoff = self.end(tiprev)
1892
1899
1893 tr.add(self.datafile, dataoff)
1900 tr.add(self.datafile, dataoff)
1894
1901
1895 if fp:
1902 if fp:
1896 fp.flush()
1903 fp.flush()
1897 fp.close()
1904 fp.close()
1898 # We can't use the cached file handle after close(). So prevent
1905 # We can't use the cached file handle after close(). So prevent
1899 # its usage.
1906 # its usage.
1900 self._writinghandles = None
1907 self._writinghandles = None
1901
1908
1902 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1909 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1903 for r in self:
1910 for r in self:
1904 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1911 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1905
1912
1906 with self._indexfp(b'w') as fp:
1913 with self._indexfp(b'w') as fp:
1907 self.version &= ~FLAG_INLINE_DATA
1914 self.version &= ~FLAG_INLINE_DATA
1908 self._inline = False
1915 self._inline = False
1909 io = self._io
1916 io = self._io
1910 for i in self:
1917 for i in self:
1911 e = io.packentry(self.index[i], self.node, self.version, i)
1918 e = io.packentry(self.index[i], self.node, self.version, i)
1912 fp.write(e)
1919 fp.write(e)
1913
1920
1914 # the temp file replace the real index when we exit the context
1921 # the temp file replace the real index when we exit the context
1915 # manager
1922 # manager
1916
1923
1917 tr.replace(self.indexfile, trindex * self._io.size)
1924 tr.replace(self.indexfile, trindex * self._io.size)
1918 self._chunkclear()
1925 self._chunkclear()
1919
1926
1920 def _nodeduplicatecallback(self, transaction, node):
1927 def _nodeduplicatecallback(self, transaction, node):
1921 """called when trying to add a node already stored.
1928 """called when trying to add a node already stored.
1922 """
1929 """
1923
1930
1924 def addrevision(
1931 def addrevision(
1925 self,
1932 self,
1926 text,
1933 text,
1927 transaction,
1934 transaction,
1928 link,
1935 link,
1929 p1,
1936 p1,
1930 p2,
1937 p2,
1931 cachedelta=None,
1938 cachedelta=None,
1932 node=None,
1939 node=None,
1933 flags=REVIDX_DEFAULT_FLAGS,
1940 flags=REVIDX_DEFAULT_FLAGS,
1934 deltacomputer=None,
1941 deltacomputer=None,
1935 sidedata=None,
1942 sidedata=None,
1936 ):
1943 ):
1937 """add a revision to the log
1944 """add a revision to the log
1938
1945
1939 text - the revision data to add
1946 text - the revision data to add
1940 transaction - the transaction object used for rollback
1947 transaction - the transaction object used for rollback
1941 link - the linkrev data to add
1948 link - the linkrev data to add
1942 p1, p2 - the parent nodeids of the revision
1949 p1, p2 - the parent nodeids of the revision
1943 cachedelta - an optional precomputed delta
1950 cachedelta - an optional precomputed delta
1944 node - nodeid of revision; typically node is not specified, and it is
1951 node - nodeid of revision; typically node is not specified, and it is
1945 computed by default as hash(text, p1, p2), however subclasses might
1952 computed by default as hash(text, p1, p2), however subclasses might
1946 use different hashing method (and override checkhash() in such case)
1953 use different hashing method (and override checkhash() in such case)
1947 flags - the known flags to set on the revision
1954 flags - the known flags to set on the revision
1948 deltacomputer - an optional deltacomputer instance shared between
1955 deltacomputer - an optional deltacomputer instance shared between
1949 multiple calls
1956 multiple calls
1950 """
1957 """
1951 if link == nullrev:
1958 if link == nullrev:
1952 raise error.RevlogError(
1959 raise error.RevlogError(
1953 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1960 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1954 )
1961 )
1955
1962
1956 if sidedata is None:
1963 if sidedata is None:
1957 sidedata = {}
1964 sidedata = {}
1958 flags = flags & ~REVIDX_SIDEDATA
1965 flags = flags & ~REVIDX_SIDEDATA
1959 elif not self.hassidedata:
1966 elif not self.hassidedata:
1960 raise error.ProgrammingError(
1967 raise error.ProgrammingError(
1961 _(b"trying to add sidedata to a revlog who don't support them")
1968 _(b"trying to add sidedata to a revlog who don't support them")
1962 )
1969 )
1963 else:
1970 else:
1964 flags |= REVIDX_SIDEDATA
1971 flags |= REVIDX_SIDEDATA
1965
1972
1966 if flags:
1973 if flags:
1967 node = node or self.hash(text, p1, p2)
1974 node = node or self.hash(text, p1, p2)
1968
1975
1969 rawtext, validatehash = flagutil.processflagswrite(
1976 rawtext, validatehash = flagutil.processflagswrite(
1970 self, text, flags, sidedata=sidedata
1977 self, text, flags, sidedata=sidedata
1971 )
1978 )
1972
1979
1973 # If the flag processor modifies the revision data, ignore any provided
1980 # If the flag processor modifies the revision data, ignore any provided
1974 # cachedelta.
1981 # cachedelta.
1975 if rawtext != text:
1982 if rawtext != text:
1976 cachedelta = None
1983 cachedelta = None
1977
1984
1978 if len(rawtext) > _maxentrysize:
1985 if len(rawtext) > _maxentrysize:
1979 raise error.RevlogError(
1986 raise error.RevlogError(
1980 _(
1987 _(
1981 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
1988 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
1982 )
1989 )
1983 % (self.indexfile, len(rawtext))
1990 % (self.indexfile, len(rawtext))
1984 )
1991 )
1985
1992
1986 node = node or self.hash(rawtext, p1, p2)
1993 node = node or self.hash(rawtext, p1, p2)
1987 if node in self.nodemap:
1994 if node in self.nodemap:
1988 return node
1995 return node
1989
1996
1990 if validatehash:
1997 if validatehash:
1991 self.checkhash(rawtext, node, p1=p1, p2=p2)
1998 self.checkhash(rawtext, node, p1=p1, p2=p2)
1992
1999
1993 return self.addrawrevision(
2000 return self.addrawrevision(
1994 rawtext,
2001 rawtext,
1995 transaction,
2002 transaction,
1996 link,
2003 link,
1997 p1,
2004 p1,
1998 p2,
2005 p2,
1999 node,
2006 node,
2000 flags,
2007 flags,
2001 cachedelta=cachedelta,
2008 cachedelta=cachedelta,
2002 deltacomputer=deltacomputer,
2009 deltacomputer=deltacomputer,
2003 )
2010 )
2004
2011
2005 def addrawrevision(
2012 def addrawrevision(
2006 self,
2013 self,
2007 rawtext,
2014 rawtext,
2008 transaction,
2015 transaction,
2009 link,
2016 link,
2010 p1,
2017 p1,
2011 p2,
2018 p2,
2012 node,
2019 node,
2013 flags,
2020 flags,
2014 cachedelta=None,
2021 cachedelta=None,
2015 deltacomputer=None,
2022 deltacomputer=None,
2016 ):
2023 ):
2017 """add a raw revision with known flags, node and parents
2024 """add a raw revision with known flags, node and parents
2018 useful when reusing a revision not stored in this revlog (ex: received
2025 useful when reusing a revision not stored in this revlog (ex: received
2019 over wire, or read from an external bundle).
2026 over wire, or read from an external bundle).
2020 """
2027 """
2021 dfh = None
2028 dfh = None
2022 if not self._inline:
2029 if not self._inline:
2023 dfh = self._datafp(b"a+")
2030 dfh = self._datafp(b"a+")
2024 ifh = self._indexfp(b"a+")
2031 ifh = self._indexfp(b"a+")
2025 try:
2032 try:
2026 return self._addrevision(
2033 return self._addrevision(
2027 node,
2034 node,
2028 rawtext,
2035 rawtext,
2029 transaction,
2036 transaction,
2030 link,
2037 link,
2031 p1,
2038 p1,
2032 p2,
2039 p2,
2033 flags,
2040 flags,
2034 cachedelta,
2041 cachedelta,
2035 ifh,
2042 ifh,
2036 dfh,
2043 dfh,
2037 deltacomputer=deltacomputer,
2044 deltacomputer=deltacomputer,
2038 )
2045 )
2039 finally:
2046 finally:
2040 if dfh:
2047 if dfh:
2041 dfh.close()
2048 dfh.close()
2042 ifh.close()
2049 ifh.close()
2043
2050
2044 def compress(self, data):
2051 def compress(self, data):
2045 """Generate a possibly-compressed representation of data."""
2052 """Generate a possibly-compressed representation of data."""
2046 if not data:
2053 if not data:
2047 return b'', data
2054 return b'', data
2048
2055
2049 compressed = self._compressor.compress(data)
2056 compressed = self._compressor.compress(data)
2050
2057
2051 if compressed:
2058 if compressed:
2052 # The revlog compressor added the header in the returned data.
2059 # The revlog compressor added the header in the returned data.
2053 return b'', compressed
2060 return b'', compressed
2054
2061
2055 if data[0:1] == b'\0':
2062 if data[0:1] == b'\0':
2056 return b'', data
2063 return b'', data
2057 return b'u', data
2064 return b'u', data
2058
2065
2059 def decompress(self, data):
2066 def decompress(self, data):
2060 """Decompress a revlog chunk.
2067 """Decompress a revlog chunk.
2061
2068
2062 The chunk is expected to begin with a header identifying the
2069 The chunk is expected to begin with a header identifying the
2063 format type so it can be routed to an appropriate decompressor.
2070 format type so it can be routed to an appropriate decompressor.
2064 """
2071 """
2065 if not data:
2072 if not data:
2066 return data
2073 return data
2067
2074
2068 # Revlogs are read much more frequently than they are written and many
2075 # Revlogs are read much more frequently than they are written and many
2069 # chunks only take microseconds to decompress, so performance is
2076 # chunks only take microseconds to decompress, so performance is
2070 # important here.
2077 # important here.
2071 #
2078 #
2072 # We can make a few assumptions about revlogs:
2079 # We can make a few assumptions about revlogs:
2073 #
2080 #
2074 # 1) the majority of chunks will be compressed (as opposed to inline
2081 # 1) the majority of chunks will be compressed (as opposed to inline
2075 # raw data).
2082 # raw data).
2076 # 2) decompressing *any* data will likely by at least 10x slower than
2083 # 2) decompressing *any* data will likely by at least 10x slower than
2077 # returning raw inline data.
2084 # returning raw inline data.
2078 # 3) we want to prioritize common and officially supported compression
2085 # 3) we want to prioritize common and officially supported compression
2079 # engines
2086 # engines
2080 #
2087 #
2081 # It follows that we want to optimize for "decompress compressed data
2088 # It follows that we want to optimize for "decompress compressed data
2082 # when encoded with common and officially supported compression engines"
2089 # when encoded with common and officially supported compression engines"
2083 # case over "raw data" and "data encoded by less common or non-official
2090 # case over "raw data" and "data encoded by less common or non-official
2084 # compression engines." That is why we have the inline lookup first
2091 # compression engines." That is why we have the inline lookup first
2085 # followed by the compengines lookup.
2092 # followed by the compengines lookup.
2086 #
2093 #
2087 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2094 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2088 # compressed chunks. And this matters for changelog and manifest reads.
2095 # compressed chunks. And this matters for changelog and manifest reads.
2089 t = data[0:1]
2096 t = data[0:1]
2090
2097
2091 if t == b'x':
2098 if t == b'x':
2092 try:
2099 try:
2093 return _zlibdecompress(data)
2100 return _zlibdecompress(data)
2094 except zlib.error as e:
2101 except zlib.error as e:
2095 raise error.RevlogError(
2102 raise error.RevlogError(
2096 _(b'revlog decompress error: %s')
2103 _(b'revlog decompress error: %s')
2097 % stringutil.forcebytestr(e)
2104 % stringutil.forcebytestr(e)
2098 )
2105 )
2099 # '\0' is more common than 'u' so it goes first.
2106 # '\0' is more common than 'u' so it goes first.
2100 elif t == b'\0':
2107 elif t == b'\0':
2101 return data
2108 return data
2102 elif t == b'u':
2109 elif t == b'u':
2103 return util.buffer(data, 1)
2110 return util.buffer(data, 1)
2104
2111
2105 try:
2112 try:
2106 compressor = self._decompressors[t]
2113 compressor = self._decompressors[t]
2107 except KeyError:
2114 except KeyError:
2108 try:
2115 try:
2109 engine = util.compengines.forrevlogheader(t)
2116 engine = util.compengines.forrevlogheader(t)
2110 compressor = engine.revlogcompressor(self._compengineopts)
2117 compressor = engine.revlogcompressor(self._compengineopts)
2111 self._decompressors[t] = compressor
2118 self._decompressors[t] = compressor
2112 except KeyError:
2119 except KeyError:
2113 raise error.RevlogError(_(b'unknown compression type %r') % t)
2120 raise error.RevlogError(_(b'unknown compression type %r') % t)
2114
2121
2115 return compressor.decompress(data)
2122 return compressor.decompress(data)
2116
2123
2117 def _addrevision(
2124 def _addrevision(
2118 self,
2125 self,
2119 node,
2126 node,
2120 rawtext,
2127 rawtext,
2121 transaction,
2128 transaction,
2122 link,
2129 link,
2123 p1,
2130 p1,
2124 p2,
2131 p2,
2125 flags,
2132 flags,
2126 cachedelta,
2133 cachedelta,
2127 ifh,
2134 ifh,
2128 dfh,
2135 dfh,
2129 alwayscache=False,
2136 alwayscache=False,
2130 deltacomputer=None,
2137 deltacomputer=None,
2131 ):
2138 ):
2132 """internal function to add revisions to the log
2139 """internal function to add revisions to the log
2133
2140
2134 see addrevision for argument descriptions.
2141 see addrevision for argument descriptions.
2135
2142
2136 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2143 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2137
2144
2138 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2145 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2139 be used.
2146 be used.
2140
2147
2141 invariants:
2148 invariants:
2142 - rawtext is optional (can be None); if not set, cachedelta must be set.
2149 - rawtext is optional (can be None); if not set, cachedelta must be set.
2143 if both are set, they must correspond to each other.
2150 if both are set, they must correspond to each other.
2144 """
2151 """
2145 if node == nullid:
2152 if node == nullid:
2146 raise error.RevlogError(
2153 raise error.RevlogError(
2147 _(b"%s: attempt to add null revision") % self.indexfile
2154 _(b"%s: attempt to add null revision") % self.indexfile
2148 )
2155 )
2149 if node == wdirid or node in wdirfilenodeids:
2156 if node == wdirid or node in wdirfilenodeids:
2150 raise error.RevlogError(
2157 raise error.RevlogError(
2151 _(b"%s: attempt to add wdir revision") % self.indexfile
2158 _(b"%s: attempt to add wdir revision") % self.indexfile
2152 )
2159 )
2153
2160
2154 if self._inline:
2161 if self._inline:
2155 fh = ifh
2162 fh = ifh
2156 else:
2163 else:
2157 fh = dfh
2164 fh = dfh
2158
2165
2159 btext = [rawtext]
2166 btext = [rawtext]
2160
2167
2161 curr = len(self)
2168 curr = len(self)
2162 prev = curr - 1
2169 prev = curr - 1
2163 offset = self.end(prev)
2170 offset = self.end(prev)
2164 p1r, p2r = self.rev(p1), self.rev(p2)
2171 p1r, p2r = self.rev(p1), self.rev(p2)
2165
2172
2166 # full versions are inserted when the needed deltas
2173 # full versions are inserted when the needed deltas
2167 # become comparable to the uncompressed text
2174 # become comparable to the uncompressed text
2168 if rawtext is None:
2175 if rawtext is None:
2169 # need rawtext size, before changed by flag processors, which is
2176 # need rawtext size, before changed by flag processors, which is
2170 # the non-raw size. use revlog explicitly to avoid filelog's extra
2177 # the non-raw size. use revlog explicitly to avoid filelog's extra
2171 # logic that might remove metadata size.
2178 # logic that might remove metadata size.
2172 textlen = mdiff.patchedsize(
2179 textlen = mdiff.patchedsize(
2173 revlog.size(self, cachedelta[0]), cachedelta[1]
2180 revlog.size(self, cachedelta[0]), cachedelta[1]
2174 )
2181 )
2175 else:
2182 else:
2176 textlen = len(rawtext)
2183 textlen = len(rawtext)
2177
2184
2178 if deltacomputer is None:
2185 if deltacomputer is None:
2179 deltacomputer = deltautil.deltacomputer(self)
2186 deltacomputer = deltautil.deltacomputer(self)
2180
2187
2181 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2188 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2182
2189
2183 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2190 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2184
2191
2185 e = (
2192 e = (
2186 offset_type(offset, flags),
2193 offset_type(offset, flags),
2187 deltainfo.deltalen,
2194 deltainfo.deltalen,
2188 textlen,
2195 textlen,
2189 deltainfo.base,
2196 deltainfo.base,
2190 link,
2197 link,
2191 p1r,
2198 p1r,
2192 p2r,
2199 p2r,
2193 node,
2200 node,
2194 )
2201 )
2195 self.index.append(e)
2202 self.index.append(e)
2196
2203
2197 # Reset the pure node cache start lookup offset to account for new
2204 # Reset the pure node cache start lookup offset to account for new
2198 # revision.
2205 # revision.
2199 if self._nodepos is not None:
2206 if self._nodepos is not None:
2200 self._nodepos = curr
2207 self._nodepos = curr
2201
2208
2202 entry = self._io.packentry(e, self.node, self.version, curr)
2209 entry = self._io.packentry(e, self.node, self.version, curr)
2203 self._writeentry(
2210 self._writeentry(
2204 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2211 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2205 )
2212 )
2206
2213
2207 rawtext = btext[0]
2214 rawtext = btext[0]
2208
2215
2209 if alwayscache and rawtext is None:
2216 if alwayscache and rawtext is None:
2210 rawtext = deltacomputer.buildtext(revinfo, fh)
2217 rawtext = deltacomputer.buildtext(revinfo, fh)
2211
2218
2212 if type(rawtext) == bytes: # only accept immutable objects
2219 if type(rawtext) == bytes: # only accept immutable objects
2213 self._revisioncache = (node, curr, rawtext)
2220 self._revisioncache = (node, curr, rawtext)
2214 self._chainbasecache[curr] = deltainfo.chainbase
2221 self._chainbasecache[curr] = deltainfo.chainbase
2215 return node
2222 return node
2216
2223
2217 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2224 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2218 # Files opened in a+ mode have inconsistent behavior on various
2225 # Files opened in a+ mode have inconsistent behavior on various
2219 # platforms. Windows requires that a file positioning call be made
2226 # platforms. Windows requires that a file positioning call be made
2220 # when the file handle transitions between reads and writes. See
2227 # when the file handle transitions between reads and writes. See
2221 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2228 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2222 # platforms, Python or the platform itself can be buggy. Some versions
2229 # platforms, Python or the platform itself can be buggy. Some versions
2223 # of Solaris have been observed to not append at the end of the file
2230 # of Solaris have been observed to not append at the end of the file
2224 # if the file was seeked to before the end. See issue4943 for more.
2231 # if the file was seeked to before the end. See issue4943 for more.
2225 #
2232 #
2226 # We work around this issue by inserting a seek() before writing.
2233 # We work around this issue by inserting a seek() before writing.
2227 # Note: This is likely not necessary on Python 3. However, because
2234 # Note: This is likely not necessary on Python 3. However, because
2228 # the file handle is reused for reads and may be seeked there, we need
2235 # the file handle is reused for reads and may be seeked there, we need
2229 # to be careful before changing this.
2236 # to be careful before changing this.
2230 ifh.seek(0, os.SEEK_END)
2237 ifh.seek(0, os.SEEK_END)
2231 if dfh:
2238 if dfh:
2232 dfh.seek(0, os.SEEK_END)
2239 dfh.seek(0, os.SEEK_END)
2233
2240
2234 curr = len(self) - 1
2241 curr = len(self) - 1
2235 if not self._inline:
2242 if not self._inline:
2236 transaction.add(self.datafile, offset)
2243 transaction.add(self.datafile, offset)
2237 transaction.add(self.indexfile, curr * len(entry))
2244 transaction.add(self.indexfile, curr * len(entry))
2238 if data[0]:
2245 if data[0]:
2239 dfh.write(data[0])
2246 dfh.write(data[0])
2240 dfh.write(data[1])
2247 dfh.write(data[1])
2241 ifh.write(entry)
2248 ifh.write(entry)
2242 else:
2249 else:
2243 offset += curr * self._io.size
2250 offset += curr * self._io.size
2244 transaction.add(self.indexfile, offset, curr)
2251 transaction.add(self.indexfile, offset, curr)
2245 ifh.write(entry)
2252 ifh.write(entry)
2246 ifh.write(data[0])
2253 ifh.write(data[0])
2247 ifh.write(data[1])
2254 ifh.write(data[1])
2248 self._enforceinlinesize(transaction, ifh)
2255 self._enforceinlinesize(transaction, ifh)
2249
2256
2250 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2257 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2251 """
2258 """
2252 add a delta group
2259 add a delta group
2253
2260
2254 given a set of deltas, add them to the revision log. the
2261 given a set of deltas, add them to the revision log. the
2255 first delta is against its parent, which should be in our
2262 first delta is against its parent, which should be in our
2256 log, the rest are against the previous delta.
2263 log, the rest are against the previous delta.
2257
2264
2258 If ``addrevisioncb`` is defined, it will be called with arguments of
2265 If ``addrevisioncb`` is defined, it will be called with arguments of
2259 this revlog and the node that was added.
2266 this revlog and the node that was added.
2260 """
2267 """
2261
2268
2262 if self._writinghandles:
2269 if self._writinghandles:
2263 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2270 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2264
2271
2265 nodes = []
2272 nodes = []
2266
2273
2267 r = len(self)
2274 r = len(self)
2268 end = 0
2275 end = 0
2269 if r:
2276 if r:
2270 end = self.end(r - 1)
2277 end = self.end(r - 1)
2271 ifh = self._indexfp(b"a+")
2278 ifh = self._indexfp(b"a+")
2272 isize = r * self._io.size
2279 isize = r * self._io.size
2273 if self._inline:
2280 if self._inline:
2274 transaction.add(self.indexfile, end + isize, r)
2281 transaction.add(self.indexfile, end + isize, r)
2275 dfh = None
2282 dfh = None
2276 else:
2283 else:
2277 transaction.add(self.indexfile, isize, r)
2284 transaction.add(self.indexfile, isize, r)
2278 transaction.add(self.datafile, end)
2285 transaction.add(self.datafile, end)
2279 dfh = self._datafp(b"a+")
2286 dfh = self._datafp(b"a+")
2280
2287
2281 def flush():
2288 def flush():
2282 if dfh:
2289 if dfh:
2283 dfh.flush()
2290 dfh.flush()
2284 ifh.flush()
2291 ifh.flush()
2285
2292
2286 self._writinghandles = (ifh, dfh)
2293 self._writinghandles = (ifh, dfh)
2287
2294
2288 try:
2295 try:
2289 deltacomputer = deltautil.deltacomputer(self)
2296 deltacomputer = deltautil.deltacomputer(self)
2290 # loop through our set of deltas
2297 # loop through our set of deltas
2291 for data in deltas:
2298 for data in deltas:
2292 node, p1, p2, linknode, deltabase, delta, flags = data
2299 node, p1, p2, linknode, deltabase, delta, flags = data
2293 link = linkmapper(linknode)
2300 link = linkmapper(linknode)
2294 flags = flags or REVIDX_DEFAULT_FLAGS
2301 flags = flags or REVIDX_DEFAULT_FLAGS
2295
2302
2296 nodes.append(node)
2303 nodes.append(node)
2297
2304
2298 if node in self.nodemap:
2305 if node in self.nodemap:
2299 self._nodeduplicatecallback(transaction, node)
2306 self._nodeduplicatecallback(transaction, node)
2300 # this can happen if two branches make the same change
2307 # this can happen if two branches make the same change
2301 continue
2308 continue
2302
2309
2303 for p in (p1, p2):
2310 for p in (p1, p2):
2304 if p not in self.nodemap:
2311 if p not in self.nodemap:
2305 raise error.LookupError(
2312 raise error.LookupError(
2306 p, self.indexfile, _(b'unknown parent')
2313 p, self.indexfile, _(b'unknown parent')
2307 )
2314 )
2308
2315
2309 if deltabase not in self.nodemap:
2316 if deltabase not in self.nodemap:
2310 raise error.LookupError(
2317 raise error.LookupError(
2311 deltabase, self.indexfile, _(b'unknown delta base')
2318 deltabase, self.indexfile, _(b'unknown delta base')
2312 )
2319 )
2313
2320
2314 baserev = self.rev(deltabase)
2321 baserev = self.rev(deltabase)
2315
2322
2316 if baserev != nullrev and self.iscensored(baserev):
2323 if baserev != nullrev and self.iscensored(baserev):
2317 # if base is censored, delta must be full replacement in a
2324 # if base is censored, delta must be full replacement in a
2318 # single patch operation
2325 # single patch operation
2319 hlen = struct.calcsize(b">lll")
2326 hlen = struct.calcsize(b">lll")
2320 oldlen = self.rawsize(baserev)
2327 oldlen = self.rawsize(baserev)
2321 newlen = len(delta) - hlen
2328 newlen = len(delta) - hlen
2322 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2329 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2323 raise error.CensoredBaseError(
2330 raise error.CensoredBaseError(
2324 self.indexfile, self.node(baserev)
2331 self.indexfile, self.node(baserev)
2325 )
2332 )
2326
2333
2327 if not flags and self._peek_iscensored(baserev, delta, flush):
2334 if not flags and self._peek_iscensored(baserev, delta, flush):
2328 flags |= REVIDX_ISCENSORED
2335 flags |= REVIDX_ISCENSORED
2329
2336
2330 # We assume consumers of addrevisioncb will want to retrieve
2337 # We assume consumers of addrevisioncb will want to retrieve
2331 # the added revision, which will require a call to
2338 # the added revision, which will require a call to
2332 # revision(). revision() will fast path if there is a cache
2339 # revision(). revision() will fast path if there is a cache
2333 # hit. So, we tell _addrevision() to always cache in this case.
2340 # hit. So, we tell _addrevision() to always cache in this case.
2334 # We're only using addgroup() in the context of changegroup
2341 # We're only using addgroup() in the context of changegroup
2335 # generation so the revision data can always be handled as raw
2342 # generation so the revision data can always be handled as raw
2336 # by the flagprocessor.
2343 # by the flagprocessor.
2337 self._addrevision(
2344 self._addrevision(
2338 node,
2345 node,
2339 None,
2346 None,
2340 transaction,
2347 transaction,
2341 link,
2348 link,
2342 p1,
2349 p1,
2343 p2,
2350 p2,
2344 flags,
2351 flags,
2345 (baserev, delta),
2352 (baserev, delta),
2346 ifh,
2353 ifh,
2347 dfh,
2354 dfh,
2348 alwayscache=bool(addrevisioncb),
2355 alwayscache=bool(addrevisioncb),
2349 deltacomputer=deltacomputer,
2356 deltacomputer=deltacomputer,
2350 )
2357 )
2351
2358
2352 if addrevisioncb:
2359 if addrevisioncb:
2353 addrevisioncb(self, node)
2360 addrevisioncb(self, node)
2354
2361
2355 if not dfh and not self._inline:
2362 if not dfh and not self._inline:
2356 # addrevision switched from inline to conventional
2363 # addrevision switched from inline to conventional
2357 # reopen the index
2364 # reopen the index
2358 ifh.close()
2365 ifh.close()
2359 dfh = self._datafp(b"a+")
2366 dfh = self._datafp(b"a+")
2360 ifh = self._indexfp(b"a+")
2367 ifh = self._indexfp(b"a+")
2361 self._writinghandles = (ifh, dfh)
2368 self._writinghandles = (ifh, dfh)
2362 finally:
2369 finally:
2363 self._writinghandles = None
2370 self._writinghandles = None
2364
2371
2365 if dfh:
2372 if dfh:
2366 dfh.close()
2373 dfh.close()
2367 ifh.close()
2374 ifh.close()
2368
2375
2369 return nodes
2376 return nodes
2370
2377
2371 def iscensored(self, rev):
2378 def iscensored(self, rev):
2372 """Check if a file revision is censored."""
2379 """Check if a file revision is censored."""
2373 if not self._censorable:
2380 if not self._censorable:
2374 return False
2381 return False
2375
2382
2376 return self.flags(rev) & REVIDX_ISCENSORED
2383 return self.flags(rev) & REVIDX_ISCENSORED
2377
2384
2378 def _peek_iscensored(self, baserev, delta, flush):
2385 def _peek_iscensored(self, baserev, delta, flush):
2379 """Quickly check if a delta produces a censored revision."""
2386 """Quickly check if a delta produces a censored revision."""
2380 if not self._censorable:
2387 if not self._censorable:
2381 return False
2388 return False
2382
2389
2383 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2390 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2384
2391
2385 def getstrippoint(self, minlink):
2392 def getstrippoint(self, minlink):
2386 """find the minimum rev that must be stripped to strip the linkrev
2393 """find the minimum rev that must be stripped to strip the linkrev
2387
2394
2388 Returns a tuple containing the minimum rev and a set of all revs that
2395 Returns a tuple containing the minimum rev and a set of all revs that
2389 have linkrevs that will be broken by this strip.
2396 have linkrevs that will be broken by this strip.
2390 """
2397 """
2391 return storageutil.resolvestripinfo(
2398 return storageutil.resolvestripinfo(
2392 minlink,
2399 minlink,
2393 len(self) - 1,
2400 len(self) - 1,
2394 self.headrevs(),
2401 self.headrevs(),
2395 self.linkrev,
2402 self.linkrev,
2396 self.parentrevs,
2403 self.parentrevs,
2397 )
2404 )
2398
2405
2399 def strip(self, minlink, transaction):
2406 def strip(self, minlink, transaction):
2400 """truncate the revlog on the first revision with a linkrev >= minlink
2407 """truncate the revlog on the first revision with a linkrev >= minlink
2401
2408
2402 This function is called when we're stripping revision minlink and
2409 This function is called when we're stripping revision minlink and
2403 its descendants from the repository.
2410 its descendants from the repository.
2404
2411
2405 We have to remove all revisions with linkrev >= minlink, because
2412 We have to remove all revisions with linkrev >= minlink, because
2406 the equivalent changelog revisions will be renumbered after the
2413 the equivalent changelog revisions will be renumbered after the
2407 strip.
2414 strip.
2408
2415
2409 So we truncate the revlog on the first of these revisions, and
2416 So we truncate the revlog on the first of these revisions, and
2410 trust that the caller has saved the revisions that shouldn't be
2417 trust that the caller has saved the revisions that shouldn't be
2411 removed and that it'll re-add them after this truncation.
2418 removed and that it'll re-add them after this truncation.
2412 """
2419 """
2413 if len(self) == 0:
2420 if len(self) == 0:
2414 return
2421 return
2415
2422
2416 rev, _ = self.getstrippoint(minlink)
2423 rev, _ = self.getstrippoint(minlink)
2417 if rev == len(self):
2424 if rev == len(self):
2418 return
2425 return
2419
2426
2420 # first truncate the files on disk
2427 # first truncate the files on disk
2421 end = self.start(rev)
2428 end = self.start(rev)
2422 if not self._inline:
2429 if not self._inline:
2423 transaction.add(self.datafile, end)
2430 transaction.add(self.datafile, end)
2424 end = rev * self._io.size
2431 end = rev * self._io.size
2425 else:
2432 else:
2426 end += rev * self._io.size
2433 end += rev * self._io.size
2427
2434
2428 transaction.add(self.indexfile, end)
2435 transaction.add(self.indexfile, end)
2429
2436
2430 # then reset internal state in memory to forget those revisions
2437 # then reset internal state in memory to forget those revisions
2431 self._revisioncache = None
2438 self._revisioncache = None
2432 self._chaininfocache = {}
2439 self._chaininfocache = {}
2433 self._chunkclear()
2440 self._chunkclear()
2434 for x in pycompat.xrange(rev, len(self)):
2435 del self.nodemap[self.node(x)]
2436
2441
2437 del self.index[rev:-1]
2442 del self.index[rev:-1]
2438 self._nodepos = None
2443 self._nodepos = None
2439
2444
2440 def checksize(self):
2445 def checksize(self):
2441 """Check size of index and data files
2446 """Check size of index and data files
2442
2447
2443 return a (dd, di) tuple.
2448 return a (dd, di) tuple.
2444 - dd: extra bytes for the "data" file
2449 - dd: extra bytes for the "data" file
2445 - di: extra bytes for the "index" file
2450 - di: extra bytes for the "index" file
2446
2451
2447 A healthy revlog will return (0, 0).
2452 A healthy revlog will return (0, 0).
2448 """
2453 """
2449 expected = 0
2454 expected = 0
2450 if len(self):
2455 if len(self):
2451 expected = max(0, self.end(len(self) - 1))
2456 expected = max(0, self.end(len(self) - 1))
2452
2457
2453 try:
2458 try:
2454 with self._datafp() as f:
2459 with self._datafp() as f:
2455 f.seek(0, io.SEEK_END)
2460 f.seek(0, io.SEEK_END)
2456 actual = f.tell()
2461 actual = f.tell()
2457 dd = actual - expected
2462 dd = actual - expected
2458 except IOError as inst:
2463 except IOError as inst:
2459 if inst.errno != errno.ENOENT:
2464 if inst.errno != errno.ENOENT:
2460 raise
2465 raise
2461 dd = 0
2466 dd = 0
2462
2467
2463 try:
2468 try:
2464 f = self.opener(self.indexfile)
2469 f = self.opener(self.indexfile)
2465 f.seek(0, io.SEEK_END)
2470 f.seek(0, io.SEEK_END)
2466 actual = f.tell()
2471 actual = f.tell()
2467 f.close()
2472 f.close()
2468 s = self._io.size
2473 s = self._io.size
2469 i = max(0, actual // s)
2474 i = max(0, actual // s)
2470 di = actual - (i * s)
2475 di = actual - (i * s)
2471 if self._inline:
2476 if self._inline:
2472 databytes = 0
2477 databytes = 0
2473 for r in self:
2478 for r in self:
2474 databytes += max(0, self.length(r))
2479 databytes += max(0, self.length(r))
2475 dd = 0
2480 dd = 0
2476 di = actual - len(self) * s - databytes
2481 di = actual - len(self) * s - databytes
2477 except IOError as inst:
2482 except IOError as inst:
2478 if inst.errno != errno.ENOENT:
2483 if inst.errno != errno.ENOENT:
2479 raise
2484 raise
2480 di = 0
2485 di = 0
2481
2486
2482 return (dd, di)
2487 return (dd, di)
2483
2488
2484 def files(self):
2489 def files(self):
2485 res = [self.indexfile]
2490 res = [self.indexfile]
2486 if not self._inline:
2491 if not self._inline:
2487 res.append(self.datafile)
2492 res.append(self.datafile)
2488 return res
2493 return res
2489
2494
2490 def emitrevisions(
2495 def emitrevisions(
2491 self,
2496 self,
2492 nodes,
2497 nodes,
2493 nodesorder=None,
2498 nodesorder=None,
2494 revisiondata=False,
2499 revisiondata=False,
2495 assumehaveparentrevisions=False,
2500 assumehaveparentrevisions=False,
2496 deltamode=repository.CG_DELTAMODE_STD,
2501 deltamode=repository.CG_DELTAMODE_STD,
2497 ):
2502 ):
2498 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2503 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2499 raise error.ProgrammingError(
2504 raise error.ProgrammingError(
2500 b'unhandled value for nodesorder: %s' % nodesorder
2505 b'unhandled value for nodesorder: %s' % nodesorder
2501 )
2506 )
2502
2507
2503 if nodesorder is None and not self._generaldelta:
2508 if nodesorder is None and not self._generaldelta:
2504 nodesorder = b'storage'
2509 nodesorder = b'storage'
2505
2510
2506 if (
2511 if (
2507 not self._storedeltachains
2512 not self._storedeltachains
2508 and deltamode != repository.CG_DELTAMODE_PREV
2513 and deltamode != repository.CG_DELTAMODE_PREV
2509 ):
2514 ):
2510 deltamode = repository.CG_DELTAMODE_FULL
2515 deltamode = repository.CG_DELTAMODE_FULL
2511
2516
2512 return storageutil.emitrevisions(
2517 return storageutil.emitrevisions(
2513 self,
2518 self,
2514 nodes,
2519 nodes,
2515 nodesorder,
2520 nodesorder,
2516 revlogrevisiondelta,
2521 revlogrevisiondelta,
2517 deltaparentfn=self.deltaparent,
2522 deltaparentfn=self.deltaparent,
2518 candeltafn=self.candelta,
2523 candeltafn=self.candelta,
2519 rawsizefn=self.rawsize,
2524 rawsizefn=self.rawsize,
2520 revdifffn=self.revdiff,
2525 revdifffn=self.revdiff,
2521 flagsfn=self.flags,
2526 flagsfn=self.flags,
2522 deltamode=deltamode,
2527 deltamode=deltamode,
2523 revisiondata=revisiondata,
2528 revisiondata=revisiondata,
2524 assumehaveparentrevisions=assumehaveparentrevisions,
2529 assumehaveparentrevisions=assumehaveparentrevisions,
2525 )
2530 )
2526
2531
2527 DELTAREUSEALWAYS = b'always'
2532 DELTAREUSEALWAYS = b'always'
2528 DELTAREUSESAMEREVS = b'samerevs'
2533 DELTAREUSESAMEREVS = b'samerevs'
2529 DELTAREUSENEVER = b'never'
2534 DELTAREUSENEVER = b'never'
2530
2535
2531 DELTAREUSEFULLADD = b'fulladd'
2536 DELTAREUSEFULLADD = b'fulladd'
2532
2537
2533 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2538 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2534
2539
2535 def clone(
2540 def clone(
2536 self,
2541 self,
2537 tr,
2542 tr,
2538 destrevlog,
2543 destrevlog,
2539 addrevisioncb=None,
2544 addrevisioncb=None,
2540 deltareuse=DELTAREUSESAMEREVS,
2545 deltareuse=DELTAREUSESAMEREVS,
2541 forcedeltabothparents=None,
2546 forcedeltabothparents=None,
2542 sidedatacompanion=None,
2547 sidedatacompanion=None,
2543 ):
2548 ):
2544 """Copy this revlog to another, possibly with format changes.
2549 """Copy this revlog to another, possibly with format changes.
2545
2550
2546 The destination revlog will contain the same revisions and nodes.
2551 The destination revlog will contain the same revisions and nodes.
2547 However, it may not be bit-for-bit identical due to e.g. delta encoding
2552 However, it may not be bit-for-bit identical due to e.g. delta encoding
2548 differences.
2553 differences.
2549
2554
2550 The ``deltareuse`` argument control how deltas from the existing revlog
2555 The ``deltareuse`` argument control how deltas from the existing revlog
2551 are preserved in the destination revlog. The argument can have the
2556 are preserved in the destination revlog. The argument can have the
2552 following values:
2557 following values:
2553
2558
2554 DELTAREUSEALWAYS
2559 DELTAREUSEALWAYS
2555 Deltas will always be reused (if possible), even if the destination
2560 Deltas will always be reused (if possible), even if the destination
2556 revlog would not select the same revisions for the delta. This is the
2561 revlog would not select the same revisions for the delta. This is the
2557 fastest mode of operation.
2562 fastest mode of operation.
2558 DELTAREUSESAMEREVS
2563 DELTAREUSESAMEREVS
2559 Deltas will be reused if the destination revlog would pick the same
2564 Deltas will be reused if the destination revlog would pick the same
2560 revisions for the delta. This mode strikes a balance between speed
2565 revisions for the delta. This mode strikes a balance between speed
2561 and optimization.
2566 and optimization.
2562 DELTAREUSENEVER
2567 DELTAREUSENEVER
2563 Deltas will never be reused. This is the slowest mode of execution.
2568 Deltas will never be reused. This is the slowest mode of execution.
2564 This mode can be used to recompute deltas (e.g. if the diff/delta
2569 This mode can be used to recompute deltas (e.g. if the diff/delta
2565 algorithm changes).
2570 algorithm changes).
2566 DELTAREUSEFULLADD
2571 DELTAREUSEFULLADD
2567 Revision will be re-added as if their were new content. This is
2572 Revision will be re-added as if their were new content. This is
2568 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2573 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2569 eg: large file detection and handling.
2574 eg: large file detection and handling.
2570
2575
2571 Delta computation can be slow, so the choice of delta reuse policy can
2576 Delta computation can be slow, so the choice of delta reuse policy can
2572 significantly affect run time.
2577 significantly affect run time.
2573
2578
2574 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2579 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2575 two extremes. Deltas will be reused if they are appropriate. But if the
2580 two extremes. Deltas will be reused if they are appropriate. But if the
2576 delta could choose a better revision, it will do so. This means if you
2581 delta could choose a better revision, it will do so. This means if you
2577 are converting a non-generaldelta revlog to a generaldelta revlog,
2582 are converting a non-generaldelta revlog to a generaldelta revlog,
2578 deltas will be recomputed if the delta's parent isn't a parent of the
2583 deltas will be recomputed if the delta's parent isn't a parent of the
2579 revision.
2584 revision.
2580
2585
2581 In addition to the delta policy, the ``forcedeltabothparents``
2586 In addition to the delta policy, the ``forcedeltabothparents``
2582 argument controls whether to force compute deltas against both parents
2587 argument controls whether to force compute deltas against both parents
2583 for merges. By default, the current default is used.
2588 for merges. By default, the current default is used.
2584
2589
2585 If not None, the `sidedatacompanion` is callable that accept two
2590 If not None, the `sidedatacompanion` is callable that accept two
2586 arguments:
2591 arguments:
2587
2592
2588 (srcrevlog, rev)
2593 (srcrevlog, rev)
2589
2594
2590 and return a triplet that control changes to sidedata content from the
2595 and return a triplet that control changes to sidedata content from the
2591 old revision to the new clone result:
2596 old revision to the new clone result:
2592
2597
2593 (dropall, filterout, update)
2598 (dropall, filterout, update)
2594
2599
2595 * if `dropall` is True, all sidedata should be dropped
2600 * if `dropall` is True, all sidedata should be dropped
2596 * `filterout` is a set of sidedata keys that should be dropped
2601 * `filterout` is a set of sidedata keys that should be dropped
2597 * `update` is a mapping of additionnal/new key -> value
2602 * `update` is a mapping of additionnal/new key -> value
2598 """
2603 """
2599 if deltareuse not in self.DELTAREUSEALL:
2604 if deltareuse not in self.DELTAREUSEALL:
2600 raise ValueError(
2605 raise ValueError(
2601 _(b'value for deltareuse invalid: %s') % deltareuse
2606 _(b'value for deltareuse invalid: %s') % deltareuse
2602 )
2607 )
2603
2608
2604 if len(destrevlog):
2609 if len(destrevlog):
2605 raise ValueError(_(b'destination revlog is not empty'))
2610 raise ValueError(_(b'destination revlog is not empty'))
2606
2611
2607 if getattr(self, 'filteredrevs', None):
2612 if getattr(self, 'filteredrevs', None):
2608 raise ValueError(_(b'source revlog has filtered revisions'))
2613 raise ValueError(_(b'source revlog has filtered revisions'))
2609 if getattr(destrevlog, 'filteredrevs', None):
2614 if getattr(destrevlog, 'filteredrevs', None):
2610 raise ValueError(_(b'destination revlog has filtered revisions'))
2615 raise ValueError(_(b'destination revlog has filtered revisions'))
2611
2616
2612 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2617 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2613 # if possible.
2618 # if possible.
2614 oldlazydelta = destrevlog._lazydelta
2619 oldlazydelta = destrevlog._lazydelta
2615 oldlazydeltabase = destrevlog._lazydeltabase
2620 oldlazydeltabase = destrevlog._lazydeltabase
2616 oldamd = destrevlog._deltabothparents
2621 oldamd = destrevlog._deltabothparents
2617
2622
2618 try:
2623 try:
2619 if deltareuse == self.DELTAREUSEALWAYS:
2624 if deltareuse == self.DELTAREUSEALWAYS:
2620 destrevlog._lazydeltabase = True
2625 destrevlog._lazydeltabase = True
2621 destrevlog._lazydelta = True
2626 destrevlog._lazydelta = True
2622 elif deltareuse == self.DELTAREUSESAMEREVS:
2627 elif deltareuse == self.DELTAREUSESAMEREVS:
2623 destrevlog._lazydeltabase = False
2628 destrevlog._lazydeltabase = False
2624 destrevlog._lazydelta = True
2629 destrevlog._lazydelta = True
2625 elif deltareuse == self.DELTAREUSENEVER:
2630 elif deltareuse == self.DELTAREUSENEVER:
2626 destrevlog._lazydeltabase = False
2631 destrevlog._lazydeltabase = False
2627 destrevlog._lazydelta = False
2632 destrevlog._lazydelta = False
2628
2633
2629 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2634 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2630
2635
2631 self._clone(
2636 self._clone(
2632 tr,
2637 tr,
2633 destrevlog,
2638 destrevlog,
2634 addrevisioncb,
2639 addrevisioncb,
2635 deltareuse,
2640 deltareuse,
2636 forcedeltabothparents,
2641 forcedeltabothparents,
2637 sidedatacompanion,
2642 sidedatacompanion,
2638 )
2643 )
2639
2644
2640 finally:
2645 finally:
2641 destrevlog._lazydelta = oldlazydelta
2646 destrevlog._lazydelta = oldlazydelta
2642 destrevlog._lazydeltabase = oldlazydeltabase
2647 destrevlog._lazydeltabase = oldlazydeltabase
2643 destrevlog._deltabothparents = oldamd
2648 destrevlog._deltabothparents = oldamd
2644
2649
2645 def _clone(
2650 def _clone(
2646 self,
2651 self,
2647 tr,
2652 tr,
2648 destrevlog,
2653 destrevlog,
2649 addrevisioncb,
2654 addrevisioncb,
2650 deltareuse,
2655 deltareuse,
2651 forcedeltabothparents,
2656 forcedeltabothparents,
2652 sidedatacompanion,
2657 sidedatacompanion,
2653 ):
2658 ):
2654 """perform the core duty of `revlog.clone` after parameter processing"""
2659 """perform the core duty of `revlog.clone` after parameter processing"""
2655 deltacomputer = deltautil.deltacomputer(destrevlog)
2660 deltacomputer = deltautil.deltacomputer(destrevlog)
2656 index = self.index
2661 index = self.index
2657 for rev in self:
2662 for rev in self:
2658 entry = index[rev]
2663 entry = index[rev]
2659
2664
2660 # Some classes override linkrev to take filtered revs into
2665 # Some classes override linkrev to take filtered revs into
2661 # account. Use raw entry from index.
2666 # account. Use raw entry from index.
2662 flags = entry[0] & 0xFFFF
2667 flags = entry[0] & 0xFFFF
2663 linkrev = entry[4]
2668 linkrev = entry[4]
2664 p1 = index[entry[5]][7]
2669 p1 = index[entry[5]][7]
2665 p2 = index[entry[6]][7]
2670 p2 = index[entry[6]][7]
2666 node = entry[7]
2671 node = entry[7]
2667
2672
2668 sidedataactions = (False, [], {})
2673 sidedataactions = (False, [], {})
2669 if sidedatacompanion is not None:
2674 if sidedatacompanion is not None:
2670 sidedataactions = sidedatacompanion(self, rev)
2675 sidedataactions = sidedatacompanion(self, rev)
2671
2676
2672 # (Possibly) reuse the delta from the revlog if allowed and
2677 # (Possibly) reuse the delta from the revlog if allowed and
2673 # the revlog chunk is a delta.
2678 # the revlog chunk is a delta.
2674 cachedelta = None
2679 cachedelta = None
2675 rawtext = None
2680 rawtext = None
2676 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2681 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2677 dropall, filterout, update = sidedataactions
2682 dropall, filterout, update = sidedataactions
2678 text, sidedata = self._revisiondata(rev)
2683 text, sidedata = self._revisiondata(rev)
2679 if dropall:
2684 if dropall:
2680 sidedata = {}
2685 sidedata = {}
2681 for key in filterout:
2686 for key in filterout:
2682 sidedata.pop(key, None)
2687 sidedata.pop(key, None)
2683 sidedata.update(update)
2688 sidedata.update(update)
2684 if not sidedata:
2689 if not sidedata:
2685 sidedata = None
2690 sidedata = None
2686 destrevlog.addrevision(
2691 destrevlog.addrevision(
2687 text,
2692 text,
2688 tr,
2693 tr,
2689 linkrev,
2694 linkrev,
2690 p1,
2695 p1,
2691 p2,
2696 p2,
2692 cachedelta=cachedelta,
2697 cachedelta=cachedelta,
2693 node=node,
2698 node=node,
2694 flags=flags,
2699 flags=flags,
2695 deltacomputer=deltacomputer,
2700 deltacomputer=deltacomputer,
2696 sidedata=sidedata,
2701 sidedata=sidedata,
2697 )
2702 )
2698 else:
2703 else:
2699 if destrevlog._lazydelta:
2704 if destrevlog._lazydelta:
2700 dp = self.deltaparent(rev)
2705 dp = self.deltaparent(rev)
2701 if dp != nullrev:
2706 if dp != nullrev:
2702 cachedelta = (dp, bytes(self._chunk(rev)))
2707 cachedelta = (dp, bytes(self._chunk(rev)))
2703
2708
2704 if not cachedelta:
2709 if not cachedelta:
2705 rawtext = self.rawdata(rev)
2710 rawtext = self.rawdata(rev)
2706
2711
2707 ifh = destrevlog.opener(
2712 ifh = destrevlog.opener(
2708 destrevlog.indexfile, b'a+', checkambig=False
2713 destrevlog.indexfile, b'a+', checkambig=False
2709 )
2714 )
2710 dfh = None
2715 dfh = None
2711 if not destrevlog._inline:
2716 if not destrevlog._inline:
2712 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2717 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2713 try:
2718 try:
2714 destrevlog._addrevision(
2719 destrevlog._addrevision(
2715 node,
2720 node,
2716 rawtext,
2721 rawtext,
2717 tr,
2722 tr,
2718 linkrev,
2723 linkrev,
2719 p1,
2724 p1,
2720 p2,
2725 p2,
2721 flags,
2726 flags,
2722 cachedelta,
2727 cachedelta,
2723 ifh,
2728 ifh,
2724 dfh,
2729 dfh,
2725 deltacomputer=deltacomputer,
2730 deltacomputer=deltacomputer,
2726 )
2731 )
2727 finally:
2732 finally:
2728 if dfh:
2733 if dfh:
2729 dfh.close()
2734 dfh.close()
2730 ifh.close()
2735 ifh.close()
2731
2736
2732 if addrevisioncb:
2737 if addrevisioncb:
2733 addrevisioncb(self, rev, node)
2738 addrevisioncb(self, rev, node)
2734
2739
2735 def censorrevision(self, tr, censornode, tombstone=b''):
2740 def censorrevision(self, tr, censornode, tombstone=b''):
2736 if (self.version & 0xFFFF) == REVLOGV0:
2741 if (self.version & 0xFFFF) == REVLOGV0:
2737 raise error.RevlogError(
2742 raise error.RevlogError(
2738 _(b'cannot censor with version %d revlogs') % self.version
2743 _(b'cannot censor with version %d revlogs') % self.version
2739 )
2744 )
2740
2745
2741 censorrev = self.rev(censornode)
2746 censorrev = self.rev(censornode)
2742 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2747 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2743
2748
2744 if len(tombstone) > self.rawsize(censorrev):
2749 if len(tombstone) > self.rawsize(censorrev):
2745 raise error.Abort(
2750 raise error.Abort(
2746 _(b'censor tombstone must be no longer than censored data')
2751 _(b'censor tombstone must be no longer than censored data')
2747 )
2752 )
2748
2753
2749 # Rewriting the revlog in place is hard. Our strategy for censoring is
2754 # Rewriting the revlog in place is hard. Our strategy for censoring is
2750 # to create a new revlog, copy all revisions to it, then replace the
2755 # to create a new revlog, copy all revisions to it, then replace the
2751 # revlogs on transaction close.
2756 # revlogs on transaction close.
2752
2757
2753 newindexfile = self.indexfile + b'.tmpcensored'
2758 newindexfile = self.indexfile + b'.tmpcensored'
2754 newdatafile = self.datafile + b'.tmpcensored'
2759 newdatafile = self.datafile + b'.tmpcensored'
2755
2760
2756 # This is a bit dangerous. We could easily have a mismatch of state.
2761 # This is a bit dangerous. We could easily have a mismatch of state.
2757 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2762 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2758 newrl.version = self.version
2763 newrl.version = self.version
2759 newrl._generaldelta = self._generaldelta
2764 newrl._generaldelta = self._generaldelta
2760 newrl._io = self._io
2765 newrl._io = self._io
2761
2766
2762 for rev in self.revs():
2767 for rev in self.revs():
2763 node = self.node(rev)
2768 node = self.node(rev)
2764 p1, p2 = self.parents(node)
2769 p1, p2 = self.parents(node)
2765
2770
2766 if rev == censorrev:
2771 if rev == censorrev:
2767 newrl.addrawrevision(
2772 newrl.addrawrevision(
2768 tombstone,
2773 tombstone,
2769 tr,
2774 tr,
2770 self.linkrev(censorrev),
2775 self.linkrev(censorrev),
2771 p1,
2776 p1,
2772 p2,
2777 p2,
2773 censornode,
2778 censornode,
2774 REVIDX_ISCENSORED,
2779 REVIDX_ISCENSORED,
2775 )
2780 )
2776
2781
2777 if newrl.deltaparent(rev) != nullrev:
2782 if newrl.deltaparent(rev) != nullrev:
2778 raise error.Abort(
2783 raise error.Abort(
2779 _(
2784 _(
2780 b'censored revision stored as delta; '
2785 b'censored revision stored as delta; '
2781 b'cannot censor'
2786 b'cannot censor'
2782 ),
2787 ),
2783 hint=_(
2788 hint=_(
2784 b'censoring of revlogs is not '
2789 b'censoring of revlogs is not '
2785 b'fully implemented; please report '
2790 b'fully implemented; please report '
2786 b'this bug'
2791 b'this bug'
2787 ),
2792 ),
2788 )
2793 )
2789 continue
2794 continue
2790
2795
2791 if self.iscensored(rev):
2796 if self.iscensored(rev):
2792 if self.deltaparent(rev) != nullrev:
2797 if self.deltaparent(rev) != nullrev:
2793 raise error.Abort(
2798 raise error.Abort(
2794 _(
2799 _(
2795 b'cannot censor due to censored '
2800 b'cannot censor due to censored '
2796 b'revision having delta stored'
2801 b'revision having delta stored'
2797 )
2802 )
2798 )
2803 )
2799 rawtext = self._chunk(rev)
2804 rawtext = self._chunk(rev)
2800 else:
2805 else:
2801 rawtext = self.rawdata(rev)
2806 rawtext = self.rawdata(rev)
2802
2807
2803 newrl.addrawrevision(
2808 newrl.addrawrevision(
2804 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2809 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2805 )
2810 )
2806
2811
2807 tr.addbackup(self.indexfile, location=b'store')
2812 tr.addbackup(self.indexfile, location=b'store')
2808 if not self._inline:
2813 if not self._inline:
2809 tr.addbackup(self.datafile, location=b'store')
2814 tr.addbackup(self.datafile, location=b'store')
2810
2815
2811 self.opener.rename(newrl.indexfile, self.indexfile)
2816 self.opener.rename(newrl.indexfile, self.indexfile)
2812 if not self._inline:
2817 if not self._inline:
2813 self.opener.rename(newrl.datafile, self.datafile)
2818 self.opener.rename(newrl.datafile, self.datafile)
2814
2819
2815 self.clearcaches()
2820 self.clearcaches()
2816 self._loadindex()
2821 self._loadindex()
2817
2822
2818 def verifyintegrity(self, state):
2823 def verifyintegrity(self, state):
2819 """Verifies the integrity of the revlog.
2824 """Verifies the integrity of the revlog.
2820
2825
2821 Yields ``revlogproblem`` instances describing problems that are
2826 Yields ``revlogproblem`` instances describing problems that are
2822 found.
2827 found.
2823 """
2828 """
2824 dd, di = self.checksize()
2829 dd, di = self.checksize()
2825 if dd:
2830 if dd:
2826 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2831 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2827 if di:
2832 if di:
2828 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2833 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2829
2834
2830 version = self.version & 0xFFFF
2835 version = self.version & 0xFFFF
2831
2836
2832 # The verifier tells us what version revlog we should be.
2837 # The verifier tells us what version revlog we should be.
2833 if version != state[b'expectedversion']:
2838 if version != state[b'expectedversion']:
2834 yield revlogproblem(
2839 yield revlogproblem(
2835 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2840 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2836 % (self.indexfile, version, state[b'expectedversion'])
2841 % (self.indexfile, version, state[b'expectedversion'])
2837 )
2842 )
2838
2843
2839 state[b'skipread'] = set()
2844 state[b'skipread'] = set()
2840
2845
2841 for rev in self:
2846 for rev in self:
2842 node = self.node(rev)
2847 node = self.node(rev)
2843
2848
2844 # Verify contents. 4 cases to care about:
2849 # Verify contents. 4 cases to care about:
2845 #
2850 #
2846 # common: the most common case
2851 # common: the most common case
2847 # rename: with a rename
2852 # rename: with a rename
2848 # meta: file content starts with b'\1\n', the metadata
2853 # meta: file content starts with b'\1\n', the metadata
2849 # header defined in filelog.py, but without a rename
2854 # header defined in filelog.py, but without a rename
2850 # ext: content stored externally
2855 # ext: content stored externally
2851 #
2856 #
2852 # More formally, their differences are shown below:
2857 # More formally, their differences are shown below:
2853 #
2858 #
2854 # | common | rename | meta | ext
2859 # | common | rename | meta | ext
2855 # -------------------------------------------------------
2860 # -------------------------------------------------------
2856 # flags() | 0 | 0 | 0 | not 0
2861 # flags() | 0 | 0 | 0 | not 0
2857 # renamed() | False | True | False | ?
2862 # renamed() | False | True | False | ?
2858 # rawtext[0:2]=='\1\n'| False | True | True | ?
2863 # rawtext[0:2]=='\1\n'| False | True | True | ?
2859 #
2864 #
2860 # "rawtext" means the raw text stored in revlog data, which
2865 # "rawtext" means the raw text stored in revlog data, which
2861 # could be retrieved by "rawdata(rev)". "text"
2866 # could be retrieved by "rawdata(rev)". "text"
2862 # mentioned below is "revision(rev)".
2867 # mentioned below is "revision(rev)".
2863 #
2868 #
2864 # There are 3 different lengths stored physically:
2869 # There are 3 different lengths stored physically:
2865 # 1. L1: rawsize, stored in revlog index
2870 # 1. L1: rawsize, stored in revlog index
2866 # 2. L2: len(rawtext), stored in revlog data
2871 # 2. L2: len(rawtext), stored in revlog data
2867 # 3. L3: len(text), stored in revlog data if flags==0, or
2872 # 3. L3: len(text), stored in revlog data if flags==0, or
2868 # possibly somewhere else if flags!=0
2873 # possibly somewhere else if flags!=0
2869 #
2874 #
2870 # L1 should be equal to L2. L3 could be different from them.
2875 # L1 should be equal to L2. L3 could be different from them.
2871 # "text" may or may not affect commit hash depending on flag
2876 # "text" may or may not affect commit hash depending on flag
2872 # processors (see flagutil.addflagprocessor).
2877 # processors (see flagutil.addflagprocessor).
2873 #
2878 #
2874 # | common | rename | meta | ext
2879 # | common | rename | meta | ext
2875 # -------------------------------------------------
2880 # -------------------------------------------------
2876 # rawsize() | L1 | L1 | L1 | L1
2881 # rawsize() | L1 | L1 | L1 | L1
2877 # size() | L1 | L2-LM | L1(*) | L1 (?)
2882 # size() | L1 | L2-LM | L1(*) | L1 (?)
2878 # len(rawtext) | L2 | L2 | L2 | L2
2883 # len(rawtext) | L2 | L2 | L2 | L2
2879 # len(text) | L2 | L2 | L2 | L3
2884 # len(text) | L2 | L2 | L2 | L3
2880 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2885 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2881 #
2886 #
2882 # LM: length of metadata, depending on rawtext
2887 # LM: length of metadata, depending on rawtext
2883 # (*): not ideal, see comment in filelog.size
2888 # (*): not ideal, see comment in filelog.size
2884 # (?): could be "- len(meta)" if the resolved content has
2889 # (?): could be "- len(meta)" if the resolved content has
2885 # rename metadata
2890 # rename metadata
2886 #
2891 #
2887 # Checks needed to be done:
2892 # Checks needed to be done:
2888 # 1. length check: L1 == L2, in all cases.
2893 # 1. length check: L1 == L2, in all cases.
2889 # 2. hash check: depending on flag processor, we may need to
2894 # 2. hash check: depending on flag processor, we may need to
2890 # use either "text" (external), or "rawtext" (in revlog).
2895 # use either "text" (external), or "rawtext" (in revlog).
2891
2896
2892 try:
2897 try:
2893 skipflags = state.get(b'skipflags', 0)
2898 skipflags = state.get(b'skipflags', 0)
2894 if skipflags:
2899 if skipflags:
2895 skipflags &= self.flags(rev)
2900 skipflags &= self.flags(rev)
2896
2901
2897 if skipflags:
2902 if skipflags:
2898 state[b'skipread'].add(node)
2903 state[b'skipread'].add(node)
2899 else:
2904 else:
2900 # Side-effect: read content and verify hash.
2905 # Side-effect: read content and verify hash.
2901 self.revision(node)
2906 self.revision(node)
2902
2907
2903 l1 = self.rawsize(rev)
2908 l1 = self.rawsize(rev)
2904 l2 = len(self.rawdata(node))
2909 l2 = len(self.rawdata(node))
2905
2910
2906 if l1 != l2:
2911 if l1 != l2:
2907 yield revlogproblem(
2912 yield revlogproblem(
2908 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2913 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2909 node=node,
2914 node=node,
2910 )
2915 )
2911
2916
2912 except error.CensoredNodeError:
2917 except error.CensoredNodeError:
2913 if state[b'erroroncensored']:
2918 if state[b'erroroncensored']:
2914 yield revlogproblem(
2919 yield revlogproblem(
2915 error=_(b'censored file data'), node=node
2920 error=_(b'censored file data'), node=node
2916 )
2921 )
2917 state[b'skipread'].add(node)
2922 state[b'skipread'].add(node)
2918 except Exception as e:
2923 except Exception as e:
2919 yield revlogproblem(
2924 yield revlogproblem(
2920 error=_(b'unpacking %s: %s')
2925 error=_(b'unpacking %s: %s')
2921 % (short(node), stringutil.forcebytestr(e)),
2926 % (short(node), stringutil.forcebytestr(e)),
2922 node=node,
2927 node=node,
2923 )
2928 )
2924 state[b'skipread'].add(node)
2929 state[b'skipread'].add(node)
2925
2930
2926 def storageinfo(
2931 def storageinfo(
2927 self,
2932 self,
2928 exclusivefiles=False,
2933 exclusivefiles=False,
2929 sharedfiles=False,
2934 sharedfiles=False,
2930 revisionscount=False,
2935 revisionscount=False,
2931 trackedsize=False,
2936 trackedsize=False,
2932 storedsize=False,
2937 storedsize=False,
2933 ):
2938 ):
2934 d = {}
2939 d = {}
2935
2940
2936 if exclusivefiles:
2941 if exclusivefiles:
2937 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2942 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2938 if not self._inline:
2943 if not self._inline:
2939 d[b'exclusivefiles'].append((self.opener, self.datafile))
2944 d[b'exclusivefiles'].append((self.opener, self.datafile))
2940
2945
2941 if sharedfiles:
2946 if sharedfiles:
2942 d[b'sharedfiles'] = []
2947 d[b'sharedfiles'] = []
2943
2948
2944 if revisionscount:
2949 if revisionscount:
2945 d[b'revisionscount'] = len(self)
2950 d[b'revisionscount'] = len(self)
2946
2951
2947 if trackedsize:
2952 if trackedsize:
2948 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2953 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2949
2954
2950 if storedsize:
2955 if storedsize:
2951 d[b'storedsize'] = sum(
2956 d[b'storedsize'] = sum(
2952 self.opener.stat(path).st_size for path in self.files()
2957 self.opener.stat(path).st_size for path in self.files()
2953 )
2958 )
2954
2959
2955 return d
2960 return d
@@ -1,286 +1,285 b''
1 # unionrepo.py - repository class for viewing union of repository changesets
1 # unionrepo.py - repository class for viewing union of repository changesets
2 #
2 #
3 # Derived from bundlerepo.py
3 # Derived from bundlerepo.py
4 # Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
4 # Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
5 # Copyright 2013 Unity Technologies, Mads Kiilerich <madski@unity3d.com>
5 # Copyright 2013 Unity Technologies, Mads Kiilerich <madski@unity3d.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 """Repository class for "in-memory pull" of one local repository to another,
10 """Repository class for "in-memory pull" of one local repository to another,
11 allowing operations like diff and log with revsets.
11 allowing operations like diff and log with revsets.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 from .i18n import _
16 from .i18n import _
17 from .pycompat import getattr
17 from .pycompat import getattr
18
18
19 from . import (
19 from . import (
20 changelog,
20 changelog,
21 cmdutil,
21 cmdutil,
22 encoding,
22 encoding,
23 error,
23 error,
24 filelog,
24 filelog,
25 localrepo,
25 localrepo,
26 manifest,
26 manifest,
27 mdiff,
27 mdiff,
28 pathutil,
28 pathutil,
29 revlog,
29 revlog,
30 util,
30 util,
31 vfs as vfsmod,
31 vfs as vfsmod,
32 )
32 )
33
33
34
34
35 class unionrevlog(revlog.revlog):
35 class unionrevlog(revlog.revlog):
36 def __init__(self, opener, indexfile, revlog2, linkmapper):
36 def __init__(self, opener, indexfile, revlog2, linkmapper):
37 # How it works:
37 # How it works:
38 # To retrieve a revision, we just need to know the node id so we can
38 # To retrieve a revision, we just need to know the node id so we can
39 # look it up in revlog2.
39 # look it up in revlog2.
40 #
40 #
41 # To differentiate a rev in the second revlog from a rev in the revlog,
41 # To differentiate a rev in the second revlog from a rev in the revlog,
42 # we check revision against repotiprev.
42 # we check revision against repotiprev.
43 opener = vfsmod.readonlyvfs(opener)
43 opener = vfsmod.readonlyvfs(opener)
44 revlog.revlog.__init__(self, opener, indexfile)
44 revlog.revlog.__init__(self, opener, indexfile)
45 self.revlog2 = revlog2
45 self.revlog2 = revlog2
46
46
47 n = len(self)
47 n = len(self)
48 self.repotiprev = n - 1
48 self.repotiprev = n - 1
49 self.bundlerevs = set() # used by 'bundle()' revset expression
49 self.bundlerevs = set() # used by 'bundle()' revset expression
50 for rev2 in self.revlog2:
50 for rev2 in self.revlog2:
51 rev = self.revlog2.index[rev2]
51 rev = self.revlog2.index[rev2]
52 # rev numbers - in revlog2, very different from self.rev
52 # rev numbers - in revlog2, very different from self.rev
53 _start, _csize, rsize, base, linkrev, p1rev, p2rev, node = rev
53 _start, _csize, rsize, base, linkrev, p1rev, p2rev, node = rev
54 flags = _start & 0xFFFF
54 flags = _start & 0xFFFF
55
55
56 if linkmapper is None: # link is to same revlog
56 if linkmapper is None: # link is to same revlog
57 assert linkrev == rev2 # we never link back
57 assert linkrev == rev2 # we never link back
58 link = n
58 link = n
59 else: # rev must be mapped from repo2 cl to unified cl by linkmapper
59 else: # rev must be mapped from repo2 cl to unified cl by linkmapper
60 link = linkmapper(linkrev)
60 link = linkmapper(linkrev)
61
61
62 if linkmapper is not None: # link is to same revlog
62 if linkmapper is not None: # link is to same revlog
63 base = linkmapper(base)
63 base = linkmapper(base)
64
64
65 if node in self.nodemap:
65 if node in self.nodemap:
66 # this happens for the common revlog revisions
66 # this happens for the common revlog revisions
67 self.bundlerevs.add(self.nodemap[node])
67 self.bundlerevs.add(self.nodemap[node])
68 continue
68 continue
69
69
70 p1node = self.revlog2.node(p1rev)
70 p1node = self.revlog2.node(p1rev)
71 p2node = self.revlog2.node(p2rev)
71 p2node = self.revlog2.node(p2rev)
72
72
73 # TODO: it's probably wrong to set compressed length to None, but
73 # TODO: it's probably wrong to set compressed length to None, but
74 # I have no idea if csize is valid in the base revlog context.
74 # I have no idea if csize is valid in the base revlog context.
75 e = (
75 e = (
76 flags,
76 flags,
77 None,
77 None,
78 rsize,
78 rsize,
79 base,
79 base,
80 link,
80 link,
81 self.rev(p1node),
81 self.rev(p1node),
82 self.rev(p2node),
82 self.rev(p2node),
83 node,
83 node,
84 )
84 )
85 self.index.append(e)
85 self.index.append(e)
86 self.nodemap[node] = n
87 self.bundlerevs.add(n)
86 self.bundlerevs.add(n)
88 n += 1
87 n += 1
89
88
90 def _chunk(self, rev):
89 def _chunk(self, rev):
91 if rev <= self.repotiprev:
90 if rev <= self.repotiprev:
92 return revlog.revlog._chunk(self, rev)
91 return revlog.revlog._chunk(self, rev)
93 return self.revlog2._chunk(self.node(rev))
92 return self.revlog2._chunk(self.node(rev))
94
93
95 def revdiff(self, rev1, rev2):
94 def revdiff(self, rev1, rev2):
96 """return or calculate a delta between two revisions"""
95 """return or calculate a delta between two revisions"""
97 if rev1 > self.repotiprev and rev2 > self.repotiprev:
96 if rev1 > self.repotiprev and rev2 > self.repotiprev:
98 return self.revlog2.revdiff(
97 return self.revlog2.revdiff(
99 self.revlog2.rev(self.node(rev1)),
98 self.revlog2.rev(self.node(rev1)),
100 self.revlog2.rev(self.node(rev2)),
99 self.revlog2.rev(self.node(rev2)),
101 )
100 )
102 elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
101 elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
103 return super(unionrevlog, self).revdiff(rev1, rev2)
102 return super(unionrevlog, self).revdiff(rev1, rev2)
104
103
105 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
104 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
106
105
107 def _revisiondata(self, nodeorrev, _df=None, raw=False):
106 def _revisiondata(self, nodeorrev, _df=None, raw=False):
108 if isinstance(nodeorrev, int):
107 if isinstance(nodeorrev, int):
109 rev = nodeorrev
108 rev = nodeorrev
110 node = self.node(rev)
109 node = self.node(rev)
111 else:
110 else:
112 node = nodeorrev
111 node = nodeorrev
113 rev = self.rev(node)
112 rev = self.rev(node)
114
113
115 if rev > self.repotiprev:
114 if rev > self.repotiprev:
116 # work around manifestrevlog NOT being a revlog
115 # work around manifestrevlog NOT being a revlog
117 revlog2 = getattr(self.revlog2, '_revlog', self.revlog2)
116 revlog2 = getattr(self.revlog2, '_revlog', self.revlog2)
118 func = revlog2._revisiondata
117 func = revlog2._revisiondata
119 else:
118 else:
120 func = super(unionrevlog, self)._revisiondata
119 func = super(unionrevlog, self)._revisiondata
121 return func(node, _df=_df, raw=raw)
120 return func(node, _df=_df, raw=raw)
122
121
123 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
122 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
124 raise NotImplementedError
123 raise NotImplementedError
125
124
126 def addgroup(
125 def addgroup(
127 self,
126 self,
128 deltas,
127 deltas,
129 linkmapper,
128 linkmapper,
130 transaction,
129 transaction,
131 addrevisioncb=None,
130 addrevisioncb=None,
132 maybemissingparents=False,
131 maybemissingparents=False,
133 ):
132 ):
134 raise NotImplementedError
133 raise NotImplementedError
135
134
136 def strip(self, minlink, transaction):
135 def strip(self, minlink, transaction):
137 raise NotImplementedError
136 raise NotImplementedError
138
137
139 def checksize(self):
138 def checksize(self):
140 raise NotImplementedError
139 raise NotImplementedError
141
140
142
141
143 class unionchangelog(unionrevlog, changelog.changelog):
142 class unionchangelog(unionrevlog, changelog.changelog):
144 def __init__(self, opener, opener2):
143 def __init__(self, opener, opener2):
145 changelog.changelog.__init__(self, opener)
144 changelog.changelog.__init__(self, opener)
146 linkmapper = None
145 linkmapper = None
147 changelog2 = changelog.changelog(opener2)
146 changelog2 = changelog.changelog(opener2)
148 unionrevlog.__init__(
147 unionrevlog.__init__(
149 self, opener, self.indexfile, changelog2, linkmapper
148 self, opener, self.indexfile, changelog2, linkmapper
150 )
149 )
151
150
152
151
153 class unionmanifest(unionrevlog, manifest.manifestrevlog):
152 class unionmanifest(unionrevlog, manifest.manifestrevlog):
154 def __init__(self, opener, opener2, linkmapper):
153 def __init__(self, opener, opener2, linkmapper):
155 manifest.manifestrevlog.__init__(self, opener)
154 manifest.manifestrevlog.__init__(self, opener)
156 manifest2 = manifest.manifestrevlog(opener2)
155 manifest2 = manifest.manifestrevlog(opener2)
157 unionrevlog.__init__(
156 unionrevlog.__init__(
158 self, opener, self.indexfile, manifest2, linkmapper
157 self, opener, self.indexfile, manifest2, linkmapper
159 )
158 )
160
159
161
160
162 class unionfilelog(filelog.filelog):
161 class unionfilelog(filelog.filelog):
163 def __init__(self, opener, path, opener2, linkmapper, repo):
162 def __init__(self, opener, path, opener2, linkmapper, repo):
164 filelog.filelog.__init__(self, opener, path)
163 filelog.filelog.__init__(self, opener, path)
165 filelog2 = filelog.filelog(opener2, path)
164 filelog2 = filelog.filelog(opener2, path)
166 self._revlog = unionrevlog(
165 self._revlog = unionrevlog(
167 opener, self.indexfile, filelog2._revlog, linkmapper
166 opener, self.indexfile, filelog2._revlog, linkmapper
168 )
167 )
169 self._repo = repo
168 self._repo = repo
170 self.repotiprev = self._revlog.repotiprev
169 self.repotiprev = self._revlog.repotiprev
171 self.revlog2 = self._revlog.revlog2
170 self.revlog2 = self._revlog.revlog2
172
171
173 def iscensored(self, rev):
172 def iscensored(self, rev):
174 """Check if a revision is censored."""
173 """Check if a revision is censored."""
175 if rev <= self.repotiprev:
174 if rev <= self.repotiprev:
176 return filelog.filelog.iscensored(self, rev)
175 return filelog.filelog.iscensored(self, rev)
177 node = self.node(rev)
176 node = self.node(rev)
178 return self.revlog2.iscensored(self.revlog2.rev(node))
177 return self.revlog2.iscensored(self.revlog2.rev(node))
179
178
180
179
181 class unionpeer(localrepo.localpeer):
180 class unionpeer(localrepo.localpeer):
182 def canpush(self):
181 def canpush(self):
183 return False
182 return False
184
183
185
184
186 class unionrepository(object):
185 class unionrepository(object):
187 """Represents the union of data in 2 repositories.
186 """Represents the union of data in 2 repositories.
188
187
189 Instances are not usable if constructed directly. Use ``instance()``
188 Instances are not usable if constructed directly. Use ``instance()``
190 or ``makeunionrepository()`` to create a usable instance.
189 or ``makeunionrepository()`` to create a usable instance.
191 """
190 """
192
191
193 def __init__(self, repo2, url):
192 def __init__(self, repo2, url):
194 self.repo2 = repo2
193 self.repo2 = repo2
195 self._url = url
194 self._url = url
196
195
197 self.ui.setconfig(b'phases', b'publish', False, b'unionrepo')
196 self.ui.setconfig(b'phases', b'publish', False, b'unionrepo')
198
197
199 @localrepo.unfilteredpropertycache
198 @localrepo.unfilteredpropertycache
200 def changelog(self):
199 def changelog(self):
201 return unionchangelog(self.svfs, self.repo2.svfs)
200 return unionchangelog(self.svfs, self.repo2.svfs)
202
201
203 @localrepo.unfilteredpropertycache
202 @localrepo.unfilteredpropertycache
204 def manifestlog(self):
203 def manifestlog(self):
205 rootstore = unionmanifest(
204 rootstore = unionmanifest(
206 self.svfs, self.repo2.svfs, self.unfiltered()._clrev
205 self.svfs, self.repo2.svfs, self.unfiltered()._clrev
207 )
206 )
208 return manifest.manifestlog(
207 return manifest.manifestlog(
209 self.svfs, self, rootstore, self.narrowmatch()
208 self.svfs, self, rootstore, self.narrowmatch()
210 )
209 )
211
210
212 def _clrev(self, rev2):
211 def _clrev(self, rev2):
213 """map from repo2 changelog rev to temporary rev in self.changelog"""
212 """map from repo2 changelog rev to temporary rev in self.changelog"""
214 node = self.repo2.changelog.node(rev2)
213 node = self.repo2.changelog.node(rev2)
215 return self.changelog.rev(node)
214 return self.changelog.rev(node)
216
215
217 def url(self):
216 def url(self):
218 return self._url
217 return self._url
219
218
220 def file(self, f):
219 def file(self, f):
221 return unionfilelog(
220 return unionfilelog(
222 self.svfs, f, self.repo2.svfs, self.unfiltered()._clrev, self
221 self.svfs, f, self.repo2.svfs, self.unfiltered()._clrev, self
223 )
222 )
224
223
225 def close(self):
224 def close(self):
226 self.repo2.close()
225 self.repo2.close()
227
226
228 def cancopy(self):
227 def cancopy(self):
229 return False
228 return False
230
229
231 def peer(self):
230 def peer(self):
232 return unionpeer(self)
231 return unionpeer(self)
233
232
234 def getcwd(self):
233 def getcwd(self):
235 return encoding.getcwd() # always outside the repo
234 return encoding.getcwd() # always outside the repo
236
235
237
236
238 def instance(ui, path, create, intents=None, createopts=None):
237 def instance(ui, path, create, intents=None, createopts=None):
239 if create:
238 if create:
240 raise error.Abort(_(b'cannot create new union repository'))
239 raise error.Abort(_(b'cannot create new union repository'))
241 parentpath = ui.config(b"bundle", b"mainreporoot")
240 parentpath = ui.config(b"bundle", b"mainreporoot")
242 if not parentpath:
241 if not parentpath:
243 # try to find the correct path to the working directory repo
242 # try to find the correct path to the working directory repo
244 parentpath = cmdutil.findrepo(encoding.getcwd())
243 parentpath = cmdutil.findrepo(encoding.getcwd())
245 if parentpath is None:
244 if parentpath is None:
246 parentpath = b''
245 parentpath = b''
247 if parentpath:
246 if parentpath:
248 # Try to make the full path relative so we get a nice, short URL.
247 # Try to make the full path relative so we get a nice, short URL.
249 # In particular, we don't want temp dir names in test outputs.
248 # In particular, we don't want temp dir names in test outputs.
250 cwd = encoding.getcwd()
249 cwd = encoding.getcwd()
251 if parentpath == cwd:
250 if parentpath == cwd:
252 parentpath = b''
251 parentpath = b''
253 else:
252 else:
254 cwd = pathutil.normasprefix(cwd)
253 cwd = pathutil.normasprefix(cwd)
255 if parentpath.startswith(cwd):
254 if parentpath.startswith(cwd):
256 parentpath = parentpath[len(cwd) :]
255 parentpath = parentpath[len(cwd) :]
257 if path.startswith(b'union:'):
256 if path.startswith(b'union:'):
258 s = path.split(b":", 1)[1].split(b"+", 1)
257 s = path.split(b":", 1)[1].split(b"+", 1)
259 if len(s) == 1:
258 if len(s) == 1:
260 repopath, repopath2 = parentpath, s[0]
259 repopath, repopath2 = parentpath, s[0]
261 else:
260 else:
262 repopath, repopath2 = s
261 repopath, repopath2 = s
263 else:
262 else:
264 repopath, repopath2 = parentpath, path
263 repopath, repopath2 = parentpath, path
265
264
266 return makeunionrepository(ui, repopath, repopath2)
265 return makeunionrepository(ui, repopath, repopath2)
267
266
268
267
269 def makeunionrepository(ui, repopath1, repopath2):
268 def makeunionrepository(ui, repopath1, repopath2):
270 """Make a union repository object from 2 local repo paths."""
269 """Make a union repository object from 2 local repo paths."""
271 repo1 = localrepo.instance(ui, repopath1, create=False)
270 repo1 = localrepo.instance(ui, repopath1, create=False)
272 repo2 = localrepo.instance(ui, repopath2, create=False)
271 repo2 = localrepo.instance(ui, repopath2, create=False)
273
272
274 url = b'union:%s+%s' % (
273 url = b'union:%s+%s' % (
275 util.expandpath(repopath1),
274 util.expandpath(repopath1),
276 util.expandpath(repopath2),
275 util.expandpath(repopath2),
277 )
276 )
278
277
279 class derivedunionrepository(unionrepository, repo1.__class__):
278 class derivedunionrepository(unionrepository, repo1.__class__):
280 pass
279 pass
281
280
282 repo = repo1
281 repo = repo1
283 repo.__class__ = derivedunionrepository
282 repo.__class__ = derivedunionrepository
284 unionrepository.__init__(repo1, repo2, url)
283 unionrepository.__init__(repo1, repo2, url)
285
284
286 return repo
285 return repo
General Comments 0
You need to be logged in to leave comments. Login now