##// END OF EJS Templates
index: replace insert(-1, e) method by append(e) method...
Martin von Zweigbergk -
r38886:6104b203 default
parent child Browse files
Show More
@@ -1,622 +1,622
1 # bundlerepo.py - repository class for viewing uncompressed bundles
1 # bundlerepo.py - repository class for viewing uncompressed bundles
2 #
2 #
3 # Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
3 # Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Repository class for viewing uncompressed bundles.
8 """Repository class for viewing uncompressed bundles.
9
9
10 This provides a read-only repository interface to bundles as if they
10 This provides a read-only repository interface to bundles as if they
11 were part of the actual repository.
11 were part of the actual repository.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import os
16 import os
17 import shutil
17 import shutil
18
18
19 from .i18n import _
19 from .i18n import _
20 from .node import nullid
20 from .node import nullid
21
21
22 from . import (
22 from . import (
23 bundle2,
23 bundle2,
24 changegroup,
24 changegroup,
25 changelog,
25 changelog,
26 cmdutil,
26 cmdutil,
27 discovery,
27 discovery,
28 error,
28 error,
29 exchange,
29 exchange,
30 filelog,
30 filelog,
31 localrepo,
31 localrepo,
32 manifest,
32 manifest,
33 mdiff,
33 mdiff,
34 node as nodemod,
34 node as nodemod,
35 pathutil,
35 pathutil,
36 phases,
36 phases,
37 pycompat,
37 pycompat,
38 revlog,
38 revlog,
39 util,
39 util,
40 vfs as vfsmod,
40 vfs as vfsmod,
41 )
41 )
42
42
43 class bundlerevlog(revlog.revlog):
43 class bundlerevlog(revlog.revlog):
44 def __init__(self, opener, indexfile, cgunpacker, linkmapper):
44 def __init__(self, opener, indexfile, cgunpacker, linkmapper):
45 # How it works:
45 # How it works:
46 # To retrieve a revision, we need to know the offset of the revision in
46 # To retrieve a revision, we need to know the offset of the revision in
47 # the bundle (an unbundle object). We store this offset in the index
47 # the bundle (an unbundle object). We store this offset in the index
48 # (start). The base of the delta is stored in the base field.
48 # (start). The base of the delta is stored in the base field.
49 #
49 #
50 # To differentiate a rev in the bundle from a rev in the revlog, we
50 # To differentiate a rev in the bundle from a rev in the revlog, we
51 # check revision against repotiprev.
51 # check revision against repotiprev.
52 opener = vfsmod.readonlyvfs(opener)
52 opener = vfsmod.readonlyvfs(opener)
53 revlog.revlog.__init__(self, opener, indexfile)
53 revlog.revlog.__init__(self, opener, indexfile)
54 self.bundle = cgunpacker
54 self.bundle = cgunpacker
55 n = len(self)
55 n = len(self)
56 self.repotiprev = n - 1
56 self.repotiprev = n - 1
57 self.bundlerevs = set() # used by 'bundle()' revset expression
57 self.bundlerevs = set() # used by 'bundle()' revset expression
58 for deltadata in cgunpacker.deltaiter():
58 for deltadata in cgunpacker.deltaiter():
59 node, p1, p2, cs, deltabase, delta, flags = deltadata
59 node, p1, p2, cs, deltabase, delta, flags = deltadata
60
60
61 size = len(delta)
61 size = len(delta)
62 start = cgunpacker.tell() - size
62 start = cgunpacker.tell() - size
63
63
64 link = linkmapper(cs)
64 link = linkmapper(cs)
65 if node in self.nodemap:
65 if node in self.nodemap:
66 # this can happen if two branches make the same change
66 # this can happen if two branches make the same change
67 self.bundlerevs.add(self.nodemap[node])
67 self.bundlerevs.add(self.nodemap[node])
68 continue
68 continue
69
69
70 for p in (p1, p2):
70 for p in (p1, p2):
71 if p not in self.nodemap:
71 if p not in self.nodemap:
72 raise error.LookupError(p, self.indexfile,
72 raise error.LookupError(p, self.indexfile,
73 _("unknown parent"))
73 _("unknown parent"))
74
74
75 if deltabase not in self.nodemap:
75 if deltabase not in self.nodemap:
76 raise LookupError(deltabase, self.indexfile,
76 raise LookupError(deltabase, self.indexfile,
77 _('unknown delta base'))
77 _('unknown delta base'))
78
78
79 baserev = self.rev(deltabase)
79 baserev = self.rev(deltabase)
80 # start, size, full unc. size, base (unused), link, p1, p2, node
80 # start, size, full unc. size, base (unused), link, p1, p2, node
81 e = (revlog.offset_type(start, flags), size, -1, baserev, link,
81 e = (revlog.offset_type(start, flags), size, -1, baserev, link,
82 self.rev(p1), self.rev(p2), node)
82 self.rev(p1), self.rev(p2), node)
83 self.index.insert(-1, e)
83 self.index.append(e)
84 self.nodemap[node] = n
84 self.nodemap[node] = n
85 self.bundlerevs.add(n)
85 self.bundlerevs.add(n)
86 n += 1
86 n += 1
87
87
88 def _chunk(self, rev, df=None):
88 def _chunk(self, rev, df=None):
89 # Warning: in case of bundle, the diff is against what we stored as
89 # Warning: in case of bundle, the diff is against what we stored as
90 # delta base, not against rev - 1
90 # delta base, not against rev - 1
91 # XXX: could use some caching
91 # XXX: could use some caching
92 if rev <= self.repotiprev:
92 if rev <= self.repotiprev:
93 return revlog.revlog._chunk(self, rev)
93 return revlog.revlog._chunk(self, rev)
94 self.bundle.seek(self.start(rev))
94 self.bundle.seek(self.start(rev))
95 return self.bundle.read(self.length(rev))
95 return self.bundle.read(self.length(rev))
96
96
97 def revdiff(self, rev1, rev2):
97 def revdiff(self, rev1, rev2):
98 """return or calculate a delta between two revisions"""
98 """return or calculate a delta between two revisions"""
99 if rev1 > self.repotiprev and rev2 > self.repotiprev:
99 if rev1 > self.repotiprev and rev2 > self.repotiprev:
100 # hot path for bundle
100 # hot path for bundle
101 revb = self.index[rev2][3]
101 revb = self.index[rev2][3]
102 if revb == rev1:
102 if revb == rev1:
103 return self._chunk(rev2)
103 return self._chunk(rev2)
104 elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
104 elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
105 return revlog.revlog.revdiff(self, rev1, rev2)
105 return revlog.revlog.revdiff(self, rev1, rev2)
106
106
107 return mdiff.textdiff(self.revision(rev1, raw=True),
107 return mdiff.textdiff(self.revision(rev1, raw=True),
108 self.revision(rev2, raw=True))
108 self.revision(rev2, raw=True))
109
109
110 def revision(self, nodeorrev, _df=None, raw=False):
110 def revision(self, nodeorrev, _df=None, raw=False):
111 """return an uncompressed revision of a given node or revision
111 """return an uncompressed revision of a given node or revision
112 number.
112 number.
113 """
113 """
114 if isinstance(nodeorrev, int):
114 if isinstance(nodeorrev, int):
115 rev = nodeorrev
115 rev = nodeorrev
116 node = self.node(rev)
116 node = self.node(rev)
117 else:
117 else:
118 node = nodeorrev
118 node = nodeorrev
119 rev = self.rev(node)
119 rev = self.rev(node)
120
120
121 if node == nullid:
121 if node == nullid:
122 return ""
122 return ""
123
123
124 rawtext = None
124 rawtext = None
125 chain = []
125 chain = []
126 iterrev = rev
126 iterrev = rev
127 # reconstruct the revision if it is from a changegroup
127 # reconstruct the revision if it is from a changegroup
128 while iterrev > self.repotiprev:
128 while iterrev > self.repotiprev:
129 if self._cache and self._cache[1] == iterrev:
129 if self._cache and self._cache[1] == iterrev:
130 rawtext = self._cache[2]
130 rawtext = self._cache[2]
131 break
131 break
132 chain.append(iterrev)
132 chain.append(iterrev)
133 iterrev = self.index[iterrev][3]
133 iterrev = self.index[iterrev][3]
134 if rawtext is None:
134 if rawtext is None:
135 rawtext = self.baserevision(iterrev)
135 rawtext = self.baserevision(iterrev)
136
136
137 while chain:
137 while chain:
138 delta = self._chunk(chain.pop())
138 delta = self._chunk(chain.pop())
139 rawtext = mdiff.patches(rawtext, [delta])
139 rawtext = mdiff.patches(rawtext, [delta])
140
140
141 text, validatehash = self._processflags(rawtext, self.flags(rev),
141 text, validatehash = self._processflags(rawtext, self.flags(rev),
142 'read', raw=raw)
142 'read', raw=raw)
143 if validatehash:
143 if validatehash:
144 self.checkhash(text, node, rev=rev)
144 self.checkhash(text, node, rev=rev)
145 self._cache = (node, rev, rawtext)
145 self._cache = (node, rev, rawtext)
146 return text
146 return text
147
147
148 def baserevision(self, nodeorrev):
148 def baserevision(self, nodeorrev):
149 # Revlog subclasses may override 'revision' method to modify format of
149 # Revlog subclasses may override 'revision' method to modify format of
150 # content retrieved from revlog. To use bundlerevlog with such class one
150 # content retrieved from revlog. To use bundlerevlog with such class one
151 # needs to override 'baserevision' and make more specific call here.
151 # needs to override 'baserevision' and make more specific call here.
152 return revlog.revlog.revision(self, nodeorrev, raw=True)
152 return revlog.revlog.revision(self, nodeorrev, raw=True)
153
153
154 def addrevision(self, *args, **kwargs):
154 def addrevision(self, *args, **kwargs):
155 raise NotImplementedError
155 raise NotImplementedError
156
156
157 def addgroup(self, *args, **kwargs):
157 def addgroup(self, *args, **kwargs):
158 raise NotImplementedError
158 raise NotImplementedError
159
159
160 def strip(self, *args, **kwargs):
160 def strip(self, *args, **kwargs):
161 raise NotImplementedError
161 raise NotImplementedError
162
162
163 def checksize(self):
163 def checksize(self):
164 raise NotImplementedError
164 raise NotImplementedError
165
165
166 class bundlechangelog(bundlerevlog, changelog.changelog):
166 class bundlechangelog(bundlerevlog, changelog.changelog):
167 def __init__(self, opener, cgunpacker):
167 def __init__(self, opener, cgunpacker):
168 changelog.changelog.__init__(self, opener)
168 changelog.changelog.__init__(self, opener)
169 linkmapper = lambda x: x
169 linkmapper = lambda x: x
170 bundlerevlog.__init__(self, opener, self.indexfile, cgunpacker,
170 bundlerevlog.__init__(self, opener, self.indexfile, cgunpacker,
171 linkmapper)
171 linkmapper)
172
172
173 def baserevision(self, nodeorrev):
173 def baserevision(self, nodeorrev):
174 # Although changelog doesn't override 'revision' method, some extensions
174 # Although changelog doesn't override 'revision' method, some extensions
175 # may replace this class with another that does. Same story with
175 # may replace this class with another that does. Same story with
176 # manifest and filelog classes.
176 # manifest and filelog classes.
177
177
178 # This bypasses filtering on changelog.node() and rev() because we need
178 # This bypasses filtering on changelog.node() and rev() because we need
179 # revision text of the bundle base even if it is hidden.
179 # revision text of the bundle base even if it is hidden.
180 oldfilter = self.filteredrevs
180 oldfilter = self.filteredrevs
181 try:
181 try:
182 self.filteredrevs = ()
182 self.filteredrevs = ()
183 return changelog.changelog.revision(self, nodeorrev, raw=True)
183 return changelog.changelog.revision(self, nodeorrev, raw=True)
184 finally:
184 finally:
185 self.filteredrevs = oldfilter
185 self.filteredrevs = oldfilter
186
186
187 class bundlemanifest(bundlerevlog, manifest.manifestrevlog):
187 class bundlemanifest(bundlerevlog, manifest.manifestrevlog):
188 def __init__(self, opener, cgunpacker, linkmapper, dirlogstarts=None,
188 def __init__(self, opener, cgunpacker, linkmapper, dirlogstarts=None,
189 dir=''):
189 dir=''):
190 manifest.manifestrevlog.__init__(self, opener, dir=dir)
190 manifest.manifestrevlog.__init__(self, opener, dir=dir)
191 bundlerevlog.__init__(self, opener, self.indexfile, cgunpacker,
191 bundlerevlog.__init__(self, opener, self.indexfile, cgunpacker,
192 linkmapper)
192 linkmapper)
193 if dirlogstarts is None:
193 if dirlogstarts is None:
194 dirlogstarts = {}
194 dirlogstarts = {}
195 if self.bundle.version == "03":
195 if self.bundle.version == "03":
196 dirlogstarts = _getfilestarts(self.bundle)
196 dirlogstarts = _getfilestarts(self.bundle)
197 self._dirlogstarts = dirlogstarts
197 self._dirlogstarts = dirlogstarts
198 self._linkmapper = linkmapper
198 self._linkmapper = linkmapper
199
199
200 def baserevision(self, nodeorrev):
200 def baserevision(self, nodeorrev):
201 node = nodeorrev
201 node = nodeorrev
202 if isinstance(node, int):
202 if isinstance(node, int):
203 node = self.node(node)
203 node = self.node(node)
204
204
205 if node in self.fulltextcache:
205 if node in self.fulltextcache:
206 result = '%s' % self.fulltextcache[node]
206 result = '%s' % self.fulltextcache[node]
207 else:
207 else:
208 result = manifest.manifestrevlog.revision(self, nodeorrev, raw=True)
208 result = manifest.manifestrevlog.revision(self, nodeorrev, raw=True)
209 return result
209 return result
210
210
211 def dirlog(self, d):
211 def dirlog(self, d):
212 if d in self._dirlogstarts:
212 if d in self._dirlogstarts:
213 self.bundle.seek(self._dirlogstarts[d])
213 self.bundle.seek(self._dirlogstarts[d])
214 return bundlemanifest(
214 return bundlemanifest(
215 self.opener, self.bundle, self._linkmapper,
215 self.opener, self.bundle, self._linkmapper,
216 self._dirlogstarts, dir=d)
216 self._dirlogstarts, dir=d)
217 return super(bundlemanifest, self).dirlog(d)
217 return super(bundlemanifest, self).dirlog(d)
218
218
219 class bundlefilelog(filelog.filelog):
219 class bundlefilelog(filelog.filelog):
220 def __init__(self, opener, path, cgunpacker, linkmapper):
220 def __init__(self, opener, path, cgunpacker, linkmapper):
221 filelog.filelog.__init__(self, opener, path)
221 filelog.filelog.__init__(self, opener, path)
222 self._revlog = bundlerevlog(opener, self.indexfile,
222 self._revlog = bundlerevlog(opener, self.indexfile,
223 cgunpacker, linkmapper)
223 cgunpacker, linkmapper)
224
224
225 def baserevision(self, nodeorrev):
225 def baserevision(self, nodeorrev):
226 return filelog.filelog.revision(self, nodeorrev, raw=True)
226 return filelog.filelog.revision(self, nodeorrev, raw=True)
227
227
228 class bundlepeer(localrepo.localpeer):
228 class bundlepeer(localrepo.localpeer):
229 def canpush(self):
229 def canpush(self):
230 return False
230 return False
231
231
232 class bundlephasecache(phases.phasecache):
232 class bundlephasecache(phases.phasecache):
233 def __init__(self, *args, **kwargs):
233 def __init__(self, *args, **kwargs):
234 super(bundlephasecache, self).__init__(*args, **kwargs)
234 super(bundlephasecache, self).__init__(*args, **kwargs)
235 if util.safehasattr(self, 'opener'):
235 if util.safehasattr(self, 'opener'):
236 self.opener = vfsmod.readonlyvfs(self.opener)
236 self.opener = vfsmod.readonlyvfs(self.opener)
237
237
238 def write(self):
238 def write(self):
239 raise NotImplementedError
239 raise NotImplementedError
240
240
241 def _write(self, fp):
241 def _write(self, fp):
242 raise NotImplementedError
242 raise NotImplementedError
243
243
244 def _updateroots(self, phase, newroots, tr):
244 def _updateroots(self, phase, newroots, tr):
245 self.phaseroots[phase] = newroots
245 self.phaseroots[phase] = newroots
246 self.invalidate()
246 self.invalidate()
247 self.dirty = True
247 self.dirty = True
248
248
249 def _getfilestarts(cgunpacker):
249 def _getfilestarts(cgunpacker):
250 filespos = {}
250 filespos = {}
251 for chunkdata in iter(cgunpacker.filelogheader, {}):
251 for chunkdata in iter(cgunpacker.filelogheader, {}):
252 fname = chunkdata['filename']
252 fname = chunkdata['filename']
253 filespos[fname] = cgunpacker.tell()
253 filespos[fname] = cgunpacker.tell()
254 for chunk in iter(lambda: cgunpacker.deltachunk(None), {}):
254 for chunk in iter(lambda: cgunpacker.deltachunk(None), {}):
255 pass
255 pass
256 return filespos
256 return filespos
257
257
258 class bundlerepository(localrepo.localrepository):
258 class bundlerepository(localrepo.localrepository):
259 """A repository instance that is a union of a local repo and a bundle.
259 """A repository instance that is a union of a local repo and a bundle.
260
260
261 Instances represent a read-only repository composed of a local repository
261 Instances represent a read-only repository composed of a local repository
262 with the contents of a bundle file applied. The repository instance is
262 with the contents of a bundle file applied. The repository instance is
263 conceptually similar to the state of a repository after an
263 conceptually similar to the state of a repository after an
264 ``hg unbundle`` operation. However, the contents of the bundle are never
264 ``hg unbundle`` operation. However, the contents of the bundle are never
265 applied to the actual base repository.
265 applied to the actual base repository.
266 """
266 """
267 def __init__(self, ui, repopath, bundlepath):
267 def __init__(self, ui, repopath, bundlepath):
268 self._tempparent = None
268 self._tempparent = None
269 try:
269 try:
270 localrepo.localrepository.__init__(self, ui, repopath)
270 localrepo.localrepository.__init__(self, ui, repopath)
271 except error.RepoError:
271 except error.RepoError:
272 self._tempparent = pycompat.mkdtemp()
272 self._tempparent = pycompat.mkdtemp()
273 localrepo.instance(ui, self._tempparent, 1)
273 localrepo.instance(ui, self._tempparent, 1)
274 localrepo.localrepository.__init__(self, ui, self._tempparent)
274 localrepo.localrepository.__init__(self, ui, self._tempparent)
275 self.ui.setconfig('phases', 'publish', False, 'bundlerepo')
275 self.ui.setconfig('phases', 'publish', False, 'bundlerepo')
276
276
277 if repopath:
277 if repopath:
278 self._url = 'bundle:' + util.expandpath(repopath) + '+' + bundlepath
278 self._url = 'bundle:' + util.expandpath(repopath) + '+' + bundlepath
279 else:
279 else:
280 self._url = 'bundle:' + bundlepath
280 self._url = 'bundle:' + bundlepath
281
281
282 self.tempfile = None
282 self.tempfile = None
283 f = util.posixfile(bundlepath, "rb")
283 f = util.posixfile(bundlepath, "rb")
284 bundle = exchange.readbundle(ui, f, bundlepath)
284 bundle = exchange.readbundle(ui, f, bundlepath)
285
285
286 if isinstance(bundle, bundle2.unbundle20):
286 if isinstance(bundle, bundle2.unbundle20):
287 self._bundlefile = bundle
287 self._bundlefile = bundle
288 self._cgunpacker = None
288 self._cgunpacker = None
289
289
290 cgpart = None
290 cgpart = None
291 for part in bundle.iterparts(seekable=True):
291 for part in bundle.iterparts(seekable=True):
292 if part.type == 'changegroup':
292 if part.type == 'changegroup':
293 if cgpart:
293 if cgpart:
294 raise NotImplementedError("can't process "
294 raise NotImplementedError("can't process "
295 "multiple changegroups")
295 "multiple changegroups")
296 cgpart = part
296 cgpart = part
297
297
298 self._handlebundle2part(bundle, part)
298 self._handlebundle2part(bundle, part)
299
299
300 if not cgpart:
300 if not cgpart:
301 raise error.Abort(_("No changegroups found"))
301 raise error.Abort(_("No changegroups found"))
302
302
303 # This is required to placate a later consumer, which expects
303 # This is required to placate a later consumer, which expects
304 # the payload offset to be at the beginning of the changegroup.
304 # the payload offset to be at the beginning of the changegroup.
305 # We need to do this after the iterparts() generator advances
305 # We need to do this after the iterparts() generator advances
306 # because iterparts() will seek to end of payload after the
306 # because iterparts() will seek to end of payload after the
307 # generator returns control to iterparts().
307 # generator returns control to iterparts().
308 cgpart.seek(0, os.SEEK_SET)
308 cgpart.seek(0, os.SEEK_SET)
309
309
310 elif isinstance(bundle, changegroup.cg1unpacker):
310 elif isinstance(bundle, changegroup.cg1unpacker):
311 if bundle.compressed():
311 if bundle.compressed():
312 f = self._writetempbundle(bundle.read, '.hg10un',
312 f = self._writetempbundle(bundle.read, '.hg10un',
313 header='HG10UN')
313 header='HG10UN')
314 bundle = exchange.readbundle(ui, f, bundlepath, self.vfs)
314 bundle = exchange.readbundle(ui, f, bundlepath, self.vfs)
315
315
316 self._bundlefile = bundle
316 self._bundlefile = bundle
317 self._cgunpacker = bundle
317 self._cgunpacker = bundle
318 else:
318 else:
319 raise error.Abort(_('bundle type %s cannot be read') %
319 raise error.Abort(_('bundle type %s cannot be read') %
320 type(bundle))
320 type(bundle))
321
321
322 # dict with the mapping 'filename' -> position in the changegroup.
322 # dict with the mapping 'filename' -> position in the changegroup.
323 self._cgfilespos = {}
323 self._cgfilespos = {}
324
324
325 self.firstnewrev = self.changelog.repotiprev + 1
325 self.firstnewrev = self.changelog.repotiprev + 1
326 phases.retractboundary(self, None, phases.draft,
326 phases.retractboundary(self, None, phases.draft,
327 [ctx.node() for ctx in self[self.firstnewrev:]])
327 [ctx.node() for ctx in self[self.firstnewrev:]])
328
328
329 def _handlebundle2part(self, bundle, part):
329 def _handlebundle2part(self, bundle, part):
330 if part.type != 'changegroup':
330 if part.type != 'changegroup':
331 return
331 return
332
332
333 cgstream = part
333 cgstream = part
334 version = part.params.get('version', '01')
334 version = part.params.get('version', '01')
335 legalcgvers = changegroup.supportedincomingversions(self)
335 legalcgvers = changegroup.supportedincomingversions(self)
336 if version not in legalcgvers:
336 if version not in legalcgvers:
337 msg = _('Unsupported changegroup version: %s')
337 msg = _('Unsupported changegroup version: %s')
338 raise error.Abort(msg % version)
338 raise error.Abort(msg % version)
339 if bundle.compressed():
339 if bundle.compressed():
340 cgstream = self._writetempbundle(part.read, '.cg%sun' % version)
340 cgstream = self._writetempbundle(part.read, '.cg%sun' % version)
341
341
342 self._cgunpacker = changegroup.getunbundler(version, cgstream, 'UN')
342 self._cgunpacker = changegroup.getunbundler(version, cgstream, 'UN')
343
343
344 def _writetempbundle(self, readfn, suffix, header=''):
344 def _writetempbundle(self, readfn, suffix, header=''):
345 """Write a temporary file to disk
345 """Write a temporary file to disk
346 """
346 """
347 fdtemp, temp = self.vfs.mkstemp(prefix="hg-bundle-",
347 fdtemp, temp = self.vfs.mkstemp(prefix="hg-bundle-",
348 suffix=suffix)
348 suffix=suffix)
349 self.tempfile = temp
349 self.tempfile = temp
350
350
351 with os.fdopen(fdtemp, r'wb') as fptemp:
351 with os.fdopen(fdtemp, r'wb') as fptemp:
352 fptemp.write(header)
352 fptemp.write(header)
353 while True:
353 while True:
354 chunk = readfn(2**18)
354 chunk = readfn(2**18)
355 if not chunk:
355 if not chunk:
356 break
356 break
357 fptemp.write(chunk)
357 fptemp.write(chunk)
358
358
359 return self.vfs.open(self.tempfile, mode="rb")
359 return self.vfs.open(self.tempfile, mode="rb")
360
360
361 @localrepo.unfilteredpropertycache
361 @localrepo.unfilteredpropertycache
362 def _phasecache(self):
362 def _phasecache(self):
363 return bundlephasecache(self, self._phasedefaults)
363 return bundlephasecache(self, self._phasedefaults)
364
364
365 @localrepo.unfilteredpropertycache
365 @localrepo.unfilteredpropertycache
366 def changelog(self):
366 def changelog(self):
367 # consume the header if it exists
367 # consume the header if it exists
368 self._cgunpacker.changelogheader()
368 self._cgunpacker.changelogheader()
369 c = bundlechangelog(self.svfs, self._cgunpacker)
369 c = bundlechangelog(self.svfs, self._cgunpacker)
370 self.manstart = self._cgunpacker.tell()
370 self.manstart = self._cgunpacker.tell()
371 return c
371 return c
372
372
373 def _constructmanifest(self):
373 def _constructmanifest(self):
374 self._cgunpacker.seek(self.manstart)
374 self._cgunpacker.seek(self.manstart)
375 # consume the header if it exists
375 # consume the header if it exists
376 self._cgunpacker.manifestheader()
376 self._cgunpacker.manifestheader()
377 linkmapper = self.unfiltered().changelog.rev
377 linkmapper = self.unfiltered().changelog.rev
378 m = bundlemanifest(self.svfs, self._cgunpacker, linkmapper)
378 m = bundlemanifest(self.svfs, self._cgunpacker, linkmapper)
379 self.filestart = self._cgunpacker.tell()
379 self.filestart = self._cgunpacker.tell()
380 return m
380 return m
381
381
382 def _consumemanifest(self):
382 def _consumemanifest(self):
383 """Consumes the manifest portion of the bundle, setting filestart so the
383 """Consumes the manifest portion of the bundle, setting filestart so the
384 file portion can be read."""
384 file portion can be read."""
385 self._cgunpacker.seek(self.manstart)
385 self._cgunpacker.seek(self.manstart)
386 self._cgunpacker.manifestheader()
386 self._cgunpacker.manifestheader()
387 for delta in self._cgunpacker.deltaiter():
387 for delta in self._cgunpacker.deltaiter():
388 pass
388 pass
389 self.filestart = self._cgunpacker.tell()
389 self.filestart = self._cgunpacker.tell()
390
390
391 @localrepo.unfilteredpropertycache
391 @localrepo.unfilteredpropertycache
392 def manstart(self):
392 def manstart(self):
393 self.changelog
393 self.changelog
394 return self.manstart
394 return self.manstart
395
395
396 @localrepo.unfilteredpropertycache
396 @localrepo.unfilteredpropertycache
397 def filestart(self):
397 def filestart(self):
398 self.manifestlog
398 self.manifestlog
399
399
400 # If filestart was not set by self.manifestlog, that means the
400 # If filestart was not set by self.manifestlog, that means the
401 # manifestlog implementation did not consume the manifests from the
401 # manifestlog implementation did not consume the manifests from the
402 # changegroup (ex: it might be consuming trees from a separate bundle2
402 # changegroup (ex: it might be consuming trees from a separate bundle2
403 # part instead). So we need to manually consume it.
403 # part instead). So we need to manually consume it.
404 if r'filestart' not in self.__dict__:
404 if r'filestart' not in self.__dict__:
405 self._consumemanifest()
405 self._consumemanifest()
406
406
407 return self.filestart
407 return self.filestart
408
408
409 def url(self):
409 def url(self):
410 return self._url
410 return self._url
411
411
412 def file(self, f):
412 def file(self, f):
413 if not self._cgfilespos:
413 if not self._cgfilespos:
414 self._cgunpacker.seek(self.filestart)
414 self._cgunpacker.seek(self.filestart)
415 self._cgfilespos = _getfilestarts(self._cgunpacker)
415 self._cgfilespos = _getfilestarts(self._cgunpacker)
416
416
417 if f in self._cgfilespos:
417 if f in self._cgfilespos:
418 self._cgunpacker.seek(self._cgfilespos[f])
418 self._cgunpacker.seek(self._cgfilespos[f])
419 linkmapper = self.unfiltered().changelog.rev
419 linkmapper = self.unfiltered().changelog.rev
420 return bundlefilelog(self.svfs, f, self._cgunpacker, linkmapper)
420 return bundlefilelog(self.svfs, f, self._cgunpacker, linkmapper)
421 else:
421 else:
422 return super(bundlerepository, self).file(f)
422 return super(bundlerepository, self).file(f)
423
423
424 def close(self):
424 def close(self):
425 """Close assigned bundle file immediately."""
425 """Close assigned bundle file immediately."""
426 self._bundlefile.close()
426 self._bundlefile.close()
427 if self.tempfile is not None:
427 if self.tempfile is not None:
428 self.vfs.unlink(self.tempfile)
428 self.vfs.unlink(self.tempfile)
429 if self._tempparent:
429 if self._tempparent:
430 shutil.rmtree(self._tempparent, True)
430 shutil.rmtree(self._tempparent, True)
431
431
432 def cancopy(self):
432 def cancopy(self):
433 return False
433 return False
434
434
435 def peer(self):
435 def peer(self):
436 return bundlepeer(self)
436 return bundlepeer(self)
437
437
438 def getcwd(self):
438 def getcwd(self):
439 return pycompat.getcwd() # always outside the repo
439 return pycompat.getcwd() # always outside the repo
440
440
441 # Check if parents exist in localrepo before setting
441 # Check if parents exist in localrepo before setting
442 def setparents(self, p1, p2=nullid):
442 def setparents(self, p1, p2=nullid):
443 p1rev = self.changelog.rev(p1)
443 p1rev = self.changelog.rev(p1)
444 p2rev = self.changelog.rev(p2)
444 p2rev = self.changelog.rev(p2)
445 msg = _("setting parent to node %s that only exists in the bundle\n")
445 msg = _("setting parent to node %s that only exists in the bundle\n")
446 if self.changelog.repotiprev < p1rev:
446 if self.changelog.repotiprev < p1rev:
447 self.ui.warn(msg % nodemod.hex(p1))
447 self.ui.warn(msg % nodemod.hex(p1))
448 if self.changelog.repotiprev < p2rev:
448 if self.changelog.repotiprev < p2rev:
449 self.ui.warn(msg % nodemod.hex(p2))
449 self.ui.warn(msg % nodemod.hex(p2))
450 return super(bundlerepository, self).setparents(p1, p2)
450 return super(bundlerepository, self).setparents(p1, p2)
451
451
452 def instance(ui, path, create, intents=None):
452 def instance(ui, path, create, intents=None):
453 if create:
453 if create:
454 raise error.Abort(_('cannot create new bundle repository'))
454 raise error.Abort(_('cannot create new bundle repository'))
455 # internal config: bundle.mainreporoot
455 # internal config: bundle.mainreporoot
456 parentpath = ui.config("bundle", "mainreporoot")
456 parentpath = ui.config("bundle", "mainreporoot")
457 if not parentpath:
457 if not parentpath:
458 # try to find the correct path to the working directory repo
458 # try to find the correct path to the working directory repo
459 parentpath = cmdutil.findrepo(pycompat.getcwd())
459 parentpath = cmdutil.findrepo(pycompat.getcwd())
460 if parentpath is None:
460 if parentpath is None:
461 parentpath = ''
461 parentpath = ''
462 if parentpath:
462 if parentpath:
463 # Try to make the full path relative so we get a nice, short URL.
463 # Try to make the full path relative so we get a nice, short URL.
464 # In particular, we don't want temp dir names in test outputs.
464 # In particular, we don't want temp dir names in test outputs.
465 cwd = pycompat.getcwd()
465 cwd = pycompat.getcwd()
466 if parentpath == cwd:
466 if parentpath == cwd:
467 parentpath = ''
467 parentpath = ''
468 else:
468 else:
469 cwd = pathutil.normasprefix(cwd)
469 cwd = pathutil.normasprefix(cwd)
470 if parentpath.startswith(cwd):
470 if parentpath.startswith(cwd):
471 parentpath = parentpath[len(cwd):]
471 parentpath = parentpath[len(cwd):]
472 u = util.url(path)
472 u = util.url(path)
473 path = u.localpath()
473 path = u.localpath()
474 if u.scheme == 'bundle':
474 if u.scheme == 'bundle':
475 s = path.split("+", 1)
475 s = path.split("+", 1)
476 if len(s) == 1:
476 if len(s) == 1:
477 repopath, bundlename = parentpath, s[0]
477 repopath, bundlename = parentpath, s[0]
478 else:
478 else:
479 repopath, bundlename = s
479 repopath, bundlename = s
480 else:
480 else:
481 repopath, bundlename = parentpath, path
481 repopath, bundlename = parentpath, path
482 return bundlerepository(ui, repopath, bundlename)
482 return bundlerepository(ui, repopath, bundlename)
483
483
484 class bundletransactionmanager(object):
484 class bundletransactionmanager(object):
485 def transaction(self):
485 def transaction(self):
486 return None
486 return None
487
487
488 def close(self):
488 def close(self):
489 raise NotImplementedError
489 raise NotImplementedError
490
490
491 def release(self):
491 def release(self):
492 raise NotImplementedError
492 raise NotImplementedError
493
493
494 def getremotechanges(ui, repo, peer, onlyheads=None, bundlename=None,
494 def getremotechanges(ui, repo, peer, onlyheads=None, bundlename=None,
495 force=False):
495 force=False):
496 '''obtains a bundle of changes incoming from peer
496 '''obtains a bundle of changes incoming from peer
497
497
498 "onlyheads" restricts the returned changes to those reachable from the
498 "onlyheads" restricts the returned changes to those reachable from the
499 specified heads.
499 specified heads.
500 "bundlename", if given, stores the bundle to this file path permanently;
500 "bundlename", if given, stores the bundle to this file path permanently;
501 otherwise it's stored to a temp file and gets deleted again when you call
501 otherwise it's stored to a temp file and gets deleted again when you call
502 the returned "cleanupfn".
502 the returned "cleanupfn".
503 "force" indicates whether to proceed on unrelated repos.
503 "force" indicates whether to proceed on unrelated repos.
504
504
505 Returns a tuple (local, csets, cleanupfn):
505 Returns a tuple (local, csets, cleanupfn):
506
506
507 "local" is a local repo from which to obtain the actual incoming
507 "local" is a local repo from which to obtain the actual incoming
508 changesets; it is a bundlerepo for the obtained bundle when the
508 changesets; it is a bundlerepo for the obtained bundle when the
509 original "peer" is remote.
509 original "peer" is remote.
510 "csets" lists the incoming changeset node ids.
510 "csets" lists the incoming changeset node ids.
511 "cleanupfn" must be called without arguments when you're done processing
511 "cleanupfn" must be called without arguments when you're done processing
512 the changes; it closes both the original "peer" and the one returned
512 the changes; it closes both the original "peer" and the one returned
513 here.
513 here.
514 '''
514 '''
515 tmp = discovery.findcommonincoming(repo, peer, heads=onlyheads,
515 tmp = discovery.findcommonincoming(repo, peer, heads=onlyheads,
516 force=force)
516 force=force)
517 common, incoming, rheads = tmp
517 common, incoming, rheads = tmp
518 if not incoming:
518 if not incoming:
519 try:
519 try:
520 if bundlename:
520 if bundlename:
521 os.unlink(bundlename)
521 os.unlink(bundlename)
522 except OSError:
522 except OSError:
523 pass
523 pass
524 return repo, [], peer.close
524 return repo, [], peer.close
525
525
526 commonset = set(common)
526 commonset = set(common)
527 rheads = [x for x in rheads if x not in commonset]
527 rheads = [x for x in rheads if x not in commonset]
528
528
529 bundle = None
529 bundle = None
530 bundlerepo = None
530 bundlerepo = None
531 localrepo = peer.local()
531 localrepo = peer.local()
532 if bundlename or not localrepo:
532 if bundlename or not localrepo:
533 # create a bundle (uncompressed if peer repo is not local)
533 # create a bundle (uncompressed if peer repo is not local)
534
534
535 # developer config: devel.legacy.exchange
535 # developer config: devel.legacy.exchange
536 legexc = ui.configlist('devel', 'legacy.exchange')
536 legexc = ui.configlist('devel', 'legacy.exchange')
537 forcebundle1 = 'bundle2' not in legexc and 'bundle1' in legexc
537 forcebundle1 = 'bundle2' not in legexc and 'bundle1' in legexc
538 canbundle2 = (not forcebundle1
538 canbundle2 = (not forcebundle1
539 and peer.capable('getbundle')
539 and peer.capable('getbundle')
540 and peer.capable('bundle2'))
540 and peer.capable('bundle2'))
541 if canbundle2:
541 if canbundle2:
542 with peer.commandexecutor() as e:
542 with peer.commandexecutor() as e:
543 b2 = e.callcommand('getbundle', {
543 b2 = e.callcommand('getbundle', {
544 'source': 'incoming',
544 'source': 'incoming',
545 'common': common,
545 'common': common,
546 'heads': rheads,
546 'heads': rheads,
547 'bundlecaps': exchange.caps20to10(repo, role='client'),
547 'bundlecaps': exchange.caps20to10(repo, role='client'),
548 'cg': True,
548 'cg': True,
549 }).result()
549 }).result()
550
550
551 fname = bundle = changegroup.writechunks(ui,
551 fname = bundle = changegroup.writechunks(ui,
552 b2._forwardchunks(),
552 b2._forwardchunks(),
553 bundlename)
553 bundlename)
554 else:
554 else:
555 if peer.capable('getbundle'):
555 if peer.capable('getbundle'):
556 with peer.commandexecutor() as e:
556 with peer.commandexecutor() as e:
557 cg = e.callcommand('getbundle', {
557 cg = e.callcommand('getbundle', {
558 'source': 'incoming',
558 'source': 'incoming',
559 'common': common,
559 'common': common,
560 'heads': rheads,
560 'heads': rheads,
561 }).result()
561 }).result()
562 elif onlyheads is None and not peer.capable('changegroupsubset'):
562 elif onlyheads is None and not peer.capable('changegroupsubset'):
563 # compat with older servers when pulling all remote heads
563 # compat with older servers when pulling all remote heads
564
564
565 with peer.commandexecutor() as e:
565 with peer.commandexecutor() as e:
566 cg = e.callcommand('changegroup', {
566 cg = e.callcommand('changegroup', {
567 'nodes': incoming,
567 'nodes': incoming,
568 'source': 'incoming',
568 'source': 'incoming',
569 }).result()
569 }).result()
570
570
571 rheads = None
571 rheads = None
572 else:
572 else:
573 with peer.commandexecutor() as e:
573 with peer.commandexecutor() as e:
574 cg = e.callcommand('changegroupsubset', {
574 cg = e.callcommand('changegroupsubset', {
575 'bases': incoming,
575 'bases': incoming,
576 'heads': rheads,
576 'heads': rheads,
577 'source': 'incoming',
577 'source': 'incoming',
578 }).result()
578 }).result()
579
579
580 if localrepo:
580 if localrepo:
581 bundletype = "HG10BZ"
581 bundletype = "HG10BZ"
582 else:
582 else:
583 bundletype = "HG10UN"
583 bundletype = "HG10UN"
584 fname = bundle = bundle2.writebundle(ui, cg, bundlename,
584 fname = bundle = bundle2.writebundle(ui, cg, bundlename,
585 bundletype)
585 bundletype)
586 # keep written bundle?
586 # keep written bundle?
587 if bundlename:
587 if bundlename:
588 bundle = None
588 bundle = None
589 if not localrepo:
589 if not localrepo:
590 # use the created uncompressed bundlerepo
590 # use the created uncompressed bundlerepo
591 localrepo = bundlerepo = bundlerepository(repo.baseui, repo.root,
591 localrepo = bundlerepo = bundlerepository(repo.baseui, repo.root,
592 fname)
592 fname)
593 # this repo contains local and peer now, so filter out local again
593 # this repo contains local and peer now, so filter out local again
594 common = repo.heads()
594 common = repo.heads()
595 if localrepo:
595 if localrepo:
596 # Part of common may be remotely filtered
596 # Part of common may be remotely filtered
597 # So use an unfiltered version
597 # So use an unfiltered version
598 # The discovery process probably need cleanup to avoid that
598 # The discovery process probably need cleanup to avoid that
599 localrepo = localrepo.unfiltered()
599 localrepo = localrepo.unfiltered()
600
600
601 csets = localrepo.changelog.findmissing(common, rheads)
601 csets = localrepo.changelog.findmissing(common, rheads)
602
602
603 if bundlerepo:
603 if bundlerepo:
604 reponodes = [ctx.node() for ctx in bundlerepo[bundlerepo.firstnewrev:]]
604 reponodes = [ctx.node() for ctx in bundlerepo[bundlerepo.firstnewrev:]]
605
605
606 with peer.commandexecutor() as e:
606 with peer.commandexecutor() as e:
607 remotephases = e.callcommand('listkeys', {
607 remotephases = e.callcommand('listkeys', {
608 'namespace': 'phases',
608 'namespace': 'phases',
609 }).result()
609 }).result()
610
610
611 pullop = exchange.pulloperation(bundlerepo, peer, heads=reponodes)
611 pullop = exchange.pulloperation(bundlerepo, peer, heads=reponodes)
612 pullop.trmanager = bundletransactionmanager()
612 pullop.trmanager = bundletransactionmanager()
613 exchange._pullapplyphases(pullop, remotephases)
613 exchange._pullapplyphases(pullop, remotephases)
614
614
615 def cleanup():
615 def cleanup():
616 if bundlerepo:
616 if bundlerepo:
617 bundlerepo.close()
617 bundlerepo.close()
618 if bundle:
618 if bundle:
619 os.unlink(bundle)
619 os.unlink(bundle)
620 peer.close()
620 peer.close()
621
621
622 return (localrepo, csets, cleanup)
622 return (localrepo, csets, cleanup)
@@ -1,797 +1,797
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #include <Python.h>
10 #include <Python.h>
11 #include <ctype.h>
11 #include <ctype.h>
12 #include <stddef.h>
12 #include <stddef.h>
13 #include <string.h>
13 #include <string.h>
14
14
15 #include "bitmanipulation.h"
15 #include "bitmanipulation.h"
16 #include "charencode.h"
16 #include "charencode.h"
17 #include "util.h"
17 #include "util.h"
18
18
19 #ifdef IS_PY3K
19 #ifdef IS_PY3K
20 /* The mapping of Python types is meant to be temporary to get Python
20 /* The mapping of Python types is meant to be temporary to get Python
21 * 3 to compile. We should remove this once Python 3 support is fully
21 * 3 to compile. We should remove this once Python 3 support is fully
22 * supported and proper types are used in the extensions themselves. */
22 * supported and proper types are used in the extensions themselves. */
23 #define PyInt_Check PyLong_Check
23 #define PyInt_Check PyLong_Check
24 #define PyInt_FromLong PyLong_FromLong
24 #define PyInt_FromLong PyLong_FromLong
25 #define PyInt_FromSsize_t PyLong_FromSsize_t
25 #define PyInt_FromSsize_t PyLong_FromSsize_t
26 #define PyInt_AsLong PyLong_AsLong
26 #define PyInt_AsLong PyLong_AsLong
27 #endif
27 #endif
28
28
29 static const char *const versionerrortext = "Python minor version mismatch";
29 static const char *const versionerrortext = "Python minor version mismatch";
30
30
31 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
31 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
32 {
32 {
33 Py_ssize_t expected_size;
33 Py_ssize_t expected_size;
34
34
35 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size))
35 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size))
36 return NULL;
36 return NULL;
37
37
38 return _dict_new_presized(expected_size);
38 return _dict_new_presized(expected_size);
39 }
39 }
40
40
41 /*
41 /*
42 * This code assumes that a manifest is stitched together with newline
42 * This code assumes that a manifest is stitched together with newline
43 * ('\n') characters.
43 * ('\n') characters.
44 */
44 */
45 static PyObject *parse_manifest(PyObject *self, PyObject *args)
45 static PyObject *parse_manifest(PyObject *self, PyObject *args)
46 {
46 {
47 PyObject *mfdict, *fdict;
47 PyObject *mfdict, *fdict;
48 char *str, *start, *end;
48 char *str, *start, *end;
49 int len;
49 int len;
50
50
51 if (!PyArg_ParseTuple(
51 if (!PyArg_ParseTuple(
52 args, PY23("O!O!s#:parse_manifest", "O!O!y#:parse_manifest"),
52 args, PY23("O!O!s#:parse_manifest", "O!O!y#:parse_manifest"),
53 &PyDict_Type, &mfdict, &PyDict_Type, &fdict, &str, &len))
53 &PyDict_Type, &mfdict, &PyDict_Type, &fdict, &str, &len))
54 goto quit;
54 goto quit;
55
55
56 start = str;
56 start = str;
57 end = str + len;
57 end = str + len;
58 while (start < end) {
58 while (start < end) {
59 PyObject *file = NULL, *node = NULL;
59 PyObject *file = NULL, *node = NULL;
60 PyObject *flags = NULL;
60 PyObject *flags = NULL;
61 char *zero = NULL, *newline = NULL;
61 char *zero = NULL, *newline = NULL;
62 ptrdiff_t nlen;
62 ptrdiff_t nlen;
63
63
64 zero = memchr(start, '\0', end - start);
64 zero = memchr(start, '\0', end - start);
65 if (!zero) {
65 if (!zero) {
66 PyErr_SetString(PyExc_ValueError,
66 PyErr_SetString(PyExc_ValueError,
67 "manifest entry has no separator");
67 "manifest entry has no separator");
68 goto quit;
68 goto quit;
69 }
69 }
70
70
71 newline = memchr(zero + 1, '\n', end - (zero + 1));
71 newline = memchr(zero + 1, '\n', end - (zero + 1));
72 if (!newline) {
72 if (!newline) {
73 PyErr_SetString(PyExc_ValueError,
73 PyErr_SetString(PyExc_ValueError,
74 "manifest contains trailing garbage");
74 "manifest contains trailing garbage");
75 goto quit;
75 goto quit;
76 }
76 }
77
77
78 file = PyBytes_FromStringAndSize(start, zero - start);
78 file = PyBytes_FromStringAndSize(start, zero - start);
79
79
80 if (!file)
80 if (!file)
81 goto bail;
81 goto bail;
82
82
83 nlen = newline - zero - 1;
83 nlen = newline - zero - 1;
84
84
85 node = unhexlify(zero + 1, nlen > 40 ? 40 : (Py_ssize_t)nlen);
85 node = unhexlify(zero + 1, nlen > 40 ? 40 : (Py_ssize_t)nlen);
86 if (!node)
86 if (!node)
87 goto bail;
87 goto bail;
88
88
89 if (nlen > 40) {
89 if (nlen > 40) {
90 flags = PyBytes_FromStringAndSize(zero + 41, nlen - 40);
90 flags = PyBytes_FromStringAndSize(zero + 41, nlen - 40);
91 if (!flags)
91 if (!flags)
92 goto bail;
92 goto bail;
93
93
94 if (PyDict_SetItem(fdict, file, flags) == -1)
94 if (PyDict_SetItem(fdict, file, flags) == -1)
95 goto bail;
95 goto bail;
96 }
96 }
97
97
98 if (PyDict_SetItem(mfdict, file, node) == -1)
98 if (PyDict_SetItem(mfdict, file, node) == -1)
99 goto bail;
99 goto bail;
100
100
101 start = newline + 1;
101 start = newline + 1;
102
102
103 Py_XDECREF(flags);
103 Py_XDECREF(flags);
104 Py_XDECREF(node);
104 Py_XDECREF(node);
105 Py_XDECREF(file);
105 Py_XDECREF(file);
106 continue;
106 continue;
107 bail:
107 bail:
108 Py_XDECREF(flags);
108 Py_XDECREF(flags);
109 Py_XDECREF(node);
109 Py_XDECREF(node);
110 Py_XDECREF(file);
110 Py_XDECREF(file);
111 goto quit;
111 goto quit;
112 }
112 }
113
113
114 Py_INCREF(Py_None);
114 Py_INCREF(Py_None);
115 return Py_None;
115 return Py_None;
116 quit:
116 quit:
117 return NULL;
117 return NULL;
118 }
118 }
119
119
120 static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
120 static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
121 int size, int mtime)
121 int size, int mtime)
122 {
122 {
123 dirstateTupleObject *t =
123 dirstateTupleObject *t =
124 PyObject_New(dirstateTupleObject, &dirstateTupleType);
124 PyObject_New(dirstateTupleObject, &dirstateTupleType);
125 if (!t)
125 if (!t)
126 return NULL;
126 return NULL;
127 t->state = state;
127 t->state = state;
128 t->mode = mode;
128 t->mode = mode;
129 t->size = size;
129 t->size = size;
130 t->mtime = mtime;
130 t->mtime = mtime;
131 return t;
131 return t;
132 }
132 }
133
133
134 static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
134 static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
135 PyObject *kwds)
135 PyObject *kwds)
136 {
136 {
137 /* We do all the initialization here and not a tp_init function because
137 /* We do all the initialization here and not a tp_init function because
138 * dirstate_tuple is immutable. */
138 * dirstate_tuple is immutable. */
139 dirstateTupleObject *t;
139 dirstateTupleObject *t;
140 char state;
140 char state;
141 int size, mode, mtime;
141 int size, mode, mtime;
142 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime))
142 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime))
143 return NULL;
143 return NULL;
144
144
145 t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
145 t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
146 if (!t)
146 if (!t)
147 return NULL;
147 return NULL;
148 t->state = state;
148 t->state = state;
149 t->mode = mode;
149 t->mode = mode;
150 t->size = size;
150 t->size = size;
151 t->mtime = mtime;
151 t->mtime = mtime;
152
152
153 return (PyObject *)t;
153 return (PyObject *)t;
154 }
154 }
155
155
156 static void dirstate_tuple_dealloc(PyObject *o)
156 static void dirstate_tuple_dealloc(PyObject *o)
157 {
157 {
158 PyObject_Del(o);
158 PyObject_Del(o);
159 }
159 }
160
160
161 static Py_ssize_t dirstate_tuple_length(PyObject *o)
161 static Py_ssize_t dirstate_tuple_length(PyObject *o)
162 {
162 {
163 return 4;
163 return 4;
164 }
164 }
165
165
166 static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
166 static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
167 {
167 {
168 dirstateTupleObject *t = (dirstateTupleObject *)o;
168 dirstateTupleObject *t = (dirstateTupleObject *)o;
169 switch (i) {
169 switch (i) {
170 case 0:
170 case 0:
171 return PyBytes_FromStringAndSize(&t->state, 1);
171 return PyBytes_FromStringAndSize(&t->state, 1);
172 case 1:
172 case 1:
173 return PyInt_FromLong(t->mode);
173 return PyInt_FromLong(t->mode);
174 case 2:
174 case 2:
175 return PyInt_FromLong(t->size);
175 return PyInt_FromLong(t->size);
176 case 3:
176 case 3:
177 return PyInt_FromLong(t->mtime);
177 return PyInt_FromLong(t->mtime);
178 default:
178 default:
179 PyErr_SetString(PyExc_IndexError, "index out of range");
179 PyErr_SetString(PyExc_IndexError, "index out of range");
180 return NULL;
180 return NULL;
181 }
181 }
182 }
182 }
183
183
184 static PySequenceMethods dirstate_tuple_sq = {
184 static PySequenceMethods dirstate_tuple_sq = {
185 dirstate_tuple_length, /* sq_length */
185 dirstate_tuple_length, /* sq_length */
186 0, /* sq_concat */
186 0, /* sq_concat */
187 0, /* sq_repeat */
187 0, /* sq_repeat */
188 dirstate_tuple_item, /* sq_item */
188 dirstate_tuple_item, /* sq_item */
189 0, /* sq_ass_item */
189 0, /* sq_ass_item */
190 0, /* sq_contains */
190 0, /* sq_contains */
191 0, /* sq_inplace_concat */
191 0, /* sq_inplace_concat */
192 0 /* sq_inplace_repeat */
192 0 /* sq_inplace_repeat */
193 };
193 };
194
194
195 PyTypeObject dirstateTupleType = {
195 PyTypeObject dirstateTupleType = {
196 PyVarObject_HEAD_INIT(NULL, 0) /* header */
196 PyVarObject_HEAD_INIT(NULL, 0) /* header */
197 "dirstate_tuple", /* tp_name */
197 "dirstate_tuple", /* tp_name */
198 sizeof(dirstateTupleObject), /* tp_basicsize */
198 sizeof(dirstateTupleObject), /* tp_basicsize */
199 0, /* tp_itemsize */
199 0, /* tp_itemsize */
200 (destructor)dirstate_tuple_dealloc, /* tp_dealloc */
200 (destructor)dirstate_tuple_dealloc, /* tp_dealloc */
201 0, /* tp_print */
201 0, /* tp_print */
202 0, /* tp_getattr */
202 0, /* tp_getattr */
203 0, /* tp_setattr */
203 0, /* tp_setattr */
204 0, /* tp_compare */
204 0, /* tp_compare */
205 0, /* tp_repr */
205 0, /* tp_repr */
206 0, /* tp_as_number */
206 0, /* tp_as_number */
207 &dirstate_tuple_sq, /* tp_as_sequence */
207 &dirstate_tuple_sq, /* tp_as_sequence */
208 0, /* tp_as_mapping */
208 0, /* tp_as_mapping */
209 0, /* tp_hash */
209 0, /* tp_hash */
210 0, /* tp_call */
210 0, /* tp_call */
211 0, /* tp_str */
211 0, /* tp_str */
212 0, /* tp_getattro */
212 0, /* tp_getattro */
213 0, /* tp_setattro */
213 0, /* tp_setattro */
214 0, /* tp_as_buffer */
214 0, /* tp_as_buffer */
215 Py_TPFLAGS_DEFAULT, /* tp_flags */
215 Py_TPFLAGS_DEFAULT, /* tp_flags */
216 "dirstate tuple", /* tp_doc */
216 "dirstate tuple", /* tp_doc */
217 0, /* tp_traverse */
217 0, /* tp_traverse */
218 0, /* tp_clear */
218 0, /* tp_clear */
219 0, /* tp_richcompare */
219 0, /* tp_richcompare */
220 0, /* tp_weaklistoffset */
220 0, /* tp_weaklistoffset */
221 0, /* tp_iter */
221 0, /* tp_iter */
222 0, /* tp_iternext */
222 0, /* tp_iternext */
223 0, /* tp_methods */
223 0, /* tp_methods */
224 0, /* tp_members */
224 0, /* tp_members */
225 0, /* tp_getset */
225 0, /* tp_getset */
226 0, /* tp_base */
226 0, /* tp_base */
227 0, /* tp_dict */
227 0, /* tp_dict */
228 0, /* tp_descr_get */
228 0, /* tp_descr_get */
229 0, /* tp_descr_set */
229 0, /* tp_descr_set */
230 0, /* tp_dictoffset */
230 0, /* tp_dictoffset */
231 0, /* tp_init */
231 0, /* tp_init */
232 0, /* tp_alloc */
232 0, /* tp_alloc */
233 dirstate_tuple_new, /* tp_new */
233 dirstate_tuple_new, /* tp_new */
234 };
234 };
235
235
236 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
236 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
237 {
237 {
238 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
238 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
239 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
239 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
240 char state, *cur, *str, *cpos;
240 char state, *cur, *str, *cpos;
241 int mode, size, mtime;
241 int mode, size, mtime;
242 unsigned int flen, len, pos = 40;
242 unsigned int flen, len, pos = 40;
243 int readlen;
243 int readlen;
244
244
245 if (!PyArg_ParseTuple(
245 if (!PyArg_ParseTuple(
246 args, PY23("O!O!s#:parse_dirstate", "O!O!y#:parse_dirstate"),
246 args, PY23("O!O!s#:parse_dirstate", "O!O!y#:parse_dirstate"),
247 &PyDict_Type, &dmap, &PyDict_Type, &cmap, &str, &readlen))
247 &PyDict_Type, &dmap, &PyDict_Type, &cmap, &str, &readlen))
248 goto quit;
248 goto quit;
249
249
250 len = readlen;
250 len = readlen;
251
251
252 /* read parents */
252 /* read parents */
253 if (len < 40) {
253 if (len < 40) {
254 PyErr_SetString(PyExc_ValueError,
254 PyErr_SetString(PyExc_ValueError,
255 "too little data for parents");
255 "too little data for parents");
256 goto quit;
256 goto quit;
257 }
257 }
258
258
259 parents = Py_BuildValue(PY23("s#s#", "y#y#"), str, 20, str + 20, 20);
259 parents = Py_BuildValue(PY23("s#s#", "y#y#"), str, 20, str + 20, 20);
260 if (!parents)
260 if (!parents)
261 goto quit;
261 goto quit;
262
262
263 /* read filenames */
263 /* read filenames */
264 while (pos >= 40 && pos < len) {
264 while (pos >= 40 && pos < len) {
265 if (pos + 17 > len) {
265 if (pos + 17 > len) {
266 PyErr_SetString(PyExc_ValueError,
266 PyErr_SetString(PyExc_ValueError,
267 "overflow in dirstate");
267 "overflow in dirstate");
268 goto quit;
268 goto quit;
269 }
269 }
270 cur = str + pos;
270 cur = str + pos;
271 /* unpack header */
271 /* unpack header */
272 state = *cur;
272 state = *cur;
273 mode = getbe32(cur + 1);
273 mode = getbe32(cur + 1);
274 size = getbe32(cur + 5);
274 size = getbe32(cur + 5);
275 mtime = getbe32(cur + 9);
275 mtime = getbe32(cur + 9);
276 flen = getbe32(cur + 13);
276 flen = getbe32(cur + 13);
277 pos += 17;
277 pos += 17;
278 cur += 17;
278 cur += 17;
279 if (flen > len - pos) {
279 if (flen > len - pos) {
280 PyErr_SetString(PyExc_ValueError,
280 PyErr_SetString(PyExc_ValueError,
281 "overflow in dirstate");
281 "overflow in dirstate");
282 goto quit;
282 goto quit;
283 }
283 }
284
284
285 entry =
285 entry =
286 (PyObject *)make_dirstate_tuple(state, mode, size, mtime);
286 (PyObject *)make_dirstate_tuple(state, mode, size, mtime);
287 cpos = memchr(cur, 0, flen);
287 cpos = memchr(cur, 0, flen);
288 if (cpos) {
288 if (cpos) {
289 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
289 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
290 cname = PyBytes_FromStringAndSize(
290 cname = PyBytes_FromStringAndSize(
291 cpos + 1, flen - (cpos - cur) - 1);
291 cpos + 1, flen - (cpos - cur) - 1);
292 if (!fname || !cname ||
292 if (!fname || !cname ||
293 PyDict_SetItem(cmap, fname, cname) == -1 ||
293 PyDict_SetItem(cmap, fname, cname) == -1 ||
294 PyDict_SetItem(dmap, fname, entry) == -1)
294 PyDict_SetItem(dmap, fname, entry) == -1)
295 goto quit;
295 goto quit;
296 Py_DECREF(cname);
296 Py_DECREF(cname);
297 } else {
297 } else {
298 fname = PyBytes_FromStringAndSize(cur, flen);
298 fname = PyBytes_FromStringAndSize(cur, flen);
299 if (!fname || PyDict_SetItem(dmap, fname, entry) == -1)
299 if (!fname || PyDict_SetItem(dmap, fname, entry) == -1)
300 goto quit;
300 goto quit;
301 }
301 }
302 Py_DECREF(fname);
302 Py_DECREF(fname);
303 Py_DECREF(entry);
303 Py_DECREF(entry);
304 fname = cname = entry = NULL;
304 fname = cname = entry = NULL;
305 pos += flen;
305 pos += flen;
306 }
306 }
307
307
308 ret = parents;
308 ret = parents;
309 Py_INCREF(ret);
309 Py_INCREF(ret);
310 quit:
310 quit:
311 Py_XDECREF(fname);
311 Py_XDECREF(fname);
312 Py_XDECREF(cname);
312 Py_XDECREF(cname);
313 Py_XDECREF(entry);
313 Py_XDECREF(entry);
314 Py_XDECREF(parents);
314 Py_XDECREF(parents);
315 return ret;
315 return ret;
316 }
316 }
317
317
318 /*
318 /*
319 * Build a set of non-normal and other parent entries from the dirstate dmap
319 * Build a set of non-normal and other parent entries from the dirstate dmap
320 */
320 */
321 static PyObject *nonnormalotherparententries(PyObject *self, PyObject *args)
321 static PyObject *nonnormalotherparententries(PyObject *self, PyObject *args)
322 {
322 {
323 PyObject *dmap, *fname, *v;
323 PyObject *dmap, *fname, *v;
324 PyObject *nonnset = NULL, *otherpset = NULL, *result = NULL;
324 PyObject *nonnset = NULL, *otherpset = NULL, *result = NULL;
325 Py_ssize_t pos;
325 Py_ssize_t pos;
326
326
327 if (!PyArg_ParseTuple(args, "O!:nonnormalentries", &PyDict_Type, &dmap))
327 if (!PyArg_ParseTuple(args, "O!:nonnormalentries", &PyDict_Type, &dmap))
328 goto bail;
328 goto bail;
329
329
330 nonnset = PySet_New(NULL);
330 nonnset = PySet_New(NULL);
331 if (nonnset == NULL)
331 if (nonnset == NULL)
332 goto bail;
332 goto bail;
333
333
334 otherpset = PySet_New(NULL);
334 otherpset = PySet_New(NULL);
335 if (otherpset == NULL)
335 if (otherpset == NULL)
336 goto bail;
336 goto bail;
337
337
338 pos = 0;
338 pos = 0;
339 while (PyDict_Next(dmap, &pos, &fname, &v)) {
339 while (PyDict_Next(dmap, &pos, &fname, &v)) {
340 dirstateTupleObject *t;
340 dirstateTupleObject *t;
341 if (!dirstate_tuple_check(v)) {
341 if (!dirstate_tuple_check(v)) {
342 PyErr_SetString(PyExc_TypeError,
342 PyErr_SetString(PyExc_TypeError,
343 "expected a dirstate tuple");
343 "expected a dirstate tuple");
344 goto bail;
344 goto bail;
345 }
345 }
346 t = (dirstateTupleObject *)v;
346 t = (dirstateTupleObject *)v;
347
347
348 if (t->state == 'n' && t->size == -2) {
348 if (t->state == 'n' && t->size == -2) {
349 if (PySet_Add(otherpset, fname) == -1) {
349 if (PySet_Add(otherpset, fname) == -1) {
350 goto bail;
350 goto bail;
351 }
351 }
352 }
352 }
353
353
354 if (t->state == 'n' && t->mtime != -1)
354 if (t->state == 'n' && t->mtime != -1)
355 continue;
355 continue;
356 if (PySet_Add(nonnset, fname) == -1)
356 if (PySet_Add(nonnset, fname) == -1)
357 goto bail;
357 goto bail;
358 }
358 }
359
359
360 result = Py_BuildValue("(OO)", nonnset, otherpset);
360 result = Py_BuildValue("(OO)", nonnset, otherpset);
361 if (result == NULL)
361 if (result == NULL)
362 goto bail;
362 goto bail;
363 Py_DECREF(nonnset);
363 Py_DECREF(nonnset);
364 Py_DECREF(otherpset);
364 Py_DECREF(otherpset);
365 return result;
365 return result;
366 bail:
366 bail:
367 Py_XDECREF(nonnset);
367 Py_XDECREF(nonnset);
368 Py_XDECREF(otherpset);
368 Py_XDECREF(otherpset);
369 Py_XDECREF(result);
369 Py_XDECREF(result);
370 return NULL;
370 return NULL;
371 }
371 }
372
372
373 /*
373 /*
374 * Efficiently pack a dirstate object into its on-disk format.
374 * Efficiently pack a dirstate object into its on-disk format.
375 */
375 */
376 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
376 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
377 {
377 {
378 PyObject *packobj = NULL;
378 PyObject *packobj = NULL;
379 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
379 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
380 Py_ssize_t nbytes, pos, l;
380 Py_ssize_t nbytes, pos, l;
381 PyObject *k, *v = NULL, *pn;
381 PyObject *k, *v = NULL, *pn;
382 char *p, *s;
382 char *p, *s;
383 int now;
383 int now;
384
384
385 if (!PyArg_ParseTuple(args, "O!O!Oi:pack_dirstate", &PyDict_Type, &map,
385 if (!PyArg_ParseTuple(args, "O!O!Oi:pack_dirstate", &PyDict_Type, &map,
386 &PyDict_Type, &copymap, &pl, &now))
386 &PyDict_Type, &copymap, &pl, &now))
387 return NULL;
387 return NULL;
388
388
389 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
389 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
390 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
390 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
391 return NULL;
391 return NULL;
392 }
392 }
393
393
394 /* Figure out how much we need to allocate. */
394 /* Figure out how much we need to allocate. */
395 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
395 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
396 PyObject *c;
396 PyObject *c;
397 if (!PyBytes_Check(k)) {
397 if (!PyBytes_Check(k)) {
398 PyErr_SetString(PyExc_TypeError, "expected string key");
398 PyErr_SetString(PyExc_TypeError, "expected string key");
399 goto bail;
399 goto bail;
400 }
400 }
401 nbytes += PyBytes_GET_SIZE(k) + 17;
401 nbytes += PyBytes_GET_SIZE(k) + 17;
402 c = PyDict_GetItem(copymap, k);
402 c = PyDict_GetItem(copymap, k);
403 if (c) {
403 if (c) {
404 if (!PyBytes_Check(c)) {
404 if (!PyBytes_Check(c)) {
405 PyErr_SetString(PyExc_TypeError,
405 PyErr_SetString(PyExc_TypeError,
406 "expected string key");
406 "expected string key");
407 goto bail;
407 goto bail;
408 }
408 }
409 nbytes += PyBytes_GET_SIZE(c) + 1;
409 nbytes += PyBytes_GET_SIZE(c) + 1;
410 }
410 }
411 }
411 }
412
412
413 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
413 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
414 if (packobj == NULL)
414 if (packobj == NULL)
415 goto bail;
415 goto bail;
416
416
417 p = PyBytes_AS_STRING(packobj);
417 p = PyBytes_AS_STRING(packobj);
418
418
419 pn = PySequence_ITEM(pl, 0);
419 pn = PySequence_ITEM(pl, 0);
420 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
420 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
421 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
421 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
422 goto bail;
422 goto bail;
423 }
423 }
424 memcpy(p, s, l);
424 memcpy(p, s, l);
425 p += 20;
425 p += 20;
426 pn = PySequence_ITEM(pl, 1);
426 pn = PySequence_ITEM(pl, 1);
427 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
427 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
428 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
428 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
429 goto bail;
429 goto bail;
430 }
430 }
431 memcpy(p, s, l);
431 memcpy(p, s, l);
432 p += 20;
432 p += 20;
433
433
434 for (pos = 0; PyDict_Next(map, &pos, &k, &v);) {
434 for (pos = 0; PyDict_Next(map, &pos, &k, &v);) {
435 dirstateTupleObject *tuple;
435 dirstateTupleObject *tuple;
436 char state;
436 char state;
437 int mode, size, mtime;
437 int mode, size, mtime;
438 Py_ssize_t len, l;
438 Py_ssize_t len, l;
439 PyObject *o;
439 PyObject *o;
440 char *t;
440 char *t;
441
441
442 if (!dirstate_tuple_check(v)) {
442 if (!dirstate_tuple_check(v)) {
443 PyErr_SetString(PyExc_TypeError,
443 PyErr_SetString(PyExc_TypeError,
444 "expected a dirstate tuple");
444 "expected a dirstate tuple");
445 goto bail;
445 goto bail;
446 }
446 }
447 tuple = (dirstateTupleObject *)v;
447 tuple = (dirstateTupleObject *)v;
448
448
449 state = tuple->state;
449 state = tuple->state;
450 mode = tuple->mode;
450 mode = tuple->mode;
451 size = tuple->size;
451 size = tuple->size;
452 mtime = tuple->mtime;
452 mtime = tuple->mtime;
453 if (state == 'n' && mtime == now) {
453 if (state == 'n' && mtime == now) {
454 /* See pure/parsers.py:pack_dirstate for why we do
454 /* See pure/parsers.py:pack_dirstate for why we do
455 * this. */
455 * this. */
456 mtime = -1;
456 mtime = -1;
457 mtime_unset = (PyObject *)make_dirstate_tuple(
457 mtime_unset = (PyObject *)make_dirstate_tuple(
458 state, mode, size, mtime);
458 state, mode, size, mtime);
459 if (!mtime_unset)
459 if (!mtime_unset)
460 goto bail;
460 goto bail;
461 if (PyDict_SetItem(map, k, mtime_unset) == -1)
461 if (PyDict_SetItem(map, k, mtime_unset) == -1)
462 goto bail;
462 goto bail;
463 Py_DECREF(mtime_unset);
463 Py_DECREF(mtime_unset);
464 mtime_unset = NULL;
464 mtime_unset = NULL;
465 }
465 }
466 *p++ = state;
466 *p++ = state;
467 putbe32((uint32_t)mode, p);
467 putbe32((uint32_t)mode, p);
468 putbe32((uint32_t)size, p + 4);
468 putbe32((uint32_t)size, p + 4);
469 putbe32((uint32_t)mtime, p + 8);
469 putbe32((uint32_t)mtime, p + 8);
470 t = p + 12;
470 t = p + 12;
471 p += 16;
471 p += 16;
472 len = PyBytes_GET_SIZE(k);
472 len = PyBytes_GET_SIZE(k);
473 memcpy(p, PyBytes_AS_STRING(k), len);
473 memcpy(p, PyBytes_AS_STRING(k), len);
474 p += len;
474 p += len;
475 o = PyDict_GetItem(copymap, k);
475 o = PyDict_GetItem(copymap, k);
476 if (o) {
476 if (o) {
477 *p++ = '\0';
477 *p++ = '\0';
478 l = PyBytes_GET_SIZE(o);
478 l = PyBytes_GET_SIZE(o);
479 memcpy(p, PyBytes_AS_STRING(o), l);
479 memcpy(p, PyBytes_AS_STRING(o), l);
480 p += l;
480 p += l;
481 len += l + 1;
481 len += l + 1;
482 }
482 }
483 putbe32((uint32_t)len, t);
483 putbe32((uint32_t)len, t);
484 }
484 }
485
485
486 pos = p - PyBytes_AS_STRING(packobj);
486 pos = p - PyBytes_AS_STRING(packobj);
487 if (pos != nbytes) {
487 if (pos != nbytes) {
488 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
488 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
489 (long)pos, (long)nbytes);
489 (long)pos, (long)nbytes);
490 goto bail;
490 goto bail;
491 }
491 }
492
492
493 return packobj;
493 return packobj;
494 bail:
494 bail:
495 Py_XDECREF(mtime_unset);
495 Py_XDECREF(mtime_unset);
496 Py_XDECREF(packobj);
496 Py_XDECREF(packobj);
497 Py_XDECREF(v);
497 Py_XDECREF(v);
498 return NULL;
498 return NULL;
499 }
499 }
500
500
501 #define BUMPED_FIX 1
501 #define BUMPED_FIX 1
502 #define USING_SHA_256 2
502 #define USING_SHA_256 2
503 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
503 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
504
504
505 static PyObject *readshas(const char *source, unsigned char num,
505 static PyObject *readshas(const char *source, unsigned char num,
506 Py_ssize_t hashwidth)
506 Py_ssize_t hashwidth)
507 {
507 {
508 int i;
508 int i;
509 PyObject *list = PyTuple_New(num);
509 PyObject *list = PyTuple_New(num);
510 if (list == NULL) {
510 if (list == NULL) {
511 return NULL;
511 return NULL;
512 }
512 }
513 for (i = 0; i < num; i++) {
513 for (i = 0; i < num; i++) {
514 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
514 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
515 if (hash == NULL) {
515 if (hash == NULL) {
516 Py_DECREF(list);
516 Py_DECREF(list);
517 return NULL;
517 return NULL;
518 }
518 }
519 PyTuple_SET_ITEM(list, i, hash);
519 PyTuple_SET_ITEM(list, i, hash);
520 source += hashwidth;
520 source += hashwidth;
521 }
521 }
522 return list;
522 return list;
523 }
523 }
524
524
525 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
525 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
526 uint32_t *msize)
526 uint32_t *msize)
527 {
527 {
528 const char *data = databegin;
528 const char *data = databegin;
529 const char *meta;
529 const char *meta;
530
530
531 double mtime;
531 double mtime;
532 int16_t tz;
532 int16_t tz;
533 uint16_t flags;
533 uint16_t flags;
534 unsigned char nsuccs, nparents, nmetadata;
534 unsigned char nsuccs, nparents, nmetadata;
535 Py_ssize_t hashwidth = 20;
535 Py_ssize_t hashwidth = 20;
536
536
537 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
537 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
538 PyObject *metadata = NULL, *ret = NULL;
538 PyObject *metadata = NULL, *ret = NULL;
539 int i;
539 int i;
540
540
541 if (data + FM1_HEADER_SIZE > dataend) {
541 if (data + FM1_HEADER_SIZE > dataend) {
542 goto overflow;
542 goto overflow;
543 }
543 }
544
544
545 *msize = getbe32(data);
545 *msize = getbe32(data);
546 data += 4;
546 data += 4;
547 mtime = getbefloat64(data);
547 mtime = getbefloat64(data);
548 data += 8;
548 data += 8;
549 tz = getbeint16(data);
549 tz = getbeint16(data);
550 data += 2;
550 data += 2;
551 flags = getbeuint16(data);
551 flags = getbeuint16(data);
552 data += 2;
552 data += 2;
553
553
554 if (flags & USING_SHA_256) {
554 if (flags & USING_SHA_256) {
555 hashwidth = 32;
555 hashwidth = 32;
556 }
556 }
557
557
558 nsuccs = (unsigned char)(*data++);
558 nsuccs = (unsigned char)(*data++);
559 nparents = (unsigned char)(*data++);
559 nparents = (unsigned char)(*data++);
560 nmetadata = (unsigned char)(*data++);
560 nmetadata = (unsigned char)(*data++);
561
561
562 if (databegin + *msize > dataend) {
562 if (databegin + *msize > dataend) {
563 goto overflow;
563 goto overflow;
564 }
564 }
565 dataend = databegin + *msize; /* narrow down to marker size */
565 dataend = databegin + *msize; /* narrow down to marker size */
566
566
567 if (data + hashwidth > dataend) {
567 if (data + hashwidth > dataend) {
568 goto overflow;
568 goto overflow;
569 }
569 }
570 prec = PyBytes_FromStringAndSize(data, hashwidth);
570 prec = PyBytes_FromStringAndSize(data, hashwidth);
571 data += hashwidth;
571 data += hashwidth;
572 if (prec == NULL) {
572 if (prec == NULL) {
573 goto bail;
573 goto bail;
574 }
574 }
575
575
576 if (data + nsuccs * hashwidth > dataend) {
576 if (data + nsuccs * hashwidth > dataend) {
577 goto overflow;
577 goto overflow;
578 }
578 }
579 succs = readshas(data, nsuccs, hashwidth);
579 succs = readshas(data, nsuccs, hashwidth);
580 if (succs == NULL) {
580 if (succs == NULL) {
581 goto bail;
581 goto bail;
582 }
582 }
583 data += nsuccs * hashwidth;
583 data += nsuccs * hashwidth;
584
584
585 if (nparents == 1 || nparents == 2) {
585 if (nparents == 1 || nparents == 2) {
586 if (data + nparents * hashwidth > dataend) {
586 if (data + nparents * hashwidth > dataend) {
587 goto overflow;
587 goto overflow;
588 }
588 }
589 parents = readshas(data, nparents, hashwidth);
589 parents = readshas(data, nparents, hashwidth);
590 if (parents == NULL) {
590 if (parents == NULL) {
591 goto bail;
591 goto bail;
592 }
592 }
593 data += nparents * hashwidth;
593 data += nparents * hashwidth;
594 } else {
594 } else {
595 parents = Py_None;
595 parents = Py_None;
596 Py_INCREF(parents);
596 Py_INCREF(parents);
597 }
597 }
598
598
599 if (data + 2 * nmetadata > dataend) {
599 if (data + 2 * nmetadata > dataend) {
600 goto overflow;
600 goto overflow;
601 }
601 }
602 meta = data + (2 * nmetadata);
602 meta = data + (2 * nmetadata);
603 metadata = PyTuple_New(nmetadata);
603 metadata = PyTuple_New(nmetadata);
604 if (metadata == NULL) {
604 if (metadata == NULL) {
605 goto bail;
605 goto bail;
606 }
606 }
607 for (i = 0; i < nmetadata; i++) {
607 for (i = 0; i < nmetadata; i++) {
608 PyObject *tmp, *left = NULL, *right = NULL;
608 PyObject *tmp, *left = NULL, *right = NULL;
609 Py_ssize_t leftsize = (unsigned char)(*data++);
609 Py_ssize_t leftsize = (unsigned char)(*data++);
610 Py_ssize_t rightsize = (unsigned char)(*data++);
610 Py_ssize_t rightsize = (unsigned char)(*data++);
611 if (meta + leftsize + rightsize > dataend) {
611 if (meta + leftsize + rightsize > dataend) {
612 goto overflow;
612 goto overflow;
613 }
613 }
614 left = PyBytes_FromStringAndSize(meta, leftsize);
614 left = PyBytes_FromStringAndSize(meta, leftsize);
615 meta += leftsize;
615 meta += leftsize;
616 right = PyBytes_FromStringAndSize(meta, rightsize);
616 right = PyBytes_FromStringAndSize(meta, rightsize);
617 meta += rightsize;
617 meta += rightsize;
618 tmp = PyTuple_New(2);
618 tmp = PyTuple_New(2);
619 if (!left || !right || !tmp) {
619 if (!left || !right || !tmp) {
620 Py_XDECREF(left);
620 Py_XDECREF(left);
621 Py_XDECREF(right);
621 Py_XDECREF(right);
622 Py_XDECREF(tmp);
622 Py_XDECREF(tmp);
623 goto bail;
623 goto bail;
624 }
624 }
625 PyTuple_SET_ITEM(tmp, 0, left);
625 PyTuple_SET_ITEM(tmp, 0, left);
626 PyTuple_SET_ITEM(tmp, 1, right);
626 PyTuple_SET_ITEM(tmp, 1, right);
627 PyTuple_SET_ITEM(metadata, i, tmp);
627 PyTuple_SET_ITEM(metadata, i, tmp);
628 }
628 }
629 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, metadata, mtime,
629 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, metadata, mtime,
630 (int)tz * 60, parents);
630 (int)tz * 60, parents);
631 goto bail; /* return successfully */
631 goto bail; /* return successfully */
632
632
633 overflow:
633 overflow:
634 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
634 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
635 bail:
635 bail:
636 Py_XDECREF(prec);
636 Py_XDECREF(prec);
637 Py_XDECREF(succs);
637 Py_XDECREF(succs);
638 Py_XDECREF(metadata);
638 Py_XDECREF(metadata);
639 Py_XDECREF(parents);
639 Py_XDECREF(parents);
640 return ret;
640 return ret;
641 }
641 }
642
642
643 static PyObject *fm1readmarkers(PyObject *self, PyObject *args)
643 static PyObject *fm1readmarkers(PyObject *self, PyObject *args)
644 {
644 {
645 const char *data, *dataend;
645 const char *data, *dataend;
646 int datalen;
646 int datalen;
647 Py_ssize_t offset, stop;
647 Py_ssize_t offset, stop;
648 PyObject *markers = NULL;
648 PyObject *markers = NULL;
649
649
650 if (!PyArg_ParseTuple(args, PY23("s#nn", "y#nn"), &data, &datalen,
650 if (!PyArg_ParseTuple(args, PY23("s#nn", "y#nn"), &data, &datalen,
651 &offset, &stop)) {
651 &offset, &stop)) {
652 return NULL;
652 return NULL;
653 }
653 }
654 dataend = data + datalen;
654 dataend = data + datalen;
655 data += offset;
655 data += offset;
656 markers = PyList_New(0);
656 markers = PyList_New(0);
657 if (!markers) {
657 if (!markers) {
658 return NULL;
658 return NULL;
659 }
659 }
660 while (offset < stop) {
660 while (offset < stop) {
661 uint32_t msize;
661 uint32_t msize;
662 int error;
662 int error;
663 PyObject *record = fm1readmarker(data, dataend, &msize);
663 PyObject *record = fm1readmarker(data, dataend, &msize);
664 if (!record) {
664 if (!record) {
665 goto bail;
665 goto bail;
666 }
666 }
667 error = PyList_Append(markers, record);
667 error = PyList_Append(markers, record);
668 Py_DECREF(record);
668 Py_DECREF(record);
669 if (error) {
669 if (error) {
670 goto bail;
670 goto bail;
671 }
671 }
672 data += msize;
672 data += msize;
673 offset += msize;
673 offset += msize;
674 }
674 }
675 return markers;
675 return markers;
676 bail:
676 bail:
677 Py_DECREF(markers);
677 Py_DECREF(markers);
678 return NULL;
678 return NULL;
679 }
679 }
680
680
681 static char parsers_doc[] = "Efficient content parsing.";
681 static char parsers_doc[] = "Efficient content parsing.";
682
682
683 PyObject *encodedir(PyObject *self, PyObject *args);
683 PyObject *encodedir(PyObject *self, PyObject *args);
684 PyObject *pathencode(PyObject *self, PyObject *args);
684 PyObject *pathencode(PyObject *self, PyObject *args);
685 PyObject *lowerencode(PyObject *self, PyObject *args);
685 PyObject *lowerencode(PyObject *self, PyObject *args);
686 PyObject *parse_index2(PyObject *self, PyObject *args);
686 PyObject *parse_index2(PyObject *self, PyObject *args);
687
687
688 static PyMethodDef methods[] = {
688 static PyMethodDef methods[] = {
689 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
689 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
690 {"nonnormalotherparententries", nonnormalotherparententries, METH_VARARGS,
690 {"nonnormalotherparententries", nonnormalotherparententries, METH_VARARGS,
691 "create a set containing non-normal and other parent entries of given "
691 "create a set containing non-normal and other parent entries of given "
692 "dirstate\n"},
692 "dirstate\n"},
693 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
693 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
694 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
694 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
695 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
695 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
696 {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
696 {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
697 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
697 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
698 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
698 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
699 {"dict_new_presized", dict_new_presized, METH_VARARGS,
699 {"dict_new_presized", dict_new_presized, METH_VARARGS,
700 "construct a dict with an expected size\n"},
700 "construct a dict with an expected size\n"},
701 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
701 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
702 "make file foldmap\n"},
702 "make file foldmap\n"},
703 {"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
703 {"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
704 "escape a UTF-8 byte string to JSON (fast path)\n"},
704 "escape a UTF-8 byte string to JSON (fast path)\n"},
705 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
705 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
706 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
706 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
707 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
707 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
708 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
708 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
709 "parse v1 obsolete markers\n"},
709 "parse v1 obsolete markers\n"},
710 {NULL, NULL}};
710 {NULL, NULL}};
711
711
712 void dirs_module_init(PyObject *mod);
712 void dirs_module_init(PyObject *mod);
713 void manifest_module_init(PyObject *mod);
713 void manifest_module_init(PyObject *mod);
714 void revlog_module_init(PyObject *mod);
714 void revlog_module_init(PyObject *mod);
715
715
716 static const int version = 5;
716 static const int version = 6;
717
717
718 static void module_init(PyObject *mod)
718 static void module_init(PyObject *mod)
719 {
719 {
720 PyModule_AddIntConstant(mod, "version", version);
720 PyModule_AddIntConstant(mod, "version", version);
721
721
722 /* This module constant has two purposes. First, it lets us unit test
722 /* This module constant has two purposes. First, it lets us unit test
723 * the ImportError raised without hard-coding any error text. This
723 * the ImportError raised without hard-coding any error text. This
724 * means we can change the text in the future without breaking tests,
724 * means we can change the text in the future without breaking tests,
725 * even across changesets without a recompile. Second, its presence
725 * even across changesets without a recompile. Second, its presence
726 * can be used to determine whether the version-checking logic is
726 * can be used to determine whether the version-checking logic is
727 * present, which also helps in testing across changesets without a
727 * present, which also helps in testing across changesets without a
728 * recompile. Note that this means the pure-Python version of parsers
728 * recompile. Note that this means the pure-Python version of parsers
729 * should not have this module constant. */
729 * should not have this module constant. */
730 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
730 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
731
731
732 dirs_module_init(mod);
732 dirs_module_init(mod);
733 manifest_module_init(mod);
733 manifest_module_init(mod);
734 revlog_module_init(mod);
734 revlog_module_init(mod);
735
735
736 if (PyType_Ready(&dirstateTupleType) < 0)
736 if (PyType_Ready(&dirstateTupleType) < 0)
737 return;
737 return;
738 Py_INCREF(&dirstateTupleType);
738 Py_INCREF(&dirstateTupleType);
739 PyModule_AddObject(mod, "dirstatetuple",
739 PyModule_AddObject(mod, "dirstatetuple",
740 (PyObject *)&dirstateTupleType);
740 (PyObject *)&dirstateTupleType);
741 }
741 }
742
742
743 static int check_python_version(void)
743 static int check_python_version(void)
744 {
744 {
745 PyObject *sys = PyImport_ImportModule("sys"), *ver;
745 PyObject *sys = PyImport_ImportModule("sys"), *ver;
746 long hexversion;
746 long hexversion;
747 if (!sys)
747 if (!sys)
748 return -1;
748 return -1;
749 ver = PyObject_GetAttrString(sys, "hexversion");
749 ver = PyObject_GetAttrString(sys, "hexversion");
750 Py_DECREF(sys);
750 Py_DECREF(sys);
751 if (!ver)
751 if (!ver)
752 return -1;
752 return -1;
753 hexversion = PyInt_AsLong(ver);
753 hexversion = PyInt_AsLong(ver);
754 Py_DECREF(ver);
754 Py_DECREF(ver);
755 /* sys.hexversion is a 32-bit number by default, so the -1 case
755 /* sys.hexversion is a 32-bit number by default, so the -1 case
756 * should only occur in unusual circumstances (e.g. if sys.hexversion
756 * should only occur in unusual circumstances (e.g. if sys.hexversion
757 * is manually set to an invalid value). */
757 * is manually set to an invalid value). */
758 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
758 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
759 PyErr_Format(PyExc_ImportError,
759 PyErr_Format(PyExc_ImportError,
760 "%s: The Mercurial extension "
760 "%s: The Mercurial extension "
761 "modules were compiled with Python " PY_VERSION
761 "modules were compiled with Python " PY_VERSION
762 ", but "
762 ", but "
763 "Mercurial is currently using Python with "
763 "Mercurial is currently using Python with "
764 "sys.hexversion=%ld: "
764 "sys.hexversion=%ld: "
765 "Python %s\n at: %s",
765 "Python %s\n at: %s",
766 versionerrortext, hexversion, Py_GetVersion(),
766 versionerrortext, hexversion, Py_GetVersion(),
767 Py_GetProgramFullPath());
767 Py_GetProgramFullPath());
768 return -1;
768 return -1;
769 }
769 }
770 return 0;
770 return 0;
771 }
771 }
772
772
773 #ifdef IS_PY3K
773 #ifdef IS_PY3K
774 static struct PyModuleDef parsers_module = {PyModuleDef_HEAD_INIT, "parsers",
774 static struct PyModuleDef parsers_module = {PyModuleDef_HEAD_INIT, "parsers",
775 parsers_doc, -1, methods};
775 parsers_doc, -1, methods};
776
776
777 PyMODINIT_FUNC PyInit_parsers(void)
777 PyMODINIT_FUNC PyInit_parsers(void)
778 {
778 {
779 PyObject *mod;
779 PyObject *mod;
780
780
781 if (check_python_version() == -1)
781 if (check_python_version() == -1)
782 return NULL;
782 return NULL;
783 mod = PyModule_Create(&parsers_module);
783 mod = PyModule_Create(&parsers_module);
784 module_init(mod);
784 module_init(mod);
785 return mod;
785 return mod;
786 }
786 }
787 #else
787 #else
788 PyMODINIT_FUNC initparsers(void)
788 PyMODINIT_FUNC initparsers(void)
789 {
789 {
790 PyObject *mod;
790 PyObject *mod;
791
791
792 if (check_python_version() == -1)
792 if (check_python_version() == -1)
793 return;
793 return;
794 mod = Py_InitModule3("parsers", methods, parsers_doc);
794 mod = Py_InitModule3("parsers", methods, parsers_doc);
795 module_init(mod);
795 module_init(mod);
796 }
796 }
797 #endif
797 #endif
@@ -1,2181 +1,2167
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #include <Python.h>
10 #include <Python.h>
11 #include <assert.h>
11 #include <assert.h>
12 #include <ctype.h>
12 #include <ctype.h>
13 #include <stddef.h>
13 #include <stddef.h>
14 #include <string.h>
14 #include <string.h>
15
15
16 #include "bitmanipulation.h"
16 #include "bitmanipulation.h"
17 #include "charencode.h"
17 #include "charencode.h"
18 #include "util.h"
18 #include "util.h"
19
19
20 #ifdef IS_PY3K
20 #ifdef IS_PY3K
21 /* The mapping of Python types is meant to be temporary to get Python
21 /* The mapping of Python types is meant to be temporary to get Python
22 * 3 to compile. We should remove this once Python 3 support is fully
22 * 3 to compile. We should remove this once Python 3 support is fully
23 * supported and proper types are used in the extensions themselves. */
23 * supported and proper types are used in the extensions themselves. */
24 #define PyInt_Check PyLong_Check
24 #define PyInt_Check PyLong_Check
25 #define PyInt_FromLong PyLong_FromLong
25 #define PyInt_FromLong PyLong_FromLong
26 #define PyInt_FromSsize_t PyLong_FromSsize_t
26 #define PyInt_FromSsize_t PyLong_FromSsize_t
27 #define PyInt_AS_LONG PyLong_AS_LONG
27 #define PyInt_AS_LONG PyLong_AS_LONG
28 #define PyInt_AsLong PyLong_AsLong
28 #define PyInt_AsLong PyLong_AsLong
29 #endif
29 #endif
30
30
31 /*
31 /*
32 * A base-16 trie for fast node->rev mapping.
32 * A base-16 trie for fast node->rev mapping.
33 *
33 *
34 * Positive value is index of the next node in the trie
34 * Positive value is index of the next node in the trie
35 * Negative value is a leaf: -(rev + 2)
35 * Negative value is a leaf: -(rev + 2)
36 * Zero is empty
36 * Zero is empty
37 */
37 */
38 typedef struct {
38 typedef struct {
39 int children[16];
39 int children[16];
40 } nodetree;
40 } nodetree;
41
41
42 /*
42 /*
43 * This class has two behaviors.
43 * This class has two behaviors.
44 *
44 *
45 * When used in a list-like way (with integer keys), we decode an
45 * When used in a list-like way (with integer keys), we decode an
46 * entry in a RevlogNG index file on demand. Our last entry is a
46 * entry in a RevlogNG index file on demand. Our last entry is a
47 * sentinel, always a nullid. We have limited support for
47 * sentinel, always a nullid. We have limited support for
48 * integer-keyed insert and delete, only at elements right before the
48 * integer-keyed insert and delete, only at elements right before the
49 * sentinel.
49 * sentinel.
50 *
50 *
51 * With string keys, we lazily perform a reverse mapping from node to
51 * With string keys, we lazily perform a reverse mapping from node to
52 * rev, using a base-16 trie.
52 * rev, using a base-16 trie.
53 */
53 */
54 typedef struct {
54 typedef struct {
55 PyObject_HEAD
55 PyObject_HEAD
56 /* Type-specific fields go here. */
56 /* Type-specific fields go here. */
57 PyObject *data; /* raw bytes of index */
57 PyObject *data; /* raw bytes of index */
58 Py_buffer buf; /* buffer of data */
58 Py_buffer buf; /* buffer of data */
59 PyObject **cache; /* cached tuples */
59 PyObject **cache; /* cached tuples */
60 const char **offsets; /* populated on demand */
60 const char **offsets; /* populated on demand */
61 Py_ssize_t raw_length; /* original number of elements */
61 Py_ssize_t raw_length; /* original number of elements */
62 Py_ssize_t length; /* current number of elements */
62 Py_ssize_t length; /* current number of elements */
63 PyObject *added; /* populated on demand */
63 PyObject *added; /* populated on demand */
64 PyObject *headrevs; /* cache, invalidated on changes */
64 PyObject *headrevs; /* cache, invalidated on changes */
65 PyObject *filteredrevs;/* filtered revs set */
65 PyObject *filteredrevs;/* filtered revs set */
66 nodetree *nt; /* base-16 trie */
66 nodetree *nt; /* base-16 trie */
67 unsigned ntlength; /* # nodes in use */
67 unsigned ntlength; /* # nodes in use */
68 unsigned ntcapacity; /* # nodes allocated */
68 unsigned ntcapacity; /* # nodes allocated */
69 int ntdepth; /* maximum depth of tree */
69 int ntdepth; /* maximum depth of tree */
70 int ntsplits; /* # splits performed */
70 int ntsplits; /* # splits performed */
71 int ntrev; /* last rev scanned */
71 int ntrev; /* last rev scanned */
72 int ntlookups; /* # lookups */
72 int ntlookups; /* # lookups */
73 int ntmisses; /* # lookups that miss the cache */
73 int ntmisses; /* # lookups that miss the cache */
74 int inlined;
74 int inlined;
75 } indexObject;
75 } indexObject;
76
76
77 static Py_ssize_t index_length(const indexObject *self)
77 static Py_ssize_t index_length(const indexObject *self)
78 {
78 {
79 if (self->added == NULL)
79 if (self->added == NULL)
80 return self->length;
80 return self->length;
81 return self->length + PyList_GET_SIZE(self->added);
81 return self->length + PyList_GET_SIZE(self->added);
82 }
82 }
83
83
84 static PyObject *nullentry;
84 static PyObject *nullentry;
85 static const char nullid[20];
85 static const char nullid[20];
86
86
87 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
87 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
88
88
89 #if LONG_MAX == 0x7fffffffL
89 #if LONG_MAX == 0x7fffffffL
90 static const char *const tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
90 static const char *const tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
91 #else
91 #else
92 static const char *const tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
92 static const char *const tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
93 #endif
93 #endif
94
94
95 /* A RevlogNG v1 index entry is 64 bytes long. */
95 /* A RevlogNG v1 index entry is 64 bytes long. */
96 static const long v1_hdrsize = 64;
96 static const long v1_hdrsize = 64;
97
97
98 /*
98 /*
99 * Return a pointer to the beginning of a RevlogNG record.
99 * Return a pointer to the beginning of a RevlogNG record.
100 */
100 */
101 static const char *index_deref(indexObject *self, Py_ssize_t pos)
101 static const char *index_deref(indexObject *self, Py_ssize_t pos)
102 {
102 {
103 if (self->inlined && pos > 0) {
103 if (self->inlined && pos > 0) {
104 if (self->offsets == NULL) {
104 if (self->offsets == NULL) {
105 self->offsets = PyMem_Malloc(self->raw_length *
105 self->offsets = PyMem_Malloc(self->raw_length *
106 sizeof(*self->offsets));
106 sizeof(*self->offsets));
107 if (self->offsets == NULL)
107 if (self->offsets == NULL)
108 return (const char *)PyErr_NoMemory();
108 return (const char *)PyErr_NoMemory();
109 inline_scan(self, self->offsets);
109 inline_scan(self, self->offsets);
110 }
110 }
111 return self->offsets[pos];
111 return self->offsets[pos];
112 }
112 }
113
113
114 return (const char *)(self->buf.buf) + pos * v1_hdrsize;
114 return (const char *)(self->buf.buf) + pos * v1_hdrsize;
115 }
115 }
116
116
117 static inline int index_get_parents(indexObject *self, Py_ssize_t rev,
117 static inline int index_get_parents(indexObject *self, Py_ssize_t rev,
118 int *ps, int maxrev)
118 int *ps, int maxrev)
119 {
119 {
120 if (rev >= self->length - 1) {
120 if (rev >= self->length - 1) {
121 PyObject *tuple = PyList_GET_ITEM(self->added,
121 PyObject *tuple = PyList_GET_ITEM(self->added,
122 rev - self->length + 1);
122 rev - self->length + 1);
123 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
123 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
124 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
124 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
125 } else {
125 } else {
126 const char *data = index_deref(self, rev);
126 const char *data = index_deref(self, rev);
127 ps[0] = getbe32(data + 24);
127 ps[0] = getbe32(data + 24);
128 ps[1] = getbe32(data + 28);
128 ps[1] = getbe32(data + 28);
129 }
129 }
130 /* If index file is corrupted, ps[] may point to invalid revisions. So
130 /* If index file is corrupted, ps[] may point to invalid revisions. So
131 * there is a risk of buffer overflow to trust them unconditionally. */
131 * there is a risk of buffer overflow to trust them unconditionally. */
132 if (ps[0] > maxrev || ps[1] > maxrev) {
132 if (ps[0] > maxrev || ps[1] > maxrev) {
133 PyErr_SetString(PyExc_ValueError, "parent out of range");
133 PyErr_SetString(PyExc_ValueError, "parent out of range");
134 return -1;
134 return -1;
135 }
135 }
136 return 0;
136 return 0;
137 }
137 }
138
138
139
139
140 /*
140 /*
141 * RevlogNG format (all in big endian, data may be inlined):
141 * RevlogNG format (all in big endian, data may be inlined):
142 * 6 bytes: offset
142 * 6 bytes: offset
143 * 2 bytes: flags
143 * 2 bytes: flags
144 * 4 bytes: compressed length
144 * 4 bytes: compressed length
145 * 4 bytes: uncompressed length
145 * 4 bytes: uncompressed length
146 * 4 bytes: base revision
146 * 4 bytes: base revision
147 * 4 bytes: link revision
147 * 4 bytes: link revision
148 * 4 bytes: parent 1 revision
148 * 4 bytes: parent 1 revision
149 * 4 bytes: parent 2 revision
149 * 4 bytes: parent 2 revision
150 * 32 bytes: nodeid (only 20 bytes used)
150 * 32 bytes: nodeid (only 20 bytes used)
151 */
151 */
152 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
152 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
153 {
153 {
154 uint64_t offset_flags;
154 uint64_t offset_flags;
155 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
155 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
156 const char *c_node_id;
156 const char *c_node_id;
157 const char *data;
157 const char *data;
158 Py_ssize_t length = index_length(self);
158 Py_ssize_t length = index_length(self);
159 PyObject *entry;
159 PyObject *entry;
160
160
161 if (pos == -1 || pos == length - 1) {
161 if (pos == -1 || pos == length - 1) {
162 Py_INCREF(nullentry);
162 Py_INCREF(nullentry);
163 return nullentry;
163 return nullentry;
164 }
164 }
165
165
166 if (pos < 0 || pos >= length) {
166 if (pos < 0 || pos >= length) {
167 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
167 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
168 return NULL;
168 return NULL;
169 }
169 }
170
170
171 if (pos >= self->length - 1) {
171 if (pos >= self->length - 1) {
172 PyObject *obj;
172 PyObject *obj;
173 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
173 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
174 Py_INCREF(obj);
174 Py_INCREF(obj);
175 return obj;
175 return obj;
176 }
176 }
177
177
178 if (self->cache) {
178 if (self->cache) {
179 if (self->cache[pos]) {
179 if (self->cache[pos]) {
180 Py_INCREF(self->cache[pos]);
180 Py_INCREF(self->cache[pos]);
181 return self->cache[pos];
181 return self->cache[pos];
182 }
182 }
183 } else {
183 } else {
184 self->cache = calloc(self->raw_length, sizeof(PyObject *));
184 self->cache = calloc(self->raw_length, sizeof(PyObject *));
185 if (self->cache == NULL)
185 if (self->cache == NULL)
186 return PyErr_NoMemory();
186 return PyErr_NoMemory();
187 }
187 }
188
188
189 data = index_deref(self, pos);
189 data = index_deref(self, pos);
190 if (data == NULL)
190 if (data == NULL)
191 return NULL;
191 return NULL;
192
192
193 offset_flags = getbe32(data + 4);
193 offset_flags = getbe32(data + 4);
194 if (pos == 0) /* mask out version number for the first entry */
194 if (pos == 0) /* mask out version number for the first entry */
195 offset_flags &= 0xFFFF;
195 offset_flags &= 0xFFFF;
196 else {
196 else {
197 uint32_t offset_high = getbe32(data);
197 uint32_t offset_high = getbe32(data);
198 offset_flags |= ((uint64_t)offset_high) << 32;
198 offset_flags |= ((uint64_t)offset_high) << 32;
199 }
199 }
200
200
201 comp_len = getbe32(data + 8);
201 comp_len = getbe32(data + 8);
202 uncomp_len = getbe32(data + 12);
202 uncomp_len = getbe32(data + 12);
203 base_rev = getbe32(data + 16);
203 base_rev = getbe32(data + 16);
204 link_rev = getbe32(data + 20);
204 link_rev = getbe32(data + 20);
205 parent_1 = getbe32(data + 24);
205 parent_1 = getbe32(data + 24);
206 parent_2 = getbe32(data + 28);
206 parent_2 = getbe32(data + 28);
207 c_node_id = data + 32;
207 c_node_id = data + 32;
208
208
209 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
209 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
210 uncomp_len, base_rev, link_rev,
210 uncomp_len, base_rev, link_rev,
211 parent_1, parent_2, c_node_id, 20);
211 parent_1, parent_2, c_node_id, 20);
212
212
213 if (entry) {
213 if (entry) {
214 PyObject_GC_UnTrack(entry);
214 PyObject_GC_UnTrack(entry);
215 Py_INCREF(entry);
215 Py_INCREF(entry);
216 }
216 }
217
217
218 self->cache[pos] = entry;
218 self->cache[pos] = entry;
219
219
220 return entry;
220 return entry;
221 }
221 }
222
222
223 /*
223 /*
224 * Return the 20-byte SHA of the node corresponding to the given rev.
224 * Return the 20-byte SHA of the node corresponding to the given rev.
225 */
225 */
226 static const char *index_node(indexObject *self, Py_ssize_t pos)
226 static const char *index_node(indexObject *self, Py_ssize_t pos)
227 {
227 {
228 Py_ssize_t length = index_length(self);
228 Py_ssize_t length = index_length(self);
229 const char *data;
229 const char *data;
230
230
231 if (pos == length - 1 || pos == -1)
231 if (pos == length - 1 || pos == -1)
232 return nullid;
232 return nullid;
233
233
234 if (pos >= length)
234 if (pos >= length)
235 return NULL;
235 return NULL;
236
236
237 if (pos >= self->length - 1) {
237 if (pos >= self->length - 1) {
238 PyObject *tuple, *str;
238 PyObject *tuple, *str;
239 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
239 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
240 str = PyTuple_GetItem(tuple, 7);
240 str = PyTuple_GetItem(tuple, 7);
241 return str ? PyBytes_AS_STRING(str) : NULL;
241 return str ? PyBytes_AS_STRING(str) : NULL;
242 }
242 }
243
243
244 data = index_deref(self, pos);
244 data = index_deref(self, pos);
245 return data ? data + 32 : NULL;
245 return data ? data + 32 : NULL;
246 }
246 }
247
247
248 /*
248 /*
249 * Return the 20-byte SHA of the node corresponding to the given rev. The
249 * Return the 20-byte SHA of the node corresponding to the given rev. The
250 * rev is assumed to be existing. If not, an exception is set.
250 * rev is assumed to be existing. If not, an exception is set.
251 */
251 */
252 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
252 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
253 {
253 {
254 const char *node = index_node(self, pos);
254 const char *node = index_node(self, pos);
255 if (node == NULL) {
255 if (node == NULL) {
256 PyErr_Format(PyExc_IndexError, "could not access rev %d",
256 PyErr_Format(PyExc_IndexError, "could not access rev %d",
257 (int)pos);
257 (int)pos);
258 }
258 }
259 return node;
259 return node;
260 }
260 }
261
261
262 static int nt_insert(indexObject *self, const char *node, int rev);
262 static int nt_insert(indexObject *self, const char *node, int rev);
263
263
264 static int node_check(PyObject *obj, char **node)
264 static int node_check(PyObject *obj, char **node)
265 {
265 {
266 Py_ssize_t nodelen;
266 Py_ssize_t nodelen;
267 if (PyBytes_AsStringAndSize(obj, node, &nodelen) == -1)
267 if (PyBytes_AsStringAndSize(obj, node, &nodelen) == -1)
268 return -1;
268 return -1;
269 if (nodelen == 20)
269 if (nodelen == 20)
270 return 0;
270 return 0;
271 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
271 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
272 return -1;
272 return -1;
273 }
273 }
274
274
275 static PyObject *index_insert(indexObject *self, PyObject *args)
275 static PyObject *index_append(indexObject *self, PyObject *obj)
276 {
276 {
277 PyObject *obj;
278 char *node;
277 char *node;
279 int index;
280 Py_ssize_t len;
278 Py_ssize_t len;
281
279
282 if (!PyArg_ParseTuple(args, "iO", &index, &obj))
283 return NULL;
284
285 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
280 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
286 PyErr_SetString(PyExc_TypeError, "8-tuple required");
281 PyErr_SetString(PyExc_TypeError, "8-tuple required");
287 return NULL;
282 return NULL;
288 }
283 }
289
284
290 if (node_check(PyTuple_GET_ITEM(obj, 7), &node) == -1)
285 if (node_check(PyTuple_GET_ITEM(obj, 7), &node) == -1)
291 return NULL;
286 return NULL;
292
287
293 len = index_length(self);
288 len = index_length(self);
294
289
295 if (index < 0)
296 index += len;
297
298 if (index != len - 1) {
299 PyErr_SetString(PyExc_IndexError,
300 "insert only supported at index -1");
301 return NULL;
302 }
303
304 if (self->added == NULL) {
290 if (self->added == NULL) {
305 self->added = PyList_New(0);
291 self->added = PyList_New(0);
306 if (self->added == NULL)
292 if (self->added == NULL)
307 return NULL;
293 return NULL;
308 }
294 }
309
295
310 if (PyList_Append(self->added, obj) == -1)
296 if (PyList_Append(self->added, obj) == -1)
311 return NULL;
297 return NULL;
312
298
313 if (self->nt)
299 if (self->nt)
314 nt_insert(self, node, index);
300 nt_insert(self, node, len - 1);
315
301
316 Py_CLEAR(self->headrevs);
302 Py_CLEAR(self->headrevs);
317 Py_RETURN_NONE;
303 Py_RETURN_NONE;
318 }
304 }
319
305
320 static void _index_clearcaches(indexObject *self)
306 static void _index_clearcaches(indexObject *self)
321 {
307 {
322 if (self->cache) {
308 if (self->cache) {
323 Py_ssize_t i;
309 Py_ssize_t i;
324
310
325 for (i = 0; i < self->raw_length; i++)
311 for (i = 0; i < self->raw_length; i++)
326 Py_CLEAR(self->cache[i]);
312 Py_CLEAR(self->cache[i]);
327 free(self->cache);
313 free(self->cache);
328 self->cache = NULL;
314 self->cache = NULL;
329 }
315 }
330 if (self->offsets) {
316 if (self->offsets) {
331 PyMem_Free(self->offsets);
317 PyMem_Free(self->offsets);
332 self->offsets = NULL;
318 self->offsets = NULL;
333 }
319 }
334 free(self->nt);
320 free(self->nt);
335 self->nt = NULL;
321 self->nt = NULL;
336 Py_CLEAR(self->headrevs);
322 Py_CLEAR(self->headrevs);
337 }
323 }
338
324
339 static PyObject *index_clearcaches(indexObject *self)
325 static PyObject *index_clearcaches(indexObject *self)
340 {
326 {
341 _index_clearcaches(self);
327 _index_clearcaches(self);
342 self->ntlength = self->ntcapacity = 0;
328 self->ntlength = self->ntcapacity = 0;
343 self->ntdepth = self->ntsplits = 0;
329 self->ntdepth = self->ntsplits = 0;
344 self->ntrev = -1;
330 self->ntrev = -1;
345 self->ntlookups = self->ntmisses = 0;
331 self->ntlookups = self->ntmisses = 0;
346 Py_RETURN_NONE;
332 Py_RETURN_NONE;
347 }
333 }
348
334
349 static PyObject *index_stats(indexObject *self)
335 static PyObject *index_stats(indexObject *self)
350 {
336 {
351 PyObject *obj = PyDict_New();
337 PyObject *obj = PyDict_New();
352 PyObject *t = NULL;
338 PyObject *t = NULL;
353
339
354 if (obj == NULL)
340 if (obj == NULL)
355 return NULL;
341 return NULL;
356
342
357 #define istat(__n, __d) \
343 #define istat(__n, __d) \
358 do { \
344 do { \
359 t = PyInt_FromSsize_t(self->__n); \
345 t = PyInt_FromSsize_t(self->__n); \
360 if (!t) \
346 if (!t) \
361 goto bail; \
347 goto bail; \
362 if (PyDict_SetItemString(obj, __d, t) == -1) \
348 if (PyDict_SetItemString(obj, __d, t) == -1) \
363 goto bail; \
349 goto bail; \
364 Py_DECREF(t); \
350 Py_DECREF(t); \
365 } while (0)
351 } while (0)
366
352
367 if (self->added) {
353 if (self->added) {
368 Py_ssize_t len = PyList_GET_SIZE(self->added);
354 Py_ssize_t len = PyList_GET_SIZE(self->added);
369 t = PyInt_FromSsize_t(len);
355 t = PyInt_FromSsize_t(len);
370 if (!t)
356 if (!t)
371 goto bail;
357 goto bail;
372 if (PyDict_SetItemString(obj, "index entries added", t) == -1)
358 if (PyDict_SetItemString(obj, "index entries added", t) == -1)
373 goto bail;
359 goto bail;
374 Py_DECREF(t);
360 Py_DECREF(t);
375 }
361 }
376
362
377 if (self->raw_length != self->length - 1)
363 if (self->raw_length != self->length - 1)
378 istat(raw_length, "revs on disk");
364 istat(raw_length, "revs on disk");
379 istat(length, "revs in memory");
365 istat(length, "revs in memory");
380 istat(ntcapacity, "node trie capacity");
366 istat(ntcapacity, "node trie capacity");
381 istat(ntdepth, "node trie depth");
367 istat(ntdepth, "node trie depth");
382 istat(ntlength, "node trie count");
368 istat(ntlength, "node trie count");
383 istat(ntlookups, "node trie lookups");
369 istat(ntlookups, "node trie lookups");
384 istat(ntmisses, "node trie misses");
370 istat(ntmisses, "node trie misses");
385 istat(ntrev, "node trie last rev scanned");
371 istat(ntrev, "node trie last rev scanned");
386 istat(ntsplits, "node trie splits");
372 istat(ntsplits, "node trie splits");
387
373
388 #undef istat
374 #undef istat
389
375
390 return obj;
376 return obj;
391
377
392 bail:
378 bail:
393 Py_XDECREF(obj);
379 Py_XDECREF(obj);
394 Py_XDECREF(t);
380 Py_XDECREF(t);
395 return NULL;
381 return NULL;
396 }
382 }
397
383
398 /*
384 /*
399 * When we cache a list, we want to be sure the caller can't mutate
385 * When we cache a list, we want to be sure the caller can't mutate
400 * the cached copy.
386 * the cached copy.
401 */
387 */
402 static PyObject *list_copy(PyObject *list)
388 static PyObject *list_copy(PyObject *list)
403 {
389 {
404 Py_ssize_t len = PyList_GET_SIZE(list);
390 Py_ssize_t len = PyList_GET_SIZE(list);
405 PyObject *newlist = PyList_New(len);
391 PyObject *newlist = PyList_New(len);
406 Py_ssize_t i;
392 Py_ssize_t i;
407
393
408 if (newlist == NULL)
394 if (newlist == NULL)
409 return NULL;
395 return NULL;
410
396
411 for (i = 0; i < len; i++) {
397 for (i = 0; i < len; i++) {
412 PyObject *obj = PyList_GET_ITEM(list, i);
398 PyObject *obj = PyList_GET_ITEM(list, i);
413 Py_INCREF(obj);
399 Py_INCREF(obj);
414 PyList_SET_ITEM(newlist, i, obj);
400 PyList_SET_ITEM(newlist, i, obj);
415 }
401 }
416
402
417 return newlist;
403 return newlist;
418 }
404 }
419
405
420 static int check_filter(PyObject *filter, Py_ssize_t arg)
406 static int check_filter(PyObject *filter, Py_ssize_t arg)
421 {
407 {
422 if (filter) {
408 if (filter) {
423 PyObject *arglist, *result;
409 PyObject *arglist, *result;
424 int isfiltered;
410 int isfiltered;
425
411
426 arglist = Py_BuildValue("(n)", arg);
412 arglist = Py_BuildValue("(n)", arg);
427 if (!arglist) {
413 if (!arglist) {
428 return -1;
414 return -1;
429 }
415 }
430
416
431 result = PyEval_CallObject(filter, arglist);
417 result = PyEval_CallObject(filter, arglist);
432 Py_DECREF(arglist);
418 Py_DECREF(arglist);
433 if (!result) {
419 if (!result) {
434 return -1;
420 return -1;
435 }
421 }
436
422
437 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
423 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
438 * same as this function, so we can just return it directly.*/
424 * same as this function, so we can just return it directly.*/
439 isfiltered = PyObject_IsTrue(result);
425 isfiltered = PyObject_IsTrue(result);
440 Py_DECREF(result);
426 Py_DECREF(result);
441 return isfiltered;
427 return isfiltered;
442 } else {
428 } else {
443 return 0;
429 return 0;
444 }
430 }
445 }
431 }
446
432
447 static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list,
433 static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list,
448 Py_ssize_t marker, char *phases)
434 Py_ssize_t marker, char *phases)
449 {
435 {
450 PyObject *iter = NULL;
436 PyObject *iter = NULL;
451 PyObject *iter_item = NULL;
437 PyObject *iter_item = NULL;
452 Py_ssize_t min_idx = index_length(self) + 1;
438 Py_ssize_t min_idx = index_length(self) + 1;
453 long iter_item_long;
439 long iter_item_long;
454
440
455 if (PyList_GET_SIZE(list) != 0) {
441 if (PyList_GET_SIZE(list) != 0) {
456 iter = PyObject_GetIter(list);
442 iter = PyObject_GetIter(list);
457 if (iter == NULL)
443 if (iter == NULL)
458 return -2;
444 return -2;
459 while ((iter_item = PyIter_Next(iter))) {
445 while ((iter_item = PyIter_Next(iter))) {
460 iter_item_long = PyInt_AS_LONG(iter_item);
446 iter_item_long = PyInt_AS_LONG(iter_item);
461 Py_DECREF(iter_item);
447 Py_DECREF(iter_item);
462 if (iter_item_long < min_idx)
448 if (iter_item_long < min_idx)
463 min_idx = iter_item_long;
449 min_idx = iter_item_long;
464 phases[iter_item_long] = marker;
450 phases[iter_item_long] = marker;
465 }
451 }
466 Py_DECREF(iter);
452 Py_DECREF(iter);
467 }
453 }
468
454
469 return min_idx;
455 return min_idx;
470 }
456 }
471
457
472 static inline void set_phase_from_parents(char *phases, int parent_1,
458 static inline void set_phase_from_parents(char *phases, int parent_1,
473 int parent_2, Py_ssize_t i)
459 int parent_2, Py_ssize_t i)
474 {
460 {
475 if (parent_1 >= 0 && phases[parent_1] > phases[i])
461 if (parent_1 >= 0 && phases[parent_1] > phases[i])
476 phases[i] = phases[parent_1];
462 phases[i] = phases[parent_1];
477 if (parent_2 >= 0 && phases[parent_2] > phases[i])
463 if (parent_2 >= 0 && phases[parent_2] > phases[i])
478 phases[i] = phases[parent_2];
464 phases[i] = phases[parent_2];
479 }
465 }
480
466
481 static PyObject *reachableroots2(indexObject *self, PyObject *args)
467 static PyObject *reachableroots2(indexObject *self, PyObject *args)
482 {
468 {
483
469
484 /* Input */
470 /* Input */
485 long minroot;
471 long minroot;
486 PyObject *includepatharg = NULL;
472 PyObject *includepatharg = NULL;
487 int includepath = 0;
473 int includepath = 0;
488 /* heads and roots are lists */
474 /* heads and roots are lists */
489 PyObject *heads = NULL;
475 PyObject *heads = NULL;
490 PyObject *roots = NULL;
476 PyObject *roots = NULL;
491 PyObject *reachable = NULL;
477 PyObject *reachable = NULL;
492
478
493 PyObject *val;
479 PyObject *val;
494 Py_ssize_t len = index_length(self) - 1;
480 Py_ssize_t len = index_length(self) - 1;
495 long revnum;
481 long revnum;
496 Py_ssize_t k;
482 Py_ssize_t k;
497 Py_ssize_t i;
483 Py_ssize_t i;
498 Py_ssize_t l;
484 Py_ssize_t l;
499 int r;
485 int r;
500 int parents[2];
486 int parents[2];
501
487
502 /* Internal data structure:
488 /* Internal data structure:
503 * tovisit: array of length len+1 (all revs + nullrev), filled upto lentovisit
489 * tovisit: array of length len+1 (all revs + nullrev), filled upto lentovisit
504 * revstates: array of length len+1 (all revs + nullrev) */
490 * revstates: array of length len+1 (all revs + nullrev) */
505 int *tovisit = NULL;
491 int *tovisit = NULL;
506 long lentovisit = 0;
492 long lentovisit = 0;
507 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
493 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
508 char *revstates = NULL;
494 char *revstates = NULL;
509
495
510 /* Get arguments */
496 /* Get arguments */
511 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
497 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
512 &PyList_Type, &roots,
498 &PyList_Type, &roots,
513 &PyBool_Type, &includepatharg))
499 &PyBool_Type, &includepatharg))
514 goto bail;
500 goto bail;
515
501
516 if (includepatharg == Py_True)
502 if (includepatharg == Py_True)
517 includepath = 1;
503 includepath = 1;
518
504
519 /* Initialize return set */
505 /* Initialize return set */
520 reachable = PyList_New(0);
506 reachable = PyList_New(0);
521 if (reachable == NULL)
507 if (reachable == NULL)
522 goto bail;
508 goto bail;
523
509
524 /* Initialize internal datastructures */
510 /* Initialize internal datastructures */
525 tovisit = (int *)malloc((len + 1) * sizeof(int));
511 tovisit = (int *)malloc((len + 1) * sizeof(int));
526 if (tovisit == NULL) {
512 if (tovisit == NULL) {
527 PyErr_NoMemory();
513 PyErr_NoMemory();
528 goto bail;
514 goto bail;
529 }
515 }
530
516
531 revstates = (char *)calloc(len + 1, 1);
517 revstates = (char *)calloc(len + 1, 1);
532 if (revstates == NULL) {
518 if (revstates == NULL) {
533 PyErr_NoMemory();
519 PyErr_NoMemory();
534 goto bail;
520 goto bail;
535 }
521 }
536
522
537 l = PyList_GET_SIZE(roots);
523 l = PyList_GET_SIZE(roots);
538 for (i = 0; i < l; i++) {
524 for (i = 0; i < l; i++) {
539 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
525 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
540 if (revnum == -1 && PyErr_Occurred())
526 if (revnum == -1 && PyErr_Occurred())
541 goto bail;
527 goto bail;
542 /* If root is out of range, e.g. wdir(), it must be unreachable
528 /* If root is out of range, e.g. wdir(), it must be unreachable
543 * from heads. So we can just ignore it. */
529 * from heads. So we can just ignore it. */
544 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
530 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
545 continue;
531 continue;
546 revstates[revnum + 1] |= RS_ROOT;
532 revstates[revnum + 1] |= RS_ROOT;
547 }
533 }
548
534
549 /* Populate tovisit with all the heads */
535 /* Populate tovisit with all the heads */
550 l = PyList_GET_SIZE(heads);
536 l = PyList_GET_SIZE(heads);
551 for (i = 0; i < l; i++) {
537 for (i = 0; i < l; i++) {
552 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
538 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
553 if (revnum == -1 && PyErr_Occurred())
539 if (revnum == -1 && PyErr_Occurred())
554 goto bail;
540 goto bail;
555 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
541 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
556 PyErr_SetString(PyExc_IndexError, "head out of range");
542 PyErr_SetString(PyExc_IndexError, "head out of range");
557 goto bail;
543 goto bail;
558 }
544 }
559 if (!(revstates[revnum + 1] & RS_SEEN)) {
545 if (!(revstates[revnum + 1] & RS_SEEN)) {
560 tovisit[lentovisit++] = (int)revnum;
546 tovisit[lentovisit++] = (int)revnum;
561 revstates[revnum + 1] |= RS_SEEN;
547 revstates[revnum + 1] |= RS_SEEN;
562 }
548 }
563 }
549 }
564
550
565 /* Visit the tovisit list and find the reachable roots */
551 /* Visit the tovisit list and find the reachable roots */
566 k = 0;
552 k = 0;
567 while (k < lentovisit) {
553 while (k < lentovisit) {
568 /* Add the node to reachable if it is a root*/
554 /* Add the node to reachable if it is a root*/
569 revnum = tovisit[k++];
555 revnum = tovisit[k++];
570 if (revstates[revnum + 1] & RS_ROOT) {
556 if (revstates[revnum + 1] & RS_ROOT) {
571 revstates[revnum + 1] |= RS_REACHABLE;
557 revstates[revnum + 1] |= RS_REACHABLE;
572 val = PyInt_FromLong(revnum);
558 val = PyInt_FromLong(revnum);
573 if (val == NULL)
559 if (val == NULL)
574 goto bail;
560 goto bail;
575 r = PyList_Append(reachable, val);
561 r = PyList_Append(reachable, val);
576 Py_DECREF(val);
562 Py_DECREF(val);
577 if (r < 0)
563 if (r < 0)
578 goto bail;
564 goto bail;
579 if (includepath == 0)
565 if (includepath == 0)
580 continue;
566 continue;
581 }
567 }
582
568
583 /* Add its parents to the list of nodes to visit */
569 /* Add its parents to the list of nodes to visit */
584 if (revnum == -1)
570 if (revnum == -1)
585 continue;
571 continue;
586 r = index_get_parents(self, revnum, parents, (int)len - 1);
572 r = index_get_parents(self, revnum, parents, (int)len - 1);
587 if (r < 0)
573 if (r < 0)
588 goto bail;
574 goto bail;
589 for (i = 0; i < 2; i++) {
575 for (i = 0; i < 2; i++) {
590 if (!(revstates[parents[i] + 1] & RS_SEEN)
576 if (!(revstates[parents[i] + 1] & RS_SEEN)
591 && parents[i] >= minroot) {
577 && parents[i] >= minroot) {
592 tovisit[lentovisit++] = parents[i];
578 tovisit[lentovisit++] = parents[i];
593 revstates[parents[i] + 1] |= RS_SEEN;
579 revstates[parents[i] + 1] |= RS_SEEN;
594 }
580 }
595 }
581 }
596 }
582 }
597
583
598 /* Find all the nodes in between the roots we found and the heads
584 /* Find all the nodes in between the roots we found and the heads
599 * and add them to the reachable set */
585 * and add them to the reachable set */
600 if (includepath == 1) {
586 if (includepath == 1) {
601 long minidx = minroot;
587 long minidx = minroot;
602 if (minidx < 0)
588 if (minidx < 0)
603 minidx = 0;
589 minidx = 0;
604 for (i = minidx; i < len; i++) {
590 for (i = minidx; i < len; i++) {
605 if (!(revstates[i + 1] & RS_SEEN))
591 if (!(revstates[i + 1] & RS_SEEN))
606 continue;
592 continue;
607 r = index_get_parents(self, i, parents, (int)len - 1);
593 r = index_get_parents(self, i, parents, (int)len - 1);
608 /* Corrupted index file, error is set from
594 /* Corrupted index file, error is set from
609 * index_get_parents */
595 * index_get_parents */
610 if (r < 0)
596 if (r < 0)
611 goto bail;
597 goto bail;
612 if (((revstates[parents[0] + 1] |
598 if (((revstates[parents[0] + 1] |
613 revstates[parents[1] + 1]) & RS_REACHABLE)
599 revstates[parents[1] + 1]) & RS_REACHABLE)
614 && !(revstates[i + 1] & RS_REACHABLE)) {
600 && !(revstates[i + 1] & RS_REACHABLE)) {
615 revstates[i + 1] |= RS_REACHABLE;
601 revstates[i + 1] |= RS_REACHABLE;
616 val = PyInt_FromLong(i);
602 val = PyInt_FromLong(i);
617 if (val == NULL)
603 if (val == NULL)
618 goto bail;
604 goto bail;
619 r = PyList_Append(reachable, val);
605 r = PyList_Append(reachable, val);
620 Py_DECREF(val);
606 Py_DECREF(val);
621 if (r < 0)
607 if (r < 0)
622 goto bail;
608 goto bail;
623 }
609 }
624 }
610 }
625 }
611 }
626
612
627 free(revstates);
613 free(revstates);
628 free(tovisit);
614 free(tovisit);
629 return reachable;
615 return reachable;
630 bail:
616 bail:
631 Py_XDECREF(reachable);
617 Py_XDECREF(reachable);
632 free(revstates);
618 free(revstates);
633 free(tovisit);
619 free(tovisit);
634 return NULL;
620 return NULL;
635 }
621 }
636
622
637 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
623 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
638 {
624 {
639 PyObject *roots = Py_None;
625 PyObject *roots = Py_None;
640 PyObject *ret = NULL;
626 PyObject *ret = NULL;
641 PyObject *phasessize = NULL;
627 PyObject *phasessize = NULL;
642 PyObject *phaseroots = NULL;
628 PyObject *phaseroots = NULL;
643 PyObject *phaseset = NULL;
629 PyObject *phaseset = NULL;
644 PyObject *phasessetlist = NULL;
630 PyObject *phasessetlist = NULL;
645 PyObject *rev = NULL;
631 PyObject *rev = NULL;
646 Py_ssize_t len = index_length(self) - 1;
632 Py_ssize_t len = index_length(self) - 1;
647 Py_ssize_t numphase = 0;
633 Py_ssize_t numphase = 0;
648 Py_ssize_t minrevallphases = 0;
634 Py_ssize_t minrevallphases = 0;
649 Py_ssize_t minrevphase = 0;
635 Py_ssize_t minrevphase = 0;
650 Py_ssize_t i = 0;
636 Py_ssize_t i = 0;
651 char *phases = NULL;
637 char *phases = NULL;
652 long phase;
638 long phase;
653
639
654 if (!PyArg_ParseTuple(args, "O", &roots))
640 if (!PyArg_ParseTuple(args, "O", &roots))
655 goto done;
641 goto done;
656 if (roots == NULL || !PyList_Check(roots)) {
642 if (roots == NULL || !PyList_Check(roots)) {
657 PyErr_SetString(PyExc_TypeError, "roots must be a list");
643 PyErr_SetString(PyExc_TypeError, "roots must be a list");
658 goto done;
644 goto done;
659 }
645 }
660
646
661 phases = calloc(len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
647 phases = calloc(len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
662 if (phases == NULL) {
648 if (phases == NULL) {
663 PyErr_NoMemory();
649 PyErr_NoMemory();
664 goto done;
650 goto done;
665 }
651 }
666 /* Put the phase information of all the roots in phases */
652 /* Put the phase information of all the roots in phases */
667 numphase = PyList_GET_SIZE(roots)+1;
653 numphase = PyList_GET_SIZE(roots)+1;
668 minrevallphases = len + 1;
654 minrevallphases = len + 1;
669 phasessetlist = PyList_New(numphase);
655 phasessetlist = PyList_New(numphase);
670 if (phasessetlist == NULL)
656 if (phasessetlist == NULL)
671 goto done;
657 goto done;
672
658
673 PyList_SET_ITEM(phasessetlist, 0, Py_None);
659 PyList_SET_ITEM(phasessetlist, 0, Py_None);
674 Py_INCREF(Py_None);
660 Py_INCREF(Py_None);
675
661
676 for (i = 0; i < numphase-1; i++) {
662 for (i = 0; i < numphase-1; i++) {
677 phaseroots = PyList_GET_ITEM(roots, i);
663 phaseroots = PyList_GET_ITEM(roots, i);
678 phaseset = PySet_New(NULL);
664 phaseset = PySet_New(NULL);
679 if (phaseset == NULL)
665 if (phaseset == NULL)
680 goto release;
666 goto release;
681 PyList_SET_ITEM(phasessetlist, i+1, phaseset);
667 PyList_SET_ITEM(phasessetlist, i+1, phaseset);
682 if (!PyList_Check(phaseroots)) {
668 if (!PyList_Check(phaseroots)) {
683 PyErr_SetString(PyExc_TypeError,
669 PyErr_SetString(PyExc_TypeError,
684 "roots item must be a list");
670 "roots item must be a list");
685 goto release;
671 goto release;
686 }
672 }
687 minrevphase = add_roots_get_min(self, phaseroots, i+1, phases);
673 minrevphase = add_roots_get_min(self, phaseroots, i+1, phases);
688 if (minrevphase == -2) /* Error from add_roots_get_min */
674 if (minrevphase == -2) /* Error from add_roots_get_min */
689 goto release;
675 goto release;
690 minrevallphases = MIN(minrevallphases, minrevphase);
676 minrevallphases = MIN(minrevallphases, minrevphase);
691 }
677 }
692 /* Propagate the phase information from the roots to the revs */
678 /* Propagate the phase information from the roots to the revs */
693 if (minrevallphases != -1) {
679 if (minrevallphases != -1) {
694 int parents[2];
680 int parents[2];
695 for (i = minrevallphases; i < len; i++) {
681 for (i = minrevallphases; i < len; i++) {
696 if (index_get_parents(self, i, parents,
682 if (index_get_parents(self, i, parents,
697 (int)len - 1) < 0)
683 (int)len - 1) < 0)
698 goto release;
684 goto release;
699 set_phase_from_parents(phases, parents[0], parents[1], i);
685 set_phase_from_parents(phases, parents[0], parents[1], i);
700 }
686 }
701 }
687 }
702 /* Transform phase list to a python list */
688 /* Transform phase list to a python list */
703 phasessize = PyInt_FromLong(len);
689 phasessize = PyInt_FromLong(len);
704 if (phasessize == NULL)
690 if (phasessize == NULL)
705 goto release;
691 goto release;
706 for (i = 0; i < len; i++) {
692 for (i = 0; i < len; i++) {
707 phase = phases[i];
693 phase = phases[i];
708 /* We only store the sets of phase for non public phase, the public phase
694 /* We only store the sets of phase for non public phase, the public phase
709 * is computed as a difference */
695 * is computed as a difference */
710 if (phase != 0) {
696 if (phase != 0) {
711 phaseset = PyList_GET_ITEM(phasessetlist, phase);
697 phaseset = PyList_GET_ITEM(phasessetlist, phase);
712 rev = PyInt_FromLong(i);
698 rev = PyInt_FromLong(i);
713 if (rev == NULL)
699 if (rev == NULL)
714 goto release;
700 goto release;
715 PySet_Add(phaseset, rev);
701 PySet_Add(phaseset, rev);
716 Py_XDECREF(rev);
702 Py_XDECREF(rev);
717 }
703 }
718 }
704 }
719 ret = PyTuple_Pack(2, phasessize, phasessetlist);
705 ret = PyTuple_Pack(2, phasessize, phasessetlist);
720
706
721 release:
707 release:
722 Py_XDECREF(phasessize);
708 Py_XDECREF(phasessize);
723 Py_XDECREF(phasessetlist);
709 Py_XDECREF(phasessetlist);
724 done:
710 done:
725 free(phases);
711 free(phases);
726 return ret;
712 return ret;
727 }
713 }
728
714
729 static PyObject *index_headrevs(indexObject *self, PyObject *args)
715 static PyObject *index_headrevs(indexObject *self, PyObject *args)
730 {
716 {
731 Py_ssize_t i, j, len;
717 Py_ssize_t i, j, len;
732 char *nothead = NULL;
718 char *nothead = NULL;
733 PyObject *heads = NULL;
719 PyObject *heads = NULL;
734 PyObject *filter = NULL;
720 PyObject *filter = NULL;
735 PyObject *filteredrevs = Py_None;
721 PyObject *filteredrevs = Py_None;
736
722
737 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
723 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
738 return NULL;
724 return NULL;
739 }
725 }
740
726
741 if (self->headrevs && filteredrevs == self->filteredrevs)
727 if (self->headrevs && filteredrevs == self->filteredrevs)
742 return list_copy(self->headrevs);
728 return list_copy(self->headrevs);
743
729
744 Py_DECREF(self->filteredrevs);
730 Py_DECREF(self->filteredrevs);
745 self->filteredrevs = filteredrevs;
731 self->filteredrevs = filteredrevs;
746 Py_INCREF(filteredrevs);
732 Py_INCREF(filteredrevs);
747
733
748 if (filteredrevs != Py_None) {
734 if (filteredrevs != Py_None) {
749 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
735 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
750 if (!filter) {
736 if (!filter) {
751 PyErr_SetString(PyExc_TypeError,
737 PyErr_SetString(PyExc_TypeError,
752 "filteredrevs has no attribute __contains__");
738 "filteredrevs has no attribute __contains__");
753 goto bail;
739 goto bail;
754 }
740 }
755 }
741 }
756
742
757 len = index_length(self) - 1;
743 len = index_length(self) - 1;
758 heads = PyList_New(0);
744 heads = PyList_New(0);
759 if (heads == NULL)
745 if (heads == NULL)
760 goto bail;
746 goto bail;
761 if (len == 0) {
747 if (len == 0) {
762 PyObject *nullid = PyInt_FromLong(-1);
748 PyObject *nullid = PyInt_FromLong(-1);
763 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
749 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
764 Py_XDECREF(nullid);
750 Py_XDECREF(nullid);
765 goto bail;
751 goto bail;
766 }
752 }
767 goto done;
753 goto done;
768 }
754 }
769
755
770 nothead = calloc(len, 1);
756 nothead = calloc(len, 1);
771 if (nothead == NULL) {
757 if (nothead == NULL) {
772 PyErr_NoMemory();
758 PyErr_NoMemory();
773 goto bail;
759 goto bail;
774 }
760 }
775
761
776 for (i = len - 1; i >= 0; i--) {
762 for (i = len - 1; i >= 0; i--) {
777 int isfiltered;
763 int isfiltered;
778 int parents[2];
764 int parents[2];
779
765
780 /* If nothead[i] == 1, it means we've seen an unfiltered child of this
766 /* If nothead[i] == 1, it means we've seen an unfiltered child of this
781 * node already, and therefore this node is not filtered. So we can skip
767 * node already, and therefore this node is not filtered. So we can skip
782 * the expensive check_filter step.
768 * the expensive check_filter step.
783 */
769 */
784 if (nothead[i] != 1) {
770 if (nothead[i] != 1) {
785 isfiltered = check_filter(filter, i);
771 isfiltered = check_filter(filter, i);
786 if (isfiltered == -1) {
772 if (isfiltered == -1) {
787 PyErr_SetString(PyExc_TypeError,
773 PyErr_SetString(PyExc_TypeError,
788 "unable to check filter");
774 "unable to check filter");
789 goto bail;
775 goto bail;
790 }
776 }
791
777
792 if (isfiltered) {
778 if (isfiltered) {
793 nothead[i] = 1;
779 nothead[i] = 1;
794 continue;
780 continue;
795 }
781 }
796 }
782 }
797
783
798 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
784 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
799 goto bail;
785 goto bail;
800 for (j = 0; j < 2; j++) {
786 for (j = 0; j < 2; j++) {
801 if (parents[j] >= 0)
787 if (parents[j] >= 0)
802 nothead[parents[j]] = 1;
788 nothead[parents[j]] = 1;
803 }
789 }
804 }
790 }
805
791
806 for (i = 0; i < len; i++) {
792 for (i = 0; i < len; i++) {
807 PyObject *head;
793 PyObject *head;
808
794
809 if (nothead[i])
795 if (nothead[i])
810 continue;
796 continue;
811 head = PyInt_FromSsize_t(i);
797 head = PyInt_FromSsize_t(i);
812 if (head == NULL || PyList_Append(heads, head) == -1) {
798 if (head == NULL || PyList_Append(heads, head) == -1) {
813 Py_XDECREF(head);
799 Py_XDECREF(head);
814 goto bail;
800 goto bail;
815 }
801 }
816 }
802 }
817
803
818 done:
804 done:
819 self->headrevs = heads;
805 self->headrevs = heads;
820 Py_XDECREF(filter);
806 Py_XDECREF(filter);
821 free(nothead);
807 free(nothead);
822 return list_copy(self->headrevs);
808 return list_copy(self->headrevs);
823 bail:
809 bail:
824 Py_XDECREF(filter);
810 Py_XDECREF(filter);
825 Py_XDECREF(heads);
811 Py_XDECREF(heads);
826 free(nothead);
812 free(nothead);
827 return NULL;
813 return NULL;
828 }
814 }
829
815
830 /**
816 /**
831 * Obtain the base revision index entry.
817 * Obtain the base revision index entry.
832 *
818 *
833 * Callers must ensure that rev >= 0 or illegal memory access may occur.
819 * Callers must ensure that rev >= 0 or illegal memory access may occur.
834 */
820 */
835 static inline int index_baserev(indexObject *self, int rev)
821 static inline int index_baserev(indexObject *self, int rev)
836 {
822 {
837 const char *data;
823 const char *data;
838
824
839 if (rev >= self->length - 1) {
825 if (rev >= self->length - 1) {
840 PyObject *tuple = PyList_GET_ITEM(self->added,
826 PyObject *tuple = PyList_GET_ITEM(self->added,
841 rev - self->length + 1);
827 rev - self->length + 1);
842 return (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 3));
828 return (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 3));
843 }
829 }
844 else {
830 else {
845 data = index_deref(self, rev);
831 data = index_deref(self, rev);
846 if (data == NULL) {
832 if (data == NULL) {
847 return -2;
833 return -2;
848 }
834 }
849
835
850 return getbe32(data + 16);
836 return getbe32(data + 16);
851 }
837 }
852 }
838 }
853
839
854 static PyObject *index_deltachain(indexObject *self, PyObject *args)
840 static PyObject *index_deltachain(indexObject *self, PyObject *args)
855 {
841 {
856 int rev, generaldelta;
842 int rev, generaldelta;
857 PyObject *stoparg;
843 PyObject *stoparg;
858 int stoprev, iterrev, baserev = -1;
844 int stoprev, iterrev, baserev = -1;
859 int stopped;
845 int stopped;
860 PyObject *chain = NULL, *result = NULL;
846 PyObject *chain = NULL, *result = NULL;
861 const Py_ssize_t length = index_length(self);
847 const Py_ssize_t length = index_length(self);
862
848
863 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
849 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
864 return NULL;
850 return NULL;
865 }
851 }
866
852
867 if (PyInt_Check(stoparg)) {
853 if (PyInt_Check(stoparg)) {
868 stoprev = (int)PyInt_AsLong(stoparg);
854 stoprev = (int)PyInt_AsLong(stoparg);
869 if (stoprev == -1 && PyErr_Occurred()) {
855 if (stoprev == -1 && PyErr_Occurred()) {
870 return NULL;
856 return NULL;
871 }
857 }
872 }
858 }
873 else if (stoparg == Py_None) {
859 else if (stoparg == Py_None) {
874 stoprev = -2;
860 stoprev = -2;
875 }
861 }
876 else {
862 else {
877 PyErr_SetString(PyExc_ValueError,
863 PyErr_SetString(PyExc_ValueError,
878 "stoprev must be integer or None");
864 "stoprev must be integer or None");
879 return NULL;
865 return NULL;
880 }
866 }
881
867
882 if (rev < 0 || rev >= length - 1) {
868 if (rev < 0 || rev >= length - 1) {
883 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
869 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
884 return NULL;
870 return NULL;
885 }
871 }
886
872
887 chain = PyList_New(0);
873 chain = PyList_New(0);
888 if (chain == NULL) {
874 if (chain == NULL) {
889 return NULL;
875 return NULL;
890 }
876 }
891
877
892 baserev = index_baserev(self, rev);
878 baserev = index_baserev(self, rev);
893
879
894 /* This should never happen. */
880 /* This should never happen. */
895 if (baserev <= -2) {
881 if (baserev <= -2) {
896 /* Error should be set by index_deref() */
882 /* Error should be set by index_deref() */
897 assert(PyErr_Occurred());
883 assert(PyErr_Occurred());
898 goto bail;
884 goto bail;
899 }
885 }
900
886
901 iterrev = rev;
887 iterrev = rev;
902
888
903 while (iterrev != baserev && iterrev != stoprev) {
889 while (iterrev != baserev && iterrev != stoprev) {
904 PyObject *value = PyInt_FromLong(iterrev);
890 PyObject *value = PyInt_FromLong(iterrev);
905 if (value == NULL) {
891 if (value == NULL) {
906 goto bail;
892 goto bail;
907 }
893 }
908 if (PyList_Append(chain, value)) {
894 if (PyList_Append(chain, value)) {
909 Py_DECREF(value);
895 Py_DECREF(value);
910 goto bail;
896 goto bail;
911 }
897 }
912 Py_DECREF(value);
898 Py_DECREF(value);
913
899
914 if (generaldelta) {
900 if (generaldelta) {
915 iterrev = baserev;
901 iterrev = baserev;
916 }
902 }
917 else {
903 else {
918 iterrev--;
904 iterrev--;
919 }
905 }
920
906
921 if (iterrev < 0) {
907 if (iterrev < 0) {
922 break;
908 break;
923 }
909 }
924
910
925 if (iterrev >= length - 1) {
911 if (iterrev >= length - 1) {
926 PyErr_SetString(PyExc_IndexError, "revision outside index");
912 PyErr_SetString(PyExc_IndexError, "revision outside index");
927 return NULL;
913 return NULL;
928 }
914 }
929
915
930 baserev = index_baserev(self, iterrev);
916 baserev = index_baserev(self, iterrev);
931
917
932 /* This should never happen. */
918 /* This should never happen. */
933 if (baserev <= -2) {
919 if (baserev <= -2) {
934 /* Error should be set by index_deref() */
920 /* Error should be set by index_deref() */
935 assert(PyErr_Occurred());
921 assert(PyErr_Occurred());
936 goto bail;
922 goto bail;
937 }
923 }
938 }
924 }
939
925
940 if (iterrev == stoprev) {
926 if (iterrev == stoprev) {
941 stopped = 1;
927 stopped = 1;
942 }
928 }
943 else {
929 else {
944 PyObject *value = PyInt_FromLong(iterrev);
930 PyObject *value = PyInt_FromLong(iterrev);
945 if (value == NULL) {
931 if (value == NULL) {
946 goto bail;
932 goto bail;
947 }
933 }
948 if (PyList_Append(chain, value)) {
934 if (PyList_Append(chain, value)) {
949 Py_DECREF(value);
935 Py_DECREF(value);
950 goto bail;
936 goto bail;
951 }
937 }
952 Py_DECREF(value);
938 Py_DECREF(value);
953
939
954 stopped = 0;
940 stopped = 0;
955 }
941 }
956
942
957 if (PyList_Reverse(chain)) {
943 if (PyList_Reverse(chain)) {
958 goto bail;
944 goto bail;
959 }
945 }
960
946
961 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
947 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
962 Py_DECREF(chain);
948 Py_DECREF(chain);
963 return result;
949 return result;
964
950
965 bail:
951 bail:
966 Py_DECREF(chain);
952 Py_DECREF(chain);
967 return NULL;
953 return NULL;
968 }
954 }
969
955
970 static inline int nt_level(const char *node, Py_ssize_t level)
956 static inline int nt_level(const char *node, Py_ssize_t level)
971 {
957 {
972 int v = node[level>>1];
958 int v = node[level>>1];
973 if (!(level & 1))
959 if (!(level & 1))
974 v >>= 4;
960 v >>= 4;
975 return v & 0xf;
961 return v & 0xf;
976 }
962 }
977
963
978 /*
964 /*
979 * Return values:
965 * Return values:
980 *
966 *
981 * -4: match is ambiguous (multiple candidates)
967 * -4: match is ambiguous (multiple candidates)
982 * -2: not found
968 * -2: not found
983 * rest: valid rev
969 * rest: valid rev
984 */
970 */
985 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
971 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
986 int hex)
972 int hex)
987 {
973 {
988 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
974 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
989 int level, maxlevel, off;
975 int level, maxlevel, off;
990
976
991 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
977 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
992 return -1;
978 return -1;
993
979
994 if (hex)
980 if (hex)
995 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
981 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
996 else
982 else
997 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
983 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
998
984
999 for (level = off = 0; level < maxlevel; level++) {
985 for (level = off = 0; level < maxlevel; level++) {
1000 int k = getnybble(node, level);
986 int k = getnybble(node, level);
1001 nodetree *n = &self->nt[off];
987 nodetree *n = &self->nt[off];
1002 int v = n->children[k];
988 int v = n->children[k];
1003
989
1004 if (v < 0) {
990 if (v < 0) {
1005 const char *n;
991 const char *n;
1006 Py_ssize_t i;
992 Py_ssize_t i;
1007
993
1008 v = -(v + 2);
994 v = -(v + 2);
1009 n = index_node(self, v);
995 n = index_node(self, v);
1010 if (n == NULL)
996 if (n == NULL)
1011 return -2;
997 return -2;
1012 for (i = level; i < maxlevel; i++)
998 for (i = level; i < maxlevel; i++)
1013 if (getnybble(node, i) != nt_level(n, i))
999 if (getnybble(node, i) != nt_level(n, i))
1014 return -2;
1000 return -2;
1015 return v;
1001 return v;
1016 }
1002 }
1017 if (v == 0)
1003 if (v == 0)
1018 return -2;
1004 return -2;
1019 off = v;
1005 off = v;
1020 }
1006 }
1021 /* multiple matches against an ambiguous prefix */
1007 /* multiple matches against an ambiguous prefix */
1022 return -4;
1008 return -4;
1023 }
1009 }
1024
1010
1025 static int nt_new(indexObject *self)
1011 static int nt_new(indexObject *self)
1026 {
1012 {
1027 if (self->ntlength == self->ntcapacity) {
1013 if (self->ntlength == self->ntcapacity) {
1028 if (self->ntcapacity >= INT_MAX / (sizeof(nodetree) * 2)) {
1014 if (self->ntcapacity >= INT_MAX / (sizeof(nodetree) * 2)) {
1029 PyErr_SetString(PyExc_MemoryError,
1015 PyErr_SetString(PyExc_MemoryError,
1030 "overflow in nt_new");
1016 "overflow in nt_new");
1031 return -1;
1017 return -1;
1032 }
1018 }
1033 self->ntcapacity *= 2;
1019 self->ntcapacity *= 2;
1034 self->nt = realloc(self->nt,
1020 self->nt = realloc(self->nt,
1035 self->ntcapacity * sizeof(nodetree));
1021 self->ntcapacity * sizeof(nodetree));
1036 if (self->nt == NULL) {
1022 if (self->nt == NULL) {
1037 PyErr_SetString(PyExc_MemoryError, "out of memory");
1023 PyErr_SetString(PyExc_MemoryError, "out of memory");
1038 return -1;
1024 return -1;
1039 }
1025 }
1040 memset(&self->nt[self->ntlength], 0,
1026 memset(&self->nt[self->ntlength], 0,
1041 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
1027 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
1042 }
1028 }
1043 return self->ntlength++;
1029 return self->ntlength++;
1044 }
1030 }
1045
1031
1046 static int nt_insert(indexObject *self, const char *node, int rev)
1032 static int nt_insert(indexObject *self, const char *node, int rev)
1047 {
1033 {
1048 int level = 0;
1034 int level = 0;
1049 int off = 0;
1035 int off = 0;
1050
1036
1051 while (level < 40) {
1037 while (level < 40) {
1052 int k = nt_level(node, level);
1038 int k = nt_level(node, level);
1053 nodetree *n;
1039 nodetree *n;
1054 int v;
1040 int v;
1055
1041
1056 n = &self->nt[off];
1042 n = &self->nt[off];
1057 v = n->children[k];
1043 v = n->children[k];
1058
1044
1059 if (v == 0) {
1045 if (v == 0) {
1060 n->children[k] = -rev - 2;
1046 n->children[k] = -rev - 2;
1061 return 0;
1047 return 0;
1062 }
1048 }
1063 if (v < 0) {
1049 if (v < 0) {
1064 const char *oldnode = index_node_existing(self, -(v + 2));
1050 const char *oldnode = index_node_existing(self, -(v + 2));
1065 int noff;
1051 int noff;
1066
1052
1067 if (oldnode == NULL)
1053 if (oldnode == NULL)
1068 return -1;
1054 return -1;
1069 if (!memcmp(oldnode, node, 20)) {
1055 if (!memcmp(oldnode, node, 20)) {
1070 n->children[k] = -rev - 2;
1056 n->children[k] = -rev - 2;
1071 return 0;
1057 return 0;
1072 }
1058 }
1073 noff = nt_new(self);
1059 noff = nt_new(self);
1074 if (noff == -1)
1060 if (noff == -1)
1075 return -1;
1061 return -1;
1076 /* self->nt may have been changed by realloc */
1062 /* self->nt may have been changed by realloc */
1077 self->nt[off].children[k] = noff;
1063 self->nt[off].children[k] = noff;
1078 off = noff;
1064 off = noff;
1079 n = &self->nt[off];
1065 n = &self->nt[off];
1080 n->children[nt_level(oldnode, ++level)] = v;
1066 n->children[nt_level(oldnode, ++level)] = v;
1081 if (level > self->ntdepth)
1067 if (level > self->ntdepth)
1082 self->ntdepth = level;
1068 self->ntdepth = level;
1083 self->ntsplits += 1;
1069 self->ntsplits += 1;
1084 } else {
1070 } else {
1085 level += 1;
1071 level += 1;
1086 off = v;
1072 off = v;
1087 }
1073 }
1088 }
1074 }
1089
1075
1090 return -1;
1076 return -1;
1091 }
1077 }
1092
1078
1093 static int nt_delete_node(indexObject *self, const char *node)
1079 static int nt_delete_node(indexObject *self, const char *node)
1094 {
1080 {
1095 /* rev==-2 happens to get encoded as 0, which is interpreted as not set */
1081 /* rev==-2 happens to get encoded as 0, which is interpreted as not set */
1096 return nt_insert(self, node, -2);
1082 return nt_insert(self, node, -2);
1097 }
1083 }
1098
1084
1099 static int nt_init(indexObject *self)
1085 static int nt_init(indexObject *self)
1100 {
1086 {
1101 if (self->nt == NULL) {
1087 if (self->nt == NULL) {
1102 if ((size_t)self->raw_length > INT_MAX / sizeof(nodetree)) {
1088 if ((size_t)self->raw_length > INT_MAX / sizeof(nodetree)) {
1103 PyErr_SetString(PyExc_ValueError, "overflow in nt_init");
1089 PyErr_SetString(PyExc_ValueError, "overflow in nt_init");
1104 return -1;
1090 return -1;
1105 }
1091 }
1106 self->ntcapacity = self->raw_length < 4
1092 self->ntcapacity = self->raw_length < 4
1107 ? 4 : (int)self->raw_length / 2;
1093 ? 4 : (int)self->raw_length / 2;
1108
1094
1109 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
1095 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
1110 if (self->nt == NULL) {
1096 if (self->nt == NULL) {
1111 PyErr_NoMemory();
1097 PyErr_NoMemory();
1112 return -1;
1098 return -1;
1113 }
1099 }
1114 self->ntlength = 1;
1100 self->ntlength = 1;
1115 self->ntrev = (int)index_length(self) - 1;
1101 self->ntrev = (int)index_length(self) - 1;
1116 self->ntlookups = 1;
1102 self->ntlookups = 1;
1117 self->ntmisses = 0;
1103 self->ntmisses = 0;
1118 if (nt_insert(self, nullid, -1) == -1) {
1104 if (nt_insert(self, nullid, -1) == -1) {
1119 free(self->nt);
1105 free(self->nt);
1120 self->nt = NULL;
1106 self->nt = NULL;
1121 return -1;
1107 return -1;
1122 }
1108 }
1123 }
1109 }
1124 return 0;
1110 return 0;
1125 }
1111 }
1126
1112
1127 /*
1113 /*
1128 * Return values:
1114 * Return values:
1129 *
1115 *
1130 * -3: error (exception set)
1116 * -3: error (exception set)
1131 * -2: not found (no exception set)
1117 * -2: not found (no exception set)
1132 * rest: valid rev
1118 * rest: valid rev
1133 */
1119 */
1134 static int index_find_node(indexObject *self,
1120 static int index_find_node(indexObject *self,
1135 const char *node, Py_ssize_t nodelen)
1121 const char *node, Py_ssize_t nodelen)
1136 {
1122 {
1137 int rev;
1123 int rev;
1138
1124
1139 if (nt_init(self) == -1)
1125 if (nt_init(self) == -1)
1140 return -3;
1126 return -3;
1141
1127
1142 self->ntlookups++;
1128 self->ntlookups++;
1143 rev = nt_find(self, node, nodelen, 0);
1129 rev = nt_find(self, node, nodelen, 0);
1144 if (rev >= -1)
1130 if (rev >= -1)
1145 return rev;
1131 return rev;
1146
1132
1147 /*
1133 /*
1148 * For the first handful of lookups, we scan the entire index,
1134 * For the first handful of lookups, we scan the entire index,
1149 * and cache only the matching nodes. This optimizes for cases
1135 * and cache only the matching nodes. This optimizes for cases
1150 * like "hg tip", where only a few nodes are accessed.
1136 * like "hg tip", where only a few nodes are accessed.
1151 *
1137 *
1152 * After that, we cache every node we visit, using a single
1138 * After that, we cache every node we visit, using a single
1153 * scan amortized over multiple lookups. This gives the best
1139 * scan amortized over multiple lookups. This gives the best
1154 * bulk performance, e.g. for "hg log".
1140 * bulk performance, e.g. for "hg log".
1155 */
1141 */
1156 if (self->ntmisses++ < 4) {
1142 if (self->ntmisses++ < 4) {
1157 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1143 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1158 const char *n = index_node_existing(self, rev);
1144 const char *n = index_node_existing(self, rev);
1159 if (n == NULL)
1145 if (n == NULL)
1160 return -3;
1146 return -3;
1161 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1147 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1162 if (nt_insert(self, n, rev) == -1)
1148 if (nt_insert(self, n, rev) == -1)
1163 return -3;
1149 return -3;
1164 break;
1150 break;
1165 }
1151 }
1166 }
1152 }
1167 } else {
1153 } else {
1168 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1154 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1169 const char *n = index_node_existing(self, rev);
1155 const char *n = index_node_existing(self, rev);
1170 if (n == NULL)
1156 if (n == NULL)
1171 return -3;
1157 return -3;
1172 if (nt_insert(self, n, rev) == -1) {
1158 if (nt_insert(self, n, rev) == -1) {
1173 self->ntrev = rev + 1;
1159 self->ntrev = rev + 1;
1174 return -3;
1160 return -3;
1175 }
1161 }
1176 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1162 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1177 break;
1163 break;
1178 }
1164 }
1179 }
1165 }
1180 self->ntrev = rev;
1166 self->ntrev = rev;
1181 }
1167 }
1182
1168
1183 if (rev >= 0)
1169 if (rev >= 0)
1184 return rev;
1170 return rev;
1185 return -2;
1171 return -2;
1186 }
1172 }
1187
1173
1188 static void raise_revlog_error(void)
1174 static void raise_revlog_error(void)
1189 {
1175 {
1190 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
1176 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
1191
1177
1192 mod = PyImport_ImportModule("mercurial.error");
1178 mod = PyImport_ImportModule("mercurial.error");
1193 if (mod == NULL) {
1179 if (mod == NULL) {
1194 goto cleanup;
1180 goto cleanup;
1195 }
1181 }
1196
1182
1197 dict = PyModule_GetDict(mod);
1183 dict = PyModule_GetDict(mod);
1198 if (dict == NULL) {
1184 if (dict == NULL) {
1199 goto cleanup;
1185 goto cleanup;
1200 }
1186 }
1201 Py_INCREF(dict);
1187 Py_INCREF(dict);
1202
1188
1203 errclass = PyDict_GetItemString(dict, "RevlogError");
1189 errclass = PyDict_GetItemString(dict, "RevlogError");
1204 if (errclass == NULL) {
1190 if (errclass == NULL) {
1205 PyErr_SetString(PyExc_SystemError,
1191 PyErr_SetString(PyExc_SystemError,
1206 "could not find RevlogError");
1192 "could not find RevlogError");
1207 goto cleanup;
1193 goto cleanup;
1208 }
1194 }
1209
1195
1210 /* value of exception is ignored by callers */
1196 /* value of exception is ignored by callers */
1211 PyErr_SetString(errclass, "RevlogError");
1197 PyErr_SetString(errclass, "RevlogError");
1212
1198
1213 cleanup:
1199 cleanup:
1214 Py_XDECREF(dict);
1200 Py_XDECREF(dict);
1215 Py_XDECREF(mod);
1201 Py_XDECREF(mod);
1216 }
1202 }
1217
1203
1218 static PyObject *index_getitem(indexObject *self, PyObject *value)
1204 static PyObject *index_getitem(indexObject *self, PyObject *value)
1219 {
1205 {
1220 char *node;
1206 char *node;
1221 int rev;
1207 int rev;
1222
1208
1223 if (PyInt_Check(value))
1209 if (PyInt_Check(value))
1224 return index_get(self, PyInt_AS_LONG(value));
1210 return index_get(self, PyInt_AS_LONG(value));
1225
1211
1226 if (node_check(value, &node) == -1)
1212 if (node_check(value, &node) == -1)
1227 return NULL;
1213 return NULL;
1228 rev = index_find_node(self, node, 20);
1214 rev = index_find_node(self, node, 20);
1229 if (rev >= -1)
1215 if (rev >= -1)
1230 return PyInt_FromLong(rev);
1216 return PyInt_FromLong(rev);
1231 if (rev == -2)
1217 if (rev == -2)
1232 raise_revlog_error();
1218 raise_revlog_error();
1233 return NULL;
1219 return NULL;
1234 }
1220 }
1235
1221
1236 /*
1222 /*
1237 * Fully populate the radix tree.
1223 * Fully populate the radix tree.
1238 */
1224 */
1239 static int nt_populate(indexObject *self) {
1225 static int nt_populate(indexObject *self) {
1240 int rev;
1226 int rev;
1241 if (self->ntrev > 0) {
1227 if (self->ntrev > 0) {
1242 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1228 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1243 const char *n = index_node_existing(self, rev);
1229 const char *n = index_node_existing(self, rev);
1244 if (n == NULL)
1230 if (n == NULL)
1245 return -1;
1231 return -1;
1246 if (nt_insert(self, n, rev) == -1)
1232 if (nt_insert(self, n, rev) == -1)
1247 return -1;
1233 return -1;
1248 }
1234 }
1249 self->ntrev = -1;
1235 self->ntrev = -1;
1250 }
1236 }
1251 return 0;
1237 return 0;
1252 }
1238 }
1253
1239
1254 static int nt_partialmatch(indexObject *self, const char *node,
1240 static int nt_partialmatch(indexObject *self, const char *node,
1255 Py_ssize_t nodelen)
1241 Py_ssize_t nodelen)
1256 {
1242 {
1257 if (nt_init(self) == -1)
1243 if (nt_init(self) == -1)
1258 return -3;
1244 return -3;
1259 if (nt_populate(self) == -1)
1245 if (nt_populate(self) == -1)
1260 return -3;
1246 return -3;
1261
1247
1262 return nt_find(self, node, nodelen, 1);
1248 return nt_find(self, node, nodelen, 1);
1263 }
1249 }
1264
1250
1265 /*
1251 /*
1266 * Find the length of the shortest unique prefix of node.
1252 * Find the length of the shortest unique prefix of node.
1267 *
1253 *
1268 * Return values:
1254 * Return values:
1269 *
1255 *
1270 * -3: error (exception set)
1256 * -3: error (exception set)
1271 * -2: not found (no exception set)
1257 * -2: not found (no exception set)
1272 * rest: length of shortest prefix
1258 * rest: length of shortest prefix
1273 */
1259 */
1274 static int nt_shortest(indexObject *self, const char *node)
1260 static int nt_shortest(indexObject *self, const char *node)
1275 {
1261 {
1276 int level, off;
1262 int level, off;
1277
1263
1278 if (nt_init(self) == -1)
1264 if (nt_init(self) == -1)
1279 return -3;
1265 return -3;
1280 if (nt_populate(self) == -1)
1266 if (nt_populate(self) == -1)
1281 return -3;
1267 return -3;
1282
1268
1283 for (level = off = 0; level < 40; level++) {
1269 for (level = off = 0; level < 40; level++) {
1284 int k, v;
1270 int k, v;
1285 nodetree *n = &self->nt[off];
1271 nodetree *n = &self->nt[off];
1286 k = nt_level(node, level);
1272 k = nt_level(node, level);
1287 v = n->children[k];
1273 v = n->children[k];
1288 if (v < 0) {
1274 if (v < 0) {
1289 const char *n;
1275 const char *n;
1290 v = -(v + 2);
1276 v = -(v + 2);
1291 n = index_node_existing(self, v);
1277 n = index_node_existing(self, v);
1292 if (n == NULL)
1278 if (n == NULL)
1293 return -3;
1279 return -3;
1294 if (memcmp(node, n, 20) != 0)
1280 if (memcmp(node, n, 20) != 0)
1295 /*
1281 /*
1296 * Found a unique prefix, but it wasn't for the
1282 * Found a unique prefix, but it wasn't for the
1297 * requested node (i.e the requested node does
1283 * requested node (i.e the requested node does
1298 * not exist).
1284 * not exist).
1299 */
1285 */
1300 return -2;
1286 return -2;
1301 return level + 1;
1287 return level + 1;
1302 }
1288 }
1303 if (v == 0)
1289 if (v == 0)
1304 return -2;
1290 return -2;
1305 off = v;
1291 off = v;
1306 }
1292 }
1307 /*
1293 /*
1308 * The node was still not unique after 40 hex digits, so this won't
1294 * The node was still not unique after 40 hex digits, so this won't
1309 * happen. Also, if we get here, then there's a programming error in
1295 * happen. Also, if we get here, then there's a programming error in
1310 * this file that made us insert a node longer than 40 hex digits.
1296 * this file that made us insert a node longer than 40 hex digits.
1311 */
1297 */
1312 PyErr_SetString(PyExc_Exception, "broken node tree");
1298 PyErr_SetString(PyExc_Exception, "broken node tree");
1313 return -3;
1299 return -3;
1314 }
1300 }
1315
1301
1316 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1302 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1317 {
1303 {
1318 const char *fullnode;
1304 const char *fullnode;
1319 int nodelen;
1305 int nodelen;
1320 char *node;
1306 char *node;
1321 int rev, i;
1307 int rev, i;
1322
1308
1323 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
1309 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
1324 return NULL;
1310 return NULL;
1325
1311
1326 if (nodelen < 1) {
1312 if (nodelen < 1) {
1327 PyErr_SetString(PyExc_ValueError, "key too short");
1313 PyErr_SetString(PyExc_ValueError, "key too short");
1328 return NULL;
1314 return NULL;
1329 }
1315 }
1330
1316
1331 if (nodelen > 40) {
1317 if (nodelen > 40) {
1332 PyErr_SetString(PyExc_ValueError, "key too long");
1318 PyErr_SetString(PyExc_ValueError, "key too long");
1333 return NULL;
1319 return NULL;
1334 }
1320 }
1335
1321
1336 for (i = 0; i < nodelen; i++)
1322 for (i = 0; i < nodelen; i++)
1337 hexdigit(node, i);
1323 hexdigit(node, i);
1338 if (PyErr_Occurred()) {
1324 if (PyErr_Occurred()) {
1339 /* input contains non-hex characters */
1325 /* input contains non-hex characters */
1340 PyErr_Clear();
1326 PyErr_Clear();
1341 Py_RETURN_NONE;
1327 Py_RETURN_NONE;
1342 }
1328 }
1343
1329
1344 rev = nt_partialmatch(self, node, nodelen);
1330 rev = nt_partialmatch(self, node, nodelen);
1345
1331
1346 switch (rev) {
1332 switch (rev) {
1347 case -4:
1333 case -4:
1348 raise_revlog_error();
1334 raise_revlog_error();
1349 case -3:
1335 case -3:
1350 return NULL;
1336 return NULL;
1351 case -2:
1337 case -2:
1352 Py_RETURN_NONE;
1338 Py_RETURN_NONE;
1353 case -1:
1339 case -1:
1354 return PyBytes_FromStringAndSize(nullid, 20);
1340 return PyBytes_FromStringAndSize(nullid, 20);
1355 }
1341 }
1356
1342
1357 fullnode = index_node_existing(self, rev);
1343 fullnode = index_node_existing(self, rev);
1358 if (fullnode == NULL) {
1344 if (fullnode == NULL) {
1359 return NULL;
1345 return NULL;
1360 }
1346 }
1361 return PyBytes_FromStringAndSize(fullnode, 20);
1347 return PyBytes_FromStringAndSize(fullnode, 20);
1362 }
1348 }
1363
1349
1364 static PyObject *index_shortest(indexObject *self, PyObject *args)
1350 static PyObject *index_shortest(indexObject *self, PyObject *args)
1365 {
1351 {
1366 PyObject *val;
1352 PyObject *val;
1367 char *node;
1353 char *node;
1368 int length;
1354 int length;
1369
1355
1370 if (!PyArg_ParseTuple(args, "O", &val))
1356 if (!PyArg_ParseTuple(args, "O", &val))
1371 return NULL;
1357 return NULL;
1372 if (node_check(val, &node) == -1)
1358 if (node_check(val, &node) == -1)
1373 return NULL;
1359 return NULL;
1374
1360
1375 self->ntlookups++;
1361 self->ntlookups++;
1376 length = nt_shortest(self, node);
1362 length = nt_shortest(self, node);
1377 if (length == -3)
1363 if (length == -3)
1378 return NULL;
1364 return NULL;
1379 if (length == -2) {
1365 if (length == -2) {
1380 raise_revlog_error();
1366 raise_revlog_error();
1381 return NULL;
1367 return NULL;
1382 }
1368 }
1383 return PyInt_FromLong(length);
1369 return PyInt_FromLong(length);
1384 }
1370 }
1385
1371
1386 static PyObject *index_m_get(indexObject *self, PyObject *args)
1372 static PyObject *index_m_get(indexObject *self, PyObject *args)
1387 {
1373 {
1388 PyObject *val;
1374 PyObject *val;
1389 char *node;
1375 char *node;
1390 int rev;
1376 int rev;
1391
1377
1392 if (!PyArg_ParseTuple(args, "O", &val))
1378 if (!PyArg_ParseTuple(args, "O", &val))
1393 return NULL;
1379 return NULL;
1394 if (node_check(val, &node) == -1)
1380 if (node_check(val, &node) == -1)
1395 return NULL;
1381 return NULL;
1396 rev = index_find_node(self, node, 20);
1382 rev = index_find_node(self, node, 20);
1397 if (rev == -3)
1383 if (rev == -3)
1398 return NULL;
1384 return NULL;
1399 if (rev == -2)
1385 if (rev == -2)
1400 Py_RETURN_NONE;
1386 Py_RETURN_NONE;
1401 return PyInt_FromLong(rev);
1387 return PyInt_FromLong(rev);
1402 }
1388 }
1403
1389
1404 static int index_contains(indexObject *self, PyObject *value)
1390 static int index_contains(indexObject *self, PyObject *value)
1405 {
1391 {
1406 char *node;
1392 char *node;
1407
1393
1408 if (PyInt_Check(value)) {
1394 if (PyInt_Check(value)) {
1409 long rev = PyInt_AS_LONG(value);
1395 long rev = PyInt_AS_LONG(value);
1410 return rev >= -1 && rev < index_length(self);
1396 return rev >= -1 && rev < index_length(self);
1411 }
1397 }
1412
1398
1413 if (node_check(value, &node) == -1)
1399 if (node_check(value, &node) == -1)
1414 return -1;
1400 return -1;
1415
1401
1416 switch (index_find_node(self, node, 20)) {
1402 switch (index_find_node(self, node, 20)) {
1417 case -3:
1403 case -3:
1418 return -1;
1404 return -1;
1419 case -2:
1405 case -2:
1420 return 0;
1406 return 0;
1421 default:
1407 default:
1422 return 1;
1408 return 1;
1423 }
1409 }
1424 }
1410 }
1425
1411
1426 typedef uint64_t bitmask;
1412 typedef uint64_t bitmask;
1427
1413
1428 /*
1414 /*
1429 * Given a disjoint set of revs, return all candidates for the
1415 * Given a disjoint set of revs, return all candidates for the
1430 * greatest common ancestor. In revset notation, this is the set
1416 * greatest common ancestor. In revset notation, this is the set
1431 * "heads(::a and ::b and ...)"
1417 * "heads(::a and ::b and ...)"
1432 */
1418 */
1433 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1419 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1434 int revcount)
1420 int revcount)
1435 {
1421 {
1436 const bitmask allseen = (1ull << revcount) - 1;
1422 const bitmask allseen = (1ull << revcount) - 1;
1437 const bitmask poison = 1ull << revcount;
1423 const bitmask poison = 1ull << revcount;
1438 PyObject *gca = PyList_New(0);
1424 PyObject *gca = PyList_New(0);
1439 int i, v, interesting;
1425 int i, v, interesting;
1440 int maxrev = -1;
1426 int maxrev = -1;
1441 bitmask sp;
1427 bitmask sp;
1442 bitmask *seen;
1428 bitmask *seen;
1443
1429
1444 if (gca == NULL)
1430 if (gca == NULL)
1445 return PyErr_NoMemory();
1431 return PyErr_NoMemory();
1446
1432
1447 for (i = 0; i < revcount; i++) {
1433 for (i = 0; i < revcount; i++) {
1448 if (revs[i] > maxrev)
1434 if (revs[i] > maxrev)
1449 maxrev = revs[i];
1435 maxrev = revs[i];
1450 }
1436 }
1451
1437
1452 seen = calloc(sizeof(*seen), maxrev + 1);
1438 seen = calloc(sizeof(*seen), maxrev + 1);
1453 if (seen == NULL) {
1439 if (seen == NULL) {
1454 Py_DECREF(gca);
1440 Py_DECREF(gca);
1455 return PyErr_NoMemory();
1441 return PyErr_NoMemory();
1456 }
1442 }
1457
1443
1458 for (i = 0; i < revcount; i++)
1444 for (i = 0; i < revcount; i++)
1459 seen[revs[i]] = 1ull << i;
1445 seen[revs[i]] = 1ull << i;
1460
1446
1461 interesting = revcount;
1447 interesting = revcount;
1462
1448
1463 for (v = maxrev; v >= 0 && interesting; v--) {
1449 for (v = maxrev; v >= 0 && interesting; v--) {
1464 bitmask sv = seen[v];
1450 bitmask sv = seen[v];
1465 int parents[2];
1451 int parents[2];
1466
1452
1467 if (!sv)
1453 if (!sv)
1468 continue;
1454 continue;
1469
1455
1470 if (sv < poison) {
1456 if (sv < poison) {
1471 interesting -= 1;
1457 interesting -= 1;
1472 if (sv == allseen) {
1458 if (sv == allseen) {
1473 PyObject *obj = PyInt_FromLong(v);
1459 PyObject *obj = PyInt_FromLong(v);
1474 if (obj == NULL)
1460 if (obj == NULL)
1475 goto bail;
1461 goto bail;
1476 if (PyList_Append(gca, obj) == -1) {
1462 if (PyList_Append(gca, obj) == -1) {
1477 Py_DECREF(obj);
1463 Py_DECREF(obj);
1478 goto bail;
1464 goto bail;
1479 }
1465 }
1480 sv |= poison;
1466 sv |= poison;
1481 for (i = 0; i < revcount; i++) {
1467 for (i = 0; i < revcount; i++) {
1482 if (revs[i] == v)
1468 if (revs[i] == v)
1483 goto done;
1469 goto done;
1484 }
1470 }
1485 }
1471 }
1486 }
1472 }
1487 if (index_get_parents(self, v, parents, maxrev) < 0)
1473 if (index_get_parents(self, v, parents, maxrev) < 0)
1488 goto bail;
1474 goto bail;
1489
1475
1490 for (i = 0; i < 2; i++) {
1476 for (i = 0; i < 2; i++) {
1491 int p = parents[i];
1477 int p = parents[i];
1492 if (p == -1)
1478 if (p == -1)
1493 continue;
1479 continue;
1494 sp = seen[p];
1480 sp = seen[p];
1495 if (sv < poison) {
1481 if (sv < poison) {
1496 if (sp == 0) {
1482 if (sp == 0) {
1497 seen[p] = sv;
1483 seen[p] = sv;
1498 interesting++;
1484 interesting++;
1499 }
1485 }
1500 else if (sp != sv)
1486 else if (sp != sv)
1501 seen[p] |= sv;
1487 seen[p] |= sv;
1502 } else {
1488 } else {
1503 if (sp && sp < poison)
1489 if (sp && sp < poison)
1504 interesting--;
1490 interesting--;
1505 seen[p] = sv;
1491 seen[p] = sv;
1506 }
1492 }
1507 }
1493 }
1508 }
1494 }
1509
1495
1510 done:
1496 done:
1511 free(seen);
1497 free(seen);
1512 return gca;
1498 return gca;
1513 bail:
1499 bail:
1514 free(seen);
1500 free(seen);
1515 Py_XDECREF(gca);
1501 Py_XDECREF(gca);
1516 return NULL;
1502 return NULL;
1517 }
1503 }
1518
1504
1519 /*
1505 /*
1520 * Given a disjoint set of revs, return the subset with the longest
1506 * Given a disjoint set of revs, return the subset with the longest
1521 * path to the root.
1507 * path to the root.
1522 */
1508 */
1523 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1509 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1524 {
1510 {
1525 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1511 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1526 static const Py_ssize_t capacity = 24;
1512 static const Py_ssize_t capacity = 24;
1527 int *depth, *interesting = NULL;
1513 int *depth, *interesting = NULL;
1528 int i, j, v, ninteresting;
1514 int i, j, v, ninteresting;
1529 PyObject *dict = NULL, *keys = NULL;
1515 PyObject *dict = NULL, *keys = NULL;
1530 long *seen = NULL;
1516 long *seen = NULL;
1531 int maxrev = -1;
1517 int maxrev = -1;
1532 long final;
1518 long final;
1533
1519
1534 if (revcount > capacity) {
1520 if (revcount > capacity) {
1535 PyErr_Format(PyExc_OverflowError,
1521 PyErr_Format(PyExc_OverflowError,
1536 "bitset size (%ld) > capacity (%ld)",
1522 "bitset size (%ld) > capacity (%ld)",
1537 (long)revcount, (long)capacity);
1523 (long)revcount, (long)capacity);
1538 return NULL;
1524 return NULL;
1539 }
1525 }
1540
1526
1541 for (i = 0; i < revcount; i++) {
1527 for (i = 0; i < revcount; i++) {
1542 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1528 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1543 if (n > maxrev)
1529 if (n > maxrev)
1544 maxrev = n;
1530 maxrev = n;
1545 }
1531 }
1546
1532
1547 depth = calloc(sizeof(*depth), maxrev + 1);
1533 depth = calloc(sizeof(*depth), maxrev + 1);
1548 if (depth == NULL)
1534 if (depth == NULL)
1549 return PyErr_NoMemory();
1535 return PyErr_NoMemory();
1550
1536
1551 seen = calloc(sizeof(*seen), maxrev + 1);
1537 seen = calloc(sizeof(*seen), maxrev + 1);
1552 if (seen == NULL) {
1538 if (seen == NULL) {
1553 PyErr_NoMemory();
1539 PyErr_NoMemory();
1554 goto bail;
1540 goto bail;
1555 }
1541 }
1556
1542
1557 interesting = calloc(sizeof(*interesting), 1 << revcount);
1543 interesting = calloc(sizeof(*interesting), 1 << revcount);
1558 if (interesting == NULL) {
1544 if (interesting == NULL) {
1559 PyErr_NoMemory();
1545 PyErr_NoMemory();
1560 goto bail;
1546 goto bail;
1561 }
1547 }
1562
1548
1563 if (PyList_Sort(revs) == -1)
1549 if (PyList_Sort(revs) == -1)
1564 goto bail;
1550 goto bail;
1565
1551
1566 for (i = 0; i < revcount; i++) {
1552 for (i = 0; i < revcount; i++) {
1567 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1553 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1568 long b = 1l << i;
1554 long b = 1l << i;
1569 depth[n] = 1;
1555 depth[n] = 1;
1570 seen[n] = b;
1556 seen[n] = b;
1571 interesting[b] = 1;
1557 interesting[b] = 1;
1572 }
1558 }
1573
1559
1574 /* invariant: ninteresting is the number of non-zero entries in
1560 /* invariant: ninteresting is the number of non-zero entries in
1575 * interesting. */
1561 * interesting. */
1576 ninteresting = (int)revcount;
1562 ninteresting = (int)revcount;
1577
1563
1578 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1564 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1579 int dv = depth[v];
1565 int dv = depth[v];
1580 int parents[2];
1566 int parents[2];
1581 long sv;
1567 long sv;
1582
1568
1583 if (dv == 0)
1569 if (dv == 0)
1584 continue;
1570 continue;
1585
1571
1586 sv = seen[v];
1572 sv = seen[v];
1587 if (index_get_parents(self, v, parents, maxrev) < 0)
1573 if (index_get_parents(self, v, parents, maxrev) < 0)
1588 goto bail;
1574 goto bail;
1589
1575
1590 for (i = 0; i < 2; i++) {
1576 for (i = 0; i < 2; i++) {
1591 int p = parents[i];
1577 int p = parents[i];
1592 long sp;
1578 long sp;
1593 int dp;
1579 int dp;
1594
1580
1595 if (p == -1)
1581 if (p == -1)
1596 continue;
1582 continue;
1597
1583
1598 dp = depth[p];
1584 dp = depth[p];
1599 sp = seen[p];
1585 sp = seen[p];
1600 if (dp <= dv) {
1586 if (dp <= dv) {
1601 depth[p] = dv + 1;
1587 depth[p] = dv + 1;
1602 if (sp != sv) {
1588 if (sp != sv) {
1603 interesting[sv] += 1;
1589 interesting[sv] += 1;
1604 seen[p] = sv;
1590 seen[p] = sv;
1605 if (sp) {
1591 if (sp) {
1606 interesting[sp] -= 1;
1592 interesting[sp] -= 1;
1607 if (interesting[sp] == 0)
1593 if (interesting[sp] == 0)
1608 ninteresting -= 1;
1594 ninteresting -= 1;
1609 }
1595 }
1610 }
1596 }
1611 }
1597 }
1612 else if (dv == dp - 1) {
1598 else if (dv == dp - 1) {
1613 long nsp = sp | sv;
1599 long nsp = sp | sv;
1614 if (nsp == sp)
1600 if (nsp == sp)
1615 continue;
1601 continue;
1616 seen[p] = nsp;
1602 seen[p] = nsp;
1617 interesting[sp] -= 1;
1603 interesting[sp] -= 1;
1618 if (interesting[sp] == 0)
1604 if (interesting[sp] == 0)
1619 ninteresting -= 1;
1605 ninteresting -= 1;
1620 if (interesting[nsp] == 0)
1606 if (interesting[nsp] == 0)
1621 ninteresting += 1;
1607 ninteresting += 1;
1622 interesting[nsp] += 1;
1608 interesting[nsp] += 1;
1623 }
1609 }
1624 }
1610 }
1625 interesting[sv] -= 1;
1611 interesting[sv] -= 1;
1626 if (interesting[sv] == 0)
1612 if (interesting[sv] == 0)
1627 ninteresting -= 1;
1613 ninteresting -= 1;
1628 }
1614 }
1629
1615
1630 final = 0;
1616 final = 0;
1631 j = ninteresting;
1617 j = ninteresting;
1632 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
1618 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
1633 if (interesting[i] == 0)
1619 if (interesting[i] == 0)
1634 continue;
1620 continue;
1635 final |= i;
1621 final |= i;
1636 j -= 1;
1622 j -= 1;
1637 }
1623 }
1638 if (final == 0) {
1624 if (final == 0) {
1639 keys = PyList_New(0);
1625 keys = PyList_New(0);
1640 goto bail;
1626 goto bail;
1641 }
1627 }
1642
1628
1643 dict = PyDict_New();
1629 dict = PyDict_New();
1644 if (dict == NULL)
1630 if (dict == NULL)
1645 goto bail;
1631 goto bail;
1646
1632
1647 for (i = 0; i < revcount; i++) {
1633 for (i = 0; i < revcount; i++) {
1648 PyObject *key;
1634 PyObject *key;
1649
1635
1650 if ((final & (1 << i)) == 0)
1636 if ((final & (1 << i)) == 0)
1651 continue;
1637 continue;
1652
1638
1653 key = PyList_GET_ITEM(revs, i);
1639 key = PyList_GET_ITEM(revs, i);
1654 Py_INCREF(key);
1640 Py_INCREF(key);
1655 Py_INCREF(Py_None);
1641 Py_INCREF(Py_None);
1656 if (PyDict_SetItem(dict, key, Py_None) == -1) {
1642 if (PyDict_SetItem(dict, key, Py_None) == -1) {
1657 Py_DECREF(key);
1643 Py_DECREF(key);
1658 Py_DECREF(Py_None);
1644 Py_DECREF(Py_None);
1659 goto bail;
1645 goto bail;
1660 }
1646 }
1661 }
1647 }
1662
1648
1663 keys = PyDict_Keys(dict);
1649 keys = PyDict_Keys(dict);
1664
1650
1665 bail:
1651 bail:
1666 free(depth);
1652 free(depth);
1667 free(seen);
1653 free(seen);
1668 free(interesting);
1654 free(interesting);
1669 Py_XDECREF(dict);
1655 Py_XDECREF(dict);
1670
1656
1671 return keys;
1657 return keys;
1672 }
1658 }
1673
1659
1674 /*
1660 /*
1675 * Given a (possibly overlapping) set of revs, return all the
1661 * Given a (possibly overlapping) set of revs, return all the
1676 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
1662 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
1677 */
1663 */
1678 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
1664 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
1679 {
1665 {
1680 PyObject *ret = NULL;
1666 PyObject *ret = NULL;
1681 Py_ssize_t argcount, i, len;
1667 Py_ssize_t argcount, i, len;
1682 bitmask repeat = 0;
1668 bitmask repeat = 0;
1683 int revcount = 0;
1669 int revcount = 0;
1684 int *revs;
1670 int *revs;
1685
1671
1686 argcount = PySequence_Length(args);
1672 argcount = PySequence_Length(args);
1687 revs = PyMem_Malloc(argcount * sizeof(*revs));
1673 revs = PyMem_Malloc(argcount * sizeof(*revs));
1688 if (argcount > 0 && revs == NULL)
1674 if (argcount > 0 && revs == NULL)
1689 return PyErr_NoMemory();
1675 return PyErr_NoMemory();
1690 len = index_length(self) - 1;
1676 len = index_length(self) - 1;
1691
1677
1692 for (i = 0; i < argcount; i++) {
1678 for (i = 0; i < argcount; i++) {
1693 static const int capacity = 24;
1679 static const int capacity = 24;
1694 PyObject *obj = PySequence_GetItem(args, i);
1680 PyObject *obj = PySequence_GetItem(args, i);
1695 bitmask x;
1681 bitmask x;
1696 long val;
1682 long val;
1697
1683
1698 if (!PyInt_Check(obj)) {
1684 if (!PyInt_Check(obj)) {
1699 PyErr_SetString(PyExc_TypeError,
1685 PyErr_SetString(PyExc_TypeError,
1700 "arguments must all be ints");
1686 "arguments must all be ints");
1701 Py_DECREF(obj);
1687 Py_DECREF(obj);
1702 goto bail;
1688 goto bail;
1703 }
1689 }
1704 val = PyInt_AsLong(obj);
1690 val = PyInt_AsLong(obj);
1705 Py_DECREF(obj);
1691 Py_DECREF(obj);
1706 if (val == -1) {
1692 if (val == -1) {
1707 ret = PyList_New(0);
1693 ret = PyList_New(0);
1708 goto done;
1694 goto done;
1709 }
1695 }
1710 if (val < 0 || val >= len) {
1696 if (val < 0 || val >= len) {
1711 PyErr_SetString(PyExc_IndexError,
1697 PyErr_SetString(PyExc_IndexError,
1712 "index out of range");
1698 "index out of range");
1713 goto bail;
1699 goto bail;
1714 }
1700 }
1715 /* this cheesy bloom filter lets us avoid some more
1701 /* this cheesy bloom filter lets us avoid some more
1716 * expensive duplicate checks in the common set-is-disjoint
1702 * expensive duplicate checks in the common set-is-disjoint
1717 * case */
1703 * case */
1718 x = 1ull << (val & 0x3f);
1704 x = 1ull << (val & 0x3f);
1719 if (repeat & x) {
1705 if (repeat & x) {
1720 int k;
1706 int k;
1721 for (k = 0; k < revcount; k++) {
1707 for (k = 0; k < revcount; k++) {
1722 if (val == revs[k])
1708 if (val == revs[k])
1723 goto duplicate;
1709 goto duplicate;
1724 }
1710 }
1725 }
1711 }
1726 else repeat |= x;
1712 else repeat |= x;
1727 if (revcount >= capacity) {
1713 if (revcount >= capacity) {
1728 PyErr_Format(PyExc_OverflowError,
1714 PyErr_Format(PyExc_OverflowError,
1729 "bitset size (%d) > capacity (%d)",
1715 "bitset size (%d) > capacity (%d)",
1730 revcount, capacity);
1716 revcount, capacity);
1731 goto bail;
1717 goto bail;
1732 }
1718 }
1733 revs[revcount++] = (int)val;
1719 revs[revcount++] = (int)val;
1734 duplicate:;
1720 duplicate:;
1735 }
1721 }
1736
1722
1737 if (revcount == 0) {
1723 if (revcount == 0) {
1738 ret = PyList_New(0);
1724 ret = PyList_New(0);
1739 goto done;
1725 goto done;
1740 }
1726 }
1741 if (revcount == 1) {
1727 if (revcount == 1) {
1742 PyObject *obj;
1728 PyObject *obj;
1743 ret = PyList_New(1);
1729 ret = PyList_New(1);
1744 if (ret == NULL)
1730 if (ret == NULL)
1745 goto bail;
1731 goto bail;
1746 obj = PyInt_FromLong(revs[0]);
1732 obj = PyInt_FromLong(revs[0]);
1747 if (obj == NULL)
1733 if (obj == NULL)
1748 goto bail;
1734 goto bail;
1749 PyList_SET_ITEM(ret, 0, obj);
1735 PyList_SET_ITEM(ret, 0, obj);
1750 goto done;
1736 goto done;
1751 }
1737 }
1752
1738
1753 ret = find_gca_candidates(self, revs, revcount);
1739 ret = find_gca_candidates(self, revs, revcount);
1754 if (ret == NULL)
1740 if (ret == NULL)
1755 goto bail;
1741 goto bail;
1756
1742
1757 done:
1743 done:
1758 PyMem_Free(revs);
1744 PyMem_Free(revs);
1759 return ret;
1745 return ret;
1760
1746
1761 bail:
1747 bail:
1762 PyMem_Free(revs);
1748 PyMem_Free(revs);
1763 Py_XDECREF(ret);
1749 Py_XDECREF(ret);
1764 return NULL;
1750 return NULL;
1765 }
1751 }
1766
1752
1767 /*
1753 /*
1768 * Given a (possibly overlapping) set of revs, return the greatest
1754 * Given a (possibly overlapping) set of revs, return the greatest
1769 * common ancestors: those with the longest path to the root.
1755 * common ancestors: those with the longest path to the root.
1770 */
1756 */
1771 static PyObject *index_ancestors(indexObject *self, PyObject *args)
1757 static PyObject *index_ancestors(indexObject *self, PyObject *args)
1772 {
1758 {
1773 PyObject *ret;
1759 PyObject *ret;
1774 PyObject *gca = index_commonancestorsheads(self, args);
1760 PyObject *gca = index_commonancestorsheads(self, args);
1775 if (gca == NULL)
1761 if (gca == NULL)
1776 return NULL;
1762 return NULL;
1777
1763
1778 if (PyList_GET_SIZE(gca) <= 1) {
1764 if (PyList_GET_SIZE(gca) <= 1) {
1779 return gca;
1765 return gca;
1780 }
1766 }
1781
1767
1782 ret = find_deepest(self, gca);
1768 ret = find_deepest(self, gca);
1783 Py_DECREF(gca);
1769 Py_DECREF(gca);
1784 return ret;
1770 return ret;
1785 }
1771 }
1786
1772
1787 /*
1773 /*
1788 * Invalidate any trie entries introduced by added revs.
1774 * Invalidate any trie entries introduced by added revs.
1789 */
1775 */
1790 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1776 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1791 {
1777 {
1792 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1778 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1793
1779
1794 for (i = start; i < len; i++) {
1780 for (i = start; i < len; i++) {
1795 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1781 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1796 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1782 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1797
1783
1798 nt_delete_node(self, PyBytes_AS_STRING(node));
1784 nt_delete_node(self, PyBytes_AS_STRING(node));
1799 }
1785 }
1800
1786
1801 if (start == 0)
1787 if (start == 0)
1802 Py_CLEAR(self->added);
1788 Py_CLEAR(self->added);
1803 }
1789 }
1804
1790
1805 /*
1791 /*
1806 * Delete a numeric range of revs, which must be at the end of the
1792 * Delete a numeric range of revs, which must be at the end of the
1807 * range, but exclude the sentinel nullid entry.
1793 * range, but exclude the sentinel nullid entry.
1808 */
1794 */
1809 static int index_slice_del(indexObject *self, PyObject *item)
1795 static int index_slice_del(indexObject *self, PyObject *item)
1810 {
1796 {
1811 Py_ssize_t start, stop, step, slicelength;
1797 Py_ssize_t start, stop, step, slicelength;
1812 Py_ssize_t length = index_length(self);
1798 Py_ssize_t length = index_length(self);
1813 int ret = 0;
1799 int ret = 0;
1814
1800
1815 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
1801 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
1816 #ifdef IS_PY3K
1802 #ifdef IS_PY3K
1817 if (PySlice_GetIndicesEx(item, length,
1803 if (PySlice_GetIndicesEx(item, length,
1818 #else
1804 #else
1819 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1805 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1820 #endif
1806 #endif
1821 &start, &stop, &step, &slicelength) < 0)
1807 &start, &stop, &step, &slicelength) < 0)
1822 return -1;
1808 return -1;
1823
1809
1824 if (slicelength <= 0)
1810 if (slicelength <= 0)
1825 return 0;
1811 return 0;
1826
1812
1827 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1813 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1828 stop = start;
1814 stop = start;
1829
1815
1830 if (step < 0) {
1816 if (step < 0) {
1831 stop = start + 1;
1817 stop = start + 1;
1832 start = stop + step*(slicelength - 1) - 1;
1818 start = stop + step*(slicelength - 1) - 1;
1833 step = -step;
1819 step = -step;
1834 }
1820 }
1835
1821
1836 if (step != 1) {
1822 if (step != 1) {
1837 PyErr_SetString(PyExc_ValueError,
1823 PyErr_SetString(PyExc_ValueError,
1838 "revlog index delete requires step size of 1");
1824 "revlog index delete requires step size of 1");
1839 return -1;
1825 return -1;
1840 }
1826 }
1841
1827
1842 if (stop != length - 1) {
1828 if (stop != length - 1) {
1843 PyErr_SetString(PyExc_IndexError,
1829 PyErr_SetString(PyExc_IndexError,
1844 "revlog index deletion indices are invalid");
1830 "revlog index deletion indices are invalid");
1845 return -1;
1831 return -1;
1846 }
1832 }
1847
1833
1848 if (start < self->length - 1) {
1834 if (start < self->length - 1) {
1849 if (self->nt) {
1835 if (self->nt) {
1850 Py_ssize_t i;
1836 Py_ssize_t i;
1851
1837
1852 for (i = start + 1; i < self->length - 1; i++) {
1838 for (i = start + 1; i < self->length - 1; i++) {
1853 const char *node = index_node_existing(self, i);
1839 const char *node = index_node_existing(self, i);
1854 if (node == NULL)
1840 if (node == NULL)
1855 return -1;
1841 return -1;
1856
1842
1857 nt_delete_node(self, node);
1843 nt_delete_node(self, node);
1858 }
1844 }
1859 if (self->added)
1845 if (self->added)
1860 nt_invalidate_added(self, 0);
1846 nt_invalidate_added(self, 0);
1861 if (self->ntrev > start)
1847 if (self->ntrev > start)
1862 self->ntrev = (int)start;
1848 self->ntrev = (int)start;
1863 }
1849 }
1864 self->length = start + 1;
1850 self->length = start + 1;
1865 if (start < self->raw_length) {
1851 if (start < self->raw_length) {
1866 if (self->cache) {
1852 if (self->cache) {
1867 Py_ssize_t i;
1853 Py_ssize_t i;
1868 for (i = start; i < self->raw_length; i++)
1854 for (i = start; i < self->raw_length; i++)
1869 Py_CLEAR(self->cache[i]);
1855 Py_CLEAR(self->cache[i]);
1870 }
1856 }
1871 self->raw_length = start;
1857 self->raw_length = start;
1872 }
1858 }
1873 goto done;
1859 goto done;
1874 }
1860 }
1875
1861
1876 if (self->nt) {
1862 if (self->nt) {
1877 nt_invalidate_added(self, start - self->length + 1);
1863 nt_invalidate_added(self, start - self->length + 1);
1878 if (self->ntrev > start)
1864 if (self->ntrev > start)
1879 self->ntrev = (int)start;
1865 self->ntrev = (int)start;
1880 }
1866 }
1881 if (self->added)
1867 if (self->added)
1882 ret = PyList_SetSlice(self->added, start - self->length + 1,
1868 ret = PyList_SetSlice(self->added, start - self->length + 1,
1883 PyList_GET_SIZE(self->added), NULL);
1869 PyList_GET_SIZE(self->added), NULL);
1884 done:
1870 done:
1885 Py_CLEAR(self->headrevs);
1871 Py_CLEAR(self->headrevs);
1886 return ret;
1872 return ret;
1887 }
1873 }
1888
1874
1889 /*
1875 /*
1890 * Supported ops:
1876 * Supported ops:
1891 *
1877 *
1892 * slice deletion
1878 * slice deletion
1893 * string assignment (extend node->rev mapping)
1879 * string assignment (extend node->rev mapping)
1894 * string deletion (shrink node->rev mapping)
1880 * string deletion (shrink node->rev mapping)
1895 */
1881 */
1896 static int index_assign_subscript(indexObject *self, PyObject *item,
1882 static int index_assign_subscript(indexObject *self, PyObject *item,
1897 PyObject *value)
1883 PyObject *value)
1898 {
1884 {
1899 char *node;
1885 char *node;
1900 long rev;
1886 long rev;
1901
1887
1902 if (PySlice_Check(item) && value == NULL)
1888 if (PySlice_Check(item) && value == NULL)
1903 return index_slice_del(self, item);
1889 return index_slice_del(self, item);
1904
1890
1905 if (node_check(item, &node) == -1)
1891 if (node_check(item, &node) == -1)
1906 return -1;
1892 return -1;
1907
1893
1908 if (value == NULL)
1894 if (value == NULL)
1909 return self->nt ? nt_delete_node(self, node) : 0;
1895 return self->nt ? nt_delete_node(self, node) : 0;
1910 rev = PyInt_AsLong(value);
1896 rev = PyInt_AsLong(value);
1911 if (rev > INT_MAX || rev < 0) {
1897 if (rev > INT_MAX || rev < 0) {
1912 if (!PyErr_Occurred())
1898 if (!PyErr_Occurred())
1913 PyErr_SetString(PyExc_ValueError, "rev out of range");
1899 PyErr_SetString(PyExc_ValueError, "rev out of range");
1914 return -1;
1900 return -1;
1915 }
1901 }
1916
1902
1917 if (nt_init(self) == -1)
1903 if (nt_init(self) == -1)
1918 return -1;
1904 return -1;
1919 return nt_insert(self, node, (int)rev);
1905 return nt_insert(self, node, (int)rev);
1920 }
1906 }
1921
1907
1922 /*
1908 /*
1923 * Find all RevlogNG entries in an index that has inline data. Update
1909 * Find all RevlogNG entries in an index that has inline data. Update
1924 * the optional "offsets" table with those entries.
1910 * the optional "offsets" table with those entries.
1925 */
1911 */
1926 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
1912 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
1927 {
1913 {
1928 const char *data = (const char *)self->buf.buf;
1914 const char *data = (const char *)self->buf.buf;
1929 Py_ssize_t pos = 0;
1915 Py_ssize_t pos = 0;
1930 Py_ssize_t end = self->buf.len;
1916 Py_ssize_t end = self->buf.len;
1931 long incr = v1_hdrsize;
1917 long incr = v1_hdrsize;
1932 Py_ssize_t len = 0;
1918 Py_ssize_t len = 0;
1933
1919
1934 while (pos + v1_hdrsize <= end && pos >= 0) {
1920 while (pos + v1_hdrsize <= end && pos >= 0) {
1935 uint32_t comp_len;
1921 uint32_t comp_len;
1936 /* 3rd element of header is length of compressed inline data */
1922 /* 3rd element of header is length of compressed inline data */
1937 comp_len = getbe32(data + pos + 8);
1923 comp_len = getbe32(data + pos + 8);
1938 incr = v1_hdrsize + comp_len;
1924 incr = v1_hdrsize + comp_len;
1939 if (offsets)
1925 if (offsets)
1940 offsets[len] = data + pos;
1926 offsets[len] = data + pos;
1941 len++;
1927 len++;
1942 pos += incr;
1928 pos += incr;
1943 }
1929 }
1944
1930
1945 if (pos != end) {
1931 if (pos != end) {
1946 if (!PyErr_Occurred())
1932 if (!PyErr_Occurred())
1947 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1933 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1948 return -1;
1934 return -1;
1949 }
1935 }
1950
1936
1951 return len;
1937 return len;
1952 }
1938 }
1953
1939
1954 static int index_init(indexObject *self, PyObject *args)
1940 static int index_init(indexObject *self, PyObject *args)
1955 {
1941 {
1956 PyObject *data_obj, *inlined_obj;
1942 PyObject *data_obj, *inlined_obj;
1957 Py_ssize_t size;
1943 Py_ssize_t size;
1958
1944
1959 /* Initialize before argument-checking to avoid index_dealloc() crash. */
1945 /* Initialize before argument-checking to avoid index_dealloc() crash. */
1960 self->raw_length = 0;
1946 self->raw_length = 0;
1961 self->added = NULL;
1947 self->added = NULL;
1962 self->cache = NULL;
1948 self->cache = NULL;
1963 self->data = NULL;
1949 self->data = NULL;
1964 memset(&self->buf, 0, sizeof(self->buf));
1950 memset(&self->buf, 0, sizeof(self->buf));
1965 self->headrevs = NULL;
1951 self->headrevs = NULL;
1966 self->filteredrevs = Py_None;
1952 self->filteredrevs = Py_None;
1967 Py_INCREF(Py_None);
1953 Py_INCREF(Py_None);
1968 self->nt = NULL;
1954 self->nt = NULL;
1969 self->offsets = NULL;
1955 self->offsets = NULL;
1970
1956
1971 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1957 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1972 return -1;
1958 return -1;
1973 if (!PyObject_CheckBuffer(data_obj)) {
1959 if (!PyObject_CheckBuffer(data_obj)) {
1974 PyErr_SetString(PyExc_TypeError,
1960 PyErr_SetString(PyExc_TypeError,
1975 "data does not support buffer interface");
1961 "data does not support buffer interface");
1976 return -1;
1962 return -1;
1977 }
1963 }
1978
1964
1979 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
1965 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
1980 return -1;
1966 return -1;
1981 size = self->buf.len;
1967 size = self->buf.len;
1982
1968
1983 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1969 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1984 self->data = data_obj;
1970 self->data = data_obj;
1985
1971
1986 self->ntlength = self->ntcapacity = 0;
1972 self->ntlength = self->ntcapacity = 0;
1987 self->ntdepth = self->ntsplits = 0;
1973 self->ntdepth = self->ntsplits = 0;
1988 self->ntlookups = self->ntmisses = 0;
1974 self->ntlookups = self->ntmisses = 0;
1989 self->ntrev = -1;
1975 self->ntrev = -1;
1990 Py_INCREF(self->data);
1976 Py_INCREF(self->data);
1991
1977
1992 if (self->inlined) {
1978 if (self->inlined) {
1993 Py_ssize_t len = inline_scan(self, NULL);
1979 Py_ssize_t len = inline_scan(self, NULL);
1994 if (len == -1)
1980 if (len == -1)
1995 goto bail;
1981 goto bail;
1996 self->raw_length = len;
1982 self->raw_length = len;
1997 self->length = len + 1;
1983 self->length = len + 1;
1998 } else {
1984 } else {
1999 if (size % v1_hdrsize) {
1985 if (size % v1_hdrsize) {
2000 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1986 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2001 goto bail;
1987 goto bail;
2002 }
1988 }
2003 self->raw_length = size / v1_hdrsize;
1989 self->raw_length = size / v1_hdrsize;
2004 self->length = self->raw_length + 1;
1990 self->length = self->raw_length + 1;
2005 }
1991 }
2006
1992
2007 return 0;
1993 return 0;
2008 bail:
1994 bail:
2009 return -1;
1995 return -1;
2010 }
1996 }
2011
1997
2012 static PyObject *index_nodemap(indexObject *self)
1998 static PyObject *index_nodemap(indexObject *self)
2013 {
1999 {
2014 Py_INCREF(self);
2000 Py_INCREF(self);
2015 return (PyObject *)self;
2001 return (PyObject *)self;
2016 }
2002 }
2017
2003
2018 static void index_dealloc(indexObject *self)
2004 static void index_dealloc(indexObject *self)
2019 {
2005 {
2020 _index_clearcaches(self);
2006 _index_clearcaches(self);
2021 Py_XDECREF(self->filteredrevs);
2007 Py_XDECREF(self->filteredrevs);
2022 if (self->buf.buf) {
2008 if (self->buf.buf) {
2023 PyBuffer_Release(&self->buf);
2009 PyBuffer_Release(&self->buf);
2024 memset(&self->buf, 0, sizeof(self->buf));
2010 memset(&self->buf, 0, sizeof(self->buf));
2025 }
2011 }
2026 Py_XDECREF(self->data);
2012 Py_XDECREF(self->data);
2027 Py_XDECREF(self->added);
2013 Py_XDECREF(self->added);
2028 PyObject_Del(self);
2014 PyObject_Del(self);
2029 }
2015 }
2030
2016
2031 static PySequenceMethods index_sequence_methods = {
2017 static PySequenceMethods index_sequence_methods = {
2032 (lenfunc)index_length, /* sq_length */
2018 (lenfunc)index_length, /* sq_length */
2033 0, /* sq_concat */
2019 0, /* sq_concat */
2034 0, /* sq_repeat */
2020 0, /* sq_repeat */
2035 (ssizeargfunc)index_get, /* sq_item */
2021 (ssizeargfunc)index_get, /* sq_item */
2036 0, /* sq_slice */
2022 0, /* sq_slice */
2037 0, /* sq_ass_item */
2023 0, /* sq_ass_item */
2038 0, /* sq_ass_slice */
2024 0, /* sq_ass_slice */
2039 (objobjproc)index_contains, /* sq_contains */
2025 (objobjproc)index_contains, /* sq_contains */
2040 };
2026 };
2041
2027
2042 static PyMappingMethods index_mapping_methods = {
2028 static PyMappingMethods index_mapping_methods = {
2043 (lenfunc)index_length, /* mp_length */
2029 (lenfunc)index_length, /* mp_length */
2044 (binaryfunc)index_getitem, /* mp_subscript */
2030 (binaryfunc)index_getitem, /* mp_subscript */
2045 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2031 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2046 };
2032 };
2047
2033
2048 static PyMethodDef index_methods[] = {
2034 static PyMethodDef index_methods[] = {
2049 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2035 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2050 "return the gca set of the given revs"},
2036 "return the gca set of the given revs"},
2051 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2037 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2052 METH_VARARGS,
2038 METH_VARARGS,
2053 "return the heads of the common ancestors of the given revs"},
2039 "return the heads of the common ancestors of the given revs"},
2054 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2040 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2055 "clear the index caches"},
2041 "clear the index caches"},
2056 {"get", (PyCFunction)index_m_get, METH_VARARGS,
2042 {"get", (PyCFunction)index_m_get, METH_VARARGS,
2057 "get an index entry"},
2043 "get an index entry"},
2058 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets,
2044 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets,
2059 METH_VARARGS, "compute phases"},
2045 METH_VARARGS, "compute phases"},
2060 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2046 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2061 "reachableroots"},
2047 "reachableroots"},
2062 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2048 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2063 "get head revisions"}, /* Can do filtering since 3.2 */
2049 "get head revisions"}, /* Can do filtering since 3.2 */
2064 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2050 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2065 "get filtered head revisions"}, /* Can always do filtering */
2051 "get filtered head revisions"}, /* Can always do filtering */
2066 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2052 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2067 "determine revisions with deltas to reconstruct fulltext"},
2053 "determine revisions with deltas to reconstruct fulltext"},
2068 {"insert", (PyCFunction)index_insert, METH_VARARGS,
2054 {"append", (PyCFunction)index_append, METH_O,
2069 "insert an index entry"},
2055 "append an index entry"},
2070 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2056 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2071 "match a potentially ambiguous node ID"},
2057 "match a potentially ambiguous node ID"},
2072 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2058 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2073 "find length of shortest hex nodeid of a binary ID"},
2059 "find length of shortest hex nodeid of a binary ID"},
2074 {"stats", (PyCFunction)index_stats, METH_NOARGS,
2060 {"stats", (PyCFunction)index_stats, METH_NOARGS,
2075 "stats for the index"},
2061 "stats for the index"},
2076 {NULL} /* Sentinel */
2062 {NULL} /* Sentinel */
2077 };
2063 };
2078
2064
2079 static PyGetSetDef index_getset[] = {
2065 static PyGetSetDef index_getset[] = {
2080 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2066 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2081 {NULL} /* Sentinel */
2067 {NULL} /* Sentinel */
2082 };
2068 };
2083
2069
2084 static PyTypeObject indexType = {
2070 static PyTypeObject indexType = {
2085 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2071 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2086 "parsers.index", /* tp_name */
2072 "parsers.index", /* tp_name */
2087 sizeof(indexObject), /* tp_basicsize */
2073 sizeof(indexObject), /* tp_basicsize */
2088 0, /* tp_itemsize */
2074 0, /* tp_itemsize */
2089 (destructor)index_dealloc, /* tp_dealloc */
2075 (destructor)index_dealloc, /* tp_dealloc */
2090 0, /* tp_print */
2076 0, /* tp_print */
2091 0, /* tp_getattr */
2077 0, /* tp_getattr */
2092 0, /* tp_setattr */
2078 0, /* tp_setattr */
2093 0, /* tp_compare */
2079 0, /* tp_compare */
2094 0, /* tp_repr */
2080 0, /* tp_repr */
2095 0, /* tp_as_number */
2081 0, /* tp_as_number */
2096 &index_sequence_methods, /* tp_as_sequence */
2082 &index_sequence_methods, /* tp_as_sequence */
2097 &index_mapping_methods, /* tp_as_mapping */
2083 &index_mapping_methods, /* tp_as_mapping */
2098 0, /* tp_hash */
2084 0, /* tp_hash */
2099 0, /* tp_call */
2085 0, /* tp_call */
2100 0, /* tp_str */
2086 0, /* tp_str */
2101 0, /* tp_getattro */
2087 0, /* tp_getattro */
2102 0, /* tp_setattro */
2088 0, /* tp_setattro */
2103 0, /* tp_as_buffer */
2089 0, /* tp_as_buffer */
2104 Py_TPFLAGS_DEFAULT, /* tp_flags */
2090 Py_TPFLAGS_DEFAULT, /* tp_flags */
2105 "revlog index", /* tp_doc */
2091 "revlog index", /* tp_doc */
2106 0, /* tp_traverse */
2092 0, /* tp_traverse */
2107 0, /* tp_clear */
2093 0, /* tp_clear */
2108 0, /* tp_richcompare */
2094 0, /* tp_richcompare */
2109 0, /* tp_weaklistoffset */
2095 0, /* tp_weaklistoffset */
2110 0, /* tp_iter */
2096 0, /* tp_iter */
2111 0, /* tp_iternext */
2097 0, /* tp_iternext */
2112 index_methods, /* tp_methods */
2098 index_methods, /* tp_methods */
2113 0, /* tp_members */
2099 0, /* tp_members */
2114 index_getset, /* tp_getset */
2100 index_getset, /* tp_getset */
2115 0, /* tp_base */
2101 0, /* tp_base */
2116 0, /* tp_dict */
2102 0, /* tp_dict */
2117 0, /* tp_descr_get */
2103 0, /* tp_descr_get */
2118 0, /* tp_descr_set */
2104 0, /* tp_descr_set */
2119 0, /* tp_dictoffset */
2105 0, /* tp_dictoffset */
2120 (initproc)index_init, /* tp_init */
2106 (initproc)index_init, /* tp_init */
2121 0, /* tp_alloc */
2107 0, /* tp_alloc */
2122 };
2108 };
2123
2109
2124 /*
2110 /*
2125 * returns a tuple of the form (index, index, cache) with elements as
2111 * returns a tuple of the form (index, index, cache) with elements as
2126 * follows:
2112 * follows:
2127 *
2113 *
2128 * index: an index object that lazily parses RevlogNG records
2114 * index: an index object that lazily parses RevlogNG records
2129 * cache: if data is inlined, a tuple (0, index_file_content), else None
2115 * cache: if data is inlined, a tuple (0, index_file_content), else None
2130 * index_file_content could be a string, or a buffer
2116 * index_file_content could be a string, or a buffer
2131 *
2117 *
2132 * added complications are for backwards compatibility
2118 * added complications are for backwards compatibility
2133 */
2119 */
2134 PyObject *parse_index2(PyObject *self, PyObject *args)
2120 PyObject *parse_index2(PyObject *self, PyObject *args)
2135 {
2121 {
2136 PyObject *tuple = NULL, *cache = NULL;
2122 PyObject *tuple = NULL, *cache = NULL;
2137 indexObject *idx;
2123 indexObject *idx;
2138 int ret;
2124 int ret;
2139
2125
2140 idx = PyObject_New(indexObject, &indexType);
2126 idx = PyObject_New(indexObject, &indexType);
2141 if (idx == NULL)
2127 if (idx == NULL)
2142 goto bail;
2128 goto bail;
2143
2129
2144 ret = index_init(idx, args);
2130 ret = index_init(idx, args);
2145 if (ret == -1)
2131 if (ret == -1)
2146 goto bail;
2132 goto bail;
2147
2133
2148 if (idx->inlined) {
2134 if (idx->inlined) {
2149 cache = Py_BuildValue("iO", 0, idx->data);
2135 cache = Py_BuildValue("iO", 0, idx->data);
2150 if (cache == NULL)
2136 if (cache == NULL)
2151 goto bail;
2137 goto bail;
2152 } else {
2138 } else {
2153 cache = Py_None;
2139 cache = Py_None;
2154 Py_INCREF(cache);
2140 Py_INCREF(cache);
2155 }
2141 }
2156
2142
2157 tuple = Py_BuildValue("NN", idx, cache);
2143 tuple = Py_BuildValue("NN", idx, cache);
2158 if (!tuple)
2144 if (!tuple)
2159 goto bail;
2145 goto bail;
2160 return tuple;
2146 return tuple;
2161
2147
2162 bail:
2148 bail:
2163 Py_XDECREF(idx);
2149 Py_XDECREF(idx);
2164 Py_XDECREF(cache);
2150 Py_XDECREF(cache);
2165 Py_XDECREF(tuple);
2151 Py_XDECREF(tuple);
2166 return NULL;
2152 return NULL;
2167 }
2153 }
2168
2154
2169 void revlog_module_init(PyObject *mod)
2155 void revlog_module_init(PyObject *mod)
2170 {
2156 {
2171 indexType.tp_new = PyType_GenericNew;
2157 indexType.tp_new = PyType_GenericNew;
2172 if (PyType_Ready(&indexType) < 0)
2158 if (PyType_Ready(&indexType) < 0)
2173 return;
2159 return;
2174 Py_INCREF(&indexType);
2160 Py_INCREF(&indexType);
2175 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
2161 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
2176
2162
2177 nullentry = Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0,
2163 nullentry = Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0,
2178 -1, -1, -1, -1, nullid, 20);
2164 -1, -1, -1, -1, nullid, 20);
2179 if (nullentry)
2165 if (nullentry)
2180 PyObject_GC_UnTrack(nullentry);
2166 PyObject_GC_UnTrack(nullentry);
2181 }
2167 }
@@ -1,109 +1,109
1 # policy.py - module policy logic for Mercurial.
1 # policy.py - module policy logic for Mercurial.
2 #
2 #
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import os
10 import os
11 import sys
11 import sys
12
12
13 # Rules for how modules can be loaded. Values are:
13 # Rules for how modules can be loaded. Values are:
14 #
14 #
15 # c - require C extensions
15 # c - require C extensions
16 # allow - allow pure Python implementation when C loading fails
16 # allow - allow pure Python implementation when C loading fails
17 # cffi - required cffi versions (implemented within pure module)
17 # cffi - required cffi versions (implemented within pure module)
18 # cffi-allow - allow pure Python implementation if cffi version is missing
18 # cffi-allow - allow pure Python implementation if cffi version is missing
19 # py - only load pure Python modules
19 # py - only load pure Python modules
20 #
20 #
21 # By default, fall back to the pure modules so the in-place build can
21 # By default, fall back to the pure modules so the in-place build can
22 # run without recompiling the C extensions. This will be overridden by
22 # run without recompiling the C extensions. This will be overridden by
23 # __modulepolicy__ generated by setup.py.
23 # __modulepolicy__ generated by setup.py.
24 policy = b'allow'
24 policy = b'allow'
25 _packageprefs = {
25 _packageprefs = {
26 # policy: (versioned package, pure package)
26 # policy: (versioned package, pure package)
27 b'c': (r'cext', None),
27 b'c': (r'cext', None),
28 b'allow': (r'cext', r'pure'),
28 b'allow': (r'cext', r'pure'),
29 b'cffi': (r'cffi', None),
29 b'cffi': (r'cffi', None),
30 b'cffi-allow': (r'cffi', r'pure'),
30 b'cffi-allow': (r'cffi', r'pure'),
31 b'py': (None, r'pure'),
31 b'py': (None, r'pure'),
32 }
32 }
33
33
34 try:
34 try:
35 from . import __modulepolicy__
35 from . import __modulepolicy__
36 policy = __modulepolicy__.modulepolicy
36 policy = __modulepolicy__.modulepolicy
37 except ImportError:
37 except ImportError:
38 pass
38 pass
39
39
40 # PyPy doesn't load C extensions.
40 # PyPy doesn't load C extensions.
41 #
41 #
42 # The canonical way to do this is to test platform.python_implementation().
42 # The canonical way to do this is to test platform.python_implementation().
43 # But we don't import platform and don't bloat for it here.
43 # But we don't import platform and don't bloat for it here.
44 if r'__pypy__' in sys.builtin_module_names:
44 if r'__pypy__' in sys.builtin_module_names:
45 policy = b'cffi'
45 policy = b'cffi'
46
46
47 # Environment variable can always force settings.
47 # Environment variable can always force settings.
48 if sys.version_info[0] >= 3:
48 if sys.version_info[0] >= 3:
49 if r'HGMODULEPOLICY' in os.environ:
49 if r'HGMODULEPOLICY' in os.environ:
50 policy = os.environ[r'HGMODULEPOLICY'].encode(r'utf-8')
50 policy = os.environ[r'HGMODULEPOLICY'].encode(r'utf-8')
51 else:
51 else:
52 policy = os.environ.get(r'HGMODULEPOLICY', policy)
52 policy = os.environ.get(r'HGMODULEPOLICY', policy)
53
53
54 def _importfrom(pkgname, modname):
54 def _importfrom(pkgname, modname):
55 # from .<pkgname> import <modname> (where . is looked through this module)
55 # from .<pkgname> import <modname> (where . is looked through this module)
56 fakelocals = {}
56 fakelocals = {}
57 pkg = __import__(pkgname, globals(), fakelocals, [modname], level=1)
57 pkg = __import__(pkgname, globals(), fakelocals, [modname], level=1)
58 try:
58 try:
59 fakelocals[modname] = mod = getattr(pkg, modname)
59 fakelocals[modname] = mod = getattr(pkg, modname)
60 except AttributeError:
60 except AttributeError:
61 raise ImportError(r'cannot import name %s' % modname)
61 raise ImportError(r'cannot import name %s' % modname)
62 # force import; fakelocals[modname] may be replaced with the real module
62 # force import; fakelocals[modname] may be replaced with the real module
63 getattr(mod, r'__doc__', None)
63 getattr(mod, r'__doc__', None)
64 return fakelocals[modname]
64 return fakelocals[modname]
65
65
66 # keep in sync with "version" in C modules
66 # keep in sync with "version" in C modules
67 _cextversions = {
67 _cextversions = {
68 (r'cext', r'base85'): 1,
68 (r'cext', r'base85'): 1,
69 (r'cext', r'bdiff'): 3,
69 (r'cext', r'bdiff'): 3,
70 (r'cext', r'mpatch'): 1,
70 (r'cext', r'mpatch'): 1,
71 (r'cext', r'osutil'): 4,
71 (r'cext', r'osutil'): 4,
72 (r'cext', r'parsers'): 5,
72 (r'cext', r'parsers'): 6,
73 }
73 }
74
74
75 # map import request to other package or module
75 # map import request to other package or module
76 _modredirects = {
76 _modredirects = {
77 (r'cext', r'charencode'): (r'cext', r'parsers'),
77 (r'cext', r'charencode'): (r'cext', r'parsers'),
78 (r'cffi', r'base85'): (r'pure', r'base85'),
78 (r'cffi', r'base85'): (r'pure', r'base85'),
79 (r'cffi', r'charencode'): (r'pure', r'charencode'),
79 (r'cffi', r'charencode'): (r'pure', r'charencode'),
80 (r'cffi', r'parsers'): (r'pure', r'parsers'),
80 (r'cffi', r'parsers'): (r'pure', r'parsers'),
81 }
81 }
82
82
83 def _checkmod(pkgname, modname, mod):
83 def _checkmod(pkgname, modname, mod):
84 expected = _cextversions.get((pkgname, modname))
84 expected = _cextversions.get((pkgname, modname))
85 actual = getattr(mod, r'version', None)
85 actual = getattr(mod, r'version', None)
86 if actual != expected:
86 if actual != expected:
87 raise ImportError(r'cannot import module %s.%s '
87 raise ImportError(r'cannot import module %s.%s '
88 r'(expected version: %d, actual: %r)'
88 r'(expected version: %d, actual: %r)'
89 % (pkgname, modname, expected, actual))
89 % (pkgname, modname, expected, actual))
90
90
91 def importmod(modname):
91 def importmod(modname):
92 """Import module according to policy and check API version"""
92 """Import module according to policy and check API version"""
93 try:
93 try:
94 verpkg, purepkg = _packageprefs[policy]
94 verpkg, purepkg = _packageprefs[policy]
95 except KeyError:
95 except KeyError:
96 raise ImportError(r'invalid HGMODULEPOLICY %r' % policy)
96 raise ImportError(r'invalid HGMODULEPOLICY %r' % policy)
97 assert verpkg or purepkg
97 assert verpkg or purepkg
98 if verpkg:
98 if verpkg:
99 pn, mn = _modredirects.get((verpkg, modname), (verpkg, modname))
99 pn, mn = _modredirects.get((verpkg, modname), (verpkg, modname))
100 try:
100 try:
101 mod = _importfrom(pn, mn)
101 mod = _importfrom(pn, mn)
102 if pn == verpkg:
102 if pn == verpkg:
103 _checkmod(pn, mn, mod)
103 _checkmod(pn, mn, mod)
104 return mod
104 return mod
105 except ImportError:
105 except ImportError:
106 if not purepkg:
106 if not purepkg:
107 raise
107 raise
108 pn, mn = _modredirects.get((purepkg, modname), (purepkg, modname))
108 pn, mn = _modredirects.get((purepkg, modname), (purepkg, modname))
109 return _importfrom(pn, mn)
109 return _importfrom(pn, mn)
@@ -1,177 +1,176
1 # parsers.py - Python implementation of parsers.c
1 # parsers.py - Python implementation of parsers.c
2 #
2 #
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11 import zlib
11 import zlib
12
12
13 from ..node import nullid
13 from ..node import nullid
14 from .. import pycompat
14 from .. import pycompat
15 stringio = pycompat.bytesio
15 stringio = pycompat.bytesio
16
16
17
17
18 _pack = struct.pack
18 _pack = struct.pack
19 _unpack = struct.unpack
19 _unpack = struct.unpack
20 _compress = zlib.compress
20 _compress = zlib.compress
21 _decompress = zlib.decompress
21 _decompress = zlib.decompress
22
22
23 # Some code below makes tuples directly because it's more convenient. However,
23 # Some code below makes tuples directly because it's more convenient. However,
24 # code outside this module should always use dirstatetuple.
24 # code outside this module should always use dirstatetuple.
25 def dirstatetuple(*x):
25 def dirstatetuple(*x):
26 # x is a tuple
26 # x is a tuple
27 return x
27 return x
28
28
29 indexformatng = ">Qiiiiii20s12x"
29 indexformatng = ">Qiiiiii20s12x"
30 indexfirst = struct.calcsize('Q')
30 indexfirst = struct.calcsize('Q')
31 sizeint = struct.calcsize('i')
31 sizeint = struct.calcsize('i')
32 indexsize = struct.calcsize(indexformatng)
32 indexsize = struct.calcsize(indexformatng)
33
33
34 def gettype(q):
34 def gettype(q):
35 return int(q & 0xFFFF)
35 return int(q & 0xFFFF)
36
36
37 def offset_type(offset, type):
37 def offset_type(offset, type):
38 return int(int(offset) << 16 | type)
38 return int(int(offset) << 16 | type)
39
39
40 class BaseIndexObject(object):
40 class BaseIndexObject(object):
41 def __len__(self):
41 def __len__(self):
42 return self._lgt + len(self._extra) + 1
42 return self._lgt + len(self._extra) + 1
43
43
44 def insert(self, i, tup):
44 def append(self, tup):
45 assert i == -1
46 self._extra.append(tup)
45 self._extra.append(tup)
47
46
48 def _fix_index(self, i):
47 def _fix_index(self, i):
49 if not isinstance(i, int):
48 if not isinstance(i, int):
50 raise TypeError("expecting int indexes")
49 raise TypeError("expecting int indexes")
51 if i < 0 or i >= len(self):
50 if i < 0 or i >= len(self):
52 raise IndexError
51 raise IndexError
53 return i
52 return i
54
53
55 def __getitem__(self, i):
54 def __getitem__(self, i):
56 if i == -1 or i == len(self) - 1:
55 if i == -1 or i == len(self) - 1:
57 return (0, 0, 0, -1, -1, -1, -1, nullid)
56 return (0, 0, 0, -1, -1, -1, -1, nullid)
58 i = self._fix_index(i)
57 i = self._fix_index(i)
59 if i >= self._lgt:
58 if i >= self._lgt:
60 return self._extra[i - self._lgt]
59 return self._extra[i - self._lgt]
61 index = self._calculate_index(i)
60 index = self._calculate_index(i)
62 r = struct.unpack(indexformatng, self._data[index:index + indexsize])
61 r = struct.unpack(indexformatng, self._data[index:index + indexsize])
63 if i == 0:
62 if i == 0:
64 e = list(r)
63 e = list(r)
65 type = gettype(e[0])
64 type = gettype(e[0])
66 e[0] = offset_type(0, type)
65 e[0] = offset_type(0, type)
67 return tuple(e)
66 return tuple(e)
68 return r
67 return r
69
68
70 class IndexObject(BaseIndexObject):
69 class IndexObject(BaseIndexObject):
71 def __init__(self, data):
70 def __init__(self, data):
72 assert len(data) % indexsize == 0
71 assert len(data) % indexsize == 0
73 self._data = data
72 self._data = data
74 self._lgt = len(data) // indexsize
73 self._lgt = len(data) // indexsize
75 self._extra = []
74 self._extra = []
76
75
77 def _calculate_index(self, i):
76 def _calculate_index(self, i):
78 return i * indexsize
77 return i * indexsize
79
78
80 def __delitem__(self, i):
79 def __delitem__(self, i):
81 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
80 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
82 raise ValueError("deleting slices only supports a:-1 with step 1")
81 raise ValueError("deleting slices only supports a:-1 with step 1")
83 i = self._fix_index(i.start)
82 i = self._fix_index(i.start)
84 if i < self._lgt:
83 if i < self._lgt:
85 self._data = self._data[:i * indexsize]
84 self._data = self._data[:i * indexsize]
86 self._lgt = i
85 self._lgt = i
87 self._extra = []
86 self._extra = []
88 else:
87 else:
89 self._extra = self._extra[:i - self._lgt]
88 self._extra = self._extra[:i - self._lgt]
90
89
91 class InlinedIndexObject(BaseIndexObject):
90 class InlinedIndexObject(BaseIndexObject):
92 def __init__(self, data, inline=0):
91 def __init__(self, data, inline=0):
93 self._data = data
92 self._data = data
94 self._lgt = self._inline_scan(None)
93 self._lgt = self._inline_scan(None)
95 self._inline_scan(self._lgt)
94 self._inline_scan(self._lgt)
96 self._extra = []
95 self._extra = []
97
96
98 def _inline_scan(self, lgt):
97 def _inline_scan(self, lgt):
99 off = 0
98 off = 0
100 if lgt is not None:
99 if lgt is not None:
101 self._offsets = [0] * lgt
100 self._offsets = [0] * lgt
102 count = 0
101 count = 0
103 while off <= len(self._data) - indexsize:
102 while off <= len(self._data) - indexsize:
104 s, = struct.unpack('>i',
103 s, = struct.unpack('>i',
105 self._data[off + indexfirst:off + sizeint + indexfirst])
104 self._data[off + indexfirst:off + sizeint + indexfirst])
106 if lgt is not None:
105 if lgt is not None:
107 self._offsets[count] = off
106 self._offsets[count] = off
108 count += 1
107 count += 1
109 off += indexsize + s
108 off += indexsize + s
110 if off != len(self._data):
109 if off != len(self._data):
111 raise ValueError("corrupted data")
110 raise ValueError("corrupted data")
112 return count
111 return count
113
112
114 def __delitem__(self, i):
113 def __delitem__(self, i):
115 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
114 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
116 raise ValueError("deleting slices only supports a:-1 with step 1")
115 raise ValueError("deleting slices only supports a:-1 with step 1")
117 i = self._fix_index(i.start)
116 i = self._fix_index(i.start)
118 if i < self._lgt:
117 if i < self._lgt:
119 self._offsets = self._offsets[:i]
118 self._offsets = self._offsets[:i]
120 self._lgt = i
119 self._lgt = i
121 self._extra = []
120 self._extra = []
122 else:
121 else:
123 self._extra = self._extra[:i - self._lgt]
122 self._extra = self._extra[:i - self._lgt]
124
123
125 def _calculate_index(self, i):
124 def _calculate_index(self, i):
126 return self._offsets[i]
125 return self._offsets[i]
127
126
128 def parse_index2(data, inline):
127 def parse_index2(data, inline):
129 if not inline:
128 if not inline:
130 return IndexObject(data), None
129 return IndexObject(data), None
131 return InlinedIndexObject(data, inline), (0, data)
130 return InlinedIndexObject(data, inline), (0, data)
132
131
133 def parse_dirstate(dmap, copymap, st):
132 def parse_dirstate(dmap, copymap, st):
134 parents = [st[:20], st[20: 40]]
133 parents = [st[:20], st[20: 40]]
135 # dereference fields so they will be local in loop
134 # dereference fields so they will be local in loop
136 format = ">cllll"
135 format = ">cllll"
137 e_size = struct.calcsize(format)
136 e_size = struct.calcsize(format)
138 pos1 = 40
137 pos1 = 40
139 l = len(st)
138 l = len(st)
140
139
141 # the inner loop
140 # the inner loop
142 while pos1 < l:
141 while pos1 < l:
143 pos2 = pos1 + e_size
142 pos2 = pos1 + e_size
144 e = _unpack(">cllll", st[pos1:pos2]) # a literal here is faster
143 e = _unpack(">cllll", st[pos1:pos2]) # a literal here is faster
145 pos1 = pos2 + e[4]
144 pos1 = pos2 + e[4]
146 f = st[pos2:pos1]
145 f = st[pos2:pos1]
147 if '\0' in f:
146 if '\0' in f:
148 f, c = f.split('\0')
147 f, c = f.split('\0')
149 copymap[f] = c
148 copymap[f] = c
150 dmap[f] = e[:4]
149 dmap[f] = e[:4]
151 return parents
150 return parents
152
151
153 def pack_dirstate(dmap, copymap, pl, now):
152 def pack_dirstate(dmap, copymap, pl, now):
154 now = int(now)
153 now = int(now)
155 cs = stringio()
154 cs = stringio()
156 write = cs.write
155 write = cs.write
157 write("".join(pl))
156 write("".join(pl))
158 for f, e in dmap.iteritems():
157 for f, e in dmap.iteritems():
159 if e[0] == 'n' and e[3] == now:
158 if e[0] == 'n' and e[3] == now:
160 # The file was last modified "simultaneously" with the current
159 # The file was last modified "simultaneously" with the current
161 # write to dirstate (i.e. within the same second for file-
160 # write to dirstate (i.e. within the same second for file-
162 # systems with a granularity of 1 sec). This commonly happens
161 # systems with a granularity of 1 sec). This commonly happens
163 # for at least a couple of files on 'update'.
162 # for at least a couple of files on 'update'.
164 # The user could change the file without changing its size
163 # The user could change the file without changing its size
165 # within the same second. Invalidate the file's mtime in
164 # within the same second. Invalidate the file's mtime in
166 # dirstate, forcing future 'status' calls to compare the
165 # dirstate, forcing future 'status' calls to compare the
167 # contents of the file if the size is the same. This prevents
166 # contents of the file if the size is the same. This prevents
168 # mistakenly treating such files as clean.
167 # mistakenly treating such files as clean.
169 e = dirstatetuple(e[0], e[1], e[2], -1)
168 e = dirstatetuple(e[0], e[1], e[2], -1)
170 dmap[f] = e
169 dmap[f] = e
171
170
172 if f in copymap:
171 if f in copymap:
173 f = "%s\0%s" % (f, copymap[f])
172 f = "%s\0%s" % (f, copymap[f])
174 e = _pack(">cllll", e[0], e[1], e[2], e[3], len(f))
173 e = _pack(">cllll", e[0], e[1], e[2], e[3], len(f))
175 write(e)
174 write(e)
176 write(f)
175 write(f)
177 return cs.getvalue()
176 return cs.getvalue()
@@ -1,2971 +1,2971
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import re
22 import re
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .thirdparty import (
38 from .thirdparty import (
39 attr,
39 attr,
40 )
40 )
41 from . import (
41 from . import (
42 ancestor,
42 ancestor,
43 error,
43 error,
44 mdiff,
44 mdiff,
45 policy,
45 policy,
46 pycompat,
46 pycompat,
47 templatefilters,
47 templatefilters,
48 util,
48 util,
49 )
49 )
50 from .utils import (
50 from .utils import (
51 stringutil,
51 stringutil,
52 )
52 )
53
53
54 parsers = policy.importmod(r'parsers')
54 parsers = policy.importmod(r'parsers')
55
55
56 # Aliased for performance.
56 # Aliased for performance.
57 _zlibdecompress = zlib.decompress
57 _zlibdecompress = zlib.decompress
58
58
59 # revlog header flags
59 # revlog header flags
60 REVLOGV0 = 0
60 REVLOGV0 = 0
61 REVLOGV1 = 1
61 REVLOGV1 = 1
62 # Dummy value until file format is finalized.
62 # Dummy value until file format is finalized.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 REVLOGV2 = 0xDEAD
64 REVLOGV2 = 0xDEAD
65 FLAG_INLINE_DATA = (1 << 16)
65 FLAG_INLINE_DATA = (1 << 16)
66 FLAG_GENERALDELTA = (1 << 17)
66 FLAG_GENERALDELTA = (1 << 17)
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72
72
73 # revlog index flags
73 # revlog index flags
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 REVIDX_DEFAULT_FLAGS = 0
77 REVIDX_DEFAULT_FLAGS = 0
78 # stable order in which flags need to be processed and their processors applied
78 # stable order in which flags need to be processed and their processors applied
79 REVIDX_FLAGS_ORDER = [
79 REVIDX_FLAGS_ORDER = [
80 REVIDX_ISCENSORED,
80 REVIDX_ISCENSORED,
81 REVIDX_ELLIPSIS,
81 REVIDX_ELLIPSIS,
82 REVIDX_EXTSTORED,
82 REVIDX_EXTSTORED,
83 ]
83 ]
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 # bitmark for flags that could cause rawdata content change
85 # bitmark for flags that could cause rawdata content change
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87
87
88 # max size of revlog with inline data
88 # max size of revlog with inline data
89 _maxinline = 131072
89 _maxinline = 131072
90 _chunksize = 1048576
90 _chunksize = 1048576
91
91
92 RevlogError = error.RevlogError
92 RevlogError = error.RevlogError
93 LookupError = error.LookupError
93 LookupError = error.LookupError
94 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
94 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
95 CensoredNodeError = error.CensoredNodeError
95 CensoredNodeError = error.CensoredNodeError
96 ProgrammingError = error.ProgrammingError
96 ProgrammingError = error.ProgrammingError
97
97
98 # Store flag processors (cf. 'addflagprocessor()' to register)
98 # Store flag processors (cf. 'addflagprocessor()' to register)
99 _flagprocessors = {
99 _flagprocessors = {
100 REVIDX_ISCENSORED: None,
100 REVIDX_ISCENSORED: None,
101 }
101 }
102
102
103 _mdre = re.compile('\1\n')
103 _mdre = re.compile('\1\n')
104 def parsemeta(text):
104 def parsemeta(text):
105 """return (metadatadict, metadatasize)"""
105 """return (metadatadict, metadatasize)"""
106 # text can be buffer, so we can't use .startswith or .index
106 # text can be buffer, so we can't use .startswith or .index
107 if text[:2] != '\1\n':
107 if text[:2] != '\1\n':
108 return None, None
108 return None, None
109 s = _mdre.search(text, 2).start()
109 s = _mdre.search(text, 2).start()
110 mtext = text[2:s]
110 mtext = text[2:s]
111 meta = {}
111 meta = {}
112 for l in mtext.splitlines():
112 for l in mtext.splitlines():
113 k, v = l.split(": ", 1)
113 k, v = l.split(": ", 1)
114 meta[k] = v
114 meta[k] = v
115 return meta, (s + 2)
115 return meta, (s + 2)
116
116
117 def packmeta(meta, text):
117 def packmeta(meta, text):
118 keys = sorted(meta)
118 keys = sorted(meta)
119 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
120 return "\1\n%s\1\n%s" % (metatext, text)
120 return "\1\n%s\1\n%s" % (metatext, text)
121
121
122 def _censoredtext(text):
122 def _censoredtext(text):
123 m, offs = parsemeta(text)
123 m, offs = parsemeta(text)
124 return m and "censored" in m
124 return m and "censored" in m
125
125
126 def addflagprocessor(flag, processor):
126 def addflagprocessor(flag, processor):
127 """Register a flag processor on a revision data flag.
127 """Register a flag processor on a revision data flag.
128
128
129 Invariant:
129 Invariant:
130 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
131 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
132 - Only one flag processor can be registered on a specific flag.
132 - Only one flag processor can be registered on a specific flag.
133 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
134 following signatures:
134 following signatures:
135 - (read) f(self, rawtext) -> text, bool
135 - (read) f(self, rawtext) -> text, bool
136 - (write) f(self, text) -> rawtext, bool
136 - (write) f(self, text) -> rawtext, bool
137 - (raw) f(self, rawtext) -> bool
137 - (raw) f(self, rawtext) -> bool
138 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 "text" is presented to the user. "rawtext" is stored in revlog data, not
139 directly visible to the user.
139 directly visible to the user.
140 The boolean returned by these transforms is used to determine whether
140 The boolean returned by these transforms is used to determine whether
141 the returned text can be used for hash integrity checking. For example,
141 the returned text can be used for hash integrity checking. For example,
142 if "write" returns False, then "text" is used to generate hash. If
142 if "write" returns False, then "text" is used to generate hash. If
143 "write" returns True, that basically means "rawtext" returned by "write"
143 "write" returns True, that basically means "rawtext" returned by "write"
144 should be used to generate hash. Usually, "write" and "read" return
144 should be used to generate hash. Usually, "write" and "read" return
145 different booleans. And "raw" returns a same boolean as "write".
145 different booleans. And "raw" returns a same boolean as "write".
146
146
147 Note: The 'raw' transform is used for changegroup generation and in some
147 Note: The 'raw' transform is used for changegroup generation and in some
148 debug commands. In this case the transform only indicates whether the
148 debug commands. In this case the transform only indicates whether the
149 contents can be used for hash integrity checks.
149 contents can be used for hash integrity checks.
150 """
150 """
151 if not flag & REVIDX_KNOWN_FLAGS:
151 if not flag & REVIDX_KNOWN_FLAGS:
152 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
153 raise ProgrammingError(msg)
153 raise ProgrammingError(msg)
154 if flag not in REVIDX_FLAGS_ORDER:
154 if flag not in REVIDX_FLAGS_ORDER:
155 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
156 raise ProgrammingError(msg)
156 raise ProgrammingError(msg)
157 if flag in _flagprocessors:
157 if flag in _flagprocessors:
158 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
159 raise error.Abort(msg)
159 raise error.Abort(msg)
160 _flagprocessors[flag] = processor
160 _flagprocessors[flag] = processor
161
161
162 def getoffset(q):
162 def getoffset(q):
163 return int(q >> 16)
163 return int(q >> 16)
164
164
165 def gettype(q):
165 def gettype(q):
166 return int(q & 0xFFFF)
166 return int(q & 0xFFFF)
167
167
168 def offset_type(offset, type):
168 def offset_type(offset, type):
169 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
170 raise ValueError('unknown revlog index flags')
170 raise ValueError('unknown revlog index flags')
171 return int(int(offset) << 16 | type)
171 return int(int(offset) << 16 | type)
172
172
173 _nullhash = hashlib.sha1(nullid)
173 _nullhash = hashlib.sha1(nullid)
174
174
175 def hash(text, p1, p2):
175 def hash(text, p1, p2):
176 """generate a hash from the given text and its parent hashes
176 """generate a hash from the given text and its parent hashes
177
177
178 This hash combines both the current file contents and its history
178 This hash combines both the current file contents and its history
179 in a manner that makes it easy to distinguish nodes with the same
179 in a manner that makes it easy to distinguish nodes with the same
180 content in the revision graph.
180 content in the revision graph.
181 """
181 """
182 # As of now, if one of the parent node is null, p2 is null
182 # As of now, if one of the parent node is null, p2 is null
183 if p2 == nullid:
183 if p2 == nullid:
184 # deep copy of a hash is faster than creating one
184 # deep copy of a hash is faster than creating one
185 s = _nullhash.copy()
185 s = _nullhash.copy()
186 s.update(p1)
186 s.update(p1)
187 else:
187 else:
188 # none of the parent nodes are nullid
188 # none of the parent nodes are nullid
189 if p1 < p2:
189 if p1 < p2:
190 a = p1
190 a = p1
191 b = p2
191 b = p2
192 else:
192 else:
193 a = p2
193 a = p2
194 b = p1
194 b = p1
195 s = hashlib.sha1(a)
195 s = hashlib.sha1(a)
196 s.update(b)
196 s.update(b)
197 s.update(text)
197 s.update(text)
198 return s.digest()
198 return s.digest()
199
199
200 class _testrevlog(object):
200 class _testrevlog(object):
201 """minimalist fake revlog to use in doctests"""
201 """minimalist fake revlog to use in doctests"""
202
202
203 def __init__(self, data, density=0.5, mingap=0):
203 def __init__(self, data, density=0.5, mingap=0):
204 """data is an list of revision payload boundaries"""
204 """data is an list of revision payload boundaries"""
205 self._data = data
205 self._data = data
206 self._srdensitythreshold = density
206 self._srdensitythreshold = density
207 self._srmingapsize = mingap
207 self._srmingapsize = mingap
208
208
209 def start(self, rev):
209 def start(self, rev):
210 if rev == 0:
210 if rev == 0:
211 return 0
211 return 0
212 return self._data[rev - 1]
212 return self._data[rev - 1]
213
213
214 def end(self, rev):
214 def end(self, rev):
215 return self._data[rev]
215 return self._data[rev]
216
216
217 def length(self, rev):
217 def length(self, rev):
218 return self.end(rev) - self.start(rev)
218 return self.end(rev) - self.start(rev)
219
219
220 def __len__(self):
220 def __len__(self):
221 return len(self._data)
221 return len(self._data)
222
222
223 def _trimchunk(revlog, revs, startidx, endidx=None):
223 def _trimchunk(revlog, revs, startidx, endidx=None):
224 """returns revs[startidx:endidx] without empty trailing revs
224 """returns revs[startidx:endidx] without empty trailing revs
225
225
226 Doctest Setup
226 Doctest Setup
227 >>> revlog = _testrevlog([
227 >>> revlog = _testrevlog([
228 ... 5, #0
228 ... 5, #0
229 ... 10, #1
229 ... 10, #1
230 ... 12, #2
230 ... 12, #2
231 ... 12, #3 (empty)
231 ... 12, #3 (empty)
232 ... 17, #4
232 ... 17, #4
233 ... 21, #5
233 ... 21, #5
234 ... 21, #6 (empty)
234 ... 21, #6 (empty)
235 ... ])
235 ... ])
236
236
237 Contiguous cases:
237 Contiguous cases:
238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
239 [0, 1, 2, 3, 4, 5]
239 [0, 1, 2, 3, 4, 5]
240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
241 [0, 1, 2, 3, 4]
241 [0, 1, 2, 3, 4]
242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
243 [0, 1, 2]
243 [0, 1, 2]
244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
245 [2]
245 [2]
246 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
246 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
247 [3, 4, 5]
247 [3, 4, 5]
248 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
248 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
249 [3, 4]
249 [3, 4]
250
250
251 Discontiguous cases:
251 Discontiguous cases:
252 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
252 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
253 [1, 3, 5]
253 [1, 3, 5]
254 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
254 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
255 [1]
255 [1]
256 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
256 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
257 [3, 5]
257 [3, 5]
258 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
258 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
259 [3, 5]
259 [3, 5]
260 """
260 """
261 length = revlog.length
261 length = revlog.length
262
262
263 if endidx is None:
263 if endidx is None:
264 endidx = len(revs)
264 endidx = len(revs)
265
265
266 # Trim empty revs at the end, but never the very first revision of a chain
266 # Trim empty revs at the end, but never the very first revision of a chain
267 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
267 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
268 endidx -= 1
268 endidx -= 1
269
269
270 return revs[startidx:endidx]
270 return revs[startidx:endidx]
271
271
272 def _segmentspan(revlog, revs):
272 def _segmentspan(revlog, revs):
273 """Get the byte span of a segment of revisions
273 """Get the byte span of a segment of revisions
274
274
275 revs is a sorted array of revision numbers
275 revs is a sorted array of revision numbers
276
276
277 >>> revlog = _testrevlog([
277 >>> revlog = _testrevlog([
278 ... 5, #0
278 ... 5, #0
279 ... 10, #1
279 ... 10, #1
280 ... 12, #2
280 ... 12, #2
281 ... 12, #3 (empty)
281 ... 12, #3 (empty)
282 ... 17, #4
282 ... 17, #4
283 ... ])
283 ... ])
284
284
285 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
285 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
286 17
286 17
287 >>> _segmentspan(revlog, [0, 4])
287 >>> _segmentspan(revlog, [0, 4])
288 17
288 17
289 >>> _segmentspan(revlog, [3, 4])
289 >>> _segmentspan(revlog, [3, 4])
290 5
290 5
291 >>> _segmentspan(revlog, [1, 2, 3,])
291 >>> _segmentspan(revlog, [1, 2, 3,])
292 7
292 7
293 >>> _segmentspan(revlog, [1, 3])
293 >>> _segmentspan(revlog, [1, 3])
294 7
294 7
295 """
295 """
296 if not revs:
296 if not revs:
297 return 0
297 return 0
298 return revlog.end(revs[-1]) - revlog.start(revs[0])
298 return revlog.end(revs[-1]) - revlog.start(revs[0])
299
299
300 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
300 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
301 """slice revs to reduce the amount of unrelated data to be read from disk.
301 """slice revs to reduce the amount of unrelated data to be read from disk.
302
302
303 ``revs`` is sliced into groups that should be read in one time.
303 ``revs`` is sliced into groups that should be read in one time.
304 Assume that revs are sorted.
304 Assume that revs are sorted.
305
305
306 The initial chunk is sliced until the overall density (payload/chunks-span
306 The initial chunk is sliced until the overall density (payload/chunks-span
307 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
307 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
308 `revlog._srmingapsize` is skipped.
308 `revlog._srmingapsize` is skipped.
309
309
310 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
310 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
311 For consistency with other slicing choice, this limit won't go lower than
311 For consistency with other slicing choice, this limit won't go lower than
312 `revlog._srmingapsize`.
312 `revlog._srmingapsize`.
313
313
314 If individual revisions chunk are larger than this limit, they will still
314 If individual revisions chunk are larger than this limit, they will still
315 be raised individually.
315 be raised individually.
316
316
317 >>> revlog = _testrevlog([
317 >>> revlog = _testrevlog([
318 ... 5, #00 (5)
318 ... 5, #00 (5)
319 ... 10, #01 (5)
319 ... 10, #01 (5)
320 ... 12, #02 (2)
320 ... 12, #02 (2)
321 ... 12, #03 (empty)
321 ... 12, #03 (empty)
322 ... 27, #04 (15)
322 ... 27, #04 (15)
323 ... 31, #05 (4)
323 ... 31, #05 (4)
324 ... 31, #06 (empty)
324 ... 31, #06 (empty)
325 ... 42, #07 (11)
325 ... 42, #07 (11)
326 ... 47, #08 (5)
326 ... 47, #08 (5)
327 ... 47, #09 (empty)
327 ... 47, #09 (empty)
328 ... 48, #10 (1)
328 ... 48, #10 (1)
329 ... 51, #11 (3)
329 ... 51, #11 (3)
330 ... 74, #12 (23)
330 ... 74, #12 (23)
331 ... 85, #13 (11)
331 ... 85, #13 (11)
332 ... 86, #14 (1)
332 ... 86, #14 (1)
333 ... 91, #15 (5)
333 ... 91, #15 (5)
334 ... ])
334 ... ])
335
335
336 >>> list(_slicechunk(revlog, list(range(16))))
336 >>> list(_slicechunk(revlog, list(range(16))))
337 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
337 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
338 >>> list(_slicechunk(revlog, [0, 15]))
338 >>> list(_slicechunk(revlog, [0, 15]))
339 [[0], [15]]
339 [[0], [15]]
340 >>> list(_slicechunk(revlog, [0, 11, 15]))
340 >>> list(_slicechunk(revlog, [0, 11, 15]))
341 [[0], [11], [15]]
341 [[0], [11], [15]]
342 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
342 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
343 [[0], [11, 13, 15]]
343 [[0], [11, 13, 15]]
344 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
344 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
345 [[1, 2], [5, 8, 10, 11], [14]]
345 [[1, 2], [5, 8, 10, 11], [14]]
346
346
347 Slicing with a maximum chunk size
347 Slicing with a maximum chunk size
348 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
348 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
349 [[0], [11], [13], [15]]
349 [[0], [11], [13], [15]]
350 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
350 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
351 [[0], [11], [13, 15]]
351 [[0], [11], [13, 15]]
352 """
352 """
353 if targetsize is not None:
353 if targetsize is not None:
354 targetsize = max(targetsize, revlog._srmingapsize)
354 targetsize = max(targetsize, revlog._srmingapsize)
355 # targetsize should not be specified when evaluating delta candidates:
355 # targetsize should not be specified when evaluating delta candidates:
356 # * targetsize is used to ensure we stay within specification when reading,
356 # * targetsize is used to ensure we stay within specification when reading,
357 # * deltainfo is used to pick are good delta chain when writing.
357 # * deltainfo is used to pick are good delta chain when writing.
358 if not (deltainfo is None or targetsize is None):
358 if not (deltainfo is None or targetsize is None):
359 msg = 'cannot use `targetsize` with a `deltainfo`'
359 msg = 'cannot use `targetsize` with a `deltainfo`'
360 raise error.ProgrammingError(msg)
360 raise error.ProgrammingError(msg)
361 for chunk in _slicechunktodensity(revlog, revs,
361 for chunk in _slicechunktodensity(revlog, revs,
362 deltainfo,
362 deltainfo,
363 revlog._srdensitythreshold,
363 revlog._srdensitythreshold,
364 revlog._srmingapsize):
364 revlog._srmingapsize):
365 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
365 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
366 yield subchunk
366 yield subchunk
367
367
368 def _slicechunktosize(revlog, revs, targetsize=None):
368 def _slicechunktosize(revlog, revs, targetsize=None):
369 """slice revs to match the target size
369 """slice revs to match the target size
370
370
371 This is intended to be used on chunk that density slicing selected by that
371 This is intended to be used on chunk that density slicing selected by that
372 are still too large compared to the read garantee of revlog. This might
372 are still too large compared to the read garantee of revlog. This might
373 happens when "minimal gap size" interrupted the slicing or when chain are
373 happens when "minimal gap size" interrupted the slicing or when chain are
374 built in a way that create large blocks next to each other.
374 built in a way that create large blocks next to each other.
375
375
376 >>> revlog = _testrevlog([
376 >>> revlog = _testrevlog([
377 ... 3, #0 (3)
377 ... 3, #0 (3)
378 ... 5, #1 (2)
378 ... 5, #1 (2)
379 ... 6, #2 (1)
379 ... 6, #2 (1)
380 ... 8, #3 (2)
380 ... 8, #3 (2)
381 ... 8, #4 (empty)
381 ... 8, #4 (empty)
382 ... 11, #5 (3)
382 ... 11, #5 (3)
383 ... 12, #6 (1)
383 ... 12, #6 (1)
384 ... 13, #7 (1)
384 ... 13, #7 (1)
385 ... 14, #8 (1)
385 ... 14, #8 (1)
386 ... ])
386 ... ])
387
387
388 Cases where chunk is already small enough
388 Cases where chunk is already small enough
389 >>> list(_slicechunktosize(revlog, [0], 3))
389 >>> list(_slicechunktosize(revlog, [0], 3))
390 [[0]]
390 [[0]]
391 >>> list(_slicechunktosize(revlog, [6, 7], 3))
391 >>> list(_slicechunktosize(revlog, [6, 7], 3))
392 [[6, 7]]
392 [[6, 7]]
393 >>> list(_slicechunktosize(revlog, [0], None))
393 >>> list(_slicechunktosize(revlog, [0], None))
394 [[0]]
394 [[0]]
395 >>> list(_slicechunktosize(revlog, [6, 7], None))
395 >>> list(_slicechunktosize(revlog, [6, 7], None))
396 [[6, 7]]
396 [[6, 7]]
397
397
398 cases where we need actual slicing
398 cases where we need actual slicing
399 >>> list(_slicechunktosize(revlog, [0, 1], 3))
399 >>> list(_slicechunktosize(revlog, [0, 1], 3))
400 [[0], [1]]
400 [[0], [1]]
401 >>> list(_slicechunktosize(revlog, [1, 3], 3))
401 >>> list(_slicechunktosize(revlog, [1, 3], 3))
402 [[1], [3]]
402 [[1], [3]]
403 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
403 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
404 [[1, 2], [3]]
404 [[1, 2], [3]]
405 >>> list(_slicechunktosize(revlog, [3, 5], 3))
405 >>> list(_slicechunktosize(revlog, [3, 5], 3))
406 [[3], [5]]
406 [[3], [5]]
407 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
407 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
408 [[3], [5]]
408 [[3], [5]]
409 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
409 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
410 [[5], [6, 7, 8]]
410 [[5], [6, 7, 8]]
411 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
411 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
412 [[0], [1, 2], [3], [5], [6, 7, 8]]
412 [[0], [1, 2], [3], [5], [6, 7, 8]]
413
413
414 Case with too large individual chunk (must return valid chunk)
414 Case with too large individual chunk (must return valid chunk)
415 >>> list(_slicechunktosize(revlog, [0, 1], 2))
415 >>> list(_slicechunktosize(revlog, [0, 1], 2))
416 [[0], [1]]
416 [[0], [1]]
417 >>> list(_slicechunktosize(revlog, [1, 3], 1))
417 >>> list(_slicechunktosize(revlog, [1, 3], 1))
418 [[1], [3]]
418 [[1], [3]]
419 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
419 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
420 [[3], [5]]
420 [[3], [5]]
421 """
421 """
422 assert targetsize is None or 0 <= targetsize
422 assert targetsize is None or 0 <= targetsize
423 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
423 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
424 yield revs
424 yield revs
425 return
425 return
426
426
427 startrevidx = 0
427 startrevidx = 0
428 startdata = revlog.start(revs[0])
428 startdata = revlog.start(revs[0])
429 endrevidx = 0
429 endrevidx = 0
430 iterrevs = enumerate(revs)
430 iterrevs = enumerate(revs)
431 next(iterrevs) # skip first rev.
431 next(iterrevs) # skip first rev.
432 for idx, r in iterrevs:
432 for idx, r in iterrevs:
433 span = revlog.end(r) - startdata
433 span = revlog.end(r) - startdata
434 if span <= targetsize:
434 if span <= targetsize:
435 endrevidx = idx
435 endrevidx = idx
436 else:
436 else:
437 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
437 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
438 if chunk:
438 if chunk:
439 yield chunk
439 yield chunk
440 startrevidx = idx
440 startrevidx = idx
441 startdata = revlog.start(r)
441 startdata = revlog.start(r)
442 endrevidx = idx
442 endrevidx = idx
443 yield _trimchunk(revlog, revs, startrevidx)
443 yield _trimchunk(revlog, revs, startrevidx)
444
444
445 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
445 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
446 mingapsize=0):
446 mingapsize=0):
447 """slice revs to reduce the amount of unrelated data to be read from disk.
447 """slice revs to reduce the amount of unrelated data to be read from disk.
448
448
449 ``revs`` is sliced into groups that should be read in one time.
449 ``revs`` is sliced into groups that should be read in one time.
450 Assume that revs are sorted.
450 Assume that revs are sorted.
451
451
452 ``deltainfo`` is a _deltainfo instance of a revision that we would append
452 ``deltainfo`` is a _deltainfo instance of a revision that we would append
453 to the top of the revlog.
453 to the top of the revlog.
454
454
455 The initial chunk is sliced until the overall density (payload/chunks-span
455 The initial chunk is sliced until the overall density (payload/chunks-span
456 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
456 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
457 skipped.
457 skipped.
458
458
459 >>> revlog = _testrevlog([
459 >>> revlog = _testrevlog([
460 ... 5, #00 (5)
460 ... 5, #00 (5)
461 ... 10, #01 (5)
461 ... 10, #01 (5)
462 ... 12, #02 (2)
462 ... 12, #02 (2)
463 ... 12, #03 (empty)
463 ... 12, #03 (empty)
464 ... 27, #04 (15)
464 ... 27, #04 (15)
465 ... 31, #05 (4)
465 ... 31, #05 (4)
466 ... 31, #06 (empty)
466 ... 31, #06 (empty)
467 ... 42, #07 (11)
467 ... 42, #07 (11)
468 ... 47, #08 (5)
468 ... 47, #08 (5)
469 ... 47, #09 (empty)
469 ... 47, #09 (empty)
470 ... 48, #10 (1)
470 ... 48, #10 (1)
471 ... 51, #11 (3)
471 ... 51, #11 (3)
472 ... 74, #12 (23)
472 ... 74, #12 (23)
473 ... 85, #13 (11)
473 ... 85, #13 (11)
474 ... 86, #14 (1)
474 ... 86, #14 (1)
475 ... 91, #15 (5)
475 ... 91, #15 (5)
476 ... ])
476 ... ])
477
477
478 >>> list(_slicechunktodensity(revlog, list(range(16))))
478 >>> list(_slicechunktodensity(revlog, list(range(16))))
479 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
479 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
480 >>> list(_slicechunktodensity(revlog, [0, 15]))
480 >>> list(_slicechunktodensity(revlog, [0, 15]))
481 [[0], [15]]
481 [[0], [15]]
482 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
482 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
483 [[0], [11], [15]]
483 [[0], [11], [15]]
484 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
484 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
485 [[0], [11, 13, 15]]
485 [[0], [11, 13, 15]]
486 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
486 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
487 [[1, 2], [5, 8, 10, 11], [14]]
487 [[1, 2], [5, 8, 10, 11], [14]]
488 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
488 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
489 ... mingapsize=20))
489 ... mingapsize=20))
490 [[1, 2, 3, 5, 8, 10, 11], [14]]
490 [[1, 2, 3, 5, 8, 10, 11], [14]]
491 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
491 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
492 ... targetdensity=0.95))
492 ... targetdensity=0.95))
493 [[1, 2], [5], [8, 10, 11], [14]]
493 [[1, 2], [5], [8, 10, 11], [14]]
494 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
494 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
495 ... targetdensity=0.95, mingapsize=12))
495 ... targetdensity=0.95, mingapsize=12))
496 [[1, 2], [5, 8, 10, 11], [14]]
496 [[1, 2], [5, 8, 10, 11], [14]]
497 """
497 """
498 start = revlog.start
498 start = revlog.start
499 length = revlog.length
499 length = revlog.length
500
500
501 if len(revs) <= 1:
501 if len(revs) <= 1:
502 yield revs
502 yield revs
503 return
503 return
504
504
505 nextrev = len(revlog)
505 nextrev = len(revlog)
506 nextoffset = revlog.end(nextrev - 1)
506 nextoffset = revlog.end(nextrev - 1)
507
507
508 if deltainfo is None:
508 if deltainfo is None:
509 deltachainspan = _segmentspan(revlog, revs)
509 deltachainspan = _segmentspan(revlog, revs)
510 chainpayload = sum(length(r) for r in revs)
510 chainpayload = sum(length(r) for r in revs)
511 else:
511 else:
512 deltachainspan = deltainfo.distance
512 deltachainspan = deltainfo.distance
513 chainpayload = deltainfo.compresseddeltalen
513 chainpayload = deltainfo.compresseddeltalen
514
514
515 if deltachainspan < mingapsize:
515 if deltachainspan < mingapsize:
516 yield revs
516 yield revs
517 return
517 return
518
518
519 readdata = deltachainspan
519 readdata = deltachainspan
520
520
521 if deltachainspan:
521 if deltachainspan:
522 density = chainpayload / float(deltachainspan)
522 density = chainpayload / float(deltachainspan)
523 else:
523 else:
524 density = 1.0
524 density = 1.0
525
525
526 if density >= targetdensity:
526 if density >= targetdensity:
527 yield revs
527 yield revs
528 return
528 return
529
529
530 if deltainfo is not None:
530 if deltainfo is not None:
531 revs = list(revs)
531 revs = list(revs)
532 revs.append(nextrev)
532 revs.append(nextrev)
533
533
534 # Store the gaps in a heap to have them sorted by decreasing size
534 # Store the gaps in a heap to have them sorted by decreasing size
535 gapsheap = []
535 gapsheap = []
536 heapq.heapify(gapsheap)
536 heapq.heapify(gapsheap)
537 prevend = None
537 prevend = None
538 for i, rev in enumerate(revs):
538 for i, rev in enumerate(revs):
539 if rev < nextrev:
539 if rev < nextrev:
540 revstart = start(rev)
540 revstart = start(rev)
541 revlen = length(rev)
541 revlen = length(rev)
542 else:
542 else:
543 revstart = nextoffset
543 revstart = nextoffset
544 revlen = deltainfo.deltalen
544 revlen = deltainfo.deltalen
545
545
546 # Skip empty revisions to form larger holes
546 # Skip empty revisions to form larger holes
547 if revlen == 0:
547 if revlen == 0:
548 continue
548 continue
549
549
550 if prevend is not None:
550 if prevend is not None:
551 gapsize = revstart - prevend
551 gapsize = revstart - prevend
552 # only consider holes that are large enough
552 # only consider holes that are large enough
553 if gapsize > mingapsize:
553 if gapsize > mingapsize:
554 heapq.heappush(gapsheap, (-gapsize, i))
554 heapq.heappush(gapsheap, (-gapsize, i))
555
555
556 prevend = revstart + revlen
556 prevend = revstart + revlen
557
557
558 # Collect the indices of the largest holes until the density is acceptable
558 # Collect the indices of the largest holes until the density is acceptable
559 indicesheap = []
559 indicesheap = []
560 heapq.heapify(indicesheap)
560 heapq.heapify(indicesheap)
561 while gapsheap and density < targetdensity:
561 while gapsheap and density < targetdensity:
562 oppgapsize, gapidx = heapq.heappop(gapsheap)
562 oppgapsize, gapidx = heapq.heappop(gapsheap)
563
563
564 heapq.heappush(indicesheap, gapidx)
564 heapq.heappush(indicesheap, gapidx)
565
565
566 # the gap sizes are stored as negatives to be sorted decreasingly
566 # the gap sizes are stored as negatives to be sorted decreasingly
567 # by the heap
567 # by the heap
568 readdata -= (-oppgapsize)
568 readdata -= (-oppgapsize)
569 if readdata > 0:
569 if readdata > 0:
570 density = chainpayload / float(readdata)
570 density = chainpayload / float(readdata)
571 else:
571 else:
572 density = 1.0
572 density = 1.0
573
573
574 # Cut the revs at collected indices
574 # Cut the revs at collected indices
575 previdx = 0
575 previdx = 0
576 while indicesheap:
576 while indicesheap:
577 idx = heapq.heappop(indicesheap)
577 idx = heapq.heappop(indicesheap)
578
578
579 chunk = _trimchunk(revlog, revs, previdx, idx)
579 chunk = _trimchunk(revlog, revs, previdx, idx)
580 if chunk:
580 if chunk:
581 yield chunk
581 yield chunk
582
582
583 previdx = idx
583 previdx = idx
584
584
585 chunk = _trimchunk(revlog, revs, previdx)
585 chunk = _trimchunk(revlog, revs, previdx)
586 if chunk:
586 if chunk:
587 yield chunk
587 yield chunk
588
588
589 @attr.s(slots=True, frozen=True)
589 @attr.s(slots=True, frozen=True)
590 class _deltainfo(object):
590 class _deltainfo(object):
591 distance = attr.ib()
591 distance = attr.ib()
592 deltalen = attr.ib()
592 deltalen = attr.ib()
593 data = attr.ib()
593 data = attr.ib()
594 base = attr.ib()
594 base = attr.ib()
595 chainbase = attr.ib()
595 chainbase = attr.ib()
596 chainlen = attr.ib()
596 chainlen = attr.ib()
597 compresseddeltalen = attr.ib()
597 compresseddeltalen = attr.ib()
598
598
599 class _deltacomputer(object):
599 class _deltacomputer(object):
600 def __init__(self, revlog):
600 def __init__(self, revlog):
601 self.revlog = revlog
601 self.revlog = revlog
602
602
603 def _getcandidaterevs(self, p1, p2, cachedelta):
603 def _getcandidaterevs(self, p1, p2, cachedelta):
604 """
604 """
605 Provides revisions that present an interest to be diffed against,
605 Provides revisions that present an interest to be diffed against,
606 grouped by level of easiness.
606 grouped by level of easiness.
607 """
607 """
608 revlog = self.revlog
608 revlog = self.revlog
609 gdelta = revlog._generaldelta
609 gdelta = revlog._generaldelta
610 curr = len(revlog)
610 curr = len(revlog)
611 prev = curr - 1
611 prev = curr - 1
612 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
612 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
613
613
614 # should we try to build a delta?
614 # should we try to build a delta?
615 if prev != nullrev and revlog.storedeltachains:
615 if prev != nullrev and revlog.storedeltachains:
616 tested = set()
616 tested = set()
617 # This condition is true most of the time when processing
617 # This condition is true most of the time when processing
618 # changegroup data into a generaldelta repo. The only time it
618 # changegroup data into a generaldelta repo. The only time it
619 # isn't true is if this is the first revision in a delta chain
619 # isn't true is if this is the first revision in a delta chain
620 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
620 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
621 if cachedelta and gdelta and revlog._lazydeltabase:
621 if cachedelta and gdelta and revlog._lazydeltabase:
622 # Assume what we received from the server is a good choice
622 # Assume what we received from the server is a good choice
623 # build delta will reuse the cache
623 # build delta will reuse the cache
624 yield (cachedelta[0],)
624 yield (cachedelta[0],)
625 tested.add(cachedelta[0])
625 tested.add(cachedelta[0])
626
626
627 if gdelta:
627 if gdelta:
628 # exclude already lazy tested base if any
628 # exclude already lazy tested base if any
629 parents = [p for p in (p1r, p2r)
629 parents = [p for p in (p1r, p2r)
630 if p != nullrev and p not in tested]
630 if p != nullrev and p not in tested]
631
631
632 if not revlog._deltabothparents and len(parents) == 2:
632 if not revlog._deltabothparents and len(parents) == 2:
633 parents.sort()
633 parents.sort()
634 # To minimize the chance of having to build a fulltext,
634 # To minimize the chance of having to build a fulltext,
635 # pick first whichever parent is closest to us (max rev)
635 # pick first whichever parent is closest to us (max rev)
636 yield (parents[1],)
636 yield (parents[1],)
637 # then the other one (min rev) if the first did not fit
637 # then the other one (min rev) if the first did not fit
638 yield (parents[0],)
638 yield (parents[0],)
639 tested.update(parents)
639 tested.update(parents)
640 elif len(parents) > 0:
640 elif len(parents) > 0:
641 # Test all parents (1 or 2), and keep the best candidate
641 # Test all parents (1 or 2), and keep the best candidate
642 yield parents
642 yield parents
643 tested.update(parents)
643 tested.update(parents)
644
644
645 if prev not in tested:
645 if prev not in tested:
646 # other approach failed try against prev to hopefully save us a
646 # other approach failed try against prev to hopefully save us a
647 # fulltext.
647 # fulltext.
648 yield (prev,)
648 yield (prev,)
649 tested.add(prev)
649 tested.add(prev)
650
650
651 def buildtext(self, revinfo, fh):
651 def buildtext(self, revinfo, fh):
652 """Builds a fulltext version of a revision
652 """Builds a fulltext version of a revision
653
653
654 revinfo: _revisioninfo instance that contains all needed info
654 revinfo: _revisioninfo instance that contains all needed info
655 fh: file handle to either the .i or the .d revlog file,
655 fh: file handle to either the .i or the .d revlog file,
656 depending on whether it is inlined or not
656 depending on whether it is inlined or not
657 """
657 """
658 btext = revinfo.btext
658 btext = revinfo.btext
659 if btext[0] is not None:
659 if btext[0] is not None:
660 return btext[0]
660 return btext[0]
661
661
662 revlog = self.revlog
662 revlog = self.revlog
663 cachedelta = revinfo.cachedelta
663 cachedelta = revinfo.cachedelta
664 flags = revinfo.flags
664 flags = revinfo.flags
665 node = revinfo.node
665 node = revinfo.node
666
666
667 baserev = cachedelta[0]
667 baserev = cachedelta[0]
668 delta = cachedelta[1]
668 delta = cachedelta[1]
669 # special case deltas which replace entire base; no need to decode
669 # special case deltas which replace entire base; no need to decode
670 # base revision. this neatly avoids censored bases, which throw when
670 # base revision. this neatly avoids censored bases, which throw when
671 # they're decoded.
671 # they're decoded.
672 hlen = struct.calcsize(">lll")
672 hlen = struct.calcsize(">lll")
673 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
673 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
674 len(delta) - hlen):
674 len(delta) - hlen):
675 btext[0] = delta[hlen:]
675 btext[0] = delta[hlen:]
676 else:
676 else:
677 # deltabase is rawtext before changed by flag processors, which is
677 # deltabase is rawtext before changed by flag processors, which is
678 # equivalent to non-raw text
678 # equivalent to non-raw text
679 basetext = revlog.revision(baserev, _df=fh, raw=False)
679 basetext = revlog.revision(baserev, _df=fh, raw=False)
680 btext[0] = mdiff.patch(basetext, delta)
680 btext[0] = mdiff.patch(basetext, delta)
681
681
682 try:
682 try:
683 res = revlog._processflags(btext[0], flags, 'read', raw=True)
683 res = revlog._processflags(btext[0], flags, 'read', raw=True)
684 btext[0], validatehash = res
684 btext[0], validatehash = res
685 if validatehash:
685 if validatehash:
686 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
686 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
687 if flags & REVIDX_ISCENSORED:
687 if flags & REVIDX_ISCENSORED:
688 raise RevlogError(_('node %s is not censored') % node)
688 raise RevlogError(_('node %s is not censored') % node)
689 except CensoredNodeError:
689 except CensoredNodeError:
690 # must pass the censored index flag to add censored revisions
690 # must pass the censored index flag to add censored revisions
691 if not flags & REVIDX_ISCENSORED:
691 if not flags & REVIDX_ISCENSORED:
692 raise
692 raise
693 return btext[0]
693 return btext[0]
694
694
695 def _builddeltadiff(self, base, revinfo, fh):
695 def _builddeltadiff(self, base, revinfo, fh):
696 revlog = self.revlog
696 revlog = self.revlog
697 t = self.buildtext(revinfo, fh)
697 t = self.buildtext(revinfo, fh)
698 if revlog.iscensored(base):
698 if revlog.iscensored(base):
699 # deltas based on a censored revision must replace the
699 # deltas based on a censored revision must replace the
700 # full content in one patch, so delta works everywhere
700 # full content in one patch, so delta works everywhere
701 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
701 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
702 delta = header + t
702 delta = header + t
703 else:
703 else:
704 ptext = revlog.revision(base, _df=fh, raw=True)
704 ptext = revlog.revision(base, _df=fh, raw=True)
705 delta = mdiff.textdiff(ptext, t)
705 delta = mdiff.textdiff(ptext, t)
706
706
707 return delta
707 return delta
708
708
709 def _builddeltainfo(self, revinfo, base, fh):
709 def _builddeltainfo(self, revinfo, base, fh):
710 # can we use the cached delta?
710 # can we use the cached delta?
711 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
711 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
712 delta = revinfo.cachedelta[1]
712 delta = revinfo.cachedelta[1]
713 else:
713 else:
714 delta = self._builddeltadiff(base, revinfo, fh)
714 delta = self._builddeltadiff(base, revinfo, fh)
715 revlog = self.revlog
715 revlog = self.revlog
716 header, data = revlog.compress(delta)
716 header, data = revlog.compress(delta)
717 deltalen = len(header) + len(data)
717 deltalen = len(header) + len(data)
718 chainbase = revlog.chainbase(base)
718 chainbase = revlog.chainbase(base)
719 offset = revlog.end(len(revlog) - 1)
719 offset = revlog.end(len(revlog) - 1)
720 dist = deltalen + offset - revlog.start(chainbase)
720 dist = deltalen + offset - revlog.start(chainbase)
721 if revlog._generaldelta:
721 if revlog._generaldelta:
722 deltabase = base
722 deltabase = base
723 else:
723 else:
724 deltabase = chainbase
724 deltabase = chainbase
725 chainlen, compresseddeltalen = revlog._chaininfo(base)
725 chainlen, compresseddeltalen = revlog._chaininfo(base)
726 chainlen += 1
726 chainlen += 1
727 compresseddeltalen += deltalen
727 compresseddeltalen += deltalen
728 return _deltainfo(dist, deltalen, (header, data), deltabase,
728 return _deltainfo(dist, deltalen, (header, data), deltabase,
729 chainbase, chainlen, compresseddeltalen)
729 chainbase, chainlen, compresseddeltalen)
730
730
731 def finddeltainfo(self, revinfo, fh):
731 def finddeltainfo(self, revinfo, fh):
732 """Find an acceptable delta against a candidate revision
732 """Find an acceptable delta against a candidate revision
733
733
734 revinfo: information about the revision (instance of _revisioninfo)
734 revinfo: information about the revision (instance of _revisioninfo)
735 fh: file handle to either the .i or the .d revlog file,
735 fh: file handle to either the .i or the .d revlog file,
736 depending on whether it is inlined or not
736 depending on whether it is inlined or not
737
737
738 Returns the first acceptable candidate revision, as ordered by
738 Returns the first acceptable candidate revision, as ordered by
739 _getcandidaterevs
739 _getcandidaterevs
740 """
740 """
741 cachedelta = revinfo.cachedelta
741 cachedelta = revinfo.cachedelta
742 p1 = revinfo.p1
742 p1 = revinfo.p1
743 p2 = revinfo.p2
743 p2 = revinfo.p2
744 revlog = self.revlog
744 revlog = self.revlog
745
745
746 deltainfo = None
746 deltainfo = None
747 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
747 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
748 nominateddeltas = []
748 nominateddeltas = []
749 for candidaterev in candidaterevs:
749 for candidaterev in candidaterevs:
750 # no delta for rawtext-changing revs (see "candelta" for why)
750 # no delta for rawtext-changing revs (see "candelta" for why)
751 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
751 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
752 continue
752 continue
753 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
753 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
754 if revlog._isgooddeltainfo(candidatedelta, revinfo):
754 if revlog._isgooddeltainfo(candidatedelta, revinfo):
755 nominateddeltas.append(candidatedelta)
755 nominateddeltas.append(candidatedelta)
756 if nominateddeltas:
756 if nominateddeltas:
757 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
757 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
758 break
758 break
759
759
760 return deltainfo
760 return deltainfo
761
761
762 @attr.s(slots=True, frozen=True)
762 @attr.s(slots=True, frozen=True)
763 class _revisioninfo(object):
763 class _revisioninfo(object):
764 """Information about a revision that allows building its fulltext
764 """Information about a revision that allows building its fulltext
765 node: expected hash of the revision
765 node: expected hash of the revision
766 p1, p2: parent revs of the revision
766 p1, p2: parent revs of the revision
767 btext: built text cache consisting of a one-element list
767 btext: built text cache consisting of a one-element list
768 cachedelta: (baserev, uncompressed_delta) or None
768 cachedelta: (baserev, uncompressed_delta) or None
769 flags: flags associated to the revision storage
769 flags: flags associated to the revision storage
770
770
771 One of btext[0] or cachedelta must be set.
771 One of btext[0] or cachedelta must be set.
772 """
772 """
773 node = attr.ib()
773 node = attr.ib()
774 p1 = attr.ib()
774 p1 = attr.ib()
775 p2 = attr.ib()
775 p2 = attr.ib()
776 btext = attr.ib()
776 btext = attr.ib()
777 textlen = attr.ib()
777 textlen = attr.ib()
778 cachedelta = attr.ib()
778 cachedelta = attr.ib()
779 flags = attr.ib()
779 flags = attr.ib()
780
780
781 # index v0:
781 # index v0:
782 # 4 bytes: offset
782 # 4 bytes: offset
783 # 4 bytes: compressed length
783 # 4 bytes: compressed length
784 # 4 bytes: base rev
784 # 4 bytes: base rev
785 # 4 bytes: link rev
785 # 4 bytes: link rev
786 # 20 bytes: parent 1 nodeid
786 # 20 bytes: parent 1 nodeid
787 # 20 bytes: parent 2 nodeid
787 # 20 bytes: parent 2 nodeid
788 # 20 bytes: nodeid
788 # 20 bytes: nodeid
789 indexformatv0 = struct.Struct(">4l20s20s20s")
789 indexformatv0 = struct.Struct(">4l20s20s20s")
790 indexformatv0_pack = indexformatv0.pack
790 indexformatv0_pack = indexformatv0.pack
791 indexformatv0_unpack = indexformatv0.unpack
791 indexformatv0_unpack = indexformatv0.unpack
792
792
793 class revlogoldindex(list):
793 class revlogoldindex(list):
794 def __len__(self):
794 def __len__(self):
795 return list.__len__(self) + 1
795 return list.__len__(self) + 1
796 def __getitem__(self, i):
796 def __getitem__(self, i):
797 if i == -1 or i == len(self) - 1:
797 if i == -1 or i == len(self) - 1:
798 return (0, 0, 0, -1, -1, -1, -1, nullid)
798 return (0, 0, 0, -1, -1, -1, -1, nullid)
799 return list.__getitem__(self, i)
799 return list.__getitem__(self, i)
800
800
801 class revlogoldio(object):
801 class revlogoldio(object):
802 def __init__(self):
802 def __init__(self):
803 self.size = indexformatv0.size
803 self.size = indexformatv0.size
804
804
805 def parseindex(self, data, inline):
805 def parseindex(self, data, inline):
806 s = self.size
806 s = self.size
807 index = []
807 index = []
808 nodemap = {nullid: nullrev}
808 nodemap = {nullid: nullrev}
809 n = off = 0
809 n = off = 0
810 l = len(data)
810 l = len(data)
811 while off + s <= l:
811 while off + s <= l:
812 cur = data[off:off + s]
812 cur = data[off:off + s]
813 off += s
813 off += s
814 e = indexformatv0_unpack(cur)
814 e = indexformatv0_unpack(cur)
815 # transform to revlogv1 format
815 # transform to revlogv1 format
816 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
816 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
817 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
817 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
818 index.append(e2)
818 index.append(e2)
819 nodemap[e[6]] = n
819 nodemap[e[6]] = n
820 n += 1
820 n += 1
821
821
822 return revlogoldindex(index), nodemap, None
822 return revlogoldindex(index), nodemap, None
823
823
824 def packentry(self, entry, node, version, rev):
824 def packentry(self, entry, node, version, rev):
825 if gettype(entry[0]):
825 if gettype(entry[0]):
826 raise RevlogError(_('index entry flags need revlog version 1'))
826 raise RevlogError(_('index entry flags need revlog version 1'))
827 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
827 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
828 node(entry[5]), node(entry[6]), entry[7])
828 node(entry[5]), node(entry[6]), entry[7])
829 return indexformatv0_pack(*e2)
829 return indexformatv0_pack(*e2)
830
830
831 # index ng:
831 # index ng:
832 # 6 bytes: offset
832 # 6 bytes: offset
833 # 2 bytes: flags
833 # 2 bytes: flags
834 # 4 bytes: compressed length
834 # 4 bytes: compressed length
835 # 4 bytes: uncompressed length
835 # 4 bytes: uncompressed length
836 # 4 bytes: base rev
836 # 4 bytes: base rev
837 # 4 bytes: link rev
837 # 4 bytes: link rev
838 # 4 bytes: parent 1 rev
838 # 4 bytes: parent 1 rev
839 # 4 bytes: parent 2 rev
839 # 4 bytes: parent 2 rev
840 # 32 bytes: nodeid
840 # 32 bytes: nodeid
841 indexformatng = struct.Struct(">Qiiiiii20s12x")
841 indexformatng = struct.Struct(">Qiiiiii20s12x")
842 indexformatng_pack = indexformatng.pack
842 indexformatng_pack = indexformatng.pack
843 versionformat = struct.Struct(">I")
843 versionformat = struct.Struct(">I")
844 versionformat_pack = versionformat.pack
844 versionformat_pack = versionformat.pack
845 versionformat_unpack = versionformat.unpack
845 versionformat_unpack = versionformat.unpack
846
846
847 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
847 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
848 # signed integer)
848 # signed integer)
849 _maxentrysize = 0x7fffffff
849 _maxentrysize = 0x7fffffff
850
850
851 class revlogio(object):
851 class revlogio(object):
852 def __init__(self):
852 def __init__(self):
853 self.size = indexformatng.size
853 self.size = indexformatng.size
854
854
855 def parseindex(self, data, inline):
855 def parseindex(self, data, inline):
856 # call the C implementation to parse the index data
856 # call the C implementation to parse the index data
857 index, cache = parsers.parse_index2(data, inline)
857 index, cache = parsers.parse_index2(data, inline)
858 return index, getattr(index, 'nodemap', None), cache
858 return index, getattr(index, 'nodemap', None), cache
859
859
860 def packentry(self, entry, node, version, rev):
860 def packentry(self, entry, node, version, rev):
861 p = indexformatng_pack(*entry)
861 p = indexformatng_pack(*entry)
862 if rev == 0:
862 if rev == 0:
863 p = versionformat_pack(version) + p[4:]
863 p = versionformat_pack(version) + p[4:]
864 return p
864 return p
865
865
866 class revlog(object):
866 class revlog(object):
867 """
867 """
868 the underlying revision storage object
868 the underlying revision storage object
869
869
870 A revlog consists of two parts, an index and the revision data.
870 A revlog consists of two parts, an index and the revision data.
871
871
872 The index is a file with a fixed record size containing
872 The index is a file with a fixed record size containing
873 information on each revision, including its nodeid (hash), the
873 information on each revision, including its nodeid (hash), the
874 nodeids of its parents, the position and offset of its data within
874 nodeids of its parents, the position and offset of its data within
875 the data file, and the revision it's based on. Finally, each entry
875 the data file, and the revision it's based on. Finally, each entry
876 contains a linkrev entry that can serve as a pointer to external
876 contains a linkrev entry that can serve as a pointer to external
877 data.
877 data.
878
878
879 The revision data itself is a linear collection of data chunks.
879 The revision data itself is a linear collection of data chunks.
880 Each chunk represents a revision and is usually represented as a
880 Each chunk represents a revision and is usually represented as a
881 delta against the previous chunk. To bound lookup time, runs of
881 delta against the previous chunk. To bound lookup time, runs of
882 deltas are limited to about 2 times the length of the original
882 deltas are limited to about 2 times the length of the original
883 version data. This makes retrieval of a version proportional to
883 version data. This makes retrieval of a version proportional to
884 its size, or O(1) relative to the number of revisions.
884 its size, or O(1) relative to the number of revisions.
885
885
886 Both pieces of the revlog are written to in an append-only
886 Both pieces of the revlog are written to in an append-only
887 fashion, which means we never need to rewrite a file to insert or
887 fashion, which means we never need to rewrite a file to insert or
888 remove data, and can use some simple techniques to avoid the need
888 remove data, and can use some simple techniques to avoid the need
889 for locking while reading.
889 for locking while reading.
890
890
891 If checkambig, indexfile is opened with checkambig=True at
891 If checkambig, indexfile is opened with checkambig=True at
892 writing, to avoid file stat ambiguity.
892 writing, to avoid file stat ambiguity.
893
893
894 If mmaplargeindex is True, and an mmapindexthreshold is set, the
894 If mmaplargeindex is True, and an mmapindexthreshold is set, the
895 index will be mmapped rather than read if it is larger than the
895 index will be mmapped rather than read if it is larger than the
896 configured threshold.
896 configured threshold.
897
897
898 If censorable is True, the revlog can have censored revisions.
898 If censorable is True, the revlog can have censored revisions.
899 """
899 """
900 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
900 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
901 mmaplargeindex=False, censorable=False):
901 mmaplargeindex=False, censorable=False):
902 """
902 """
903 create a revlog object
903 create a revlog object
904
904
905 opener is a function that abstracts the file opening operation
905 opener is a function that abstracts the file opening operation
906 and can be used to implement COW semantics or the like.
906 and can be used to implement COW semantics or the like.
907 """
907 """
908 self.indexfile = indexfile
908 self.indexfile = indexfile
909 self.datafile = datafile or (indexfile[:-2] + ".d")
909 self.datafile = datafile or (indexfile[:-2] + ".d")
910 self.opener = opener
910 self.opener = opener
911 # When True, indexfile is opened with checkambig=True at writing, to
911 # When True, indexfile is opened with checkambig=True at writing, to
912 # avoid file stat ambiguity.
912 # avoid file stat ambiguity.
913 self._checkambig = checkambig
913 self._checkambig = checkambig
914 self._censorable = censorable
914 self._censorable = censorable
915 # 3-tuple of (node, rev, text) for a raw revision.
915 # 3-tuple of (node, rev, text) for a raw revision.
916 self._cache = None
916 self._cache = None
917 # Maps rev to chain base rev.
917 # Maps rev to chain base rev.
918 self._chainbasecache = util.lrucachedict(100)
918 self._chainbasecache = util.lrucachedict(100)
919 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
919 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
920 self._chunkcache = (0, '')
920 self._chunkcache = (0, '')
921 # How much data to read and cache into the raw revlog data cache.
921 # How much data to read and cache into the raw revlog data cache.
922 self._chunkcachesize = 65536
922 self._chunkcachesize = 65536
923 self._maxchainlen = None
923 self._maxchainlen = None
924 self._deltabothparents = True
924 self._deltabothparents = True
925 self.index = []
925 self.index = []
926 # Mapping of partial identifiers to full nodes.
926 # Mapping of partial identifiers to full nodes.
927 self._pcache = {}
927 self._pcache = {}
928 # Mapping of revision integer to full node.
928 # Mapping of revision integer to full node.
929 self._nodecache = {nullid: nullrev}
929 self._nodecache = {nullid: nullrev}
930 self._nodepos = None
930 self._nodepos = None
931 self._compengine = 'zlib'
931 self._compengine = 'zlib'
932 self._maxdeltachainspan = -1
932 self._maxdeltachainspan = -1
933 self._withsparseread = False
933 self._withsparseread = False
934 self._sparserevlog = False
934 self._sparserevlog = False
935 self._srdensitythreshold = 0.50
935 self._srdensitythreshold = 0.50
936 self._srmingapsize = 262144
936 self._srmingapsize = 262144
937
937
938 mmapindexthreshold = None
938 mmapindexthreshold = None
939 v = REVLOG_DEFAULT_VERSION
939 v = REVLOG_DEFAULT_VERSION
940 opts = getattr(opener, 'options', None)
940 opts = getattr(opener, 'options', None)
941 if opts is not None:
941 if opts is not None:
942 if 'revlogv2' in opts:
942 if 'revlogv2' in opts:
943 # version 2 revlogs always use generaldelta.
943 # version 2 revlogs always use generaldelta.
944 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
944 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
945 elif 'revlogv1' in opts:
945 elif 'revlogv1' in opts:
946 if 'generaldelta' in opts:
946 if 'generaldelta' in opts:
947 v |= FLAG_GENERALDELTA
947 v |= FLAG_GENERALDELTA
948 else:
948 else:
949 v = 0
949 v = 0
950 if 'chunkcachesize' in opts:
950 if 'chunkcachesize' in opts:
951 self._chunkcachesize = opts['chunkcachesize']
951 self._chunkcachesize = opts['chunkcachesize']
952 if 'maxchainlen' in opts:
952 if 'maxchainlen' in opts:
953 self._maxchainlen = opts['maxchainlen']
953 self._maxchainlen = opts['maxchainlen']
954 if 'deltabothparents' in opts:
954 if 'deltabothparents' in opts:
955 self._deltabothparents = opts['deltabothparents']
955 self._deltabothparents = opts['deltabothparents']
956 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
956 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
957 if 'compengine' in opts:
957 if 'compengine' in opts:
958 self._compengine = opts['compengine']
958 self._compengine = opts['compengine']
959 if 'maxdeltachainspan' in opts:
959 if 'maxdeltachainspan' in opts:
960 self._maxdeltachainspan = opts['maxdeltachainspan']
960 self._maxdeltachainspan = opts['maxdeltachainspan']
961 if mmaplargeindex and 'mmapindexthreshold' in opts:
961 if mmaplargeindex and 'mmapindexthreshold' in opts:
962 mmapindexthreshold = opts['mmapindexthreshold']
962 mmapindexthreshold = opts['mmapindexthreshold']
963 self._sparserevlog = bool(opts.get('sparse-revlog', False))
963 self._sparserevlog = bool(opts.get('sparse-revlog', False))
964 withsparseread = bool(opts.get('with-sparse-read', False))
964 withsparseread = bool(opts.get('with-sparse-read', False))
965 # sparse-revlog forces sparse-read
965 # sparse-revlog forces sparse-read
966 self._withsparseread = self._sparserevlog or withsparseread
966 self._withsparseread = self._sparserevlog or withsparseread
967 if 'sparse-read-density-threshold' in opts:
967 if 'sparse-read-density-threshold' in opts:
968 self._srdensitythreshold = opts['sparse-read-density-threshold']
968 self._srdensitythreshold = opts['sparse-read-density-threshold']
969 if 'sparse-read-min-gap-size' in opts:
969 if 'sparse-read-min-gap-size' in opts:
970 self._srmingapsize = opts['sparse-read-min-gap-size']
970 self._srmingapsize = opts['sparse-read-min-gap-size']
971
971
972 if self._chunkcachesize <= 0:
972 if self._chunkcachesize <= 0:
973 raise RevlogError(_('revlog chunk cache size %r is not greater '
973 raise RevlogError(_('revlog chunk cache size %r is not greater '
974 'than 0') % self._chunkcachesize)
974 'than 0') % self._chunkcachesize)
975 elif self._chunkcachesize & (self._chunkcachesize - 1):
975 elif self._chunkcachesize & (self._chunkcachesize - 1):
976 raise RevlogError(_('revlog chunk cache size %r is not a power '
976 raise RevlogError(_('revlog chunk cache size %r is not a power '
977 'of 2') % self._chunkcachesize)
977 'of 2') % self._chunkcachesize)
978
978
979 indexdata = ''
979 indexdata = ''
980 self._initempty = True
980 self._initempty = True
981 try:
981 try:
982 with self._indexfp() as f:
982 with self._indexfp() as f:
983 if (mmapindexthreshold is not None and
983 if (mmapindexthreshold is not None and
984 self.opener.fstat(f).st_size >= mmapindexthreshold):
984 self.opener.fstat(f).st_size >= mmapindexthreshold):
985 indexdata = util.buffer(util.mmapread(f))
985 indexdata = util.buffer(util.mmapread(f))
986 else:
986 else:
987 indexdata = f.read()
987 indexdata = f.read()
988 if len(indexdata) > 0:
988 if len(indexdata) > 0:
989 v = versionformat_unpack(indexdata[:4])[0]
989 v = versionformat_unpack(indexdata[:4])[0]
990 self._initempty = False
990 self._initempty = False
991 except IOError as inst:
991 except IOError as inst:
992 if inst.errno != errno.ENOENT:
992 if inst.errno != errno.ENOENT:
993 raise
993 raise
994
994
995 self.version = v
995 self.version = v
996 self._inline = v & FLAG_INLINE_DATA
996 self._inline = v & FLAG_INLINE_DATA
997 self._generaldelta = v & FLAG_GENERALDELTA
997 self._generaldelta = v & FLAG_GENERALDELTA
998 flags = v & ~0xFFFF
998 flags = v & ~0xFFFF
999 fmt = v & 0xFFFF
999 fmt = v & 0xFFFF
1000 if fmt == REVLOGV0:
1000 if fmt == REVLOGV0:
1001 if flags:
1001 if flags:
1002 raise RevlogError(_('unknown flags (%#04x) in version %d '
1002 raise RevlogError(_('unknown flags (%#04x) in version %d '
1003 'revlog %s') %
1003 'revlog %s') %
1004 (flags >> 16, fmt, self.indexfile))
1004 (flags >> 16, fmt, self.indexfile))
1005 elif fmt == REVLOGV1:
1005 elif fmt == REVLOGV1:
1006 if flags & ~REVLOGV1_FLAGS:
1006 if flags & ~REVLOGV1_FLAGS:
1007 raise RevlogError(_('unknown flags (%#04x) in version %d '
1007 raise RevlogError(_('unknown flags (%#04x) in version %d '
1008 'revlog %s') %
1008 'revlog %s') %
1009 (flags >> 16, fmt, self.indexfile))
1009 (flags >> 16, fmt, self.indexfile))
1010 elif fmt == REVLOGV2:
1010 elif fmt == REVLOGV2:
1011 if flags & ~REVLOGV2_FLAGS:
1011 if flags & ~REVLOGV2_FLAGS:
1012 raise RevlogError(_('unknown flags (%#04x) in version %d '
1012 raise RevlogError(_('unknown flags (%#04x) in version %d '
1013 'revlog %s') %
1013 'revlog %s') %
1014 (flags >> 16, fmt, self.indexfile))
1014 (flags >> 16, fmt, self.indexfile))
1015 else:
1015 else:
1016 raise RevlogError(_('unknown version (%d) in revlog %s') %
1016 raise RevlogError(_('unknown version (%d) in revlog %s') %
1017 (fmt, self.indexfile))
1017 (fmt, self.indexfile))
1018
1018
1019 self.storedeltachains = True
1019 self.storedeltachains = True
1020
1020
1021 self._io = revlogio()
1021 self._io = revlogio()
1022 if self.version == REVLOGV0:
1022 if self.version == REVLOGV0:
1023 self._io = revlogoldio()
1023 self._io = revlogoldio()
1024 try:
1024 try:
1025 d = self._io.parseindex(indexdata, self._inline)
1025 d = self._io.parseindex(indexdata, self._inline)
1026 except (ValueError, IndexError):
1026 except (ValueError, IndexError):
1027 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1027 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1028 self.index, nodemap, self._chunkcache = d
1028 self.index, nodemap, self._chunkcache = d
1029 if nodemap is not None:
1029 if nodemap is not None:
1030 self.nodemap = self._nodecache = nodemap
1030 self.nodemap = self._nodecache = nodemap
1031 if not self._chunkcache:
1031 if not self._chunkcache:
1032 self._chunkclear()
1032 self._chunkclear()
1033 # revnum -> (chain-length, sum-delta-length)
1033 # revnum -> (chain-length, sum-delta-length)
1034 self._chaininfocache = {}
1034 self._chaininfocache = {}
1035 # revlog header -> revlog compressor
1035 # revlog header -> revlog compressor
1036 self._decompressors = {}
1036 self._decompressors = {}
1037
1037
1038 @util.propertycache
1038 @util.propertycache
1039 def _compressor(self):
1039 def _compressor(self):
1040 return util.compengines[self._compengine].revlogcompressor()
1040 return util.compengines[self._compengine].revlogcompressor()
1041
1041
1042 def _indexfp(self, mode='r'):
1042 def _indexfp(self, mode='r'):
1043 """file object for the revlog's index file"""
1043 """file object for the revlog's index file"""
1044 args = {r'mode': mode}
1044 args = {r'mode': mode}
1045 if mode != 'r':
1045 if mode != 'r':
1046 args[r'checkambig'] = self._checkambig
1046 args[r'checkambig'] = self._checkambig
1047 if mode == 'w':
1047 if mode == 'w':
1048 args[r'atomictemp'] = True
1048 args[r'atomictemp'] = True
1049 return self.opener(self.indexfile, **args)
1049 return self.opener(self.indexfile, **args)
1050
1050
1051 def _datafp(self, mode='r'):
1051 def _datafp(self, mode='r'):
1052 """file object for the revlog's data file"""
1052 """file object for the revlog's data file"""
1053 return self.opener(self.datafile, mode=mode)
1053 return self.opener(self.datafile, mode=mode)
1054
1054
1055 @contextlib.contextmanager
1055 @contextlib.contextmanager
1056 def _datareadfp(self, existingfp=None):
1056 def _datareadfp(self, existingfp=None):
1057 """file object suitable to read data"""
1057 """file object suitable to read data"""
1058 if existingfp is not None:
1058 if existingfp is not None:
1059 yield existingfp
1059 yield existingfp
1060 else:
1060 else:
1061 if self._inline:
1061 if self._inline:
1062 func = self._indexfp
1062 func = self._indexfp
1063 else:
1063 else:
1064 func = self._datafp
1064 func = self._datafp
1065 with func() as fp:
1065 with func() as fp:
1066 yield fp
1066 yield fp
1067
1067
1068 def tip(self):
1068 def tip(self):
1069 return self.node(len(self.index) - 2)
1069 return self.node(len(self.index) - 2)
1070 def __contains__(self, rev):
1070 def __contains__(self, rev):
1071 return 0 <= rev < len(self)
1071 return 0 <= rev < len(self)
1072 def __len__(self):
1072 def __len__(self):
1073 return len(self.index) - 1
1073 return len(self.index) - 1
1074 def __iter__(self):
1074 def __iter__(self):
1075 return iter(pycompat.xrange(len(self)))
1075 return iter(pycompat.xrange(len(self)))
1076 def revs(self, start=0, stop=None):
1076 def revs(self, start=0, stop=None):
1077 """iterate over all rev in this revlog (from start to stop)"""
1077 """iterate over all rev in this revlog (from start to stop)"""
1078 step = 1
1078 step = 1
1079 length = len(self)
1079 length = len(self)
1080 if stop is not None:
1080 if stop is not None:
1081 if start > stop:
1081 if start > stop:
1082 step = -1
1082 step = -1
1083 stop += step
1083 stop += step
1084 if stop > length:
1084 if stop > length:
1085 stop = length
1085 stop = length
1086 else:
1086 else:
1087 stop = length
1087 stop = length
1088 return pycompat.xrange(start, stop, step)
1088 return pycompat.xrange(start, stop, step)
1089
1089
1090 @util.propertycache
1090 @util.propertycache
1091 def nodemap(self):
1091 def nodemap(self):
1092 self.rev(self.node(0))
1092 self.rev(self.node(0))
1093 return self._nodecache
1093 return self._nodecache
1094
1094
1095 def hasnode(self, node):
1095 def hasnode(self, node):
1096 try:
1096 try:
1097 self.rev(node)
1097 self.rev(node)
1098 return True
1098 return True
1099 except KeyError:
1099 except KeyError:
1100 return False
1100 return False
1101
1101
1102 def candelta(self, baserev, rev):
1102 def candelta(self, baserev, rev):
1103 """whether two revisions (baserev, rev) can be delta-ed or not"""
1103 """whether two revisions (baserev, rev) can be delta-ed or not"""
1104 # Disable delta if either rev requires a content-changing flag
1104 # Disable delta if either rev requires a content-changing flag
1105 # processor (ex. LFS). This is because such flag processor can alter
1105 # processor (ex. LFS). This is because such flag processor can alter
1106 # the rawtext content that the delta will be based on, and two clients
1106 # the rawtext content that the delta will be based on, and two clients
1107 # could have a same revlog node with different flags (i.e. different
1107 # could have a same revlog node with different flags (i.e. different
1108 # rawtext contents) and the delta could be incompatible.
1108 # rawtext contents) and the delta could be incompatible.
1109 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1109 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1110 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1110 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1111 return False
1111 return False
1112 return True
1112 return True
1113
1113
1114 def clearcaches(self):
1114 def clearcaches(self):
1115 self._cache = None
1115 self._cache = None
1116 self._chainbasecache.clear()
1116 self._chainbasecache.clear()
1117 self._chunkcache = (0, '')
1117 self._chunkcache = (0, '')
1118 self._pcache = {}
1118 self._pcache = {}
1119
1119
1120 try:
1120 try:
1121 self._nodecache.clearcaches()
1121 self._nodecache.clearcaches()
1122 except AttributeError:
1122 except AttributeError:
1123 self._nodecache = {nullid: nullrev}
1123 self._nodecache = {nullid: nullrev}
1124 self._nodepos = None
1124 self._nodepos = None
1125
1125
1126 def rev(self, node):
1126 def rev(self, node):
1127 try:
1127 try:
1128 return self._nodecache[node]
1128 return self._nodecache[node]
1129 except TypeError:
1129 except TypeError:
1130 raise
1130 raise
1131 except RevlogError:
1131 except RevlogError:
1132 # parsers.c radix tree lookup failed
1132 # parsers.c radix tree lookup failed
1133 if node == wdirid or node in wdirfilenodeids:
1133 if node == wdirid or node in wdirfilenodeids:
1134 raise error.WdirUnsupported
1134 raise error.WdirUnsupported
1135 raise LookupError(node, self.indexfile, _('no node'))
1135 raise LookupError(node, self.indexfile, _('no node'))
1136 except KeyError:
1136 except KeyError:
1137 # pure python cache lookup failed
1137 # pure python cache lookup failed
1138 n = self._nodecache
1138 n = self._nodecache
1139 i = self.index
1139 i = self.index
1140 p = self._nodepos
1140 p = self._nodepos
1141 if p is None:
1141 if p is None:
1142 p = len(i) - 2
1142 p = len(i) - 2
1143 else:
1143 else:
1144 assert p < len(i)
1144 assert p < len(i)
1145 for r in pycompat.xrange(p, -1, -1):
1145 for r in pycompat.xrange(p, -1, -1):
1146 v = i[r][7]
1146 v = i[r][7]
1147 n[v] = r
1147 n[v] = r
1148 if v == node:
1148 if v == node:
1149 self._nodepos = r - 1
1149 self._nodepos = r - 1
1150 return r
1150 return r
1151 if node == wdirid or node in wdirfilenodeids:
1151 if node == wdirid or node in wdirfilenodeids:
1152 raise error.WdirUnsupported
1152 raise error.WdirUnsupported
1153 raise LookupError(node, self.indexfile, _('no node'))
1153 raise LookupError(node, self.indexfile, _('no node'))
1154
1154
1155 # Accessors for index entries.
1155 # Accessors for index entries.
1156
1156
1157 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1157 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1158 # are flags.
1158 # are flags.
1159 def start(self, rev):
1159 def start(self, rev):
1160 return int(self.index[rev][0] >> 16)
1160 return int(self.index[rev][0] >> 16)
1161
1161
1162 def flags(self, rev):
1162 def flags(self, rev):
1163 return self.index[rev][0] & 0xFFFF
1163 return self.index[rev][0] & 0xFFFF
1164
1164
1165 def length(self, rev):
1165 def length(self, rev):
1166 return self.index[rev][1]
1166 return self.index[rev][1]
1167
1167
1168 def rawsize(self, rev):
1168 def rawsize(self, rev):
1169 """return the length of the uncompressed text for a given revision"""
1169 """return the length of the uncompressed text for a given revision"""
1170 l = self.index[rev][2]
1170 l = self.index[rev][2]
1171 if l >= 0:
1171 if l >= 0:
1172 return l
1172 return l
1173
1173
1174 t = self.revision(rev, raw=True)
1174 t = self.revision(rev, raw=True)
1175 return len(t)
1175 return len(t)
1176
1176
1177 def size(self, rev):
1177 def size(self, rev):
1178 """length of non-raw text (processed by a "read" flag processor)"""
1178 """length of non-raw text (processed by a "read" flag processor)"""
1179 # fast path: if no "read" flag processor could change the content,
1179 # fast path: if no "read" flag processor could change the content,
1180 # size is rawsize. note: ELLIPSIS is known to not change the content.
1180 # size is rawsize. note: ELLIPSIS is known to not change the content.
1181 flags = self.flags(rev)
1181 flags = self.flags(rev)
1182 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1182 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1183 return self.rawsize(rev)
1183 return self.rawsize(rev)
1184
1184
1185 return len(self.revision(rev, raw=False))
1185 return len(self.revision(rev, raw=False))
1186
1186
1187 def chainbase(self, rev):
1187 def chainbase(self, rev):
1188 base = self._chainbasecache.get(rev)
1188 base = self._chainbasecache.get(rev)
1189 if base is not None:
1189 if base is not None:
1190 return base
1190 return base
1191
1191
1192 index = self.index
1192 index = self.index
1193 iterrev = rev
1193 iterrev = rev
1194 base = index[iterrev][3]
1194 base = index[iterrev][3]
1195 while base != iterrev:
1195 while base != iterrev:
1196 iterrev = base
1196 iterrev = base
1197 base = index[iterrev][3]
1197 base = index[iterrev][3]
1198
1198
1199 self._chainbasecache[rev] = base
1199 self._chainbasecache[rev] = base
1200 return base
1200 return base
1201
1201
1202 def linkrev(self, rev):
1202 def linkrev(self, rev):
1203 return self.index[rev][4]
1203 return self.index[rev][4]
1204
1204
1205 def parentrevs(self, rev):
1205 def parentrevs(self, rev):
1206 try:
1206 try:
1207 entry = self.index[rev]
1207 entry = self.index[rev]
1208 except IndexError:
1208 except IndexError:
1209 if rev == wdirrev:
1209 if rev == wdirrev:
1210 raise error.WdirUnsupported
1210 raise error.WdirUnsupported
1211 raise
1211 raise
1212
1212
1213 return entry[5], entry[6]
1213 return entry[5], entry[6]
1214
1214
1215 def node(self, rev):
1215 def node(self, rev):
1216 try:
1216 try:
1217 return self.index[rev][7]
1217 return self.index[rev][7]
1218 except IndexError:
1218 except IndexError:
1219 if rev == wdirrev:
1219 if rev == wdirrev:
1220 raise error.WdirUnsupported
1220 raise error.WdirUnsupported
1221 raise
1221 raise
1222
1222
1223 # Derived from index values.
1223 # Derived from index values.
1224
1224
1225 def end(self, rev):
1225 def end(self, rev):
1226 return self.start(rev) + self.length(rev)
1226 return self.start(rev) + self.length(rev)
1227
1227
1228 def parents(self, node):
1228 def parents(self, node):
1229 i = self.index
1229 i = self.index
1230 d = i[self.rev(node)]
1230 d = i[self.rev(node)]
1231 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1231 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1232
1232
1233 def chainlen(self, rev):
1233 def chainlen(self, rev):
1234 return self._chaininfo(rev)[0]
1234 return self._chaininfo(rev)[0]
1235
1235
1236 def _chaininfo(self, rev):
1236 def _chaininfo(self, rev):
1237 chaininfocache = self._chaininfocache
1237 chaininfocache = self._chaininfocache
1238 if rev in chaininfocache:
1238 if rev in chaininfocache:
1239 return chaininfocache[rev]
1239 return chaininfocache[rev]
1240 index = self.index
1240 index = self.index
1241 generaldelta = self._generaldelta
1241 generaldelta = self._generaldelta
1242 iterrev = rev
1242 iterrev = rev
1243 e = index[iterrev]
1243 e = index[iterrev]
1244 clen = 0
1244 clen = 0
1245 compresseddeltalen = 0
1245 compresseddeltalen = 0
1246 while iterrev != e[3]:
1246 while iterrev != e[3]:
1247 clen += 1
1247 clen += 1
1248 compresseddeltalen += e[1]
1248 compresseddeltalen += e[1]
1249 if generaldelta:
1249 if generaldelta:
1250 iterrev = e[3]
1250 iterrev = e[3]
1251 else:
1251 else:
1252 iterrev -= 1
1252 iterrev -= 1
1253 if iterrev in chaininfocache:
1253 if iterrev in chaininfocache:
1254 t = chaininfocache[iterrev]
1254 t = chaininfocache[iterrev]
1255 clen += t[0]
1255 clen += t[0]
1256 compresseddeltalen += t[1]
1256 compresseddeltalen += t[1]
1257 break
1257 break
1258 e = index[iterrev]
1258 e = index[iterrev]
1259 else:
1259 else:
1260 # Add text length of base since decompressing that also takes
1260 # Add text length of base since decompressing that also takes
1261 # work. For cache hits the length is already included.
1261 # work. For cache hits the length is already included.
1262 compresseddeltalen += e[1]
1262 compresseddeltalen += e[1]
1263 r = (clen, compresseddeltalen)
1263 r = (clen, compresseddeltalen)
1264 chaininfocache[rev] = r
1264 chaininfocache[rev] = r
1265 return r
1265 return r
1266
1266
1267 def _deltachain(self, rev, stoprev=None):
1267 def _deltachain(self, rev, stoprev=None):
1268 """Obtain the delta chain for a revision.
1268 """Obtain the delta chain for a revision.
1269
1269
1270 ``stoprev`` specifies a revision to stop at. If not specified, we
1270 ``stoprev`` specifies a revision to stop at. If not specified, we
1271 stop at the base of the chain.
1271 stop at the base of the chain.
1272
1272
1273 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1273 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1274 revs in ascending order and ``stopped`` is a bool indicating whether
1274 revs in ascending order and ``stopped`` is a bool indicating whether
1275 ``stoprev`` was hit.
1275 ``stoprev`` was hit.
1276 """
1276 """
1277 # Try C implementation.
1277 # Try C implementation.
1278 try:
1278 try:
1279 return self.index.deltachain(rev, stoprev, self._generaldelta)
1279 return self.index.deltachain(rev, stoprev, self._generaldelta)
1280 except AttributeError:
1280 except AttributeError:
1281 pass
1281 pass
1282
1282
1283 chain = []
1283 chain = []
1284
1284
1285 # Alias to prevent attribute lookup in tight loop.
1285 # Alias to prevent attribute lookup in tight loop.
1286 index = self.index
1286 index = self.index
1287 generaldelta = self._generaldelta
1287 generaldelta = self._generaldelta
1288
1288
1289 iterrev = rev
1289 iterrev = rev
1290 e = index[iterrev]
1290 e = index[iterrev]
1291 while iterrev != e[3] and iterrev != stoprev:
1291 while iterrev != e[3] and iterrev != stoprev:
1292 chain.append(iterrev)
1292 chain.append(iterrev)
1293 if generaldelta:
1293 if generaldelta:
1294 iterrev = e[3]
1294 iterrev = e[3]
1295 else:
1295 else:
1296 iterrev -= 1
1296 iterrev -= 1
1297 e = index[iterrev]
1297 e = index[iterrev]
1298
1298
1299 if iterrev == stoprev:
1299 if iterrev == stoprev:
1300 stopped = True
1300 stopped = True
1301 else:
1301 else:
1302 chain.append(iterrev)
1302 chain.append(iterrev)
1303 stopped = False
1303 stopped = False
1304
1304
1305 chain.reverse()
1305 chain.reverse()
1306 return chain, stopped
1306 return chain, stopped
1307
1307
1308 def ancestors(self, revs, stoprev=0, inclusive=False):
1308 def ancestors(self, revs, stoprev=0, inclusive=False):
1309 """Generate the ancestors of 'revs' in reverse topological order.
1309 """Generate the ancestors of 'revs' in reverse topological order.
1310 Does not generate revs lower than stoprev.
1310 Does not generate revs lower than stoprev.
1311
1311
1312 See the documentation for ancestor.lazyancestors for more details."""
1312 See the documentation for ancestor.lazyancestors for more details."""
1313
1313
1314 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1314 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1315 inclusive=inclusive)
1315 inclusive=inclusive)
1316
1316
1317 def descendants(self, revs):
1317 def descendants(self, revs):
1318 """Generate the descendants of 'revs' in revision order.
1318 """Generate the descendants of 'revs' in revision order.
1319
1319
1320 Yield a sequence of revision numbers starting with a child of
1320 Yield a sequence of revision numbers starting with a child of
1321 some rev in revs, i.e., each revision is *not* considered a
1321 some rev in revs, i.e., each revision is *not* considered a
1322 descendant of itself. Results are ordered by revision number (a
1322 descendant of itself. Results are ordered by revision number (a
1323 topological sort)."""
1323 topological sort)."""
1324 first = min(revs)
1324 first = min(revs)
1325 if first == nullrev:
1325 if first == nullrev:
1326 for i in self:
1326 for i in self:
1327 yield i
1327 yield i
1328 return
1328 return
1329
1329
1330 seen = set(revs)
1330 seen = set(revs)
1331 for i in self.revs(start=first + 1):
1331 for i in self.revs(start=first + 1):
1332 for x in self.parentrevs(i):
1332 for x in self.parentrevs(i):
1333 if x != nullrev and x in seen:
1333 if x != nullrev and x in seen:
1334 seen.add(i)
1334 seen.add(i)
1335 yield i
1335 yield i
1336 break
1336 break
1337
1337
1338 def findcommonmissing(self, common=None, heads=None):
1338 def findcommonmissing(self, common=None, heads=None):
1339 """Return a tuple of the ancestors of common and the ancestors of heads
1339 """Return a tuple of the ancestors of common and the ancestors of heads
1340 that are not ancestors of common. In revset terminology, we return the
1340 that are not ancestors of common. In revset terminology, we return the
1341 tuple:
1341 tuple:
1342
1342
1343 ::common, (::heads) - (::common)
1343 ::common, (::heads) - (::common)
1344
1344
1345 The list is sorted by revision number, meaning it is
1345 The list is sorted by revision number, meaning it is
1346 topologically sorted.
1346 topologically sorted.
1347
1347
1348 'heads' and 'common' are both lists of node IDs. If heads is
1348 'heads' and 'common' are both lists of node IDs. If heads is
1349 not supplied, uses all of the revlog's heads. If common is not
1349 not supplied, uses all of the revlog's heads. If common is not
1350 supplied, uses nullid."""
1350 supplied, uses nullid."""
1351 if common is None:
1351 if common is None:
1352 common = [nullid]
1352 common = [nullid]
1353 if heads is None:
1353 if heads is None:
1354 heads = self.heads()
1354 heads = self.heads()
1355
1355
1356 common = [self.rev(n) for n in common]
1356 common = [self.rev(n) for n in common]
1357 heads = [self.rev(n) for n in heads]
1357 heads = [self.rev(n) for n in heads]
1358
1358
1359 # we want the ancestors, but inclusive
1359 # we want the ancestors, but inclusive
1360 class lazyset(object):
1360 class lazyset(object):
1361 def __init__(self, lazyvalues):
1361 def __init__(self, lazyvalues):
1362 self.addedvalues = set()
1362 self.addedvalues = set()
1363 self.lazyvalues = lazyvalues
1363 self.lazyvalues = lazyvalues
1364
1364
1365 def __contains__(self, value):
1365 def __contains__(self, value):
1366 return value in self.addedvalues or value in self.lazyvalues
1366 return value in self.addedvalues or value in self.lazyvalues
1367
1367
1368 def __iter__(self):
1368 def __iter__(self):
1369 added = self.addedvalues
1369 added = self.addedvalues
1370 for r in added:
1370 for r in added:
1371 yield r
1371 yield r
1372 for r in self.lazyvalues:
1372 for r in self.lazyvalues:
1373 if not r in added:
1373 if not r in added:
1374 yield r
1374 yield r
1375
1375
1376 def add(self, value):
1376 def add(self, value):
1377 self.addedvalues.add(value)
1377 self.addedvalues.add(value)
1378
1378
1379 def update(self, values):
1379 def update(self, values):
1380 self.addedvalues.update(values)
1380 self.addedvalues.update(values)
1381
1381
1382 has = lazyset(self.ancestors(common))
1382 has = lazyset(self.ancestors(common))
1383 has.add(nullrev)
1383 has.add(nullrev)
1384 has.update(common)
1384 has.update(common)
1385
1385
1386 # take all ancestors from heads that aren't in has
1386 # take all ancestors from heads that aren't in has
1387 missing = set()
1387 missing = set()
1388 visit = collections.deque(r for r in heads if r not in has)
1388 visit = collections.deque(r for r in heads if r not in has)
1389 while visit:
1389 while visit:
1390 r = visit.popleft()
1390 r = visit.popleft()
1391 if r in missing:
1391 if r in missing:
1392 continue
1392 continue
1393 else:
1393 else:
1394 missing.add(r)
1394 missing.add(r)
1395 for p in self.parentrevs(r):
1395 for p in self.parentrevs(r):
1396 if p not in has:
1396 if p not in has:
1397 visit.append(p)
1397 visit.append(p)
1398 missing = list(missing)
1398 missing = list(missing)
1399 missing.sort()
1399 missing.sort()
1400 return has, [self.node(miss) for miss in missing]
1400 return has, [self.node(miss) for miss in missing]
1401
1401
1402 def incrementalmissingrevs(self, common=None):
1402 def incrementalmissingrevs(self, common=None):
1403 """Return an object that can be used to incrementally compute the
1403 """Return an object that can be used to incrementally compute the
1404 revision numbers of the ancestors of arbitrary sets that are not
1404 revision numbers of the ancestors of arbitrary sets that are not
1405 ancestors of common. This is an ancestor.incrementalmissingancestors
1405 ancestors of common. This is an ancestor.incrementalmissingancestors
1406 object.
1406 object.
1407
1407
1408 'common' is a list of revision numbers. If common is not supplied, uses
1408 'common' is a list of revision numbers. If common is not supplied, uses
1409 nullrev.
1409 nullrev.
1410 """
1410 """
1411 if common is None:
1411 if common is None:
1412 common = [nullrev]
1412 common = [nullrev]
1413
1413
1414 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1414 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1415
1415
1416 def findmissingrevs(self, common=None, heads=None):
1416 def findmissingrevs(self, common=None, heads=None):
1417 """Return the revision numbers of the ancestors of heads that
1417 """Return the revision numbers of the ancestors of heads that
1418 are not ancestors of common.
1418 are not ancestors of common.
1419
1419
1420 More specifically, return a list of revision numbers corresponding to
1420 More specifically, return a list of revision numbers corresponding to
1421 nodes N such that every N satisfies the following constraints:
1421 nodes N such that every N satisfies the following constraints:
1422
1422
1423 1. N is an ancestor of some node in 'heads'
1423 1. N is an ancestor of some node in 'heads'
1424 2. N is not an ancestor of any node in 'common'
1424 2. N is not an ancestor of any node in 'common'
1425
1425
1426 The list is sorted by revision number, meaning it is
1426 The list is sorted by revision number, meaning it is
1427 topologically sorted.
1427 topologically sorted.
1428
1428
1429 'heads' and 'common' are both lists of revision numbers. If heads is
1429 'heads' and 'common' are both lists of revision numbers. If heads is
1430 not supplied, uses all of the revlog's heads. If common is not
1430 not supplied, uses all of the revlog's heads. If common is not
1431 supplied, uses nullid."""
1431 supplied, uses nullid."""
1432 if common is None:
1432 if common is None:
1433 common = [nullrev]
1433 common = [nullrev]
1434 if heads is None:
1434 if heads is None:
1435 heads = self.headrevs()
1435 heads = self.headrevs()
1436
1436
1437 inc = self.incrementalmissingrevs(common=common)
1437 inc = self.incrementalmissingrevs(common=common)
1438 return inc.missingancestors(heads)
1438 return inc.missingancestors(heads)
1439
1439
1440 def findmissing(self, common=None, heads=None):
1440 def findmissing(self, common=None, heads=None):
1441 """Return the ancestors of heads that are not ancestors of common.
1441 """Return the ancestors of heads that are not ancestors of common.
1442
1442
1443 More specifically, return a list of nodes N such that every N
1443 More specifically, return a list of nodes N such that every N
1444 satisfies the following constraints:
1444 satisfies the following constraints:
1445
1445
1446 1. N is an ancestor of some node in 'heads'
1446 1. N is an ancestor of some node in 'heads'
1447 2. N is not an ancestor of any node in 'common'
1447 2. N is not an ancestor of any node in 'common'
1448
1448
1449 The list is sorted by revision number, meaning it is
1449 The list is sorted by revision number, meaning it is
1450 topologically sorted.
1450 topologically sorted.
1451
1451
1452 'heads' and 'common' are both lists of node IDs. If heads is
1452 'heads' and 'common' are both lists of node IDs. If heads is
1453 not supplied, uses all of the revlog's heads. If common is not
1453 not supplied, uses all of the revlog's heads. If common is not
1454 supplied, uses nullid."""
1454 supplied, uses nullid."""
1455 if common is None:
1455 if common is None:
1456 common = [nullid]
1456 common = [nullid]
1457 if heads is None:
1457 if heads is None:
1458 heads = self.heads()
1458 heads = self.heads()
1459
1459
1460 common = [self.rev(n) for n in common]
1460 common = [self.rev(n) for n in common]
1461 heads = [self.rev(n) for n in heads]
1461 heads = [self.rev(n) for n in heads]
1462
1462
1463 inc = self.incrementalmissingrevs(common=common)
1463 inc = self.incrementalmissingrevs(common=common)
1464 return [self.node(r) for r in inc.missingancestors(heads)]
1464 return [self.node(r) for r in inc.missingancestors(heads)]
1465
1465
1466 def nodesbetween(self, roots=None, heads=None):
1466 def nodesbetween(self, roots=None, heads=None):
1467 """Return a topological path from 'roots' to 'heads'.
1467 """Return a topological path from 'roots' to 'heads'.
1468
1468
1469 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1469 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1470 topologically sorted list of all nodes N that satisfy both of
1470 topologically sorted list of all nodes N that satisfy both of
1471 these constraints:
1471 these constraints:
1472
1472
1473 1. N is a descendant of some node in 'roots'
1473 1. N is a descendant of some node in 'roots'
1474 2. N is an ancestor of some node in 'heads'
1474 2. N is an ancestor of some node in 'heads'
1475
1475
1476 Every node is considered to be both a descendant and an ancestor
1476 Every node is considered to be both a descendant and an ancestor
1477 of itself, so every reachable node in 'roots' and 'heads' will be
1477 of itself, so every reachable node in 'roots' and 'heads' will be
1478 included in 'nodes'.
1478 included in 'nodes'.
1479
1479
1480 'outroots' is the list of reachable nodes in 'roots', i.e., the
1480 'outroots' is the list of reachable nodes in 'roots', i.e., the
1481 subset of 'roots' that is returned in 'nodes'. Likewise,
1481 subset of 'roots' that is returned in 'nodes'. Likewise,
1482 'outheads' is the subset of 'heads' that is also in 'nodes'.
1482 'outheads' is the subset of 'heads' that is also in 'nodes'.
1483
1483
1484 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1484 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1485 unspecified, uses nullid as the only root. If 'heads' is
1485 unspecified, uses nullid as the only root. If 'heads' is
1486 unspecified, uses list of all of the revlog's heads."""
1486 unspecified, uses list of all of the revlog's heads."""
1487 nonodes = ([], [], [])
1487 nonodes = ([], [], [])
1488 if roots is not None:
1488 if roots is not None:
1489 roots = list(roots)
1489 roots = list(roots)
1490 if not roots:
1490 if not roots:
1491 return nonodes
1491 return nonodes
1492 lowestrev = min([self.rev(n) for n in roots])
1492 lowestrev = min([self.rev(n) for n in roots])
1493 else:
1493 else:
1494 roots = [nullid] # Everybody's a descendant of nullid
1494 roots = [nullid] # Everybody's a descendant of nullid
1495 lowestrev = nullrev
1495 lowestrev = nullrev
1496 if (lowestrev == nullrev) and (heads is None):
1496 if (lowestrev == nullrev) and (heads is None):
1497 # We want _all_ the nodes!
1497 # We want _all_ the nodes!
1498 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1498 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1499 if heads is None:
1499 if heads is None:
1500 # All nodes are ancestors, so the latest ancestor is the last
1500 # All nodes are ancestors, so the latest ancestor is the last
1501 # node.
1501 # node.
1502 highestrev = len(self) - 1
1502 highestrev = len(self) - 1
1503 # Set ancestors to None to signal that every node is an ancestor.
1503 # Set ancestors to None to signal that every node is an ancestor.
1504 ancestors = None
1504 ancestors = None
1505 # Set heads to an empty dictionary for later discovery of heads
1505 # Set heads to an empty dictionary for later discovery of heads
1506 heads = {}
1506 heads = {}
1507 else:
1507 else:
1508 heads = list(heads)
1508 heads = list(heads)
1509 if not heads:
1509 if not heads:
1510 return nonodes
1510 return nonodes
1511 ancestors = set()
1511 ancestors = set()
1512 # Turn heads into a dictionary so we can remove 'fake' heads.
1512 # Turn heads into a dictionary so we can remove 'fake' heads.
1513 # Also, later we will be using it to filter out the heads we can't
1513 # Also, later we will be using it to filter out the heads we can't
1514 # find from roots.
1514 # find from roots.
1515 heads = dict.fromkeys(heads, False)
1515 heads = dict.fromkeys(heads, False)
1516 # Start at the top and keep marking parents until we're done.
1516 # Start at the top and keep marking parents until we're done.
1517 nodestotag = set(heads)
1517 nodestotag = set(heads)
1518 # Remember where the top was so we can use it as a limit later.
1518 # Remember where the top was so we can use it as a limit later.
1519 highestrev = max([self.rev(n) for n in nodestotag])
1519 highestrev = max([self.rev(n) for n in nodestotag])
1520 while nodestotag:
1520 while nodestotag:
1521 # grab a node to tag
1521 # grab a node to tag
1522 n = nodestotag.pop()
1522 n = nodestotag.pop()
1523 # Never tag nullid
1523 # Never tag nullid
1524 if n == nullid:
1524 if n == nullid:
1525 continue
1525 continue
1526 # A node's revision number represents its place in a
1526 # A node's revision number represents its place in a
1527 # topologically sorted list of nodes.
1527 # topologically sorted list of nodes.
1528 r = self.rev(n)
1528 r = self.rev(n)
1529 if r >= lowestrev:
1529 if r >= lowestrev:
1530 if n not in ancestors:
1530 if n not in ancestors:
1531 # If we are possibly a descendant of one of the roots
1531 # If we are possibly a descendant of one of the roots
1532 # and we haven't already been marked as an ancestor
1532 # and we haven't already been marked as an ancestor
1533 ancestors.add(n) # Mark as ancestor
1533 ancestors.add(n) # Mark as ancestor
1534 # Add non-nullid parents to list of nodes to tag.
1534 # Add non-nullid parents to list of nodes to tag.
1535 nodestotag.update([p for p in self.parents(n) if
1535 nodestotag.update([p for p in self.parents(n) if
1536 p != nullid])
1536 p != nullid])
1537 elif n in heads: # We've seen it before, is it a fake head?
1537 elif n in heads: # We've seen it before, is it a fake head?
1538 # So it is, real heads should not be the ancestors of
1538 # So it is, real heads should not be the ancestors of
1539 # any other heads.
1539 # any other heads.
1540 heads.pop(n)
1540 heads.pop(n)
1541 if not ancestors:
1541 if not ancestors:
1542 return nonodes
1542 return nonodes
1543 # Now that we have our set of ancestors, we want to remove any
1543 # Now that we have our set of ancestors, we want to remove any
1544 # roots that are not ancestors.
1544 # roots that are not ancestors.
1545
1545
1546 # If one of the roots was nullid, everything is included anyway.
1546 # If one of the roots was nullid, everything is included anyway.
1547 if lowestrev > nullrev:
1547 if lowestrev > nullrev:
1548 # But, since we weren't, let's recompute the lowest rev to not
1548 # But, since we weren't, let's recompute the lowest rev to not
1549 # include roots that aren't ancestors.
1549 # include roots that aren't ancestors.
1550
1550
1551 # Filter out roots that aren't ancestors of heads
1551 # Filter out roots that aren't ancestors of heads
1552 roots = [root for root in roots if root in ancestors]
1552 roots = [root for root in roots if root in ancestors]
1553 # Recompute the lowest revision
1553 # Recompute the lowest revision
1554 if roots:
1554 if roots:
1555 lowestrev = min([self.rev(root) for root in roots])
1555 lowestrev = min([self.rev(root) for root in roots])
1556 else:
1556 else:
1557 # No more roots? Return empty list
1557 # No more roots? Return empty list
1558 return nonodes
1558 return nonodes
1559 else:
1559 else:
1560 # We are descending from nullid, and don't need to care about
1560 # We are descending from nullid, and don't need to care about
1561 # any other roots.
1561 # any other roots.
1562 lowestrev = nullrev
1562 lowestrev = nullrev
1563 roots = [nullid]
1563 roots = [nullid]
1564 # Transform our roots list into a set.
1564 # Transform our roots list into a set.
1565 descendants = set(roots)
1565 descendants = set(roots)
1566 # Also, keep the original roots so we can filter out roots that aren't
1566 # Also, keep the original roots so we can filter out roots that aren't
1567 # 'real' roots (i.e. are descended from other roots).
1567 # 'real' roots (i.e. are descended from other roots).
1568 roots = descendants.copy()
1568 roots = descendants.copy()
1569 # Our topologically sorted list of output nodes.
1569 # Our topologically sorted list of output nodes.
1570 orderedout = []
1570 orderedout = []
1571 # Don't start at nullid since we don't want nullid in our output list,
1571 # Don't start at nullid since we don't want nullid in our output list,
1572 # and if nullid shows up in descendants, empty parents will look like
1572 # and if nullid shows up in descendants, empty parents will look like
1573 # they're descendants.
1573 # they're descendants.
1574 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1574 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1575 n = self.node(r)
1575 n = self.node(r)
1576 isdescendant = False
1576 isdescendant = False
1577 if lowestrev == nullrev: # Everybody is a descendant of nullid
1577 if lowestrev == nullrev: # Everybody is a descendant of nullid
1578 isdescendant = True
1578 isdescendant = True
1579 elif n in descendants:
1579 elif n in descendants:
1580 # n is already a descendant
1580 # n is already a descendant
1581 isdescendant = True
1581 isdescendant = True
1582 # This check only needs to be done here because all the roots
1582 # This check only needs to be done here because all the roots
1583 # will start being marked is descendants before the loop.
1583 # will start being marked is descendants before the loop.
1584 if n in roots:
1584 if n in roots:
1585 # If n was a root, check if it's a 'real' root.
1585 # If n was a root, check if it's a 'real' root.
1586 p = tuple(self.parents(n))
1586 p = tuple(self.parents(n))
1587 # If any of its parents are descendants, it's not a root.
1587 # If any of its parents are descendants, it's not a root.
1588 if (p[0] in descendants) or (p[1] in descendants):
1588 if (p[0] in descendants) or (p[1] in descendants):
1589 roots.remove(n)
1589 roots.remove(n)
1590 else:
1590 else:
1591 p = tuple(self.parents(n))
1591 p = tuple(self.parents(n))
1592 # A node is a descendant if either of its parents are
1592 # A node is a descendant if either of its parents are
1593 # descendants. (We seeded the dependents list with the roots
1593 # descendants. (We seeded the dependents list with the roots
1594 # up there, remember?)
1594 # up there, remember?)
1595 if (p[0] in descendants) or (p[1] in descendants):
1595 if (p[0] in descendants) or (p[1] in descendants):
1596 descendants.add(n)
1596 descendants.add(n)
1597 isdescendant = True
1597 isdescendant = True
1598 if isdescendant and ((ancestors is None) or (n in ancestors)):
1598 if isdescendant and ((ancestors is None) or (n in ancestors)):
1599 # Only include nodes that are both descendants and ancestors.
1599 # Only include nodes that are both descendants and ancestors.
1600 orderedout.append(n)
1600 orderedout.append(n)
1601 if (ancestors is not None) and (n in heads):
1601 if (ancestors is not None) and (n in heads):
1602 # We're trying to figure out which heads are reachable
1602 # We're trying to figure out which heads are reachable
1603 # from roots.
1603 # from roots.
1604 # Mark this head as having been reached
1604 # Mark this head as having been reached
1605 heads[n] = True
1605 heads[n] = True
1606 elif ancestors is None:
1606 elif ancestors is None:
1607 # Otherwise, we're trying to discover the heads.
1607 # Otherwise, we're trying to discover the heads.
1608 # Assume this is a head because if it isn't, the next step
1608 # Assume this is a head because if it isn't, the next step
1609 # will eventually remove it.
1609 # will eventually remove it.
1610 heads[n] = True
1610 heads[n] = True
1611 # But, obviously its parents aren't.
1611 # But, obviously its parents aren't.
1612 for p in self.parents(n):
1612 for p in self.parents(n):
1613 heads.pop(p, None)
1613 heads.pop(p, None)
1614 heads = [head for head, flag in heads.iteritems() if flag]
1614 heads = [head for head, flag in heads.iteritems() if flag]
1615 roots = list(roots)
1615 roots = list(roots)
1616 assert orderedout
1616 assert orderedout
1617 assert roots
1617 assert roots
1618 assert heads
1618 assert heads
1619 return (orderedout, roots, heads)
1619 return (orderedout, roots, heads)
1620
1620
1621 def headrevs(self):
1621 def headrevs(self):
1622 try:
1622 try:
1623 return self.index.headrevs()
1623 return self.index.headrevs()
1624 except AttributeError:
1624 except AttributeError:
1625 return self._headrevs()
1625 return self._headrevs()
1626
1626
1627 def computephases(self, roots):
1627 def computephases(self, roots):
1628 return self.index.computephasesmapsets(roots)
1628 return self.index.computephasesmapsets(roots)
1629
1629
1630 def _headrevs(self):
1630 def _headrevs(self):
1631 count = len(self)
1631 count = len(self)
1632 if not count:
1632 if not count:
1633 return [nullrev]
1633 return [nullrev]
1634 # we won't iter over filtered rev so nobody is a head at start
1634 # we won't iter over filtered rev so nobody is a head at start
1635 ishead = [0] * (count + 1)
1635 ishead = [0] * (count + 1)
1636 index = self.index
1636 index = self.index
1637 for r in self:
1637 for r in self:
1638 ishead[r] = 1 # I may be an head
1638 ishead[r] = 1 # I may be an head
1639 e = index[r]
1639 e = index[r]
1640 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1640 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1641 return [r for r, val in enumerate(ishead) if val]
1641 return [r for r, val in enumerate(ishead) if val]
1642
1642
1643 def heads(self, start=None, stop=None):
1643 def heads(self, start=None, stop=None):
1644 """return the list of all nodes that have no children
1644 """return the list of all nodes that have no children
1645
1645
1646 if start is specified, only heads that are descendants of
1646 if start is specified, only heads that are descendants of
1647 start will be returned
1647 start will be returned
1648 if stop is specified, it will consider all the revs from stop
1648 if stop is specified, it will consider all the revs from stop
1649 as if they had no children
1649 as if they had no children
1650 """
1650 """
1651 if start is None and stop is None:
1651 if start is None and stop is None:
1652 if not len(self):
1652 if not len(self):
1653 return [nullid]
1653 return [nullid]
1654 return [self.node(r) for r in self.headrevs()]
1654 return [self.node(r) for r in self.headrevs()]
1655
1655
1656 if start is None:
1656 if start is None:
1657 start = nullid
1657 start = nullid
1658 if stop is None:
1658 if stop is None:
1659 stop = []
1659 stop = []
1660 stoprevs = set([self.rev(n) for n in stop])
1660 stoprevs = set([self.rev(n) for n in stop])
1661 startrev = self.rev(start)
1661 startrev = self.rev(start)
1662 reachable = {startrev}
1662 reachable = {startrev}
1663 heads = {startrev}
1663 heads = {startrev}
1664
1664
1665 parentrevs = self.parentrevs
1665 parentrevs = self.parentrevs
1666 for r in self.revs(start=startrev + 1):
1666 for r in self.revs(start=startrev + 1):
1667 for p in parentrevs(r):
1667 for p in parentrevs(r):
1668 if p in reachable:
1668 if p in reachable:
1669 if r not in stoprevs:
1669 if r not in stoprevs:
1670 reachable.add(r)
1670 reachable.add(r)
1671 heads.add(r)
1671 heads.add(r)
1672 if p in heads and p not in stoprevs:
1672 if p in heads and p not in stoprevs:
1673 heads.remove(p)
1673 heads.remove(p)
1674
1674
1675 return [self.node(r) for r in heads]
1675 return [self.node(r) for r in heads]
1676
1676
1677 def children(self, node):
1677 def children(self, node):
1678 """find the children of a given node"""
1678 """find the children of a given node"""
1679 c = []
1679 c = []
1680 p = self.rev(node)
1680 p = self.rev(node)
1681 for r in self.revs(start=p + 1):
1681 for r in self.revs(start=p + 1):
1682 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1682 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1683 if prevs:
1683 if prevs:
1684 for pr in prevs:
1684 for pr in prevs:
1685 if pr == p:
1685 if pr == p:
1686 c.append(self.node(r))
1686 c.append(self.node(r))
1687 elif p == nullrev:
1687 elif p == nullrev:
1688 c.append(self.node(r))
1688 c.append(self.node(r))
1689 return c
1689 return c
1690
1690
1691 def commonancestorsheads(self, a, b):
1691 def commonancestorsheads(self, a, b):
1692 """calculate all the heads of the common ancestors of nodes a and b"""
1692 """calculate all the heads of the common ancestors of nodes a and b"""
1693 a, b = self.rev(a), self.rev(b)
1693 a, b = self.rev(a), self.rev(b)
1694 ancs = self._commonancestorsheads(a, b)
1694 ancs = self._commonancestorsheads(a, b)
1695 return pycompat.maplist(self.node, ancs)
1695 return pycompat.maplist(self.node, ancs)
1696
1696
1697 def _commonancestorsheads(self, *revs):
1697 def _commonancestorsheads(self, *revs):
1698 """calculate all the heads of the common ancestors of revs"""
1698 """calculate all the heads of the common ancestors of revs"""
1699 try:
1699 try:
1700 ancs = self.index.commonancestorsheads(*revs)
1700 ancs = self.index.commonancestorsheads(*revs)
1701 except (AttributeError, OverflowError): # C implementation failed
1701 except (AttributeError, OverflowError): # C implementation failed
1702 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1702 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1703 return ancs
1703 return ancs
1704
1704
1705 def isancestor(self, a, b):
1705 def isancestor(self, a, b):
1706 """return True if node a is an ancestor of node b
1706 """return True if node a is an ancestor of node b
1707
1707
1708 A revision is considered an ancestor of itself."""
1708 A revision is considered an ancestor of itself."""
1709 a, b = self.rev(a), self.rev(b)
1709 a, b = self.rev(a), self.rev(b)
1710 return self.isancestorrev(a, b)
1710 return self.isancestorrev(a, b)
1711
1711
1712 def descendant(self, a, b):
1712 def descendant(self, a, b):
1713 msg = 'revlog.descendant is deprecated, use revlog.isancestorrev'
1713 msg = 'revlog.descendant is deprecated, use revlog.isancestorrev'
1714 util.nouideprecwarn(msg, '4.7')
1714 util.nouideprecwarn(msg, '4.7')
1715 return self.isancestorrev(a, b)
1715 return self.isancestorrev(a, b)
1716
1716
1717 def isancestorrev(self, a, b):
1717 def isancestorrev(self, a, b):
1718 """return True if revision a is an ancestor of revision b
1718 """return True if revision a is an ancestor of revision b
1719
1719
1720 A revision is considered an ancestor of itself.
1720 A revision is considered an ancestor of itself.
1721
1721
1722 The implementation of this is trivial but the use of
1722 The implementation of this is trivial but the use of
1723 commonancestorsheads is not."""
1723 commonancestorsheads is not."""
1724 if a == nullrev:
1724 if a == nullrev:
1725 return True
1725 return True
1726 elif a == b:
1726 elif a == b:
1727 return True
1727 return True
1728 elif a > b:
1728 elif a > b:
1729 return False
1729 return False
1730 return a in self._commonancestorsheads(a, b)
1730 return a in self._commonancestorsheads(a, b)
1731
1731
1732 def ancestor(self, a, b):
1732 def ancestor(self, a, b):
1733 """calculate the "best" common ancestor of nodes a and b"""
1733 """calculate the "best" common ancestor of nodes a and b"""
1734
1734
1735 a, b = self.rev(a), self.rev(b)
1735 a, b = self.rev(a), self.rev(b)
1736 try:
1736 try:
1737 ancs = self.index.ancestors(a, b)
1737 ancs = self.index.ancestors(a, b)
1738 except (AttributeError, OverflowError):
1738 except (AttributeError, OverflowError):
1739 ancs = ancestor.ancestors(self.parentrevs, a, b)
1739 ancs = ancestor.ancestors(self.parentrevs, a, b)
1740 if ancs:
1740 if ancs:
1741 # choose a consistent winner when there's a tie
1741 # choose a consistent winner when there's a tie
1742 return min(map(self.node, ancs))
1742 return min(map(self.node, ancs))
1743 return nullid
1743 return nullid
1744
1744
1745 def _match(self, id):
1745 def _match(self, id):
1746 if isinstance(id, int):
1746 if isinstance(id, int):
1747 # rev
1747 # rev
1748 return self.node(id)
1748 return self.node(id)
1749 if len(id) == 20:
1749 if len(id) == 20:
1750 # possibly a binary node
1750 # possibly a binary node
1751 # odds of a binary node being all hex in ASCII are 1 in 10**25
1751 # odds of a binary node being all hex in ASCII are 1 in 10**25
1752 try:
1752 try:
1753 node = id
1753 node = id
1754 self.rev(node) # quick search the index
1754 self.rev(node) # quick search the index
1755 return node
1755 return node
1756 except LookupError:
1756 except LookupError:
1757 pass # may be partial hex id
1757 pass # may be partial hex id
1758 try:
1758 try:
1759 # str(rev)
1759 # str(rev)
1760 rev = int(id)
1760 rev = int(id)
1761 if "%d" % rev != id:
1761 if "%d" % rev != id:
1762 raise ValueError
1762 raise ValueError
1763 if rev < 0:
1763 if rev < 0:
1764 rev = len(self) + rev
1764 rev = len(self) + rev
1765 if rev < 0 or rev >= len(self):
1765 if rev < 0 or rev >= len(self):
1766 raise ValueError
1766 raise ValueError
1767 return self.node(rev)
1767 return self.node(rev)
1768 except (ValueError, OverflowError):
1768 except (ValueError, OverflowError):
1769 pass
1769 pass
1770 if len(id) == 40:
1770 if len(id) == 40:
1771 try:
1771 try:
1772 # a full hex nodeid?
1772 # a full hex nodeid?
1773 node = bin(id)
1773 node = bin(id)
1774 self.rev(node)
1774 self.rev(node)
1775 return node
1775 return node
1776 except (TypeError, LookupError):
1776 except (TypeError, LookupError):
1777 pass
1777 pass
1778
1778
1779 def _partialmatch(self, id):
1779 def _partialmatch(self, id):
1780 # we don't care wdirfilenodeids as they should be always full hash
1780 # we don't care wdirfilenodeids as they should be always full hash
1781 maybewdir = wdirhex.startswith(id)
1781 maybewdir = wdirhex.startswith(id)
1782 try:
1782 try:
1783 partial = self.index.partialmatch(id)
1783 partial = self.index.partialmatch(id)
1784 if partial and self.hasnode(partial):
1784 if partial and self.hasnode(partial):
1785 if maybewdir:
1785 if maybewdir:
1786 # single 'ff...' match in radix tree, ambiguous with wdir
1786 # single 'ff...' match in radix tree, ambiguous with wdir
1787 raise RevlogError
1787 raise RevlogError
1788 return partial
1788 return partial
1789 if maybewdir:
1789 if maybewdir:
1790 # no 'ff...' match in radix tree, wdir identified
1790 # no 'ff...' match in radix tree, wdir identified
1791 raise error.WdirUnsupported
1791 raise error.WdirUnsupported
1792 return None
1792 return None
1793 except RevlogError:
1793 except RevlogError:
1794 # parsers.c radix tree lookup gave multiple matches
1794 # parsers.c radix tree lookup gave multiple matches
1795 # fast path: for unfiltered changelog, radix tree is accurate
1795 # fast path: for unfiltered changelog, radix tree is accurate
1796 if not getattr(self, 'filteredrevs', None):
1796 if not getattr(self, 'filteredrevs', None):
1797 raise AmbiguousPrefixLookupError(id, self.indexfile,
1797 raise AmbiguousPrefixLookupError(id, self.indexfile,
1798 _('ambiguous identifier'))
1798 _('ambiguous identifier'))
1799 # fall through to slow path that filters hidden revisions
1799 # fall through to slow path that filters hidden revisions
1800 except (AttributeError, ValueError):
1800 except (AttributeError, ValueError):
1801 # we are pure python, or key was too short to search radix tree
1801 # we are pure python, or key was too short to search radix tree
1802 pass
1802 pass
1803
1803
1804 if id in self._pcache:
1804 if id in self._pcache:
1805 return self._pcache[id]
1805 return self._pcache[id]
1806
1806
1807 if len(id) <= 40:
1807 if len(id) <= 40:
1808 try:
1808 try:
1809 # hex(node)[:...]
1809 # hex(node)[:...]
1810 l = len(id) // 2 # grab an even number of digits
1810 l = len(id) // 2 # grab an even number of digits
1811 prefix = bin(id[:l * 2])
1811 prefix = bin(id[:l * 2])
1812 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1812 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1813 nl = [n for n in nl if hex(n).startswith(id) and
1813 nl = [n for n in nl if hex(n).startswith(id) and
1814 self.hasnode(n)]
1814 self.hasnode(n)]
1815 if len(nl) > 0:
1815 if len(nl) > 0:
1816 if len(nl) == 1 and not maybewdir:
1816 if len(nl) == 1 and not maybewdir:
1817 self._pcache[id] = nl[0]
1817 self._pcache[id] = nl[0]
1818 return nl[0]
1818 return nl[0]
1819 raise AmbiguousPrefixLookupError(id, self.indexfile,
1819 raise AmbiguousPrefixLookupError(id, self.indexfile,
1820 _('ambiguous identifier'))
1820 _('ambiguous identifier'))
1821 if maybewdir:
1821 if maybewdir:
1822 raise error.WdirUnsupported
1822 raise error.WdirUnsupported
1823 return None
1823 return None
1824 except TypeError:
1824 except TypeError:
1825 pass
1825 pass
1826
1826
1827 def lookup(self, id):
1827 def lookup(self, id):
1828 """locate a node based on:
1828 """locate a node based on:
1829 - revision number or str(revision number)
1829 - revision number or str(revision number)
1830 - nodeid or subset of hex nodeid
1830 - nodeid or subset of hex nodeid
1831 """
1831 """
1832 n = self._match(id)
1832 n = self._match(id)
1833 if n is not None:
1833 if n is not None:
1834 return n
1834 return n
1835 n = self._partialmatch(id)
1835 n = self._partialmatch(id)
1836 if n:
1836 if n:
1837 return n
1837 return n
1838
1838
1839 raise LookupError(id, self.indexfile, _('no match found'))
1839 raise LookupError(id, self.indexfile, _('no match found'))
1840
1840
1841 def shortest(self, node, minlength=1):
1841 def shortest(self, node, minlength=1):
1842 """Find the shortest unambiguous prefix that matches node."""
1842 """Find the shortest unambiguous prefix that matches node."""
1843 def isvalid(prefix):
1843 def isvalid(prefix):
1844 try:
1844 try:
1845 node = self._partialmatch(prefix)
1845 node = self._partialmatch(prefix)
1846 except error.RevlogError:
1846 except error.RevlogError:
1847 return False
1847 return False
1848 except error.WdirUnsupported:
1848 except error.WdirUnsupported:
1849 # single 'ff...' match
1849 # single 'ff...' match
1850 return True
1850 return True
1851 if node is None:
1851 if node is None:
1852 raise LookupError(node, self.indexfile, _('no node'))
1852 raise LookupError(node, self.indexfile, _('no node'))
1853 return True
1853 return True
1854
1854
1855 def maybewdir(prefix):
1855 def maybewdir(prefix):
1856 return all(c == 'f' for c in prefix)
1856 return all(c == 'f' for c in prefix)
1857
1857
1858 hexnode = hex(node)
1858 hexnode = hex(node)
1859
1859
1860 def disambiguate(hexnode, minlength):
1860 def disambiguate(hexnode, minlength):
1861 """Disambiguate against wdirid."""
1861 """Disambiguate against wdirid."""
1862 for length in range(minlength, 41):
1862 for length in range(minlength, 41):
1863 prefix = hexnode[:length]
1863 prefix = hexnode[:length]
1864 if not maybewdir(prefix):
1864 if not maybewdir(prefix):
1865 return prefix
1865 return prefix
1866
1866
1867 if not getattr(self, 'filteredrevs', None):
1867 if not getattr(self, 'filteredrevs', None):
1868 try:
1868 try:
1869 length = max(self.index.shortest(node), minlength)
1869 length = max(self.index.shortest(node), minlength)
1870 return disambiguate(hexnode, length)
1870 return disambiguate(hexnode, length)
1871 except RevlogError:
1871 except RevlogError:
1872 if node != wdirid:
1872 if node != wdirid:
1873 raise LookupError(node, self.indexfile, _('no node'))
1873 raise LookupError(node, self.indexfile, _('no node'))
1874 except AttributeError:
1874 except AttributeError:
1875 # Fall through to pure code
1875 # Fall through to pure code
1876 pass
1876 pass
1877
1877
1878 if node == wdirid:
1878 if node == wdirid:
1879 for length in range(minlength, 41):
1879 for length in range(minlength, 41):
1880 prefix = hexnode[:length]
1880 prefix = hexnode[:length]
1881 if isvalid(prefix):
1881 if isvalid(prefix):
1882 return prefix
1882 return prefix
1883
1883
1884 for length in range(minlength, 41):
1884 for length in range(minlength, 41):
1885 prefix = hexnode[:length]
1885 prefix = hexnode[:length]
1886 if isvalid(prefix):
1886 if isvalid(prefix):
1887 return disambiguate(hexnode, length)
1887 return disambiguate(hexnode, length)
1888
1888
1889 def cmp(self, node, text):
1889 def cmp(self, node, text):
1890 """compare text with a given file revision
1890 """compare text with a given file revision
1891
1891
1892 returns True if text is different than what is stored.
1892 returns True if text is different than what is stored.
1893 """
1893 """
1894 p1, p2 = self.parents(node)
1894 p1, p2 = self.parents(node)
1895 return hash(text, p1, p2) != node
1895 return hash(text, p1, p2) != node
1896
1896
1897 def _cachesegment(self, offset, data):
1897 def _cachesegment(self, offset, data):
1898 """Add a segment to the revlog cache.
1898 """Add a segment to the revlog cache.
1899
1899
1900 Accepts an absolute offset and the data that is at that location.
1900 Accepts an absolute offset and the data that is at that location.
1901 """
1901 """
1902 o, d = self._chunkcache
1902 o, d = self._chunkcache
1903 # try to add to existing cache
1903 # try to add to existing cache
1904 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1904 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1905 self._chunkcache = o, d + data
1905 self._chunkcache = o, d + data
1906 else:
1906 else:
1907 self._chunkcache = offset, data
1907 self._chunkcache = offset, data
1908
1908
1909 def _readsegment(self, offset, length, df=None):
1909 def _readsegment(self, offset, length, df=None):
1910 """Load a segment of raw data from the revlog.
1910 """Load a segment of raw data from the revlog.
1911
1911
1912 Accepts an absolute offset, length to read, and an optional existing
1912 Accepts an absolute offset, length to read, and an optional existing
1913 file handle to read from.
1913 file handle to read from.
1914
1914
1915 If an existing file handle is passed, it will be seeked and the
1915 If an existing file handle is passed, it will be seeked and the
1916 original seek position will NOT be restored.
1916 original seek position will NOT be restored.
1917
1917
1918 Returns a str or buffer of raw byte data.
1918 Returns a str or buffer of raw byte data.
1919 """
1919 """
1920 # Cache data both forward and backward around the requested
1920 # Cache data both forward and backward around the requested
1921 # data, in a fixed size window. This helps speed up operations
1921 # data, in a fixed size window. This helps speed up operations
1922 # involving reading the revlog backwards.
1922 # involving reading the revlog backwards.
1923 cachesize = self._chunkcachesize
1923 cachesize = self._chunkcachesize
1924 realoffset = offset & ~(cachesize - 1)
1924 realoffset = offset & ~(cachesize - 1)
1925 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1925 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1926 - realoffset)
1926 - realoffset)
1927 with self._datareadfp(df) as df:
1927 with self._datareadfp(df) as df:
1928 df.seek(realoffset)
1928 df.seek(realoffset)
1929 d = df.read(reallength)
1929 d = df.read(reallength)
1930 self._cachesegment(realoffset, d)
1930 self._cachesegment(realoffset, d)
1931 if offset != realoffset or reallength != length:
1931 if offset != realoffset or reallength != length:
1932 return util.buffer(d, offset - realoffset, length)
1932 return util.buffer(d, offset - realoffset, length)
1933 return d
1933 return d
1934
1934
1935 def _getsegment(self, offset, length, df=None):
1935 def _getsegment(self, offset, length, df=None):
1936 """Obtain a segment of raw data from the revlog.
1936 """Obtain a segment of raw data from the revlog.
1937
1937
1938 Accepts an absolute offset, length of bytes to obtain, and an
1938 Accepts an absolute offset, length of bytes to obtain, and an
1939 optional file handle to the already-opened revlog. If the file
1939 optional file handle to the already-opened revlog. If the file
1940 handle is used, it's original seek position will not be preserved.
1940 handle is used, it's original seek position will not be preserved.
1941
1941
1942 Requests for data may be returned from a cache.
1942 Requests for data may be returned from a cache.
1943
1943
1944 Returns a str or a buffer instance of raw byte data.
1944 Returns a str or a buffer instance of raw byte data.
1945 """
1945 """
1946 o, d = self._chunkcache
1946 o, d = self._chunkcache
1947 l = len(d)
1947 l = len(d)
1948
1948
1949 # is it in the cache?
1949 # is it in the cache?
1950 cachestart = offset - o
1950 cachestart = offset - o
1951 cacheend = cachestart + length
1951 cacheend = cachestart + length
1952 if cachestart >= 0 and cacheend <= l:
1952 if cachestart >= 0 and cacheend <= l:
1953 if cachestart == 0 and cacheend == l:
1953 if cachestart == 0 and cacheend == l:
1954 return d # avoid a copy
1954 return d # avoid a copy
1955 return util.buffer(d, cachestart, cacheend - cachestart)
1955 return util.buffer(d, cachestart, cacheend - cachestart)
1956
1956
1957 return self._readsegment(offset, length, df=df)
1957 return self._readsegment(offset, length, df=df)
1958
1958
1959 def _getsegmentforrevs(self, startrev, endrev, df=None):
1959 def _getsegmentforrevs(self, startrev, endrev, df=None):
1960 """Obtain a segment of raw data corresponding to a range of revisions.
1960 """Obtain a segment of raw data corresponding to a range of revisions.
1961
1961
1962 Accepts the start and end revisions and an optional already-open
1962 Accepts the start and end revisions and an optional already-open
1963 file handle to be used for reading. If the file handle is read, its
1963 file handle to be used for reading. If the file handle is read, its
1964 seek position will not be preserved.
1964 seek position will not be preserved.
1965
1965
1966 Requests for data may be satisfied by a cache.
1966 Requests for data may be satisfied by a cache.
1967
1967
1968 Returns a 2-tuple of (offset, data) for the requested range of
1968 Returns a 2-tuple of (offset, data) for the requested range of
1969 revisions. Offset is the integer offset from the beginning of the
1969 revisions. Offset is the integer offset from the beginning of the
1970 revlog and data is a str or buffer of the raw byte data.
1970 revlog and data is a str or buffer of the raw byte data.
1971
1971
1972 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1972 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1973 to determine where each revision's data begins and ends.
1973 to determine where each revision's data begins and ends.
1974 """
1974 """
1975 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1975 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1976 # (functions are expensive).
1976 # (functions are expensive).
1977 index = self.index
1977 index = self.index
1978 istart = index[startrev]
1978 istart = index[startrev]
1979 start = int(istart[0] >> 16)
1979 start = int(istart[0] >> 16)
1980 if startrev == endrev:
1980 if startrev == endrev:
1981 end = start + istart[1]
1981 end = start + istart[1]
1982 else:
1982 else:
1983 iend = index[endrev]
1983 iend = index[endrev]
1984 end = int(iend[0] >> 16) + iend[1]
1984 end = int(iend[0] >> 16) + iend[1]
1985
1985
1986 if self._inline:
1986 if self._inline:
1987 start += (startrev + 1) * self._io.size
1987 start += (startrev + 1) * self._io.size
1988 end += (endrev + 1) * self._io.size
1988 end += (endrev + 1) * self._io.size
1989 length = end - start
1989 length = end - start
1990
1990
1991 return start, self._getsegment(start, length, df=df)
1991 return start, self._getsegment(start, length, df=df)
1992
1992
1993 def _chunk(self, rev, df=None):
1993 def _chunk(self, rev, df=None):
1994 """Obtain a single decompressed chunk for a revision.
1994 """Obtain a single decompressed chunk for a revision.
1995
1995
1996 Accepts an integer revision and an optional already-open file handle
1996 Accepts an integer revision and an optional already-open file handle
1997 to be used for reading. If used, the seek position of the file will not
1997 to be used for reading. If used, the seek position of the file will not
1998 be preserved.
1998 be preserved.
1999
1999
2000 Returns a str holding uncompressed data for the requested revision.
2000 Returns a str holding uncompressed data for the requested revision.
2001 """
2001 """
2002 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2002 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2003
2003
2004 def _chunks(self, revs, df=None, targetsize=None):
2004 def _chunks(self, revs, df=None, targetsize=None):
2005 """Obtain decompressed chunks for the specified revisions.
2005 """Obtain decompressed chunks for the specified revisions.
2006
2006
2007 Accepts an iterable of numeric revisions that are assumed to be in
2007 Accepts an iterable of numeric revisions that are assumed to be in
2008 ascending order. Also accepts an optional already-open file handle
2008 ascending order. Also accepts an optional already-open file handle
2009 to be used for reading. If used, the seek position of the file will
2009 to be used for reading. If used, the seek position of the file will
2010 not be preserved.
2010 not be preserved.
2011
2011
2012 This function is similar to calling ``self._chunk()`` multiple times,
2012 This function is similar to calling ``self._chunk()`` multiple times,
2013 but is faster.
2013 but is faster.
2014
2014
2015 Returns a list with decompressed data for each requested revision.
2015 Returns a list with decompressed data for each requested revision.
2016 """
2016 """
2017 if not revs:
2017 if not revs:
2018 return []
2018 return []
2019 start = self.start
2019 start = self.start
2020 length = self.length
2020 length = self.length
2021 inline = self._inline
2021 inline = self._inline
2022 iosize = self._io.size
2022 iosize = self._io.size
2023 buffer = util.buffer
2023 buffer = util.buffer
2024
2024
2025 l = []
2025 l = []
2026 ladd = l.append
2026 ladd = l.append
2027
2027
2028 if not self._withsparseread:
2028 if not self._withsparseread:
2029 slicedchunks = (revs,)
2029 slicedchunks = (revs,)
2030 else:
2030 else:
2031 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2031 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2032
2032
2033 for revschunk in slicedchunks:
2033 for revschunk in slicedchunks:
2034 firstrev = revschunk[0]
2034 firstrev = revschunk[0]
2035 # Skip trailing revisions with empty diff
2035 # Skip trailing revisions with empty diff
2036 for lastrev in revschunk[::-1]:
2036 for lastrev in revschunk[::-1]:
2037 if length(lastrev) != 0:
2037 if length(lastrev) != 0:
2038 break
2038 break
2039
2039
2040 try:
2040 try:
2041 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2041 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2042 except OverflowError:
2042 except OverflowError:
2043 # issue4215 - we can't cache a run of chunks greater than
2043 # issue4215 - we can't cache a run of chunks greater than
2044 # 2G on Windows
2044 # 2G on Windows
2045 return [self._chunk(rev, df=df) for rev in revschunk]
2045 return [self._chunk(rev, df=df) for rev in revschunk]
2046
2046
2047 decomp = self.decompress
2047 decomp = self.decompress
2048 for rev in revschunk:
2048 for rev in revschunk:
2049 chunkstart = start(rev)
2049 chunkstart = start(rev)
2050 if inline:
2050 if inline:
2051 chunkstart += (rev + 1) * iosize
2051 chunkstart += (rev + 1) * iosize
2052 chunklength = length(rev)
2052 chunklength = length(rev)
2053 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2053 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2054
2054
2055 return l
2055 return l
2056
2056
2057 def _chunkclear(self):
2057 def _chunkclear(self):
2058 """Clear the raw chunk cache."""
2058 """Clear the raw chunk cache."""
2059 self._chunkcache = (0, '')
2059 self._chunkcache = (0, '')
2060
2060
2061 def deltaparent(self, rev):
2061 def deltaparent(self, rev):
2062 """return deltaparent of the given revision"""
2062 """return deltaparent of the given revision"""
2063 base = self.index[rev][3]
2063 base = self.index[rev][3]
2064 if base == rev:
2064 if base == rev:
2065 return nullrev
2065 return nullrev
2066 elif self._generaldelta:
2066 elif self._generaldelta:
2067 return base
2067 return base
2068 else:
2068 else:
2069 return rev - 1
2069 return rev - 1
2070
2070
2071 def revdiff(self, rev1, rev2):
2071 def revdiff(self, rev1, rev2):
2072 """return or calculate a delta between two revisions
2072 """return or calculate a delta between two revisions
2073
2073
2074 The delta calculated is in binary form and is intended to be written to
2074 The delta calculated is in binary form and is intended to be written to
2075 revlog data directly. So this function needs raw revision data.
2075 revlog data directly. So this function needs raw revision data.
2076 """
2076 """
2077 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2077 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2078 return bytes(self._chunk(rev2))
2078 return bytes(self._chunk(rev2))
2079
2079
2080 return mdiff.textdiff(self.revision(rev1, raw=True),
2080 return mdiff.textdiff(self.revision(rev1, raw=True),
2081 self.revision(rev2, raw=True))
2081 self.revision(rev2, raw=True))
2082
2082
2083 def revision(self, nodeorrev, _df=None, raw=False):
2083 def revision(self, nodeorrev, _df=None, raw=False):
2084 """return an uncompressed revision of a given node or revision
2084 """return an uncompressed revision of a given node or revision
2085 number.
2085 number.
2086
2086
2087 _df - an existing file handle to read from. (internal-only)
2087 _df - an existing file handle to read from. (internal-only)
2088 raw - an optional argument specifying if the revision data is to be
2088 raw - an optional argument specifying if the revision data is to be
2089 treated as raw data when applying flag transforms. 'raw' should be set
2089 treated as raw data when applying flag transforms. 'raw' should be set
2090 to True when generating changegroups or in debug commands.
2090 to True when generating changegroups or in debug commands.
2091 """
2091 """
2092 if isinstance(nodeorrev, int):
2092 if isinstance(nodeorrev, int):
2093 rev = nodeorrev
2093 rev = nodeorrev
2094 node = self.node(rev)
2094 node = self.node(rev)
2095 else:
2095 else:
2096 node = nodeorrev
2096 node = nodeorrev
2097 rev = None
2097 rev = None
2098
2098
2099 cachedrev = None
2099 cachedrev = None
2100 flags = None
2100 flags = None
2101 rawtext = None
2101 rawtext = None
2102 if node == nullid:
2102 if node == nullid:
2103 return ""
2103 return ""
2104 if self._cache:
2104 if self._cache:
2105 if self._cache[0] == node:
2105 if self._cache[0] == node:
2106 # _cache only stores rawtext
2106 # _cache only stores rawtext
2107 if raw:
2107 if raw:
2108 return self._cache[2]
2108 return self._cache[2]
2109 # duplicated, but good for perf
2109 # duplicated, but good for perf
2110 if rev is None:
2110 if rev is None:
2111 rev = self.rev(node)
2111 rev = self.rev(node)
2112 if flags is None:
2112 if flags is None:
2113 flags = self.flags(rev)
2113 flags = self.flags(rev)
2114 # no extra flags set, no flag processor runs, text = rawtext
2114 # no extra flags set, no flag processor runs, text = rawtext
2115 if flags == REVIDX_DEFAULT_FLAGS:
2115 if flags == REVIDX_DEFAULT_FLAGS:
2116 return self._cache[2]
2116 return self._cache[2]
2117 # rawtext is reusable. need to run flag processor
2117 # rawtext is reusable. need to run flag processor
2118 rawtext = self._cache[2]
2118 rawtext = self._cache[2]
2119
2119
2120 cachedrev = self._cache[1]
2120 cachedrev = self._cache[1]
2121
2121
2122 # look up what we need to read
2122 # look up what we need to read
2123 if rawtext is None:
2123 if rawtext is None:
2124 if rev is None:
2124 if rev is None:
2125 rev = self.rev(node)
2125 rev = self.rev(node)
2126
2126
2127 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2127 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2128 if stopped:
2128 if stopped:
2129 rawtext = self._cache[2]
2129 rawtext = self._cache[2]
2130
2130
2131 # drop cache to save memory
2131 # drop cache to save memory
2132 self._cache = None
2132 self._cache = None
2133
2133
2134 targetsize = None
2134 targetsize = None
2135 rawsize = self.index[rev][2]
2135 rawsize = self.index[rev][2]
2136 if 0 <= rawsize:
2136 if 0 <= rawsize:
2137 targetsize = 4 * rawsize
2137 targetsize = 4 * rawsize
2138
2138
2139 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2139 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2140 if rawtext is None:
2140 if rawtext is None:
2141 rawtext = bytes(bins[0])
2141 rawtext = bytes(bins[0])
2142 bins = bins[1:]
2142 bins = bins[1:]
2143
2143
2144 rawtext = mdiff.patches(rawtext, bins)
2144 rawtext = mdiff.patches(rawtext, bins)
2145 self._cache = (node, rev, rawtext)
2145 self._cache = (node, rev, rawtext)
2146
2146
2147 if flags is None:
2147 if flags is None:
2148 if rev is None:
2148 if rev is None:
2149 rev = self.rev(node)
2149 rev = self.rev(node)
2150 flags = self.flags(rev)
2150 flags = self.flags(rev)
2151
2151
2152 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2152 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2153 if validatehash:
2153 if validatehash:
2154 self.checkhash(text, node, rev=rev)
2154 self.checkhash(text, node, rev=rev)
2155
2155
2156 return text
2156 return text
2157
2157
2158 def hash(self, text, p1, p2):
2158 def hash(self, text, p1, p2):
2159 """Compute a node hash.
2159 """Compute a node hash.
2160
2160
2161 Available as a function so that subclasses can replace the hash
2161 Available as a function so that subclasses can replace the hash
2162 as needed.
2162 as needed.
2163 """
2163 """
2164 return hash(text, p1, p2)
2164 return hash(text, p1, p2)
2165
2165
2166 def _processflags(self, text, flags, operation, raw=False):
2166 def _processflags(self, text, flags, operation, raw=False):
2167 """Inspect revision data flags and applies transforms defined by
2167 """Inspect revision data flags and applies transforms defined by
2168 registered flag processors.
2168 registered flag processors.
2169
2169
2170 ``text`` - the revision data to process
2170 ``text`` - the revision data to process
2171 ``flags`` - the revision flags
2171 ``flags`` - the revision flags
2172 ``operation`` - the operation being performed (read or write)
2172 ``operation`` - the operation being performed (read or write)
2173 ``raw`` - an optional argument describing if the raw transform should be
2173 ``raw`` - an optional argument describing if the raw transform should be
2174 applied.
2174 applied.
2175
2175
2176 This method processes the flags in the order (or reverse order if
2176 This method processes the flags in the order (or reverse order if
2177 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2177 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2178 flag processors registered for present flags. The order of flags defined
2178 flag processors registered for present flags. The order of flags defined
2179 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2179 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2180
2180
2181 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2181 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2182 processed text and ``validatehash`` is a bool indicating whether the
2182 processed text and ``validatehash`` is a bool indicating whether the
2183 returned text should be checked for hash integrity.
2183 returned text should be checked for hash integrity.
2184
2184
2185 Note: If the ``raw`` argument is set, it has precedence over the
2185 Note: If the ``raw`` argument is set, it has precedence over the
2186 operation and will only update the value of ``validatehash``.
2186 operation and will only update the value of ``validatehash``.
2187 """
2187 """
2188 # fast path: no flag processors will run
2188 # fast path: no flag processors will run
2189 if flags == 0:
2189 if flags == 0:
2190 return text, True
2190 return text, True
2191 if not operation in ('read', 'write'):
2191 if not operation in ('read', 'write'):
2192 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2192 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2193 # Check all flags are known.
2193 # Check all flags are known.
2194 if flags & ~REVIDX_KNOWN_FLAGS:
2194 if flags & ~REVIDX_KNOWN_FLAGS:
2195 raise RevlogError(_("incompatible revision flag '%#x'") %
2195 raise RevlogError(_("incompatible revision flag '%#x'") %
2196 (flags & ~REVIDX_KNOWN_FLAGS))
2196 (flags & ~REVIDX_KNOWN_FLAGS))
2197 validatehash = True
2197 validatehash = True
2198 # Depending on the operation (read or write), the order might be
2198 # Depending on the operation (read or write), the order might be
2199 # reversed due to non-commutative transforms.
2199 # reversed due to non-commutative transforms.
2200 orderedflags = REVIDX_FLAGS_ORDER
2200 orderedflags = REVIDX_FLAGS_ORDER
2201 if operation == 'write':
2201 if operation == 'write':
2202 orderedflags = reversed(orderedflags)
2202 orderedflags = reversed(orderedflags)
2203
2203
2204 for flag in orderedflags:
2204 for flag in orderedflags:
2205 # If a flagprocessor has been registered for a known flag, apply the
2205 # If a flagprocessor has been registered for a known flag, apply the
2206 # related operation transform and update result tuple.
2206 # related operation transform and update result tuple.
2207 if flag & flags:
2207 if flag & flags:
2208 vhash = True
2208 vhash = True
2209
2209
2210 if flag not in _flagprocessors:
2210 if flag not in _flagprocessors:
2211 message = _("missing processor for flag '%#x'") % (flag)
2211 message = _("missing processor for flag '%#x'") % (flag)
2212 raise RevlogError(message)
2212 raise RevlogError(message)
2213
2213
2214 processor = _flagprocessors[flag]
2214 processor = _flagprocessors[flag]
2215 if processor is not None:
2215 if processor is not None:
2216 readtransform, writetransform, rawtransform = processor
2216 readtransform, writetransform, rawtransform = processor
2217
2217
2218 if raw:
2218 if raw:
2219 vhash = rawtransform(self, text)
2219 vhash = rawtransform(self, text)
2220 elif operation == 'read':
2220 elif operation == 'read':
2221 text, vhash = readtransform(self, text)
2221 text, vhash = readtransform(self, text)
2222 else: # write operation
2222 else: # write operation
2223 text, vhash = writetransform(self, text)
2223 text, vhash = writetransform(self, text)
2224 validatehash = validatehash and vhash
2224 validatehash = validatehash and vhash
2225
2225
2226 return text, validatehash
2226 return text, validatehash
2227
2227
2228 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2228 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2229 """Check node hash integrity.
2229 """Check node hash integrity.
2230
2230
2231 Available as a function so that subclasses can extend hash mismatch
2231 Available as a function so that subclasses can extend hash mismatch
2232 behaviors as needed.
2232 behaviors as needed.
2233 """
2233 """
2234 try:
2234 try:
2235 if p1 is None and p2 is None:
2235 if p1 is None and p2 is None:
2236 p1, p2 = self.parents(node)
2236 p1, p2 = self.parents(node)
2237 if node != self.hash(text, p1, p2):
2237 if node != self.hash(text, p1, p2):
2238 revornode = rev
2238 revornode = rev
2239 if revornode is None:
2239 if revornode is None:
2240 revornode = templatefilters.short(hex(node))
2240 revornode = templatefilters.short(hex(node))
2241 raise RevlogError(_("integrity check failed on %s:%s")
2241 raise RevlogError(_("integrity check failed on %s:%s")
2242 % (self.indexfile, pycompat.bytestr(revornode)))
2242 % (self.indexfile, pycompat.bytestr(revornode)))
2243 except RevlogError:
2243 except RevlogError:
2244 if self._censorable and _censoredtext(text):
2244 if self._censorable and _censoredtext(text):
2245 raise error.CensoredNodeError(self.indexfile, node, text)
2245 raise error.CensoredNodeError(self.indexfile, node, text)
2246 raise
2246 raise
2247
2247
2248 def _enforceinlinesize(self, tr, fp=None):
2248 def _enforceinlinesize(self, tr, fp=None):
2249 """Check if the revlog is too big for inline and convert if so.
2249 """Check if the revlog is too big for inline and convert if so.
2250
2250
2251 This should be called after revisions are added to the revlog. If the
2251 This should be called after revisions are added to the revlog. If the
2252 revlog has grown too large to be an inline revlog, it will convert it
2252 revlog has grown too large to be an inline revlog, it will convert it
2253 to use multiple index and data files.
2253 to use multiple index and data files.
2254 """
2254 """
2255 tiprev = len(self) - 1
2255 tiprev = len(self) - 1
2256 if (not self._inline or
2256 if (not self._inline or
2257 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2257 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2258 return
2258 return
2259
2259
2260 trinfo = tr.find(self.indexfile)
2260 trinfo = tr.find(self.indexfile)
2261 if trinfo is None:
2261 if trinfo is None:
2262 raise RevlogError(_("%s not found in the transaction")
2262 raise RevlogError(_("%s not found in the transaction")
2263 % self.indexfile)
2263 % self.indexfile)
2264
2264
2265 trindex = trinfo[2]
2265 trindex = trinfo[2]
2266 if trindex is not None:
2266 if trindex is not None:
2267 dataoff = self.start(trindex)
2267 dataoff = self.start(trindex)
2268 else:
2268 else:
2269 # revlog was stripped at start of transaction, use all leftover data
2269 # revlog was stripped at start of transaction, use all leftover data
2270 trindex = len(self) - 1
2270 trindex = len(self) - 1
2271 dataoff = self.end(tiprev)
2271 dataoff = self.end(tiprev)
2272
2272
2273 tr.add(self.datafile, dataoff)
2273 tr.add(self.datafile, dataoff)
2274
2274
2275 if fp:
2275 if fp:
2276 fp.flush()
2276 fp.flush()
2277 fp.close()
2277 fp.close()
2278
2278
2279 with self._datafp('w') as df:
2279 with self._datafp('w') as df:
2280 for r in self:
2280 for r in self:
2281 df.write(self._getsegmentforrevs(r, r)[1])
2281 df.write(self._getsegmentforrevs(r, r)[1])
2282
2282
2283 with self._indexfp('w') as fp:
2283 with self._indexfp('w') as fp:
2284 self.version &= ~FLAG_INLINE_DATA
2284 self.version &= ~FLAG_INLINE_DATA
2285 self._inline = False
2285 self._inline = False
2286 io = self._io
2286 io = self._io
2287 for i in self:
2287 for i in self:
2288 e = io.packentry(self.index[i], self.node, self.version, i)
2288 e = io.packentry(self.index[i], self.node, self.version, i)
2289 fp.write(e)
2289 fp.write(e)
2290
2290
2291 # the temp file replace the real index when we exit the context
2291 # the temp file replace the real index when we exit the context
2292 # manager
2292 # manager
2293
2293
2294 tr.replace(self.indexfile, trindex * self._io.size)
2294 tr.replace(self.indexfile, trindex * self._io.size)
2295 self._chunkclear()
2295 self._chunkclear()
2296
2296
2297 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2297 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2298 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2298 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2299 """add a revision to the log
2299 """add a revision to the log
2300
2300
2301 text - the revision data to add
2301 text - the revision data to add
2302 transaction - the transaction object used for rollback
2302 transaction - the transaction object used for rollback
2303 link - the linkrev data to add
2303 link - the linkrev data to add
2304 p1, p2 - the parent nodeids of the revision
2304 p1, p2 - the parent nodeids of the revision
2305 cachedelta - an optional precomputed delta
2305 cachedelta - an optional precomputed delta
2306 node - nodeid of revision; typically node is not specified, and it is
2306 node - nodeid of revision; typically node is not specified, and it is
2307 computed by default as hash(text, p1, p2), however subclasses might
2307 computed by default as hash(text, p1, p2), however subclasses might
2308 use different hashing method (and override checkhash() in such case)
2308 use different hashing method (and override checkhash() in such case)
2309 flags - the known flags to set on the revision
2309 flags - the known flags to set on the revision
2310 deltacomputer - an optional _deltacomputer instance shared between
2310 deltacomputer - an optional _deltacomputer instance shared between
2311 multiple calls
2311 multiple calls
2312 """
2312 """
2313 if link == nullrev:
2313 if link == nullrev:
2314 raise RevlogError(_("attempted to add linkrev -1 to %s")
2314 raise RevlogError(_("attempted to add linkrev -1 to %s")
2315 % self.indexfile)
2315 % self.indexfile)
2316
2316
2317 if flags:
2317 if flags:
2318 node = node or self.hash(text, p1, p2)
2318 node = node or self.hash(text, p1, p2)
2319
2319
2320 rawtext, validatehash = self._processflags(text, flags, 'write')
2320 rawtext, validatehash = self._processflags(text, flags, 'write')
2321
2321
2322 # If the flag processor modifies the revision data, ignore any provided
2322 # If the flag processor modifies the revision data, ignore any provided
2323 # cachedelta.
2323 # cachedelta.
2324 if rawtext != text:
2324 if rawtext != text:
2325 cachedelta = None
2325 cachedelta = None
2326
2326
2327 if len(rawtext) > _maxentrysize:
2327 if len(rawtext) > _maxentrysize:
2328 raise RevlogError(
2328 raise RevlogError(
2329 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2329 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2330 % (self.indexfile, len(rawtext)))
2330 % (self.indexfile, len(rawtext)))
2331
2331
2332 node = node or self.hash(rawtext, p1, p2)
2332 node = node or self.hash(rawtext, p1, p2)
2333 if node in self.nodemap:
2333 if node in self.nodemap:
2334 return node
2334 return node
2335
2335
2336 if validatehash:
2336 if validatehash:
2337 self.checkhash(rawtext, node, p1=p1, p2=p2)
2337 self.checkhash(rawtext, node, p1=p1, p2=p2)
2338
2338
2339 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2339 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2340 flags, cachedelta=cachedelta,
2340 flags, cachedelta=cachedelta,
2341 deltacomputer=deltacomputer)
2341 deltacomputer=deltacomputer)
2342
2342
2343 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2343 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2344 cachedelta=None, deltacomputer=None):
2344 cachedelta=None, deltacomputer=None):
2345 """add a raw revision with known flags, node and parents
2345 """add a raw revision with known flags, node and parents
2346 useful when reusing a revision not stored in this revlog (ex: received
2346 useful when reusing a revision not stored in this revlog (ex: received
2347 over wire, or read from an external bundle).
2347 over wire, or read from an external bundle).
2348 """
2348 """
2349 dfh = None
2349 dfh = None
2350 if not self._inline:
2350 if not self._inline:
2351 dfh = self._datafp("a+")
2351 dfh = self._datafp("a+")
2352 ifh = self._indexfp("a+")
2352 ifh = self._indexfp("a+")
2353 try:
2353 try:
2354 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2354 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2355 flags, cachedelta, ifh, dfh,
2355 flags, cachedelta, ifh, dfh,
2356 deltacomputer=deltacomputer)
2356 deltacomputer=deltacomputer)
2357 finally:
2357 finally:
2358 if dfh:
2358 if dfh:
2359 dfh.close()
2359 dfh.close()
2360 ifh.close()
2360 ifh.close()
2361
2361
2362 def compress(self, data):
2362 def compress(self, data):
2363 """Generate a possibly-compressed representation of data."""
2363 """Generate a possibly-compressed representation of data."""
2364 if not data:
2364 if not data:
2365 return '', data
2365 return '', data
2366
2366
2367 compressed = self._compressor.compress(data)
2367 compressed = self._compressor.compress(data)
2368
2368
2369 if compressed:
2369 if compressed:
2370 # The revlog compressor added the header in the returned data.
2370 # The revlog compressor added the header in the returned data.
2371 return '', compressed
2371 return '', compressed
2372
2372
2373 if data[0:1] == '\0':
2373 if data[0:1] == '\0':
2374 return '', data
2374 return '', data
2375 return 'u', data
2375 return 'u', data
2376
2376
2377 def decompress(self, data):
2377 def decompress(self, data):
2378 """Decompress a revlog chunk.
2378 """Decompress a revlog chunk.
2379
2379
2380 The chunk is expected to begin with a header identifying the
2380 The chunk is expected to begin with a header identifying the
2381 format type so it can be routed to an appropriate decompressor.
2381 format type so it can be routed to an appropriate decompressor.
2382 """
2382 """
2383 if not data:
2383 if not data:
2384 return data
2384 return data
2385
2385
2386 # Revlogs are read much more frequently than they are written and many
2386 # Revlogs are read much more frequently than they are written and many
2387 # chunks only take microseconds to decompress, so performance is
2387 # chunks only take microseconds to decompress, so performance is
2388 # important here.
2388 # important here.
2389 #
2389 #
2390 # We can make a few assumptions about revlogs:
2390 # We can make a few assumptions about revlogs:
2391 #
2391 #
2392 # 1) the majority of chunks will be compressed (as opposed to inline
2392 # 1) the majority of chunks will be compressed (as opposed to inline
2393 # raw data).
2393 # raw data).
2394 # 2) decompressing *any* data will likely by at least 10x slower than
2394 # 2) decompressing *any* data will likely by at least 10x slower than
2395 # returning raw inline data.
2395 # returning raw inline data.
2396 # 3) we want to prioritize common and officially supported compression
2396 # 3) we want to prioritize common and officially supported compression
2397 # engines
2397 # engines
2398 #
2398 #
2399 # It follows that we want to optimize for "decompress compressed data
2399 # It follows that we want to optimize for "decompress compressed data
2400 # when encoded with common and officially supported compression engines"
2400 # when encoded with common and officially supported compression engines"
2401 # case over "raw data" and "data encoded by less common or non-official
2401 # case over "raw data" and "data encoded by less common or non-official
2402 # compression engines." That is why we have the inline lookup first
2402 # compression engines." That is why we have the inline lookup first
2403 # followed by the compengines lookup.
2403 # followed by the compengines lookup.
2404 #
2404 #
2405 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2405 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2406 # compressed chunks. And this matters for changelog and manifest reads.
2406 # compressed chunks. And this matters for changelog and manifest reads.
2407 t = data[0:1]
2407 t = data[0:1]
2408
2408
2409 if t == 'x':
2409 if t == 'x':
2410 try:
2410 try:
2411 return _zlibdecompress(data)
2411 return _zlibdecompress(data)
2412 except zlib.error as e:
2412 except zlib.error as e:
2413 raise RevlogError(_('revlog decompress error: %s') %
2413 raise RevlogError(_('revlog decompress error: %s') %
2414 stringutil.forcebytestr(e))
2414 stringutil.forcebytestr(e))
2415 # '\0' is more common than 'u' so it goes first.
2415 # '\0' is more common than 'u' so it goes first.
2416 elif t == '\0':
2416 elif t == '\0':
2417 return data
2417 return data
2418 elif t == 'u':
2418 elif t == 'u':
2419 return util.buffer(data, 1)
2419 return util.buffer(data, 1)
2420
2420
2421 try:
2421 try:
2422 compressor = self._decompressors[t]
2422 compressor = self._decompressors[t]
2423 except KeyError:
2423 except KeyError:
2424 try:
2424 try:
2425 engine = util.compengines.forrevlogheader(t)
2425 engine = util.compengines.forrevlogheader(t)
2426 compressor = engine.revlogcompressor()
2426 compressor = engine.revlogcompressor()
2427 self._decompressors[t] = compressor
2427 self._decompressors[t] = compressor
2428 except KeyError:
2428 except KeyError:
2429 raise RevlogError(_('unknown compression type %r') % t)
2429 raise RevlogError(_('unknown compression type %r') % t)
2430
2430
2431 return compressor.decompress(data)
2431 return compressor.decompress(data)
2432
2432
2433 def _isgooddeltainfo(self, deltainfo, revinfo):
2433 def _isgooddeltainfo(self, deltainfo, revinfo):
2434 """Returns True if the given delta is good. Good means that it is within
2434 """Returns True if the given delta is good. Good means that it is within
2435 the disk span, disk size, and chain length bounds that we know to be
2435 the disk span, disk size, and chain length bounds that we know to be
2436 performant."""
2436 performant."""
2437 if deltainfo is None:
2437 if deltainfo is None:
2438 return False
2438 return False
2439
2439
2440 # - 'deltainfo.distance' is the distance from the base revision --
2440 # - 'deltainfo.distance' is the distance from the base revision --
2441 # bounding it limits the amount of I/O we need to do.
2441 # bounding it limits the amount of I/O we need to do.
2442 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2442 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2443 # deltas we need to apply -- bounding it limits the amount of CPU
2443 # deltas we need to apply -- bounding it limits the amount of CPU
2444 # we consume.
2444 # we consume.
2445
2445
2446 if self._sparserevlog:
2446 if self._sparserevlog:
2447 # As sparse-read will be used, we can consider that the distance,
2447 # As sparse-read will be used, we can consider that the distance,
2448 # instead of being the span of the whole chunk,
2448 # instead of being the span of the whole chunk,
2449 # is the span of the largest read chunk
2449 # is the span of the largest read chunk
2450 base = deltainfo.base
2450 base = deltainfo.base
2451
2451
2452 if base != nullrev:
2452 if base != nullrev:
2453 deltachain = self._deltachain(base)[0]
2453 deltachain = self._deltachain(base)[0]
2454 else:
2454 else:
2455 deltachain = []
2455 deltachain = []
2456
2456
2457 chunks = _slicechunk(self, deltachain, deltainfo)
2457 chunks = _slicechunk(self, deltachain, deltainfo)
2458 distance = max(map(lambda revs:_segmentspan(self, revs), chunks))
2458 distance = max(map(lambda revs:_segmentspan(self, revs), chunks))
2459 else:
2459 else:
2460 distance = deltainfo.distance
2460 distance = deltainfo.distance
2461
2461
2462 textlen = revinfo.textlen
2462 textlen = revinfo.textlen
2463 defaultmax = textlen * 4
2463 defaultmax = textlen * 4
2464 maxdist = self._maxdeltachainspan
2464 maxdist = self._maxdeltachainspan
2465 if not maxdist:
2465 if not maxdist:
2466 maxdist = distance # ensure the conditional pass
2466 maxdist = distance # ensure the conditional pass
2467 maxdist = max(maxdist, defaultmax)
2467 maxdist = max(maxdist, defaultmax)
2468 if self._sparserevlog and maxdist < self._srmingapsize:
2468 if self._sparserevlog and maxdist < self._srmingapsize:
2469 # In multiple place, we are ignoring irrelevant data range below a
2469 # In multiple place, we are ignoring irrelevant data range below a
2470 # certain size. Be also apply this tradeoff here and relax span
2470 # certain size. Be also apply this tradeoff here and relax span
2471 # constraint for small enought content.
2471 # constraint for small enought content.
2472 maxdist = self._srmingapsize
2472 maxdist = self._srmingapsize
2473 if (distance > maxdist or deltainfo.deltalen > textlen or
2473 if (distance > maxdist or deltainfo.deltalen > textlen or
2474 deltainfo.compresseddeltalen > textlen * 2 or
2474 deltainfo.compresseddeltalen > textlen * 2 or
2475 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2475 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2476 return False
2476 return False
2477
2477
2478 return True
2478 return True
2479
2479
2480 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2480 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2481 cachedelta, ifh, dfh, alwayscache=False,
2481 cachedelta, ifh, dfh, alwayscache=False,
2482 deltacomputer=None):
2482 deltacomputer=None):
2483 """internal function to add revisions to the log
2483 """internal function to add revisions to the log
2484
2484
2485 see addrevision for argument descriptions.
2485 see addrevision for argument descriptions.
2486
2486
2487 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2487 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2488
2488
2489 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2489 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2490 be used.
2490 be used.
2491
2491
2492 invariants:
2492 invariants:
2493 - rawtext is optional (can be None); if not set, cachedelta must be set.
2493 - rawtext is optional (can be None); if not set, cachedelta must be set.
2494 if both are set, they must correspond to each other.
2494 if both are set, they must correspond to each other.
2495 """
2495 """
2496 if node == nullid:
2496 if node == nullid:
2497 raise RevlogError(_("%s: attempt to add null revision") %
2497 raise RevlogError(_("%s: attempt to add null revision") %
2498 (self.indexfile))
2498 (self.indexfile))
2499 if node == wdirid or node in wdirfilenodeids:
2499 if node == wdirid or node in wdirfilenodeids:
2500 raise RevlogError(_("%s: attempt to add wdir revision") %
2500 raise RevlogError(_("%s: attempt to add wdir revision") %
2501 (self.indexfile))
2501 (self.indexfile))
2502
2502
2503 if self._inline:
2503 if self._inline:
2504 fh = ifh
2504 fh = ifh
2505 else:
2505 else:
2506 fh = dfh
2506 fh = dfh
2507
2507
2508 btext = [rawtext]
2508 btext = [rawtext]
2509
2509
2510 curr = len(self)
2510 curr = len(self)
2511 prev = curr - 1
2511 prev = curr - 1
2512 offset = self.end(prev)
2512 offset = self.end(prev)
2513 p1r, p2r = self.rev(p1), self.rev(p2)
2513 p1r, p2r = self.rev(p1), self.rev(p2)
2514
2514
2515 # full versions are inserted when the needed deltas
2515 # full versions are inserted when the needed deltas
2516 # become comparable to the uncompressed text
2516 # become comparable to the uncompressed text
2517 if rawtext is None:
2517 if rawtext is None:
2518 # need rawtext size, before changed by flag processors, which is
2518 # need rawtext size, before changed by flag processors, which is
2519 # the non-raw size. use revlog explicitly to avoid filelog's extra
2519 # the non-raw size. use revlog explicitly to avoid filelog's extra
2520 # logic that might remove metadata size.
2520 # logic that might remove metadata size.
2521 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2521 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2522 cachedelta[1])
2522 cachedelta[1])
2523 else:
2523 else:
2524 textlen = len(rawtext)
2524 textlen = len(rawtext)
2525
2525
2526 if deltacomputer is None:
2526 if deltacomputer is None:
2527 deltacomputer = _deltacomputer(self)
2527 deltacomputer = _deltacomputer(self)
2528
2528
2529 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2529 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2530
2530
2531 # no delta for flag processor revision (see "candelta" for why)
2531 # no delta for flag processor revision (see "candelta" for why)
2532 # not calling candelta since only one revision needs test, also to
2532 # not calling candelta since only one revision needs test, also to
2533 # avoid overhead fetching flags again.
2533 # avoid overhead fetching flags again.
2534 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2534 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2535 deltainfo = None
2535 deltainfo = None
2536 else:
2536 else:
2537 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2537 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2538
2538
2539 if deltainfo is not None:
2539 if deltainfo is not None:
2540 base = deltainfo.base
2540 base = deltainfo.base
2541 chainbase = deltainfo.chainbase
2541 chainbase = deltainfo.chainbase
2542 data = deltainfo.data
2542 data = deltainfo.data
2543 l = deltainfo.deltalen
2543 l = deltainfo.deltalen
2544 else:
2544 else:
2545 rawtext = deltacomputer.buildtext(revinfo, fh)
2545 rawtext = deltacomputer.buildtext(revinfo, fh)
2546 data = self.compress(rawtext)
2546 data = self.compress(rawtext)
2547 l = len(data[1]) + len(data[0])
2547 l = len(data[1]) + len(data[0])
2548 base = chainbase = curr
2548 base = chainbase = curr
2549
2549
2550 e = (offset_type(offset, flags), l, textlen,
2550 e = (offset_type(offset, flags), l, textlen,
2551 base, link, p1r, p2r, node)
2551 base, link, p1r, p2r, node)
2552 self.index.insert(-1, e)
2552 self.index.append(e)
2553 self.nodemap[node] = curr
2553 self.nodemap[node] = curr
2554
2554
2555 entry = self._io.packentry(e, self.node, self.version, curr)
2555 entry = self._io.packentry(e, self.node, self.version, curr)
2556 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2556 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2557
2557
2558 if alwayscache and rawtext is None:
2558 if alwayscache and rawtext is None:
2559 rawtext = deltacomputer._buildtext(revinfo, fh)
2559 rawtext = deltacomputer._buildtext(revinfo, fh)
2560
2560
2561 if type(rawtext) == bytes: # only accept immutable objects
2561 if type(rawtext) == bytes: # only accept immutable objects
2562 self._cache = (node, curr, rawtext)
2562 self._cache = (node, curr, rawtext)
2563 self._chainbasecache[curr] = chainbase
2563 self._chainbasecache[curr] = chainbase
2564 return node
2564 return node
2565
2565
2566 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2566 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2567 # Files opened in a+ mode have inconsistent behavior on various
2567 # Files opened in a+ mode have inconsistent behavior on various
2568 # platforms. Windows requires that a file positioning call be made
2568 # platforms. Windows requires that a file positioning call be made
2569 # when the file handle transitions between reads and writes. See
2569 # when the file handle transitions between reads and writes. See
2570 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2570 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2571 # platforms, Python or the platform itself can be buggy. Some versions
2571 # platforms, Python or the platform itself can be buggy. Some versions
2572 # of Solaris have been observed to not append at the end of the file
2572 # of Solaris have been observed to not append at the end of the file
2573 # if the file was seeked to before the end. See issue4943 for more.
2573 # if the file was seeked to before the end. See issue4943 for more.
2574 #
2574 #
2575 # We work around this issue by inserting a seek() before writing.
2575 # We work around this issue by inserting a seek() before writing.
2576 # Note: This is likely not necessary on Python 3.
2576 # Note: This is likely not necessary on Python 3.
2577 ifh.seek(0, os.SEEK_END)
2577 ifh.seek(0, os.SEEK_END)
2578 if dfh:
2578 if dfh:
2579 dfh.seek(0, os.SEEK_END)
2579 dfh.seek(0, os.SEEK_END)
2580
2580
2581 curr = len(self) - 1
2581 curr = len(self) - 1
2582 if not self._inline:
2582 if not self._inline:
2583 transaction.add(self.datafile, offset)
2583 transaction.add(self.datafile, offset)
2584 transaction.add(self.indexfile, curr * len(entry))
2584 transaction.add(self.indexfile, curr * len(entry))
2585 if data[0]:
2585 if data[0]:
2586 dfh.write(data[0])
2586 dfh.write(data[0])
2587 dfh.write(data[1])
2587 dfh.write(data[1])
2588 ifh.write(entry)
2588 ifh.write(entry)
2589 else:
2589 else:
2590 offset += curr * self._io.size
2590 offset += curr * self._io.size
2591 transaction.add(self.indexfile, offset, curr)
2591 transaction.add(self.indexfile, offset, curr)
2592 ifh.write(entry)
2592 ifh.write(entry)
2593 ifh.write(data[0])
2593 ifh.write(data[0])
2594 ifh.write(data[1])
2594 ifh.write(data[1])
2595 self._enforceinlinesize(transaction, ifh)
2595 self._enforceinlinesize(transaction, ifh)
2596
2596
2597 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2597 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2598 """
2598 """
2599 add a delta group
2599 add a delta group
2600
2600
2601 given a set of deltas, add them to the revision log. the
2601 given a set of deltas, add them to the revision log. the
2602 first delta is against its parent, which should be in our
2602 first delta is against its parent, which should be in our
2603 log, the rest are against the previous delta.
2603 log, the rest are against the previous delta.
2604
2604
2605 If ``addrevisioncb`` is defined, it will be called with arguments of
2605 If ``addrevisioncb`` is defined, it will be called with arguments of
2606 this revlog and the node that was added.
2606 this revlog and the node that was added.
2607 """
2607 """
2608
2608
2609 nodes = []
2609 nodes = []
2610
2610
2611 r = len(self)
2611 r = len(self)
2612 end = 0
2612 end = 0
2613 if r:
2613 if r:
2614 end = self.end(r - 1)
2614 end = self.end(r - 1)
2615 ifh = self._indexfp("a+")
2615 ifh = self._indexfp("a+")
2616 isize = r * self._io.size
2616 isize = r * self._io.size
2617 if self._inline:
2617 if self._inline:
2618 transaction.add(self.indexfile, end + isize, r)
2618 transaction.add(self.indexfile, end + isize, r)
2619 dfh = None
2619 dfh = None
2620 else:
2620 else:
2621 transaction.add(self.indexfile, isize, r)
2621 transaction.add(self.indexfile, isize, r)
2622 transaction.add(self.datafile, end)
2622 transaction.add(self.datafile, end)
2623 dfh = self._datafp("a+")
2623 dfh = self._datafp("a+")
2624 def flush():
2624 def flush():
2625 if dfh:
2625 if dfh:
2626 dfh.flush()
2626 dfh.flush()
2627 ifh.flush()
2627 ifh.flush()
2628 try:
2628 try:
2629 deltacomputer = _deltacomputer(self)
2629 deltacomputer = _deltacomputer(self)
2630 # loop through our set of deltas
2630 # loop through our set of deltas
2631 for data in deltas:
2631 for data in deltas:
2632 node, p1, p2, linknode, deltabase, delta, flags = data
2632 node, p1, p2, linknode, deltabase, delta, flags = data
2633 link = linkmapper(linknode)
2633 link = linkmapper(linknode)
2634 flags = flags or REVIDX_DEFAULT_FLAGS
2634 flags = flags or REVIDX_DEFAULT_FLAGS
2635
2635
2636 nodes.append(node)
2636 nodes.append(node)
2637
2637
2638 if node in self.nodemap:
2638 if node in self.nodemap:
2639 # this can happen if two branches make the same change
2639 # this can happen if two branches make the same change
2640 continue
2640 continue
2641
2641
2642 for p in (p1, p2):
2642 for p in (p1, p2):
2643 if p not in self.nodemap:
2643 if p not in self.nodemap:
2644 raise LookupError(p, self.indexfile,
2644 raise LookupError(p, self.indexfile,
2645 _('unknown parent'))
2645 _('unknown parent'))
2646
2646
2647 if deltabase not in self.nodemap:
2647 if deltabase not in self.nodemap:
2648 raise LookupError(deltabase, self.indexfile,
2648 raise LookupError(deltabase, self.indexfile,
2649 _('unknown delta base'))
2649 _('unknown delta base'))
2650
2650
2651 baserev = self.rev(deltabase)
2651 baserev = self.rev(deltabase)
2652
2652
2653 if baserev != nullrev and self.iscensored(baserev):
2653 if baserev != nullrev and self.iscensored(baserev):
2654 # if base is censored, delta must be full replacement in a
2654 # if base is censored, delta must be full replacement in a
2655 # single patch operation
2655 # single patch operation
2656 hlen = struct.calcsize(">lll")
2656 hlen = struct.calcsize(">lll")
2657 oldlen = self.rawsize(baserev)
2657 oldlen = self.rawsize(baserev)
2658 newlen = len(delta) - hlen
2658 newlen = len(delta) - hlen
2659 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2659 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2660 raise error.CensoredBaseError(self.indexfile,
2660 raise error.CensoredBaseError(self.indexfile,
2661 self.node(baserev))
2661 self.node(baserev))
2662
2662
2663 if not flags and self._peek_iscensored(baserev, delta, flush):
2663 if not flags and self._peek_iscensored(baserev, delta, flush):
2664 flags |= REVIDX_ISCENSORED
2664 flags |= REVIDX_ISCENSORED
2665
2665
2666 # We assume consumers of addrevisioncb will want to retrieve
2666 # We assume consumers of addrevisioncb will want to retrieve
2667 # the added revision, which will require a call to
2667 # the added revision, which will require a call to
2668 # revision(). revision() will fast path if there is a cache
2668 # revision(). revision() will fast path if there is a cache
2669 # hit. So, we tell _addrevision() to always cache in this case.
2669 # hit. So, we tell _addrevision() to always cache in this case.
2670 # We're only using addgroup() in the context of changegroup
2670 # We're only using addgroup() in the context of changegroup
2671 # generation so the revision data can always be handled as raw
2671 # generation so the revision data can always be handled as raw
2672 # by the flagprocessor.
2672 # by the flagprocessor.
2673 self._addrevision(node, None, transaction, link,
2673 self._addrevision(node, None, transaction, link,
2674 p1, p2, flags, (baserev, delta),
2674 p1, p2, flags, (baserev, delta),
2675 ifh, dfh,
2675 ifh, dfh,
2676 alwayscache=bool(addrevisioncb),
2676 alwayscache=bool(addrevisioncb),
2677 deltacomputer=deltacomputer)
2677 deltacomputer=deltacomputer)
2678
2678
2679 if addrevisioncb:
2679 if addrevisioncb:
2680 addrevisioncb(self, node)
2680 addrevisioncb(self, node)
2681
2681
2682 if not dfh and not self._inline:
2682 if not dfh and not self._inline:
2683 # addrevision switched from inline to conventional
2683 # addrevision switched from inline to conventional
2684 # reopen the index
2684 # reopen the index
2685 ifh.close()
2685 ifh.close()
2686 dfh = self._datafp("a+")
2686 dfh = self._datafp("a+")
2687 ifh = self._indexfp("a+")
2687 ifh = self._indexfp("a+")
2688 finally:
2688 finally:
2689 if dfh:
2689 if dfh:
2690 dfh.close()
2690 dfh.close()
2691 ifh.close()
2691 ifh.close()
2692
2692
2693 return nodes
2693 return nodes
2694
2694
2695 def iscensored(self, rev):
2695 def iscensored(self, rev):
2696 """Check if a file revision is censored."""
2696 """Check if a file revision is censored."""
2697 if not self._censorable:
2697 if not self._censorable:
2698 return False
2698 return False
2699
2699
2700 return self.flags(rev) & REVIDX_ISCENSORED
2700 return self.flags(rev) & REVIDX_ISCENSORED
2701
2701
2702 def _peek_iscensored(self, baserev, delta, flush):
2702 def _peek_iscensored(self, baserev, delta, flush):
2703 """Quickly check if a delta produces a censored revision."""
2703 """Quickly check if a delta produces a censored revision."""
2704 if not self._censorable:
2704 if not self._censorable:
2705 return False
2705 return False
2706
2706
2707 # Fragile heuristic: unless new file meta keys are added alphabetically
2707 # Fragile heuristic: unless new file meta keys are added alphabetically
2708 # preceding "censored", all censored revisions are prefixed by
2708 # preceding "censored", all censored revisions are prefixed by
2709 # "\1\ncensored:". A delta producing such a censored revision must be a
2709 # "\1\ncensored:". A delta producing such a censored revision must be a
2710 # full-replacement delta, so we inspect the first and only patch in the
2710 # full-replacement delta, so we inspect the first and only patch in the
2711 # delta for this prefix.
2711 # delta for this prefix.
2712 hlen = struct.calcsize(">lll")
2712 hlen = struct.calcsize(">lll")
2713 if len(delta) <= hlen:
2713 if len(delta) <= hlen:
2714 return False
2714 return False
2715
2715
2716 oldlen = self.rawsize(baserev)
2716 oldlen = self.rawsize(baserev)
2717 newlen = len(delta) - hlen
2717 newlen = len(delta) - hlen
2718 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2718 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2719 return False
2719 return False
2720
2720
2721 add = "\1\ncensored:"
2721 add = "\1\ncensored:"
2722 addlen = len(add)
2722 addlen = len(add)
2723 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2723 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2724
2724
2725 def getstrippoint(self, minlink):
2725 def getstrippoint(self, minlink):
2726 """find the minimum rev that must be stripped to strip the linkrev
2726 """find the minimum rev that must be stripped to strip the linkrev
2727
2727
2728 Returns a tuple containing the minimum rev and a set of all revs that
2728 Returns a tuple containing the minimum rev and a set of all revs that
2729 have linkrevs that will be broken by this strip.
2729 have linkrevs that will be broken by this strip.
2730 """
2730 """
2731 brokenrevs = set()
2731 brokenrevs = set()
2732 strippoint = len(self)
2732 strippoint = len(self)
2733
2733
2734 heads = {}
2734 heads = {}
2735 futurelargelinkrevs = set()
2735 futurelargelinkrevs = set()
2736 for head in self.headrevs():
2736 for head in self.headrevs():
2737 headlinkrev = self.linkrev(head)
2737 headlinkrev = self.linkrev(head)
2738 heads[head] = headlinkrev
2738 heads[head] = headlinkrev
2739 if headlinkrev >= minlink:
2739 if headlinkrev >= minlink:
2740 futurelargelinkrevs.add(headlinkrev)
2740 futurelargelinkrevs.add(headlinkrev)
2741
2741
2742 # This algorithm involves walking down the rev graph, starting at the
2742 # This algorithm involves walking down the rev graph, starting at the
2743 # heads. Since the revs are topologically sorted according to linkrev,
2743 # heads. Since the revs are topologically sorted according to linkrev,
2744 # once all head linkrevs are below the minlink, we know there are
2744 # once all head linkrevs are below the minlink, we know there are
2745 # no more revs that could have a linkrev greater than minlink.
2745 # no more revs that could have a linkrev greater than minlink.
2746 # So we can stop walking.
2746 # So we can stop walking.
2747 while futurelargelinkrevs:
2747 while futurelargelinkrevs:
2748 strippoint -= 1
2748 strippoint -= 1
2749 linkrev = heads.pop(strippoint)
2749 linkrev = heads.pop(strippoint)
2750
2750
2751 if linkrev < minlink:
2751 if linkrev < minlink:
2752 brokenrevs.add(strippoint)
2752 brokenrevs.add(strippoint)
2753 else:
2753 else:
2754 futurelargelinkrevs.remove(linkrev)
2754 futurelargelinkrevs.remove(linkrev)
2755
2755
2756 for p in self.parentrevs(strippoint):
2756 for p in self.parentrevs(strippoint):
2757 if p != nullrev:
2757 if p != nullrev:
2758 plinkrev = self.linkrev(p)
2758 plinkrev = self.linkrev(p)
2759 heads[p] = plinkrev
2759 heads[p] = plinkrev
2760 if plinkrev >= minlink:
2760 if plinkrev >= minlink:
2761 futurelargelinkrevs.add(plinkrev)
2761 futurelargelinkrevs.add(plinkrev)
2762
2762
2763 return strippoint, brokenrevs
2763 return strippoint, brokenrevs
2764
2764
2765 def strip(self, minlink, transaction):
2765 def strip(self, minlink, transaction):
2766 """truncate the revlog on the first revision with a linkrev >= minlink
2766 """truncate the revlog on the first revision with a linkrev >= minlink
2767
2767
2768 This function is called when we're stripping revision minlink and
2768 This function is called when we're stripping revision minlink and
2769 its descendants from the repository.
2769 its descendants from the repository.
2770
2770
2771 We have to remove all revisions with linkrev >= minlink, because
2771 We have to remove all revisions with linkrev >= minlink, because
2772 the equivalent changelog revisions will be renumbered after the
2772 the equivalent changelog revisions will be renumbered after the
2773 strip.
2773 strip.
2774
2774
2775 So we truncate the revlog on the first of these revisions, and
2775 So we truncate the revlog on the first of these revisions, and
2776 trust that the caller has saved the revisions that shouldn't be
2776 trust that the caller has saved the revisions that shouldn't be
2777 removed and that it'll re-add them after this truncation.
2777 removed and that it'll re-add them after this truncation.
2778 """
2778 """
2779 if len(self) == 0:
2779 if len(self) == 0:
2780 return
2780 return
2781
2781
2782 rev, _ = self.getstrippoint(minlink)
2782 rev, _ = self.getstrippoint(minlink)
2783 if rev == len(self):
2783 if rev == len(self):
2784 return
2784 return
2785
2785
2786 # first truncate the files on disk
2786 # first truncate the files on disk
2787 end = self.start(rev)
2787 end = self.start(rev)
2788 if not self._inline:
2788 if not self._inline:
2789 transaction.add(self.datafile, end)
2789 transaction.add(self.datafile, end)
2790 end = rev * self._io.size
2790 end = rev * self._io.size
2791 else:
2791 else:
2792 end += rev * self._io.size
2792 end += rev * self._io.size
2793
2793
2794 transaction.add(self.indexfile, end)
2794 transaction.add(self.indexfile, end)
2795
2795
2796 # then reset internal state in memory to forget those revisions
2796 # then reset internal state in memory to forget those revisions
2797 self._cache = None
2797 self._cache = None
2798 self._chaininfocache = {}
2798 self._chaininfocache = {}
2799 self._chunkclear()
2799 self._chunkclear()
2800 for x in pycompat.xrange(rev, len(self)):
2800 for x in pycompat.xrange(rev, len(self)):
2801 del self.nodemap[self.node(x)]
2801 del self.nodemap[self.node(x)]
2802
2802
2803 del self.index[rev:-1]
2803 del self.index[rev:-1]
2804 self._nodepos = None
2804 self._nodepos = None
2805
2805
2806 def checksize(self):
2806 def checksize(self):
2807 expected = 0
2807 expected = 0
2808 if len(self):
2808 if len(self):
2809 expected = max(0, self.end(len(self) - 1))
2809 expected = max(0, self.end(len(self) - 1))
2810
2810
2811 try:
2811 try:
2812 with self._datafp() as f:
2812 with self._datafp() as f:
2813 f.seek(0, 2)
2813 f.seek(0, 2)
2814 actual = f.tell()
2814 actual = f.tell()
2815 dd = actual - expected
2815 dd = actual - expected
2816 except IOError as inst:
2816 except IOError as inst:
2817 if inst.errno != errno.ENOENT:
2817 if inst.errno != errno.ENOENT:
2818 raise
2818 raise
2819 dd = 0
2819 dd = 0
2820
2820
2821 try:
2821 try:
2822 f = self.opener(self.indexfile)
2822 f = self.opener(self.indexfile)
2823 f.seek(0, 2)
2823 f.seek(0, 2)
2824 actual = f.tell()
2824 actual = f.tell()
2825 f.close()
2825 f.close()
2826 s = self._io.size
2826 s = self._io.size
2827 i = max(0, actual // s)
2827 i = max(0, actual // s)
2828 di = actual - (i * s)
2828 di = actual - (i * s)
2829 if self._inline:
2829 if self._inline:
2830 databytes = 0
2830 databytes = 0
2831 for r in self:
2831 for r in self:
2832 databytes += max(0, self.length(r))
2832 databytes += max(0, self.length(r))
2833 dd = 0
2833 dd = 0
2834 di = actual - len(self) * s - databytes
2834 di = actual - len(self) * s - databytes
2835 except IOError as inst:
2835 except IOError as inst:
2836 if inst.errno != errno.ENOENT:
2836 if inst.errno != errno.ENOENT:
2837 raise
2837 raise
2838 di = 0
2838 di = 0
2839
2839
2840 return (dd, di)
2840 return (dd, di)
2841
2841
2842 def files(self):
2842 def files(self):
2843 res = [self.indexfile]
2843 res = [self.indexfile]
2844 if not self._inline:
2844 if not self._inline:
2845 res.append(self.datafile)
2845 res.append(self.datafile)
2846 return res
2846 return res
2847
2847
2848 DELTAREUSEALWAYS = 'always'
2848 DELTAREUSEALWAYS = 'always'
2849 DELTAREUSESAMEREVS = 'samerevs'
2849 DELTAREUSESAMEREVS = 'samerevs'
2850 DELTAREUSENEVER = 'never'
2850 DELTAREUSENEVER = 'never'
2851
2851
2852 DELTAREUSEFULLADD = 'fulladd'
2852 DELTAREUSEFULLADD = 'fulladd'
2853
2853
2854 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2854 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2855
2855
2856 def clone(self, tr, destrevlog, addrevisioncb=None,
2856 def clone(self, tr, destrevlog, addrevisioncb=None,
2857 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2857 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2858 """Copy this revlog to another, possibly with format changes.
2858 """Copy this revlog to another, possibly with format changes.
2859
2859
2860 The destination revlog will contain the same revisions and nodes.
2860 The destination revlog will contain the same revisions and nodes.
2861 However, it may not be bit-for-bit identical due to e.g. delta encoding
2861 However, it may not be bit-for-bit identical due to e.g. delta encoding
2862 differences.
2862 differences.
2863
2863
2864 The ``deltareuse`` argument control how deltas from the existing revlog
2864 The ``deltareuse`` argument control how deltas from the existing revlog
2865 are preserved in the destination revlog. The argument can have the
2865 are preserved in the destination revlog. The argument can have the
2866 following values:
2866 following values:
2867
2867
2868 DELTAREUSEALWAYS
2868 DELTAREUSEALWAYS
2869 Deltas will always be reused (if possible), even if the destination
2869 Deltas will always be reused (if possible), even if the destination
2870 revlog would not select the same revisions for the delta. This is the
2870 revlog would not select the same revisions for the delta. This is the
2871 fastest mode of operation.
2871 fastest mode of operation.
2872 DELTAREUSESAMEREVS
2872 DELTAREUSESAMEREVS
2873 Deltas will be reused if the destination revlog would pick the same
2873 Deltas will be reused if the destination revlog would pick the same
2874 revisions for the delta. This mode strikes a balance between speed
2874 revisions for the delta. This mode strikes a balance between speed
2875 and optimization.
2875 and optimization.
2876 DELTAREUSENEVER
2876 DELTAREUSENEVER
2877 Deltas will never be reused. This is the slowest mode of execution.
2877 Deltas will never be reused. This is the slowest mode of execution.
2878 This mode can be used to recompute deltas (e.g. if the diff/delta
2878 This mode can be used to recompute deltas (e.g. if the diff/delta
2879 algorithm changes).
2879 algorithm changes).
2880
2880
2881 Delta computation can be slow, so the choice of delta reuse policy can
2881 Delta computation can be slow, so the choice of delta reuse policy can
2882 significantly affect run time.
2882 significantly affect run time.
2883
2883
2884 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2884 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2885 two extremes. Deltas will be reused if they are appropriate. But if the
2885 two extremes. Deltas will be reused if they are appropriate. But if the
2886 delta could choose a better revision, it will do so. This means if you
2886 delta could choose a better revision, it will do so. This means if you
2887 are converting a non-generaldelta revlog to a generaldelta revlog,
2887 are converting a non-generaldelta revlog to a generaldelta revlog,
2888 deltas will be recomputed if the delta's parent isn't a parent of the
2888 deltas will be recomputed if the delta's parent isn't a parent of the
2889 revision.
2889 revision.
2890
2890
2891 In addition to the delta policy, the ``deltabothparents`` argument
2891 In addition to the delta policy, the ``deltabothparents`` argument
2892 controls whether to compute deltas against both parents for merges.
2892 controls whether to compute deltas against both parents for merges.
2893 By default, the current default is used.
2893 By default, the current default is used.
2894 """
2894 """
2895 if deltareuse not in self.DELTAREUSEALL:
2895 if deltareuse not in self.DELTAREUSEALL:
2896 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2896 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2897
2897
2898 if len(destrevlog):
2898 if len(destrevlog):
2899 raise ValueError(_('destination revlog is not empty'))
2899 raise ValueError(_('destination revlog is not empty'))
2900
2900
2901 if getattr(self, 'filteredrevs', None):
2901 if getattr(self, 'filteredrevs', None):
2902 raise ValueError(_('source revlog has filtered revisions'))
2902 raise ValueError(_('source revlog has filtered revisions'))
2903 if getattr(destrevlog, 'filteredrevs', None):
2903 if getattr(destrevlog, 'filteredrevs', None):
2904 raise ValueError(_('destination revlog has filtered revisions'))
2904 raise ValueError(_('destination revlog has filtered revisions'))
2905
2905
2906 # lazydeltabase controls whether to reuse a cached delta, if possible.
2906 # lazydeltabase controls whether to reuse a cached delta, if possible.
2907 oldlazydeltabase = destrevlog._lazydeltabase
2907 oldlazydeltabase = destrevlog._lazydeltabase
2908 oldamd = destrevlog._deltabothparents
2908 oldamd = destrevlog._deltabothparents
2909
2909
2910 try:
2910 try:
2911 if deltareuse == self.DELTAREUSEALWAYS:
2911 if deltareuse == self.DELTAREUSEALWAYS:
2912 destrevlog._lazydeltabase = True
2912 destrevlog._lazydeltabase = True
2913 elif deltareuse == self.DELTAREUSESAMEREVS:
2913 elif deltareuse == self.DELTAREUSESAMEREVS:
2914 destrevlog._lazydeltabase = False
2914 destrevlog._lazydeltabase = False
2915
2915
2916 destrevlog._deltabothparents = deltabothparents or oldamd
2916 destrevlog._deltabothparents = deltabothparents or oldamd
2917
2917
2918 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2918 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2919 self.DELTAREUSESAMEREVS)
2919 self.DELTAREUSESAMEREVS)
2920
2920
2921 deltacomputer = _deltacomputer(destrevlog)
2921 deltacomputer = _deltacomputer(destrevlog)
2922 index = self.index
2922 index = self.index
2923 for rev in self:
2923 for rev in self:
2924 entry = index[rev]
2924 entry = index[rev]
2925
2925
2926 # Some classes override linkrev to take filtered revs into
2926 # Some classes override linkrev to take filtered revs into
2927 # account. Use raw entry from index.
2927 # account. Use raw entry from index.
2928 flags = entry[0] & 0xffff
2928 flags = entry[0] & 0xffff
2929 linkrev = entry[4]
2929 linkrev = entry[4]
2930 p1 = index[entry[5]][7]
2930 p1 = index[entry[5]][7]
2931 p2 = index[entry[6]][7]
2931 p2 = index[entry[6]][7]
2932 node = entry[7]
2932 node = entry[7]
2933
2933
2934 # (Possibly) reuse the delta from the revlog if allowed and
2934 # (Possibly) reuse the delta from the revlog if allowed and
2935 # the revlog chunk is a delta.
2935 # the revlog chunk is a delta.
2936 cachedelta = None
2936 cachedelta = None
2937 rawtext = None
2937 rawtext = None
2938 if populatecachedelta:
2938 if populatecachedelta:
2939 dp = self.deltaparent(rev)
2939 dp = self.deltaparent(rev)
2940 if dp != nullrev:
2940 if dp != nullrev:
2941 cachedelta = (dp, bytes(self._chunk(rev)))
2941 cachedelta = (dp, bytes(self._chunk(rev)))
2942
2942
2943 if not cachedelta:
2943 if not cachedelta:
2944 rawtext = self.revision(rev, raw=True)
2944 rawtext = self.revision(rev, raw=True)
2945
2945
2946
2946
2947 if deltareuse == self.DELTAREUSEFULLADD:
2947 if deltareuse == self.DELTAREUSEFULLADD:
2948 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2948 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2949 cachedelta=cachedelta,
2949 cachedelta=cachedelta,
2950 node=node, flags=flags,
2950 node=node, flags=flags,
2951 deltacomputer=deltacomputer)
2951 deltacomputer=deltacomputer)
2952 else:
2952 else:
2953 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2953 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2954 checkambig=False)
2954 checkambig=False)
2955 dfh = None
2955 dfh = None
2956 if not destrevlog._inline:
2956 if not destrevlog._inline:
2957 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2957 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2958 try:
2958 try:
2959 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2959 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2960 p2, flags, cachedelta, ifh, dfh,
2960 p2, flags, cachedelta, ifh, dfh,
2961 deltacomputer=deltacomputer)
2961 deltacomputer=deltacomputer)
2962 finally:
2962 finally:
2963 if dfh:
2963 if dfh:
2964 dfh.close()
2964 dfh.close()
2965 ifh.close()
2965 ifh.close()
2966
2966
2967 if addrevisioncb:
2967 if addrevisioncb:
2968 addrevisioncb(self, rev, node)
2968 addrevisioncb(self, rev, node)
2969 finally:
2969 finally:
2970 destrevlog._lazydeltabase = oldlazydeltabase
2970 destrevlog._lazydeltabase = oldlazydeltabase
2971 destrevlog._deltabothparents = oldamd
2971 destrevlog._deltabothparents = oldamd
@@ -1,263 +1,263
1 # unionrepo.py - repository class for viewing union of repository changesets
1 # unionrepo.py - repository class for viewing union of repository changesets
2 #
2 #
3 # Derived from bundlerepo.py
3 # Derived from bundlerepo.py
4 # Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
4 # Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
5 # Copyright 2013 Unity Technologies, Mads Kiilerich <madski@unity3d.com>
5 # Copyright 2013 Unity Technologies, Mads Kiilerich <madski@unity3d.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 """Repository class for "in-memory pull" of one local repository to another,
10 """Repository class for "in-memory pull" of one local repository to another,
11 allowing operations like diff and log with revsets.
11 allowing operations like diff and log with revsets.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 from .i18n import _
16 from .i18n import _
17 from .node import nullid
17 from .node import nullid
18
18
19 from . import (
19 from . import (
20 changelog,
20 changelog,
21 cmdutil,
21 cmdutil,
22 error,
22 error,
23 filelog,
23 filelog,
24 localrepo,
24 localrepo,
25 manifest,
25 manifest,
26 mdiff,
26 mdiff,
27 pathutil,
27 pathutil,
28 pycompat,
28 pycompat,
29 revlog,
29 revlog,
30 util,
30 util,
31 vfs as vfsmod,
31 vfs as vfsmod,
32 )
32 )
33
33
34 class unionrevlog(revlog.revlog):
34 class unionrevlog(revlog.revlog):
35 def __init__(self, opener, indexfile, revlog2, linkmapper):
35 def __init__(self, opener, indexfile, revlog2, linkmapper):
36 # How it works:
36 # How it works:
37 # To retrieve a revision, we just need to know the node id so we can
37 # To retrieve a revision, we just need to know the node id so we can
38 # look it up in revlog2.
38 # look it up in revlog2.
39 #
39 #
40 # To differentiate a rev in the second revlog from a rev in the revlog,
40 # To differentiate a rev in the second revlog from a rev in the revlog,
41 # we check revision against repotiprev.
41 # we check revision against repotiprev.
42 opener = vfsmod.readonlyvfs(opener)
42 opener = vfsmod.readonlyvfs(opener)
43 revlog.revlog.__init__(self, opener, indexfile)
43 revlog.revlog.__init__(self, opener, indexfile)
44 self.revlog2 = revlog2
44 self.revlog2 = revlog2
45
45
46 n = len(self)
46 n = len(self)
47 self.repotiprev = n - 1
47 self.repotiprev = n - 1
48 self.bundlerevs = set() # used by 'bundle()' revset expression
48 self.bundlerevs = set() # used by 'bundle()' revset expression
49 for rev2 in self.revlog2:
49 for rev2 in self.revlog2:
50 rev = self.revlog2.index[rev2]
50 rev = self.revlog2.index[rev2]
51 # rev numbers - in revlog2, very different from self.rev
51 # rev numbers - in revlog2, very different from self.rev
52 _start, _csize, rsize, base, linkrev, p1rev, p2rev, node = rev
52 _start, _csize, rsize, base, linkrev, p1rev, p2rev, node = rev
53 flags = _start & 0xFFFF
53 flags = _start & 0xFFFF
54
54
55 if linkmapper is None: # link is to same revlog
55 if linkmapper is None: # link is to same revlog
56 assert linkrev == rev2 # we never link back
56 assert linkrev == rev2 # we never link back
57 link = n
57 link = n
58 else: # rev must be mapped from repo2 cl to unified cl by linkmapper
58 else: # rev must be mapped from repo2 cl to unified cl by linkmapper
59 link = linkmapper(linkrev)
59 link = linkmapper(linkrev)
60
60
61 if linkmapper is not None: # link is to same revlog
61 if linkmapper is not None: # link is to same revlog
62 base = linkmapper(base)
62 base = linkmapper(base)
63
63
64 if node in self.nodemap:
64 if node in self.nodemap:
65 # this happens for the common revlog revisions
65 # this happens for the common revlog revisions
66 self.bundlerevs.add(self.nodemap[node])
66 self.bundlerevs.add(self.nodemap[node])
67 continue
67 continue
68
68
69 p1node = self.revlog2.node(p1rev)
69 p1node = self.revlog2.node(p1rev)
70 p2node = self.revlog2.node(p2rev)
70 p2node = self.revlog2.node(p2rev)
71
71
72 # TODO: it's probably wrong to set compressed length to None, but
72 # TODO: it's probably wrong to set compressed length to None, but
73 # I have no idea if csize is valid in the base revlog context.
73 # I have no idea if csize is valid in the base revlog context.
74 e = (flags, None, rsize, base,
74 e = (flags, None, rsize, base,
75 link, self.rev(p1node), self.rev(p2node), node)
75 link, self.rev(p1node), self.rev(p2node), node)
76 self.index.insert(-1, e)
76 self.index.append(e)
77 self.nodemap[node] = n
77 self.nodemap[node] = n
78 self.bundlerevs.add(n)
78 self.bundlerevs.add(n)
79 n += 1
79 n += 1
80
80
81 def _chunk(self, rev):
81 def _chunk(self, rev):
82 if rev <= self.repotiprev:
82 if rev <= self.repotiprev:
83 return revlog.revlog._chunk(self, rev)
83 return revlog.revlog._chunk(self, rev)
84 return self.revlog2._chunk(self.node(rev))
84 return self.revlog2._chunk(self.node(rev))
85
85
86 def revdiff(self, rev1, rev2):
86 def revdiff(self, rev1, rev2):
87 """return or calculate a delta between two revisions"""
87 """return or calculate a delta between two revisions"""
88 if rev1 > self.repotiprev and rev2 > self.repotiprev:
88 if rev1 > self.repotiprev and rev2 > self.repotiprev:
89 return self.revlog2.revdiff(
89 return self.revlog2.revdiff(
90 self.revlog2.rev(self.node(rev1)),
90 self.revlog2.rev(self.node(rev1)),
91 self.revlog2.rev(self.node(rev2)))
91 self.revlog2.rev(self.node(rev2)))
92 elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
92 elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
93 return self.baserevdiff(rev1, rev2)
93 return self.baserevdiff(rev1, rev2)
94
94
95 return mdiff.textdiff(self.revision(rev1), self.revision(rev2))
95 return mdiff.textdiff(self.revision(rev1), self.revision(rev2))
96
96
97 def revision(self, nodeorrev, _df=None, raw=False):
97 def revision(self, nodeorrev, _df=None, raw=False):
98 """return an uncompressed revision of a given node or revision
98 """return an uncompressed revision of a given node or revision
99 number.
99 number.
100 """
100 """
101 if isinstance(nodeorrev, int):
101 if isinstance(nodeorrev, int):
102 rev = nodeorrev
102 rev = nodeorrev
103 node = self.node(rev)
103 node = self.node(rev)
104 else:
104 else:
105 node = nodeorrev
105 node = nodeorrev
106 rev = self.rev(node)
106 rev = self.rev(node)
107
107
108 if node == nullid:
108 if node == nullid:
109 return ""
109 return ""
110
110
111 if rev > self.repotiprev:
111 if rev > self.repotiprev:
112 text = self.revlog2.revision(node)
112 text = self.revlog2.revision(node)
113 self._cache = (node, rev, text)
113 self._cache = (node, rev, text)
114 else:
114 else:
115 text = self.baserevision(rev)
115 text = self.baserevision(rev)
116 # already cached
116 # already cached
117 return text
117 return text
118
118
119 def baserevision(self, nodeorrev):
119 def baserevision(self, nodeorrev):
120 # Revlog subclasses may override 'revision' method to modify format of
120 # Revlog subclasses may override 'revision' method to modify format of
121 # content retrieved from revlog. To use unionrevlog with such class one
121 # content retrieved from revlog. To use unionrevlog with such class one
122 # needs to override 'baserevision' and make more specific call here.
122 # needs to override 'baserevision' and make more specific call here.
123 return revlog.revlog.revision(self, nodeorrev)
123 return revlog.revlog.revision(self, nodeorrev)
124
124
125 def baserevdiff(self, rev1, rev2):
125 def baserevdiff(self, rev1, rev2):
126 # Exists for the same purpose as baserevision.
126 # Exists for the same purpose as baserevision.
127 return revlog.revlog.revdiff(self, rev1, rev2)
127 return revlog.revlog.revdiff(self, rev1, rev2)
128
128
129 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
129 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
130 raise NotImplementedError
130 raise NotImplementedError
131 def addgroup(self, deltas, transaction, addrevisioncb=None):
131 def addgroup(self, deltas, transaction, addrevisioncb=None):
132 raise NotImplementedError
132 raise NotImplementedError
133 def strip(self, rev, minlink):
133 def strip(self, rev, minlink):
134 raise NotImplementedError
134 raise NotImplementedError
135 def checksize(self):
135 def checksize(self):
136 raise NotImplementedError
136 raise NotImplementedError
137
137
138 class unionchangelog(unionrevlog, changelog.changelog):
138 class unionchangelog(unionrevlog, changelog.changelog):
139 def __init__(self, opener, opener2):
139 def __init__(self, opener, opener2):
140 changelog.changelog.__init__(self, opener)
140 changelog.changelog.__init__(self, opener)
141 linkmapper = None
141 linkmapper = None
142 changelog2 = changelog.changelog(opener2)
142 changelog2 = changelog.changelog(opener2)
143 unionrevlog.__init__(self, opener, self.indexfile, changelog2,
143 unionrevlog.__init__(self, opener, self.indexfile, changelog2,
144 linkmapper)
144 linkmapper)
145
145
146 def baserevision(self, nodeorrev):
146 def baserevision(self, nodeorrev):
147 # Although changelog doesn't override 'revision' method, some extensions
147 # Although changelog doesn't override 'revision' method, some extensions
148 # may replace this class with another that does. Same story with
148 # may replace this class with another that does. Same story with
149 # manifest and filelog classes.
149 # manifest and filelog classes.
150 return changelog.changelog.revision(self, nodeorrev)
150 return changelog.changelog.revision(self, nodeorrev)
151
151
152 def baserevdiff(self, rev1, rev2):
152 def baserevdiff(self, rev1, rev2):
153 return changelog.changelog.revdiff(self, rev1, rev2)
153 return changelog.changelog.revdiff(self, rev1, rev2)
154
154
155 class unionmanifest(unionrevlog, manifest.manifestrevlog):
155 class unionmanifest(unionrevlog, manifest.manifestrevlog):
156 def __init__(self, opener, opener2, linkmapper):
156 def __init__(self, opener, opener2, linkmapper):
157 manifest.manifestrevlog.__init__(self, opener)
157 manifest.manifestrevlog.__init__(self, opener)
158 manifest2 = manifest.manifestrevlog(opener2)
158 manifest2 = manifest.manifestrevlog(opener2)
159 unionrevlog.__init__(self, opener, self.indexfile, manifest2,
159 unionrevlog.__init__(self, opener, self.indexfile, manifest2,
160 linkmapper)
160 linkmapper)
161
161
162 def baserevision(self, nodeorrev):
162 def baserevision(self, nodeorrev):
163 return manifest.manifestrevlog.revision(self, nodeorrev)
163 return manifest.manifestrevlog.revision(self, nodeorrev)
164
164
165 def baserevdiff(self, rev1, rev2):
165 def baserevdiff(self, rev1, rev2):
166 return manifest.manifestrevlog.revdiff(self, rev1, rev2)
166 return manifest.manifestrevlog.revdiff(self, rev1, rev2)
167
167
168 class unionfilelog(filelog.filelog):
168 class unionfilelog(filelog.filelog):
169 def __init__(self, opener, path, opener2, linkmapper, repo):
169 def __init__(self, opener, path, opener2, linkmapper, repo):
170 filelog.filelog.__init__(self, opener, path)
170 filelog.filelog.__init__(self, opener, path)
171 filelog2 = filelog.filelog(opener2, path)
171 filelog2 = filelog.filelog(opener2, path)
172 self._revlog = unionrevlog(opener, self.indexfile,
172 self._revlog = unionrevlog(opener, self.indexfile,
173 filelog2._revlog, linkmapper)
173 filelog2._revlog, linkmapper)
174 self._repo = repo
174 self._repo = repo
175 self.repotiprev = self._revlog.repotiprev
175 self.repotiprev = self._revlog.repotiprev
176 self.revlog2 = self._revlog.revlog2
176 self.revlog2 = self._revlog.revlog2
177
177
178 def baserevision(self, nodeorrev):
178 def baserevision(self, nodeorrev):
179 return filelog.filelog.revision(self, nodeorrev)
179 return filelog.filelog.revision(self, nodeorrev)
180
180
181 def baserevdiff(self, rev1, rev2):
181 def baserevdiff(self, rev1, rev2):
182 return filelog.filelog.revdiff(self, rev1, rev2)
182 return filelog.filelog.revdiff(self, rev1, rev2)
183
183
184 def iscensored(self, rev):
184 def iscensored(self, rev):
185 """Check if a revision is censored."""
185 """Check if a revision is censored."""
186 if rev <= self.repotiprev:
186 if rev <= self.repotiprev:
187 return filelog.filelog.iscensored(self, rev)
187 return filelog.filelog.iscensored(self, rev)
188 node = self.node(rev)
188 node = self.node(rev)
189 return self.revlog2.iscensored(self.revlog2.rev(node))
189 return self.revlog2.iscensored(self.revlog2.rev(node))
190
190
191 class unionpeer(localrepo.localpeer):
191 class unionpeer(localrepo.localpeer):
192 def canpush(self):
192 def canpush(self):
193 return False
193 return False
194
194
195 class unionrepository(localrepo.localrepository):
195 class unionrepository(localrepo.localrepository):
196 def __init__(self, ui, path, path2):
196 def __init__(self, ui, path, path2):
197 localrepo.localrepository.__init__(self, ui, path)
197 localrepo.localrepository.__init__(self, ui, path)
198 self.ui.setconfig('phases', 'publish', False, 'unionrepo')
198 self.ui.setconfig('phases', 'publish', False, 'unionrepo')
199
199
200 self._url = 'union:%s+%s' % (util.expandpath(path),
200 self._url = 'union:%s+%s' % (util.expandpath(path),
201 util.expandpath(path2))
201 util.expandpath(path2))
202 self.repo2 = localrepo.localrepository(ui, path2)
202 self.repo2 = localrepo.localrepository(ui, path2)
203
203
204 @localrepo.unfilteredpropertycache
204 @localrepo.unfilteredpropertycache
205 def changelog(self):
205 def changelog(self):
206 return unionchangelog(self.svfs, self.repo2.svfs)
206 return unionchangelog(self.svfs, self.repo2.svfs)
207
207
208 def _clrev(self, rev2):
208 def _clrev(self, rev2):
209 """map from repo2 changelog rev to temporary rev in self.changelog"""
209 """map from repo2 changelog rev to temporary rev in self.changelog"""
210 node = self.repo2.changelog.node(rev2)
210 node = self.repo2.changelog.node(rev2)
211 return self.changelog.rev(node)
211 return self.changelog.rev(node)
212
212
213 def _constructmanifest(self):
213 def _constructmanifest(self):
214 return unionmanifest(self.svfs, self.repo2.svfs,
214 return unionmanifest(self.svfs, self.repo2.svfs,
215 self.unfiltered()._clrev)
215 self.unfiltered()._clrev)
216
216
217 def url(self):
217 def url(self):
218 return self._url
218 return self._url
219
219
220 def file(self, f):
220 def file(self, f):
221 return unionfilelog(self.svfs, f, self.repo2.svfs,
221 return unionfilelog(self.svfs, f, self.repo2.svfs,
222 self.unfiltered()._clrev, self)
222 self.unfiltered()._clrev, self)
223
223
224 def close(self):
224 def close(self):
225 self.repo2.close()
225 self.repo2.close()
226
226
227 def cancopy(self):
227 def cancopy(self):
228 return False
228 return False
229
229
230 def peer(self):
230 def peer(self):
231 return unionpeer(self)
231 return unionpeer(self)
232
232
233 def getcwd(self):
233 def getcwd(self):
234 return pycompat.getcwd() # always outside the repo
234 return pycompat.getcwd() # always outside the repo
235
235
236 def instance(ui, path, create, intents=None):
236 def instance(ui, path, create, intents=None):
237 if create:
237 if create:
238 raise error.Abort(_('cannot create new union repository'))
238 raise error.Abort(_('cannot create new union repository'))
239 parentpath = ui.config("bundle", "mainreporoot")
239 parentpath = ui.config("bundle", "mainreporoot")
240 if not parentpath:
240 if not parentpath:
241 # try to find the correct path to the working directory repo
241 # try to find the correct path to the working directory repo
242 parentpath = cmdutil.findrepo(pycompat.getcwd())
242 parentpath = cmdutil.findrepo(pycompat.getcwd())
243 if parentpath is None:
243 if parentpath is None:
244 parentpath = ''
244 parentpath = ''
245 if parentpath:
245 if parentpath:
246 # Try to make the full path relative so we get a nice, short URL.
246 # Try to make the full path relative so we get a nice, short URL.
247 # In particular, we don't want temp dir names in test outputs.
247 # In particular, we don't want temp dir names in test outputs.
248 cwd = pycompat.getcwd()
248 cwd = pycompat.getcwd()
249 if parentpath == cwd:
249 if parentpath == cwd:
250 parentpath = ''
250 parentpath = ''
251 else:
251 else:
252 cwd = pathutil.normasprefix(cwd)
252 cwd = pathutil.normasprefix(cwd)
253 if parentpath.startswith(cwd):
253 if parentpath.startswith(cwd):
254 parentpath = parentpath[len(cwd):]
254 parentpath = parentpath[len(cwd):]
255 if path.startswith('union:'):
255 if path.startswith('union:'):
256 s = path.split(":", 1)[1].split("+", 1)
256 s = path.split(":", 1)[1].split("+", 1)
257 if len(s) == 1:
257 if len(s) == 1:
258 repopath, repopath2 = parentpath, s[0]
258 repopath, repopath2 = parentpath, s[0]
259 else:
259 else:
260 repopath, repopath2 = s
260 repopath, repopath2 = s
261 else:
261 else:
262 repopath, repopath2 = parentpath, path
262 repopath, repopath2 = parentpath, path
263 return unionrepository(ui, repopath, repopath2)
263 return unionrepository(ui, repopath, repopath2)
General Comments 0
You need to be logged in to leave comments. Login now