##// END OF EJS Templates
revlog: add revmap back to revlog.addgroup...
Durham Goode -
r34292:1db9abf4 default
parent child Browse files
Show More
@@ -1,1015 +1,1014 b''
1 # changegroup.py - Mercurial changegroup manipulation functions
1 # changegroup.py - Mercurial changegroup manipulation functions
2 #
2 #
3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import os
10 import os
11 import struct
11 import struct
12 import tempfile
12 import tempfile
13 import weakref
13 import weakref
14
14
15 from .i18n import _
15 from .i18n import _
16 from .node import (
16 from .node import (
17 hex,
17 hex,
18 nullrev,
18 nullrev,
19 short,
19 short,
20 )
20 )
21
21
22 from . import (
22 from . import (
23 dagutil,
23 dagutil,
24 error,
24 error,
25 mdiff,
25 mdiff,
26 phases,
26 phases,
27 pycompat,
27 pycompat,
28 util,
28 util,
29 )
29 )
30
30
31 _CHANGEGROUPV1_DELTA_HEADER = "20s20s20s20s"
31 _CHANGEGROUPV1_DELTA_HEADER = "20s20s20s20s"
32 _CHANGEGROUPV2_DELTA_HEADER = "20s20s20s20s20s"
32 _CHANGEGROUPV2_DELTA_HEADER = "20s20s20s20s20s"
33 _CHANGEGROUPV3_DELTA_HEADER = ">20s20s20s20s20sH"
33 _CHANGEGROUPV3_DELTA_HEADER = ">20s20s20s20s20sH"
34
34
35 def readexactly(stream, n):
35 def readexactly(stream, n):
36 '''read n bytes from stream.read and abort if less was available'''
36 '''read n bytes from stream.read and abort if less was available'''
37 s = stream.read(n)
37 s = stream.read(n)
38 if len(s) < n:
38 if len(s) < n:
39 raise error.Abort(_("stream ended unexpectedly"
39 raise error.Abort(_("stream ended unexpectedly"
40 " (got %d bytes, expected %d)")
40 " (got %d bytes, expected %d)")
41 % (len(s), n))
41 % (len(s), n))
42 return s
42 return s
43
43
44 def getchunk(stream):
44 def getchunk(stream):
45 """return the next chunk from stream as a string"""
45 """return the next chunk from stream as a string"""
46 d = readexactly(stream, 4)
46 d = readexactly(stream, 4)
47 l = struct.unpack(">l", d)[0]
47 l = struct.unpack(">l", d)[0]
48 if l <= 4:
48 if l <= 4:
49 if l:
49 if l:
50 raise error.Abort(_("invalid chunk length %d") % l)
50 raise error.Abort(_("invalid chunk length %d") % l)
51 return ""
51 return ""
52 return readexactly(stream, l - 4)
52 return readexactly(stream, l - 4)
53
53
54 def chunkheader(length):
54 def chunkheader(length):
55 """return a changegroup chunk header (string)"""
55 """return a changegroup chunk header (string)"""
56 return struct.pack(">l", length + 4)
56 return struct.pack(">l", length + 4)
57
57
58 def closechunk():
58 def closechunk():
59 """return a changegroup chunk header (string) for a zero-length chunk"""
59 """return a changegroup chunk header (string) for a zero-length chunk"""
60 return struct.pack(">l", 0)
60 return struct.pack(">l", 0)
61
61
62 def writechunks(ui, chunks, filename, vfs=None):
62 def writechunks(ui, chunks, filename, vfs=None):
63 """Write chunks to a file and return its filename.
63 """Write chunks to a file and return its filename.
64
64
65 The stream is assumed to be a bundle file.
65 The stream is assumed to be a bundle file.
66 Existing files will not be overwritten.
66 Existing files will not be overwritten.
67 If no filename is specified, a temporary file is created.
67 If no filename is specified, a temporary file is created.
68 """
68 """
69 fh = None
69 fh = None
70 cleanup = None
70 cleanup = None
71 try:
71 try:
72 if filename:
72 if filename:
73 if vfs:
73 if vfs:
74 fh = vfs.open(filename, "wb")
74 fh = vfs.open(filename, "wb")
75 else:
75 else:
76 # Increase default buffer size because default is usually
76 # Increase default buffer size because default is usually
77 # small (4k is common on Linux).
77 # small (4k is common on Linux).
78 fh = open(filename, "wb", 131072)
78 fh = open(filename, "wb", 131072)
79 else:
79 else:
80 fd, filename = tempfile.mkstemp(prefix="hg-bundle-", suffix=".hg")
80 fd, filename = tempfile.mkstemp(prefix="hg-bundle-", suffix=".hg")
81 fh = os.fdopen(fd, pycompat.sysstr("wb"))
81 fh = os.fdopen(fd, pycompat.sysstr("wb"))
82 cleanup = filename
82 cleanup = filename
83 for c in chunks:
83 for c in chunks:
84 fh.write(c)
84 fh.write(c)
85 cleanup = None
85 cleanup = None
86 return filename
86 return filename
87 finally:
87 finally:
88 if fh is not None:
88 if fh is not None:
89 fh.close()
89 fh.close()
90 if cleanup is not None:
90 if cleanup is not None:
91 if filename and vfs:
91 if filename and vfs:
92 vfs.unlink(cleanup)
92 vfs.unlink(cleanup)
93 else:
93 else:
94 os.unlink(cleanup)
94 os.unlink(cleanup)
95
95
96 class cg1unpacker(object):
96 class cg1unpacker(object):
97 """Unpacker for cg1 changegroup streams.
97 """Unpacker for cg1 changegroup streams.
98
98
99 A changegroup unpacker handles the framing of the revision data in
99 A changegroup unpacker handles the framing of the revision data in
100 the wire format. Most consumers will want to use the apply()
100 the wire format. Most consumers will want to use the apply()
101 method to add the changes from the changegroup to a repository.
101 method to add the changes from the changegroup to a repository.
102
102
103 If you're forwarding a changegroup unmodified to another consumer,
103 If you're forwarding a changegroup unmodified to another consumer,
104 use getchunks(), which returns an iterator of changegroup
104 use getchunks(), which returns an iterator of changegroup
105 chunks. This is mostly useful for cases where you need to know the
105 chunks. This is mostly useful for cases where you need to know the
106 data stream has ended by observing the end of the changegroup.
106 data stream has ended by observing the end of the changegroup.
107
107
108 deltachunk() is useful only if you're applying delta data. Most
108 deltachunk() is useful only if you're applying delta data. Most
109 consumers should prefer apply() instead.
109 consumers should prefer apply() instead.
110
110
111 A few other public methods exist. Those are used only for
111 A few other public methods exist. Those are used only for
112 bundlerepo and some debug commands - their use is discouraged.
112 bundlerepo and some debug commands - their use is discouraged.
113 """
113 """
114 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
114 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
115 deltaheadersize = struct.calcsize(deltaheader)
115 deltaheadersize = struct.calcsize(deltaheader)
116 version = '01'
116 version = '01'
117 _grouplistcount = 1 # One list of files after the manifests
117 _grouplistcount = 1 # One list of files after the manifests
118
118
119 def __init__(self, fh, alg, extras=None):
119 def __init__(self, fh, alg, extras=None):
120 if alg is None:
120 if alg is None:
121 alg = 'UN'
121 alg = 'UN'
122 if alg not in util.compengines.supportedbundletypes:
122 if alg not in util.compengines.supportedbundletypes:
123 raise error.Abort(_('unknown stream compression type: %s')
123 raise error.Abort(_('unknown stream compression type: %s')
124 % alg)
124 % alg)
125 if alg == 'BZ':
125 if alg == 'BZ':
126 alg = '_truncatedBZ'
126 alg = '_truncatedBZ'
127
127
128 compengine = util.compengines.forbundletype(alg)
128 compengine = util.compengines.forbundletype(alg)
129 self._stream = compengine.decompressorreader(fh)
129 self._stream = compengine.decompressorreader(fh)
130 self._type = alg
130 self._type = alg
131 self.extras = extras or {}
131 self.extras = extras or {}
132 self.callback = None
132 self.callback = None
133
133
134 # These methods (compressed, read, seek, tell) all appear to only
134 # These methods (compressed, read, seek, tell) all appear to only
135 # be used by bundlerepo, but it's a little hard to tell.
135 # be used by bundlerepo, but it's a little hard to tell.
136 def compressed(self):
136 def compressed(self):
137 return self._type is not None and self._type != 'UN'
137 return self._type is not None and self._type != 'UN'
138 def read(self, l):
138 def read(self, l):
139 return self._stream.read(l)
139 return self._stream.read(l)
140 def seek(self, pos):
140 def seek(self, pos):
141 return self._stream.seek(pos)
141 return self._stream.seek(pos)
142 def tell(self):
142 def tell(self):
143 return self._stream.tell()
143 return self._stream.tell()
144 def close(self):
144 def close(self):
145 return self._stream.close()
145 return self._stream.close()
146
146
147 def _chunklength(self):
147 def _chunklength(self):
148 d = readexactly(self._stream, 4)
148 d = readexactly(self._stream, 4)
149 l = struct.unpack(">l", d)[0]
149 l = struct.unpack(">l", d)[0]
150 if l <= 4:
150 if l <= 4:
151 if l:
151 if l:
152 raise error.Abort(_("invalid chunk length %d") % l)
152 raise error.Abort(_("invalid chunk length %d") % l)
153 return 0
153 return 0
154 if self.callback:
154 if self.callback:
155 self.callback()
155 self.callback()
156 return l - 4
156 return l - 4
157
157
158 def changelogheader(self):
158 def changelogheader(self):
159 """v10 does not have a changelog header chunk"""
159 """v10 does not have a changelog header chunk"""
160 return {}
160 return {}
161
161
162 def manifestheader(self):
162 def manifestheader(self):
163 """v10 does not have a manifest header chunk"""
163 """v10 does not have a manifest header chunk"""
164 return {}
164 return {}
165
165
166 def filelogheader(self):
166 def filelogheader(self):
167 """return the header of the filelogs chunk, v10 only has the filename"""
167 """return the header of the filelogs chunk, v10 only has the filename"""
168 l = self._chunklength()
168 l = self._chunklength()
169 if not l:
169 if not l:
170 return {}
170 return {}
171 fname = readexactly(self._stream, l)
171 fname = readexactly(self._stream, l)
172 return {'filename': fname}
172 return {'filename': fname}
173
173
174 def _deltaheader(self, headertuple, prevnode):
174 def _deltaheader(self, headertuple, prevnode):
175 node, p1, p2, cs = headertuple
175 node, p1, p2, cs = headertuple
176 if prevnode is None:
176 if prevnode is None:
177 deltabase = p1
177 deltabase = p1
178 else:
178 else:
179 deltabase = prevnode
179 deltabase = prevnode
180 flags = 0
180 flags = 0
181 return node, p1, p2, deltabase, cs, flags
181 return node, p1, p2, deltabase, cs, flags
182
182
183 def deltachunk(self, prevnode):
183 def deltachunk(self, prevnode):
184 l = self._chunklength()
184 l = self._chunklength()
185 if not l:
185 if not l:
186 return {}
186 return {}
187 headerdata = readexactly(self._stream, self.deltaheadersize)
187 headerdata = readexactly(self._stream, self.deltaheadersize)
188 header = struct.unpack(self.deltaheader, headerdata)
188 header = struct.unpack(self.deltaheader, headerdata)
189 delta = readexactly(self._stream, l - self.deltaheadersize)
189 delta = readexactly(self._stream, l - self.deltaheadersize)
190 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
190 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
191 return {'node': node, 'p1': p1, 'p2': p2, 'cs': cs,
191 return {'node': node, 'p1': p1, 'p2': p2, 'cs': cs,
192 'deltabase': deltabase, 'delta': delta, 'flags': flags}
192 'deltabase': deltabase, 'delta': delta, 'flags': flags}
193
193
194 def getchunks(self):
194 def getchunks(self):
195 """returns all the chunks contains in the bundle
195 """returns all the chunks contains in the bundle
196
196
197 Used when you need to forward the binary stream to a file or another
197 Used when you need to forward the binary stream to a file or another
198 network API. To do so, it parse the changegroup data, otherwise it will
198 network API. To do so, it parse the changegroup data, otherwise it will
199 block in case of sshrepo because it don't know the end of the stream.
199 block in case of sshrepo because it don't know the end of the stream.
200 """
200 """
201 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
201 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
202 # and a list of filelogs. For changegroup 3, we expect 4 parts:
202 # and a list of filelogs. For changegroup 3, we expect 4 parts:
203 # changelog, manifestlog, a list of tree manifestlogs, and a list of
203 # changelog, manifestlog, a list of tree manifestlogs, and a list of
204 # filelogs.
204 # filelogs.
205 #
205 #
206 # Changelog and manifestlog parts are terminated with empty chunks. The
206 # Changelog and manifestlog parts are terminated with empty chunks. The
207 # tree and file parts are a list of entry sections. Each entry section
207 # tree and file parts are a list of entry sections. Each entry section
208 # is a series of chunks terminating in an empty chunk. The list of these
208 # is a series of chunks terminating in an empty chunk. The list of these
209 # entry sections is terminated in yet another empty chunk, so we know
209 # entry sections is terminated in yet another empty chunk, so we know
210 # we've reached the end of the tree/file list when we reach an empty
210 # we've reached the end of the tree/file list when we reach an empty
211 # chunk that was proceeded by no non-empty chunks.
211 # chunk that was proceeded by no non-empty chunks.
212
212
213 parts = 0
213 parts = 0
214 while parts < 2 + self._grouplistcount:
214 while parts < 2 + self._grouplistcount:
215 noentries = True
215 noentries = True
216 while True:
216 while True:
217 chunk = getchunk(self)
217 chunk = getchunk(self)
218 if not chunk:
218 if not chunk:
219 # The first two empty chunks represent the end of the
219 # The first two empty chunks represent the end of the
220 # changelog and the manifestlog portions. The remaining
220 # changelog and the manifestlog portions. The remaining
221 # empty chunks represent either A) the end of individual
221 # empty chunks represent either A) the end of individual
222 # tree or file entries in the file list, or B) the end of
222 # tree or file entries in the file list, or B) the end of
223 # the entire list. It's the end of the entire list if there
223 # the entire list. It's the end of the entire list if there
224 # were no entries (i.e. noentries is True).
224 # were no entries (i.e. noentries is True).
225 if parts < 2:
225 if parts < 2:
226 parts += 1
226 parts += 1
227 elif noentries:
227 elif noentries:
228 parts += 1
228 parts += 1
229 break
229 break
230 noentries = False
230 noentries = False
231 yield chunkheader(len(chunk))
231 yield chunkheader(len(chunk))
232 pos = 0
232 pos = 0
233 while pos < len(chunk):
233 while pos < len(chunk):
234 next = pos + 2**20
234 next = pos + 2**20
235 yield chunk[pos:next]
235 yield chunk[pos:next]
236 pos = next
236 pos = next
237 yield closechunk()
237 yield closechunk()
238
238
239 def _unpackmanifests(self, repo, revmap, trp, prog, numchanges):
239 def _unpackmanifests(self, repo, revmap, trp, prog, numchanges):
240 # We know that we'll never have more manifests than we had
240 # We know that we'll never have more manifests than we had
241 # changesets.
241 # changesets.
242 self.callback = prog(_('manifests'), numchanges)
242 self.callback = prog(_('manifests'), numchanges)
243 # no need to check for empty manifest group here:
243 # no need to check for empty manifest group here:
244 # if the result of the merge of 1 and 2 is the same in 3 and 4,
244 # if the result of the merge of 1 and 2 is the same in 3 and 4,
245 # no new manifest will be created and the manifest group will
245 # no new manifest will be created and the manifest group will
246 # be empty during the pull
246 # be empty during the pull
247 self.manifestheader()
247 self.manifestheader()
248 deltas = self.deltaiter(revmap)
248 deltas = self.deltaiter()
249 repo.manifestlog._revlog.addgroup(deltas, trp)
249 repo.manifestlog._revlog.addgroup(deltas, revmap, trp)
250 repo.ui.progress(_('manifests'), None)
250 repo.ui.progress(_('manifests'), None)
251 self.callback = None
251 self.callback = None
252
252
253 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
253 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
254 expectedtotal=None):
254 expectedtotal=None):
255 """Add the changegroup returned by source.read() to this repo.
255 """Add the changegroup returned by source.read() to this repo.
256 srctype is a string like 'push', 'pull', or 'unbundle'. url is
256 srctype is a string like 'push', 'pull', or 'unbundle'. url is
257 the URL of the repo where this changegroup is coming from.
257 the URL of the repo where this changegroup is coming from.
258
258
259 Return an integer summarizing the change to this repo:
259 Return an integer summarizing the change to this repo:
260 - nothing changed or no source: 0
260 - nothing changed or no source: 0
261 - more heads than before: 1+added heads (2..n)
261 - more heads than before: 1+added heads (2..n)
262 - fewer heads than before: -1-removed heads (-2..-n)
262 - fewer heads than before: -1-removed heads (-2..-n)
263 - number of heads stays the same: 1
263 - number of heads stays the same: 1
264 """
264 """
265 repo = repo.unfiltered()
265 repo = repo.unfiltered()
266 def csmap(x):
266 def csmap(x):
267 repo.ui.debug("add changeset %s\n" % short(x))
267 repo.ui.debug("add changeset %s\n" % short(x))
268 return len(cl)
268 return len(cl)
269
269
270 def revmap(x):
270 def revmap(x):
271 return cl.rev(x)
271 return cl.rev(x)
272
272
273 changesets = files = revisions = 0
273 changesets = files = revisions = 0
274
274
275 try:
275 try:
276 # The transaction may already carry source information. In this
276 # The transaction may already carry source information. In this
277 # case we use the top level data. We overwrite the argument
277 # case we use the top level data. We overwrite the argument
278 # because we need to use the top level value (if they exist)
278 # because we need to use the top level value (if they exist)
279 # in this function.
279 # in this function.
280 srctype = tr.hookargs.setdefault('source', srctype)
280 srctype = tr.hookargs.setdefault('source', srctype)
281 url = tr.hookargs.setdefault('url', url)
281 url = tr.hookargs.setdefault('url', url)
282 repo.hook('prechangegroup',
282 repo.hook('prechangegroup',
283 throw=True, **pycompat.strkwargs(tr.hookargs))
283 throw=True, **pycompat.strkwargs(tr.hookargs))
284
284
285 # write changelog data to temp files so concurrent readers
285 # write changelog data to temp files so concurrent readers
286 # will not see an inconsistent view
286 # will not see an inconsistent view
287 cl = repo.changelog
287 cl = repo.changelog
288 cl.delayupdate(tr)
288 cl.delayupdate(tr)
289 oldheads = set(cl.heads())
289 oldheads = set(cl.heads())
290
290
291 trp = weakref.proxy(tr)
291 trp = weakref.proxy(tr)
292 # pull off the changeset group
292 # pull off the changeset group
293 repo.ui.status(_("adding changesets\n"))
293 repo.ui.status(_("adding changesets\n"))
294 clstart = len(cl)
294 clstart = len(cl)
295 class prog(object):
295 class prog(object):
296 def __init__(self, step, total):
296 def __init__(self, step, total):
297 self._step = step
297 self._step = step
298 self._total = total
298 self._total = total
299 self._count = 1
299 self._count = 1
300 def __call__(self):
300 def __call__(self):
301 repo.ui.progress(self._step, self._count, unit=_('chunks'),
301 repo.ui.progress(self._step, self._count, unit=_('chunks'),
302 total=self._total)
302 total=self._total)
303 self._count += 1
303 self._count += 1
304 self.callback = prog(_('changesets'), expectedtotal)
304 self.callback = prog(_('changesets'), expectedtotal)
305
305
306 efiles = set()
306 efiles = set()
307 def onchangelog(cl, node):
307 def onchangelog(cl, node):
308 efiles.update(cl.readfiles(node))
308 efiles.update(cl.readfiles(node))
309
309
310 self.changelogheader()
310 self.changelogheader()
311 deltas = self.deltaiter(csmap)
311 deltas = self.deltaiter()
312 cgnodes = cl.addgroup(deltas, trp, addrevisioncb=onchangelog)
312 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
313 efiles = len(efiles)
313 efiles = len(efiles)
314
314
315 if not cgnodes:
315 if not cgnodes:
316 repo.ui.develwarn('applied empty changegroup',
316 repo.ui.develwarn('applied empty changegroup',
317 config='empty-changegroup')
317 config='empty-changegroup')
318 clend = len(cl)
318 clend = len(cl)
319 changesets = clend - clstart
319 changesets = clend - clstart
320 repo.ui.progress(_('changesets'), None)
320 repo.ui.progress(_('changesets'), None)
321 self.callback = None
321 self.callback = None
322
322
323 # pull off the manifest group
323 # pull off the manifest group
324 repo.ui.status(_("adding manifests\n"))
324 repo.ui.status(_("adding manifests\n"))
325 self._unpackmanifests(repo, revmap, trp, prog, changesets)
325 self._unpackmanifests(repo, revmap, trp, prog, changesets)
326
326
327 needfiles = {}
327 needfiles = {}
328 if repo.ui.configbool('server', 'validate'):
328 if repo.ui.configbool('server', 'validate'):
329 cl = repo.changelog
329 cl = repo.changelog
330 ml = repo.manifestlog
330 ml = repo.manifestlog
331 # validate incoming csets have their manifests
331 # validate incoming csets have their manifests
332 for cset in xrange(clstart, clend):
332 for cset in xrange(clstart, clend):
333 mfnode = cl.changelogrevision(cset).manifest
333 mfnode = cl.changelogrevision(cset).manifest
334 mfest = ml[mfnode].readdelta()
334 mfest = ml[mfnode].readdelta()
335 # store file cgnodes we must see
335 # store file cgnodes we must see
336 for f, n in mfest.iteritems():
336 for f, n in mfest.iteritems():
337 needfiles.setdefault(f, set()).add(n)
337 needfiles.setdefault(f, set()).add(n)
338
338
339 # process the files
339 # process the files
340 repo.ui.status(_("adding file changes\n"))
340 repo.ui.status(_("adding file changes\n"))
341 newrevs, newfiles = _addchangegroupfiles(
341 newrevs, newfiles = _addchangegroupfiles(
342 repo, self, revmap, trp, efiles, needfiles)
342 repo, self, revmap, trp, efiles, needfiles)
343 revisions += newrevs
343 revisions += newrevs
344 files += newfiles
344 files += newfiles
345
345
346 deltaheads = 0
346 deltaheads = 0
347 if oldheads:
347 if oldheads:
348 heads = cl.heads()
348 heads = cl.heads()
349 deltaheads = len(heads) - len(oldheads)
349 deltaheads = len(heads) - len(oldheads)
350 for h in heads:
350 for h in heads:
351 if h not in oldheads and repo[h].closesbranch():
351 if h not in oldheads and repo[h].closesbranch():
352 deltaheads -= 1
352 deltaheads -= 1
353 htext = ""
353 htext = ""
354 if deltaheads:
354 if deltaheads:
355 htext = _(" (%+d heads)") % deltaheads
355 htext = _(" (%+d heads)") % deltaheads
356
356
357 repo.ui.status(_("added %d changesets"
357 repo.ui.status(_("added %d changesets"
358 " with %d changes to %d files%s\n")
358 " with %d changes to %d files%s\n")
359 % (changesets, revisions, files, htext))
359 % (changesets, revisions, files, htext))
360 repo.invalidatevolatilesets()
360 repo.invalidatevolatilesets()
361
361
362 if changesets > 0:
362 if changesets > 0:
363 if 'node' not in tr.hookargs:
363 if 'node' not in tr.hookargs:
364 tr.hookargs['node'] = hex(cl.node(clstart))
364 tr.hookargs['node'] = hex(cl.node(clstart))
365 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
365 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
366 hookargs = dict(tr.hookargs)
366 hookargs = dict(tr.hookargs)
367 else:
367 else:
368 hookargs = dict(tr.hookargs)
368 hookargs = dict(tr.hookargs)
369 hookargs['node'] = hex(cl.node(clstart))
369 hookargs['node'] = hex(cl.node(clstart))
370 hookargs['node_last'] = hex(cl.node(clend - 1))
370 hookargs['node_last'] = hex(cl.node(clend - 1))
371 repo.hook('pretxnchangegroup',
371 repo.hook('pretxnchangegroup',
372 throw=True, **pycompat.strkwargs(hookargs))
372 throw=True, **pycompat.strkwargs(hookargs))
373
373
374 added = [cl.node(r) for r in xrange(clstart, clend)]
374 added = [cl.node(r) for r in xrange(clstart, clend)]
375 phaseall = None
375 phaseall = None
376 if srctype in ('push', 'serve'):
376 if srctype in ('push', 'serve'):
377 # Old servers can not push the boundary themselves.
377 # Old servers can not push the boundary themselves.
378 # New servers won't push the boundary if changeset already
378 # New servers won't push the boundary if changeset already
379 # exists locally as secret
379 # exists locally as secret
380 #
380 #
381 # We should not use added here but the list of all change in
381 # We should not use added here but the list of all change in
382 # the bundle
382 # the bundle
383 if repo.publishing():
383 if repo.publishing():
384 targetphase = phaseall = phases.public
384 targetphase = phaseall = phases.public
385 else:
385 else:
386 # closer target phase computation
386 # closer target phase computation
387
387
388 # Those changesets have been pushed from the
388 # Those changesets have been pushed from the
389 # outside, their phases are going to be pushed
389 # outside, their phases are going to be pushed
390 # alongside. Therefor `targetphase` is
390 # alongside. Therefor `targetphase` is
391 # ignored.
391 # ignored.
392 targetphase = phaseall = phases.draft
392 targetphase = phaseall = phases.draft
393 if added:
393 if added:
394 phases.registernew(repo, tr, targetphase, added)
394 phases.registernew(repo, tr, targetphase, added)
395 if phaseall is not None:
395 if phaseall is not None:
396 phases.advanceboundary(repo, tr, phaseall, cgnodes)
396 phases.advanceboundary(repo, tr, phaseall, cgnodes)
397
397
398 if changesets > 0:
398 if changesets > 0:
399
399
400 def runhooks():
400 def runhooks():
401 # These hooks run when the lock releases, not when the
401 # These hooks run when the lock releases, not when the
402 # transaction closes. So it's possible for the changelog
402 # transaction closes. So it's possible for the changelog
403 # to have changed since we last saw it.
403 # to have changed since we last saw it.
404 if clstart >= len(repo):
404 if clstart >= len(repo):
405 return
405 return
406
406
407 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
407 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
408
408
409 for n in added:
409 for n in added:
410 args = hookargs.copy()
410 args = hookargs.copy()
411 args['node'] = hex(n)
411 args['node'] = hex(n)
412 del args['node_last']
412 del args['node_last']
413 repo.hook("incoming", **pycompat.strkwargs(args))
413 repo.hook("incoming", **pycompat.strkwargs(args))
414
414
415 newheads = [h for h in repo.heads()
415 newheads = [h for h in repo.heads()
416 if h not in oldheads]
416 if h not in oldheads]
417 repo.ui.log("incoming",
417 repo.ui.log("incoming",
418 "%s incoming changes - new heads: %s\n",
418 "%s incoming changes - new heads: %s\n",
419 len(added),
419 len(added),
420 ', '.join([hex(c[:6]) for c in newheads]))
420 ', '.join([hex(c[:6]) for c in newheads]))
421
421
422 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
422 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
423 lambda tr: repo._afterlock(runhooks))
423 lambda tr: repo._afterlock(runhooks))
424 finally:
424 finally:
425 repo.ui.flush()
425 repo.ui.flush()
426 # never return 0 here:
426 # never return 0 here:
427 if deltaheads < 0:
427 if deltaheads < 0:
428 ret = deltaheads - 1
428 ret = deltaheads - 1
429 else:
429 else:
430 ret = deltaheads + 1
430 ret = deltaheads + 1
431 return ret
431 return ret
432
432
433 def deltaiter(self, linkmapper):
433 def deltaiter(self):
434 """
434 """
435 returns an iterator of the deltas in this changegroup
435 returns an iterator of the deltas in this changegroup
436
436
437 Useful for passing to the underlying storage system to be stored.
437 Useful for passing to the underlying storage system to be stored.
438 """
438 """
439 chain = None
439 chain = None
440 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
440 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
441 node = chunkdata['node']
441 node = chunkdata['node']
442 p1 = chunkdata['p1']
442 p1 = chunkdata['p1']
443 p2 = chunkdata['p2']
443 p2 = chunkdata['p2']
444 cs = chunkdata['cs']
444 cs = chunkdata['cs']
445 deltabase = chunkdata['deltabase']
445 deltabase = chunkdata['deltabase']
446 delta = chunkdata['delta']
446 delta = chunkdata['delta']
447 flags = chunkdata['flags']
447 flags = chunkdata['flags']
448
448
449 link = linkmapper(cs)
450 chain = node
449 chain = node
451
450
452 yield (node, p1, p2, link, deltabase, delta, flags)
451 yield (node, p1, p2, cs, deltabase, delta, flags)
453
452
454 class cg2unpacker(cg1unpacker):
453 class cg2unpacker(cg1unpacker):
455 """Unpacker for cg2 streams.
454 """Unpacker for cg2 streams.
456
455
457 cg2 streams add support for generaldelta, so the delta header
456 cg2 streams add support for generaldelta, so the delta header
458 format is slightly different. All other features about the data
457 format is slightly different. All other features about the data
459 remain the same.
458 remain the same.
460 """
459 """
461 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
460 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
462 deltaheadersize = struct.calcsize(deltaheader)
461 deltaheadersize = struct.calcsize(deltaheader)
463 version = '02'
462 version = '02'
464
463
465 def _deltaheader(self, headertuple, prevnode):
464 def _deltaheader(self, headertuple, prevnode):
466 node, p1, p2, deltabase, cs = headertuple
465 node, p1, p2, deltabase, cs = headertuple
467 flags = 0
466 flags = 0
468 return node, p1, p2, deltabase, cs, flags
467 return node, p1, p2, deltabase, cs, flags
469
468
470 class cg3unpacker(cg2unpacker):
469 class cg3unpacker(cg2unpacker):
471 """Unpacker for cg3 streams.
470 """Unpacker for cg3 streams.
472
471
473 cg3 streams add support for exchanging treemanifests and revlog
472 cg3 streams add support for exchanging treemanifests and revlog
474 flags. It adds the revlog flags to the delta header and an empty chunk
473 flags. It adds the revlog flags to the delta header and an empty chunk
475 separating manifests and files.
474 separating manifests and files.
476 """
475 """
477 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
476 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
478 deltaheadersize = struct.calcsize(deltaheader)
477 deltaheadersize = struct.calcsize(deltaheader)
479 version = '03'
478 version = '03'
480 _grouplistcount = 2 # One list of manifests and one list of files
479 _grouplistcount = 2 # One list of manifests and one list of files
481
480
482 def _deltaheader(self, headertuple, prevnode):
481 def _deltaheader(self, headertuple, prevnode):
483 node, p1, p2, deltabase, cs, flags = headertuple
482 node, p1, p2, deltabase, cs, flags = headertuple
484 return node, p1, p2, deltabase, cs, flags
483 return node, p1, p2, deltabase, cs, flags
485
484
486 def _unpackmanifests(self, repo, revmap, trp, prog, numchanges):
485 def _unpackmanifests(self, repo, revmap, trp, prog, numchanges):
487 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog,
486 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog,
488 numchanges)
487 numchanges)
489 for chunkdata in iter(self.filelogheader, {}):
488 for chunkdata in iter(self.filelogheader, {}):
490 # If we get here, there are directory manifests in the changegroup
489 # If we get here, there are directory manifests in the changegroup
491 d = chunkdata["filename"]
490 d = chunkdata["filename"]
492 repo.ui.debug("adding %s revisions\n" % d)
491 repo.ui.debug("adding %s revisions\n" % d)
493 dirlog = repo.manifestlog._revlog.dirlog(d)
492 dirlog = repo.manifestlog._revlog.dirlog(d)
494 deltas = self.deltaiter(revmap)
493 deltas = self.deltaiter()
495 if not dirlog.addgroup(deltas, trp):
494 if not dirlog.addgroup(deltas, revmap, trp):
496 raise error.Abort(_("received dir revlog group is empty"))
495 raise error.Abort(_("received dir revlog group is empty"))
497
496
498 class headerlessfixup(object):
497 class headerlessfixup(object):
499 def __init__(self, fh, h):
498 def __init__(self, fh, h):
500 self._h = h
499 self._h = h
501 self._fh = fh
500 self._fh = fh
502 def read(self, n):
501 def read(self, n):
503 if self._h:
502 if self._h:
504 d, self._h = self._h[:n], self._h[n:]
503 d, self._h = self._h[:n], self._h[n:]
505 if len(d) < n:
504 if len(d) < n:
506 d += readexactly(self._fh, n - len(d))
505 d += readexactly(self._fh, n - len(d))
507 return d
506 return d
508 return readexactly(self._fh, n)
507 return readexactly(self._fh, n)
509
508
510 class cg1packer(object):
509 class cg1packer(object):
511 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
510 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
512 version = '01'
511 version = '01'
513 def __init__(self, repo, bundlecaps=None):
512 def __init__(self, repo, bundlecaps=None):
514 """Given a source repo, construct a bundler.
513 """Given a source repo, construct a bundler.
515
514
516 bundlecaps is optional and can be used to specify the set of
515 bundlecaps is optional and can be used to specify the set of
517 capabilities which can be used to build the bundle. While bundlecaps is
516 capabilities which can be used to build the bundle. While bundlecaps is
518 unused in core Mercurial, extensions rely on this feature to communicate
517 unused in core Mercurial, extensions rely on this feature to communicate
519 capabilities to customize the changegroup packer.
518 capabilities to customize the changegroup packer.
520 """
519 """
521 # Set of capabilities we can use to build the bundle.
520 # Set of capabilities we can use to build the bundle.
522 if bundlecaps is None:
521 if bundlecaps is None:
523 bundlecaps = set()
522 bundlecaps = set()
524 self._bundlecaps = bundlecaps
523 self._bundlecaps = bundlecaps
525 # experimental config: bundle.reorder
524 # experimental config: bundle.reorder
526 reorder = repo.ui.config('bundle', 'reorder')
525 reorder = repo.ui.config('bundle', 'reorder')
527 if reorder == 'auto':
526 if reorder == 'auto':
528 reorder = None
527 reorder = None
529 else:
528 else:
530 reorder = util.parsebool(reorder)
529 reorder = util.parsebool(reorder)
531 self._repo = repo
530 self._repo = repo
532 self._reorder = reorder
531 self._reorder = reorder
533 self._progress = repo.ui.progress
532 self._progress = repo.ui.progress
534 if self._repo.ui.verbose and not self._repo.ui.debugflag:
533 if self._repo.ui.verbose and not self._repo.ui.debugflag:
535 self._verbosenote = self._repo.ui.note
534 self._verbosenote = self._repo.ui.note
536 else:
535 else:
537 self._verbosenote = lambda s: None
536 self._verbosenote = lambda s: None
538
537
539 def close(self):
538 def close(self):
540 return closechunk()
539 return closechunk()
541
540
542 def fileheader(self, fname):
541 def fileheader(self, fname):
543 return chunkheader(len(fname)) + fname
542 return chunkheader(len(fname)) + fname
544
543
545 # Extracted both for clarity and for overriding in extensions.
544 # Extracted both for clarity and for overriding in extensions.
546 def _sortgroup(self, revlog, nodelist, lookup):
545 def _sortgroup(self, revlog, nodelist, lookup):
547 """Sort nodes for change group and turn them into revnums."""
546 """Sort nodes for change group and turn them into revnums."""
548 # for generaldelta revlogs, we linearize the revs; this will both be
547 # for generaldelta revlogs, we linearize the revs; this will both be
549 # much quicker and generate a much smaller bundle
548 # much quicker and generate a much smaller bundle
550 if (revlog._generaldelta and self._reorder is None) or self._reorder:
549 if (revlog._generaldelta and self._reorder is None) or self._reorder:
551 dag = dagutil.revlogdag(revlog)
550 dag = dagutil.revlogdag(revlog)
552 return dag.linearize(set(revlog.rev(n) for n in nodelist))
551 return dag.linearize(set(revlog.rev(n) for n in nodelist))
553 else:
552 else:
554 return sorted([revlog.rev(n) for n in nodelist])
553 return sorted([revlog.rev(n) for n in nodelist])
555
554
556 def group(self, nodelist, revlog, lookup, units=None):
555 def group(self, nodelist, revlog, lookup, units=None):
557 """Calculate a delta group, yielding a sequence of changegroup chunks
556 """Calculate a delta group, yielding a sequence of changegroup chunks
558 (strings).
557 (strings).
559
558
560 Given a list of changeset revs, return a set of deltas and
559 Given a list of changeset revs, return a set of deltas and
561 metadata corresponding to nodes. The first delta is
560 metadata corresponding to nodes. The first delta is
562 first parent(nodelist[0]) -> nodelist[0], the receiver is
561 first parent(nodelist[0]) -> nodelist[0], the receiver is
563 guaranteed to have this parent as it has all history before
562 guaranteed to have this parent as it has all history before
564 these changesets. In the case firstparent is nullrev the
563 these changesets. In the case firstparent is nullrev the
565 changegroup starts with a full revision.
564 changegroup starts with a full revision.
566
565
567 If units is not None, progress detail will be generated, units specifies
566 If units is not None, progress detail will be generated, units specifies
568 the type of revlog that is touched (changelog, manifest, etc.).
567 the type of revlog that is touched (changelog, manifest, etc.).
569 """
568 """
570 # if we don't have any revisions touched by these changesets, bail
569 # if we don't have any revisions touched by these changesets, bail
571 if len(nodelist) == 0:
570 if len(nodelist) == 0:
572 yield self.close()
571 yield self.close()
573 return
572 return
574
573
575 revs = self._sortgroup(revlog, nodelist, lookup)
574 revs = self._sortgroup(revlog, nodelist, lookup)
576
575
577 # add the parent of the first rev
576 # add the parent of the first rev
578 p = revlog.parentrevs(revs[0])[0]
577 p = revlog.parentrevs(revs[0])[0]
579 revs.insert(0, p)
578 revs.insert(0, p)
580
579
581 # build deltas
580 # build deltas
582 total = len(revs) - 1
581 total = len(revs) - 1
583 msgbundling = _('bundling')
582 msgbundling = _('bundling')
584 for r in xrange(len(revs) - 1):
583 for r in xrange(len(revs) - 1):
585 if units is not None:
584 if units is not None:
586 self._progress(msgbundling, r + 1, unit=units, total=total)
585 self._progress(msgbundling, r + 1, unit=units, total=total)
587 prev, curr = revs[r], revs[r + 1]
586 prev, curr = revs[r], revs[r + 1]
588 linknode = lookup(revlog.node(curr))
587 linknode = lookup(revlog.node(curr))
589 for c in self.revchunk(revlog, curr, prev, linknode):
588 for c in self.revchunk(revlog, curr, prev, linknode):
590 yield c
589 yield c
591
590
592 if units is not None:
591 if units is not None:
593 self._progress(msgbundling, None)
592 self._progress(msgbundling, None)
594 yield self.close()
593 yield self.close()
595
594
596 # filter any nodes that claim to be part of the known set
595 # filter any nodes that claim to be part of the known set
597 def prune(self, revlog, missing, commonrevs):
596 def prune(self, revlog, missing, commonrevs):
598 rr, rl = revlog.rev, revlog.linkrev
597 rr, rl = revlog.rev, revlog.linkrev
599 return [n for n in missing if rl(rr(n)) not in commonrevs]
598 return [n for n in missing if rl(rr(n)) not in commonrevs]
600
599
601 def _packmanifests(self, dir, mfnodes, lookuplinknode):
600 def _packmanifests(self, dir, mfnodes, lookuplinknode):
602 """Pack flat manifests into a changegroup stream."""
601 """Pack flat manifests into a changegroup stream."""
603 assert not dir
602 assert not dir
604 for chunk in self.group(mfnodes, self._repo.manifestlog._revlog,
603 for chunk in self.group(mfnodes, self._repo.manifestlog._revlog,
605 lookuplinknode, units=_('manifests')):
604 lookuplinknode, units=_('manifests')):
606 yield chunk
605 yield chunk
607
606
608 def _manifestsdone(self):
607 def _manifestsdone(self):
609 return ''
608 return ''
610
609
611 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
610 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
612 '''yield a sequence of changegroup chunks (strings)'''
611 '''yield a sequence of changegroup chunks (strings)'''
613 repo = self._repo
612 repo = self._repo
614 cl = repo.changelog
613 cl = repo.changelog
615
614
616 clrevorder = {}
615 clrevorder = {}
617 mfs = {} # needed manifests
616 mfs = {} # needed manifests
618 fnodes = {} # needed file nodes
617 fnodes = {} # needed file nodes
619 changedfiles = set()
618 changedfiles = set()
620
619
621 # Callback for the changelog, used to collect changed files and manifest
620 # Callback for the changelog, used to collect changed files and manifest
622 # nodes.
621 # nodes.
623 # Returns the linkrev node (identity in the changelog case).
622 # Returns the linkrev node (identity in the changelog case).
624 def lookupcl(x):
623 def lookupcl(x):
625 c = cl.read(x)
624 c = cl.read(x)
626 clrevorder[x] = len(clrevorder)
625 clrevorder[x] = len(clrevorder)
627 n = c[0]
626 n = c[0]
628 # record the first changeset introducing this manifest version
627 # record the first changeset introducing this manifest version
629 mfs.setdefault(n, x)
628 mfs.setdefault(n, x)
630 # Record a complete list of potentially-changed files in
629 # Record a complete list of potentially-changed files in
631 # this manifest.
630 # this manifest.
632 changedfiles.update(c[3])
631 changedfiles.update(c[3])
633 return x
632 return x
634
633
635 self._verbosenote(_('uncompressed size of bundle content:\n'))
634 self._verbosenote(_('uncompressed size of bundle content:\n'))
636 size = 0
635 size = 0
637 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
636 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
638 size += len(chunk)
637 size += len(chunk)
639 yield chunk
638 yield chunk
640 self._verbosenote(_('%8.i (changelog)\n') % size)
639 self._verbosenote(_('%8.i (changelog)\n') % size)
641
640
642 # We need to make sure that the linkrev in the changegroup refers to
641 # We need to make sure that the linkrev in the changegroup refers to
643 # the first changeset that introduced the manifest or file revision.
642 # the first changeset that introduced the manifest or file revision.
644 # The fastpath is usually safer than the slowpath, because the filelogs
643 # The fastpath is usually safer than the slowpath, because the filelogs
645 # are walked in revlog order.
644 # are walked in revlog order.
646 #
645 #
647 # When taking the slowpath with reorder=None and the manifest revlog
646 # When taking the slowpath with reorder=None and the manifest revlog
648 # uses generaldelta, the manifest may be walked in the "wrong" order.
647 # uses generaldelta, the manifest may be walked in the "wrong" order.
649 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
648 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
650 # cc0ff93d0c0c).
649 # cc0ff93d0c0c).
651 #
650 #
652 # When taking the fastpath, we are only vulnerable to reordering
651 # When taking the fastpath, we are only vulnerable to reordering
653 # of the changelog itself. The changelog never uses generaldelta, so
652 # of the changelog itself. The changelog never uses generaldelta, so
654 # it is only reordered when reorder=True. To handle this case, we
653 # it is only reordered when reorder=True. To handle this case, we
655 # simply take the slowpath, which already has the 'clrevorder' logic.
654 # simply take the slowpath, which already has the 'clrevorder' logic.
656 # This was also fixed in cc0ff93d0c0c.
655 # This was also fixed in cc0ff93d0c0c.
657 fastpathlinkrev = fastpathlinkrev and not self._reorder
656 fastpathlinkrev = fastpathlinkrev and not self._reorder
658 # Treemanifests don't work correctly with fastpathlinkrev
657 # Treemanifests don't work correctly with fastpathlinkrev
659 # either, because we don't discover which directory nodes to
658 # either, because we don't discover which directory nodes to
660 # send along with files. This could probably be fixed.
659 # send along with files. This could probably be fixed.
661 fastpathlinkrev = fastpathlinkrev and (
660 fastpathlinkrev = fastpathlinkrev and (
662 'treemanifest' not in repo.requirements)
661 'treemanifest' not in repo.requirements)
663
662
664 for chunk in self.generatemanifests(commonrevs, clrevorder,
663 for chunk in self.generatemanifests(commonrevs, clrevorder,
665 fastpathlinkrev, mfs, fnodes, source):
664 fastpathlinkrev, mfs, fnodes, source):
666 yield chunk
665 yield chunk
667 mfs.clear()
666 mfs.clear()
668 clrevs = set(cl.rev(x) for x in clnodes)
667 clrevs = set(cl.rev(x) for x in clnodes)
669
668
670 if not fastpathlinkrev:
669 if not fastpathlinkrev:
671 def linknodes(unused, fname):
670 def linknodes(unused, fname):
672 return fnodes.get(fname, {})
671 return fnodes.get(fname, {})
673 else:
672 else:
674 cln = cl.node
673 cln = cl.node
675 def linknodes(filerevlog, fname):
674 def linknodes(filerevlog, fname):
676 llr = filerevlog.linkrev
675 llr = filerevlog.linkrev
677 fln = filerevlog.node
676 fln = filerevlog.node
678 revs = ((r, llr(r)) for r in filerevlog)
677 revs = ((r, llr(r)) for r in filerevlog)
679 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
678 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
680
679
681 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
680 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
682 source):
681 source):
683 yield chunk
682 yield chunk
684
683
685 yield self.close()
684 yield self.close()
686
685
687 if clnodes:
686 if clnodes:
688 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
687 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
689
688
690 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
689 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
691 fnodes, source):
690 fnodes, source):
692 """Returns an iterator of changegroup chunks containing manifests.
691 """Returns an iterator of changegroup chunks containing manifests.
693
692
694 `source` is unused here, but is used by extensions like remotefilelog to
693 `source` is unused here, but is used by extensions like remotefilelog to
695 change what is sent based in pulls vs pushes, etc.
694 change what is sent based in pulls vs pushes, etc.
696 """
695 """
697 repo = self._repo
696 repo = self._repo
698 mfl = repo.manifestlog
697 mfl = repo.manifestlog
699 dirlog = mfl._revlog.dirlog
698 dirlog = mfl._revlog.dirlog
700 tmfnodes = {'': mfs}
699 tmfnodes = {'': mfs}
701
700
702 # Callback for the manifest, used to collect linkrevs for filelog
701 # Callback for the manifest, used to collect linkrevs for filelog
703 # revisions.
702 # revisions.
704 # Returns the linkrev node (collected in lookupcl).
703 # Returns the linkrev node (collected in lookupcl).
705 def makelookupmflinknode(dir):
704 def makelookupmflinknode(dir):
706 if fastpathlinkrev:
705 if fastpathlinkrev:
707 assert not dir
706 assert not dir
708 return mfs.__getitem__
707 return mfs.__getitem__
709
708
710 def lookupmflinknode(x):
709 def lookupmflinknode(x):
711 """Callback for looking up the linknode for manifests.
710 """Callback for looking up the linknode for manifests.
712
711
713 Returns the linkrev node for the specified manifest.
712 Returns the linkrev node for the specified manifest.
714
713
715 SIDE EFFECT:
714 SIDE EFFECT:
716
715
717 1) fclnodes gets populated with the list of relevant
716 1) fclnodes gets populated with the list of relevant
718 file nodes if we're not using fastpathlinkrev
717 file nodes if we're not using fastpathlinkrev
719 2) When treemanifests are in use, collects treemanifest nodes
718 2) When treemanifests are in use, collects treemanifest nodes
720 to send
719 to send
721
720
722 Note that this means manifests must be completely sent to
721 Note that this means manifests must be completely sent to
723 the client before you can trust the list of files and
722 the client before you can trust the list of files and
724 treemanifests to send.
723 treemanifests to send.
725 """
724 """
726 clnode = tmfnodes[dir][x]
725 clnode = tmfnodes[dir][x]
727 mdata = mfl.get(dir, x).readfast(shallow=True)
726 mdata = mfl.get(dir, x).readfast(shallow=True)
728 for p, n, fl in mdata.iterentries():
727 for p, n, fl in mdata.iterentries():
729 if fl == 't': # subdirectory manifest
728 if fl == 't': # subdirectory manifest
730 subdir = dir + p + '/'
729 subdir = dir + p + '/'
731 tmfclnodes = tmfnodes.setdefault(subdir, {})
730 tmfclnodes = tmfnodes.setdefault(subdir, {})
732 tmfclnode = tmfclnodes.setdefault(n, clnode)
731 tmfclnode = tmfclnodes.setdefault(n, clnode)
733 if clrevorder[clnode] < clrevorder[tmfclnode]:
732 if clrevorder[clnode] < clrevorder[tmfclnode]:
734 tmfclnodes[n] = clnode
733 tmfclnodes[n] = clnode
735 else:
734 else:
736 f = dir + p
735 f = dir + p
737 fclnodes = fnodes.setdefault(f, {})
736 fclnodes = fnodes.setdefault(f, {})
738 fclnode = fclnodes.setdefault(n, clnode)
737 fclnode = fclnodes.setdefault(n, clnode)
739 if clrevorder[clnode] < clrevorder[fclnode]:
738 if clrevorder[clnode] < clrevorder[fclnode]:
740 fclnodes[n] = clnode
739 fclnodes[n] = clnode
741 return clnode
740 return clnode
742 return lookupmflinknode
741 return lookupmflinknode
743
742
744 size = 0
743 size = 0
745 while tmfnodes:
744 while tmfnodes:
746 dir = min(tmfnodes)
745 dir = min(tmfnodes)
747 nodes = tmfnodes[dir]
746 nodes = tmfnodes[dir]
748 prunednodes = self.prune(dirlog(dir), nodes, commonrevs)
747 prunednodes = self.prune(dirlog(dir), nodes, commonrevs)
749 if not dir or prunednodes:
748 if not dir or prunednodes:
750 for x in self._packmanifests(dir, prunednodes,
749 for x in self._packmanifests(dir, prunednodes,
751 makelookupmflinknode(dir)):
750 makelookupmflinknode(dir)):
752 size += len(x)
751 size += len(x)
753 yield x
752 yield x
754 del tmfnodes[dir]
753 del tmfnodes[dir]
755 self._verbosenote(_('%8.i (manifests)\n') % size)
754 self._verbosenote(_('%8.i (manifests)\n') % size)
756 yield self._manifestsdone()
755 yield self._manifestsdone()
757
756
758 # The 'source' parameter is useful for extensions
757 # The 'source' parameter is useful for extensions
759 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
758 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
760 repo = self._repo
759 repo = self._repo
761 progress = self._progress
760 progress = self._progress
762 msgbundling = _('bundling')
761 msgbundling = _('bundling')
763
762
764 total = len(changedfiles)
763 total = len(changedfiles)
765 # for progress output
764 # for progress output
766 msgfiles = _('files')
765 msgfiles = _('files')
767 for i, fname in enumerate(sorted(changedfiles)):
766 for i, fname in enumerate(sorted(changedfiles)):
768 filerevlog = repo.file(fname)
767 filerevlog = repo.file(fname)
769 if not filerevlog:
768 if not filerevlog:
770 raise error.Abort(_("empty or missing revlog for %s") % fname)
769 raise error.Abort(_("empty or missing revlog for %s") % fname)
771
770
772 linkrevnodes = linknodes(filerevlog, fname)
771 linkrevnodes = linknodes(filerevlog, fname)
773 # Lookup for filenodes, we collected the linkrev nodes above in the
772 # Lookup for filenodes, we collected the linkrev nodes above in the
774 # fastpath case and with lookupmf in the slowpath case.
773 # fastpath case and with lookupmf in the slowpath case.
775 def lookupfilelog(x):
774 def lookupfilelog(x):
776 return linkrevnodes[x]
775 return linkrevnodes[x]
777
776
778 filenodes = self.prune(filerevlog, linkrevnodes, commonrevs)
777 filenodes = self.prune(filerevlog, linkrevnodes, commonrevs)
779 if filenodes:
778 if filenodes:
780 progress(msgbundling, i + 1, item=fname, unit=msgfiles,
779 progress(msgbundling, i + 1, item=fname, unit=msgfiles,
781 total=total)
780 total=total)
782 h = self.fileheader(fname)
781 h = self.fileheader(fname)
783 size = len(h)
782 size = len(h)
784 yield h
783 yield h
785 for chunk in self.group(filenodes, filerevlog, lookupfilelog):
784 for chunk in self.group(filenodes, filerevlog, lookupfilelog):
786 size += len(chunk)
785 size += len(chunk)
787 yield chunk
786 yield chunk
788 self._verbosenote(_('%8.i %s\n') % (size, fname))
787 self._verbosenote(_('%8.i %s\n') % (size, fname))
789 progress(msgbundling, None)
788 progress(msgbundling, None)
790
789
791 def deltaparent(self, revlog, rev, p1, p2, prev):
790 def deltaparent(self, revlog, rev, p1, p2, prev):
792 return prev
791 return prev
793
792
794 def revchunk(self, revlog, rev, prev, linknode):
793 def revchunk(self, revlog, rev, prev, linknode):
795 node = revlog.node(rev)
794 node = revlog.node(rev)
796 p1, p2 = revlog.parentrevs(rev)
795 p1, p2 = revlog.parentrevs(rev)
797 base = self.deltaparent(revlog, rev, p1, p2, prev)
796 base = self.deltaparent(revlog, rev, p1, p2, prev)
798
797
799 prefix = ''
798 prefix = ''
800 if revlog.iscensored(base) or revlog.iscensored(rev):
799 if revlog.iscensored(base) or revlog.iscensored(rev):
801 try:
800 try:
802 delta = revlog.revision(node, raw=True)
801 delta = revlog.revision(node, raw=True)
803 except error.CensoredNodeError as e:
802 except error.CensoredNodeError as e:
804 delta = e.tombstone
803 delta = e.tombstone
805 if base == nullrev:
804 if base == nullrev:
806 prefix = mdiff.trivialdiffheader(len(delta))
805 prefix = mdiff.trivialdiffheader(len(delta))
807 else:
806 else:
808 baselen = revlog.rawsize(base)
807 baselen = revlog.rawsize(base)
809 prefix = mdiff.replacediffheader(baselen, len(delta))
808 prefix = mdiff.replacediffheader(baselen, len(delta))
810 elif base == nullrev:
809 elif base == nullrev:
811 delta = revlog.revision(node, raw=True)
810 delta = revlog.revision(node, raw=True)
812 prefix = mdiff.trivialdiffheader(len(delta))
811 prefix = mdiff.trivialdiffheader(len(delta))
813 else:
812 else:
814 delta = revlog.revdiff(base, rev)
813 delta = revlog.revdiff(base, rev)
815 p1n, p2n = revlog.parents(node)
814 p1n, p2n = revlog.parents(node)
816 basenode = revlog.node(base)
815 basenode = revlog.node(base)
817 flags = revlog.flags(rev)
816 flags = revlog.flags(rev)
818 meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode, flags)
817 meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode, flags)
819 meta += prefix
818 meta += prefix
820 l = len(meta) + len(delta)
819 l = len(meta) + len(delta)
821 yield chunkheader(l)
820 yield chunkheader(l)
822 yield meta
821 yield meta
823 yield delta
822 yield delta
824 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
823 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
825 # do nothing with basenode, it is implicitly the previous one in HG10
824 # do nothing with basenode, it is implicitly the previous one in HG10
826 # do nothing with flags, it is implicitly 0 for cg1 and cg2
825 # do nothing with flags, it is implicitly 0 for cg1 and cg2
827 return struct.pack(self.deltaheader, node, p1n, p2n, linknode)
826 return struct.pack(self.deltaheader, node, p1n, p2n, linknode)
828
827
829 class cg2packer(cg1packer):
828 class cg2packer(cg1packer):
830 version = '02'
829 version = '02'
831 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
830 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
832
831
833 def __init__(self, repo, bundlecaps=None):
832 def __init__(self, repo, bundlecaps=None):
834 super(cg2packer, self).__init__(repo, bundlecaps)
833 super(cg2packer, self).__init__(repo, bundlecaps)
835 if self._reorder is None:
834 if self._reorder is None:
836 # Since generaldelta is directly supported by cg2, reordering
835 # Since generaldelta is directly supported by cg2, reordering
837 # generally doesn't help, so we disable it by default (treating
836 # generally doesn't help, so we disable it by default (treating
838 # bundle.reorder=auto just like bundle.reorder=False).
837 # bundle.reorder=auto just like bundle.reorder=False).
839 self._reorder = False
838 self._reorder = False
840
839
841 def deltaparent(self, revlog, rev, p1, p2, prev):
840 def deltaparent(self, revlog, rev, p1, p2, prev):
842 dp = revlog.deltaparent(rev)
841 dp = revlog.deltaparent(rev)
843 if dp == nullrev and revlog.storedeltachains:
842 if dp == nullrev and revlog.storedeltachains:
844 # Avoid sending full revisions when delta parent is null. Pick prev
843 # Avoid sending full revisions when delta parent is null. Pick prev
845 # in that case. It's tempting to pick p1 in this case, as p1 will
844 # in that case. It's tempting to pick p1 in this case, as p1 will
846 # be smaller in the common case. However, computing a delta against
845 # be smaller in the common case. However, computing a delta against
847 # p1 may require resolving the raw text of p1, which could be
846 # p1 may require resolving the raw text of p1, which could be
848 # expensive. The revlog caches should have prev cached, meaning
847 # expensive. The revlog caches should have prev cached, meaning
849 # less CPU for changegroup generation. There is likely room to add
848 # less CPU for changegroup generation. There is likely room to add
850 # a flag and/or config option to control this behavior.
849 # a flag and/or config option to control this behavior.
851 return prev
850 return prev
852 elif dp == nullrev:
851 elif dp == nullrev:
853 # revlog is configured to use full snapshot for a reason,
852 # revlog is configured to use full snapshot for a reason,
854 # stick to full snapshot.
853 # stick to full snapshot.
855 return nullrev
854 return nullrev
856 elif dp not in (p1, p2, prev):
855 elif dp not in (p1, p2, prev):
857 # Pick prev when we can't be sure remote has the base revision.
856 # Pick prev when we can't be sure remote has the base revision.
858 return prev
857 return prev
859 else:
858 else:
860 return dp
859 return dp
861
860
862 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
861 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
863 # Do nothing with flags, it is implicitly 0 in cg1 and cg2
862 # Do nothing with flags, it is implicitly 0 in cg1 and cg2
864 return struct.pack(self.deltaheader, node, p1n, p2n, basenode, linknode)
863 return struct.pack(self.deltaheader, node, p1n, p2n, basenode, linknode)
865
864
866 class cg3packer(cg2packer):
865 class cg3packer(cg2packer):
867 version = '03'
866 version = '03'
868 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
867 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
869
868
870 def _packmanifests(self, dir, mfnodes, lookuplinknode):
869 def _packmanifests(self, dir, mfnodes, lookuplinknode):
871 if dir:
870 if dir:
872 yield self.fileheader(dir)
871 yield self.fileheader(dir)
873
872
874 dirlog = self._repo.manifestlog._revlog.dirlog(dir)
873 dirlog = self._repo.manifestlog._revlog.dirlog(dir)
875 for chunk in self.group(mfnodes, dirlog, lookuplinknode,
874 for chunk in self.group(mfnodes, dirlog, lookuplinknode,
876 units=_('manifests')):
875 units=_('manifests')):
877 yield chunk
876 yield chunk
878
877
879 def _manifestsdone(self):
878 def _manifestsdone(self):
880 return self.close()
879 return self.close()
881
880
882 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
881 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
883 return struct.pack(
882 return struct.pack(
884 self.deltaheader, node, p1n, p2n, basenode, linknode, flags)
883 self.deltaheader, node, p1n, p2n, basenode, linknode, flags)
885
884
886 _packermap = {'01': (cg1packer, cg1unpacker),
885 _packermap = {'01': (cg1packer, cg1unpacker),
887 # cg2 adds support for exchanging generaldelta
886 # cg2 adds support for exchanging generaldelta
888 '02': (cg2packer, cg2unpacker),
887 '02': (cg2packer, cg2unpacker),
889 # cg3 adds support for exchanging revlog flags and treemanifests
888 # cg3 adds support for exchanging revlog flags and treemanifests
890 '03': (cg3packer, cg3unpacker),
889 '03': (cg3packer, cg3unpacker),
891 }
890 }
892
891
893 def allsupportedversions(repo):
892 def allsupportedversions(repo):
894 versions = set(_packermap.keys())
893 versions = set(_packermap.keys())
895 if not (repo.ui.configbool('experimental', 'changegroup3') or
894 if not (repo.ui.configbool('experimental', 'changegroup3') or
896 repo.ui.configbool('experimental', 'treemanifest') or
895 repo.ui.configbool('experimental', 'treemanifest') or
897 'treemanifest' in repo.requirements):
896 'treemanifest' in repo.requirements):
898 versions.discard('03')
897 versions.discard('03')
899 return versions
898 return versions
900
899
901 # Changegroup versions that can be applied to the repo
900 # Changegroup versions that can be applied to the repo
902 def supportedincomingversions(repo):
901 def supportedincomingversions(repo):
903 return allsupportedversions(repo)
902 return allsupportedversions(repo)
904
903
905 # Changegroup versions that can be created from the repo
904 # Changegroup versions that can be created from the repo
906 def supportedoutgoingversions(repo):
905 def supportedoutgoingversions(repo):
907 versions = allsupportedversions(repo)
906 versions = allsupportedversions(repo)
908 if 'treemanifest' in repo.requirements:
907 if 'treemanifest' in repo.requirements:
909 # Versions 01 and 02 support only flat manifests and it's just too
908 # Versions 01 and 02 support only flat manifests and it's just too
910 # expensive to convert between the flat manifest and tree manifest on
909 # expensive to convert between the flat manifest and tree manifest on
911 # the fly. Since tree manifests are hashed differently, all of history
910 # the fly. Since tree manifests are hashed differently, all of history
912 # would have to be converted. Instead, we simply don't even pretend to
911 # would have to be converted. Instead, we simply don't even pretend to
913 # support versions 01 and 02.
912 # support versions 01 and 02.
914 versions.discard('01')
913 versions.discard('01')
915 versions.discard('02')
914 versions.discard('02')
916 return versions
915 return versions
917
916
918 def localversion(repo):
917 def localversion(repo):
919 # Finds the best version to use for bundles that are meant to be used
918 # Finds the best version to use for bundles that are meant to be used
920 # locally, such as those from strip and shelve, and temporary bundles.
919 # locally, such as those from strip and shelve, and temporary bundles.
921 return max(supportedoutgoingversions(repo))
920 return max(supportedoutgoingversions(repo))
922
921
923 def safeversion(repo):
922 def safeversion(repo):
924 # Finds the smallest version that it's safe to assume clients of the repo
923 # Finds the smallest version that it's safe to assume clients of the repo
925 # will support. For example, all hg versions that support generaldelta also
924 # will support. For example, all hg versions that support generaldelta also
926 # support changegroup 02.
925 # support changegroup 02.
927 versions = supportedoutgoingversions(repo)
926 versions = supportedoutgoingversions(repo)
928 if 'generaldelta' in repo.requirements:
927 if 'generaldelta' in repo.requirements:
929 versions.discard('01')
928 versions.discard('01')
930 assert versions
929 assert versions
931 return min(versions)
930 return min(versions)
932
931
933 def getbundler(version, repo, bundlecaps=None):
932 def getbundler(version, repo, bundlecaps=None):
934 assert version in supportedoutgoingversions(repo)
933 assert version in supportedoutgoingversions(repo)
935 return _packermap[version][0](repo, bundlecaps)
934 return _packermap[version][0](repo, bundlecaps)
936
935
937 def getunbundler(version, fh, alg, extras=None):
936 def getunbundler(version, fh, alg, extras=None):
938 return _packermap[version][1](fh, alg, extras=extras)
937 return _packermap[version][1](fh, alg, extras=extras)
939
938
940 def _changegroupinfo(repo, nodes, source):
939 def _changegroupinfo(repo, nodes, source):
941 if repo.ui.verbose or source == 'bundle':
940 if repo.ui.verbose or source == 'bundle':
942 repo.ui.status(_("%d changesets found\n") % len(nodes))
941 repo.ui.status(_("%d changesets found\n") % len(nodes))
943 if repo.ui.debugflag:
942 if repo.ui.debugflag:
944 repo.ui.debug("list of changesets:\n")
943 repo.ui.debug("list of changesets:\n")
945 for node in nodes:
944 for node in nodes:
946 repo.ui.debug("%s\n" % hex(node))
945 repo.ui.debug("%s\n" % hex(node))
947
946
948 def makechangegroup(repo, outgoing, version, source, fastpath=False,
947 def makechangegroup(repo, outgoing, version, source, fastpath=False,
949 bundlecaps=None):
948 bundlecaps=None):
950 cgstream = makestream(repo, outgoing, version, source,
949 cgstream = makestream(repo, outgoing, version, source,
951 fastpath=fastpath, bundlecaps=bundlecaps)
950 fastpath=fastpath, bundlecaps=bundlecaps)
952 return getunbundler(version, util.chunkbuffer(cgstream), None,
951 return getunbundler(version, util.chunkbuffer(cgstream), None,
953 {'clcount': len(outgoing.missing) })
952 {'clcount': len(outgoing.missing) })
954
953
955 def makestream(repo, outgoing, version, source, fastpath=False,
954 def makestream(repo, outgoing, version, source, fastpath=False,
956 bundlecaps=None):
955 bundlecaps=None):
957 bundler = getbundler(version, repo, bundlecaps=bundlecaps)
956 bundler = getbundler(version, repo, bundlecaps=bundlecaps)
958
957
959 repo = repo.unfiltered()
958 repo = repo.unfiltered()
960 commonrevs = outgoing.common
959 commonrevs = outgoing.common
961 csets = outgoing.missing
960 csets = outgoing.missing
962 heads = outgoing.missingheads
961 heads = outgoing.missingheads
963 # We go through the fast path if we get told to, or if all (unfiltered
962 # We go through the fast path if we get told to, or if all (unfiltered
964 # heads have been requested (since we then know there all linkrevs will
963 # heads have been requested (since we then know there all linkrevs will
965 # be pulled by the client).
964 # be pulled by the client).
966 heads.sort()
965 heads.sort()
967 fastpathlinkrev = fastpath or (
966 fastpathlinkrev = fastpath or (
968 repo.filtername is None and heads == sorted(repo.heads()))
967 repo.filtername is None and heads == sorted(repo.heads()))
969
968
970 repo.hook('preoutgoing', throw=True, source=source)
969 repo.hook('preoutgoing', throw=True, source=source)
971 _changegroupinfo(repo, csets, source)
970 _changegroupinfo(repo, csets, source)
972 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
971 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
973
972
974 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
973 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
975 revisions = 0
974 revisions = 0
976 files = 0
975 files = 0
977 for chunkdata in iter(source.filelogheader, {}):
976 for chunkdata in iter(source.filelogheader, {}):
978 files += 1
977 files += 1
979 f = chunkdata["filename"]
978 f = chunkdata["filename"]
980 repo.ui.debug("adding %s revisions\n" % f)
979 repo.ui.debug("adding %s revisions\n" % f)
981 repo.ui.progress(_('files'), files, unit=_('files'),
980 repo.ui.progress(_('files'), files, unit=_('files'),
982 total=expectedfiles)
981 total=expectedfiles)
983 fl = repo.file(f)
982 fl = repo.file(f)
984 o = len(fl)
983 o = len(fl)
985 try:
984 try:
986 deltas = source.deltaiter(revmap)
985 deltas = source.deltaiter()
987 if not fl.addgroup(deltas, trp):
986 if not fl.addgroup(deltas, revmap, trp):
988 raise error.Abort(_("received file revlog group is empty"))
987 raise error.Abort(_("received file revlog group is empty"))
989 except error.CensoredBaseError as e:
988 except error.CensoredBaseError as e:
990 raise error.Abort(_("received delta base is censored: %s") % e)
989 raise error.Abort(_("received delta base is censored: %s") % e)
991 revisions += len(fl) - o
990 revisions += len(fl) - o
992 if f in needfiles:
991 if f in needfiles:
993 needs = needfiles[f]
992 needs = needfiles[f]
994 for new in xrange(o, len(fl)):
993 for new in xrange(o, len(fl)):
995 n = fl.node(new)
994 n = fl.node(new)
996 if n in needs:
995 if n in needs:
997 needs.remove(n)
996 needs.remove(n)
998 else:
997 else:
999 raise error.Abort(
998 raise error.Abort(
1000 _("received spurious file revlog entry"))
999 _("received spurious file revlog entry"))
1001 if not needs:
1000 if not needs:
1002 del needfiles[f]
1001 del needfiles[f]
1003 repo.ui.progress(_('files'), None)
1002 repo.ui.progress(_('files'), None)
1004
1003
1005 for f, needs in needfiles.iteritems():
1004 for f, needs in needfiles.iteritems():
1006 fl = repo.file(f)
1005 fl = repo.file(f)
1007 for n in needs:
1006 for n in needs:
1008 try:
1007 try:
1009 fl.rev(n)
1008 fl.rev(n)
1010 except error.LookupError:
1009 except error.LookupError:
1011 raise error.Abort(
1010 raise error.Abort(
1012 _('missing file data for %s:%s - run hg verify') %
1011 _('missing file data for %s:%s - run hg verify') %
1013 (f, hex(n)))
1012 (f, hex(n)))
1014
1013
1015 return revisions, files
1014 return revisions, files
@@ -1,2252 +1,2253 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullid,
28 nullid,
29 nullrev,
29 nullrev,
30 wdirhex,
30 wdirhex,
31 wdirid,
31 wdirid,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from . import (
35 from . import (
36 ancestor,
36 ancestor,
37 error,
37 error,
38 mdiff,
38 mdiff,
39 policy,
39 policy,
40 pycompat,
40 pycompat,
41 templatefilters,
41 templatefilters,
42 util,
42 util,
43 )
43 )
44
44
45 parsers = policy.importmod(r'parsers')
45 parsers = policy.importmod(r'parsers')
46
46
47 # Aliased for performance.
47 # Aliased for performance.
48 _zlibdecompress = zlib.decompress
48 _zlibdecompress = zlib.decompress
49
49
50 # revlog header flags
50 # revlog header flags
51 REVLOGV0 = 0
51 REVLOGV0 = 0
52 REVLOGV1 = 1
52 REVLOGV1 = 1
53 # Dummy value until file format is finalized.
53 # Dummy value until file format is finalized.
54 # Reminder: change the bounds check in revlog.__init__ when this is changed.
54 # Reminder: change the bounds check in revlog.__init__ when this is changed.
55 REVLOGV2 = 0xDEAD
55 REVLOGV2 = 0xDEAD
56 FLAG_INLINE_DATA = (1 << 16)
56 FLAG_INLINE_DATA = (1 << 16)
57 FLAG_GENERALDELTA = (1 << 17)
57 FLAG_GENERALDELTA = (1 << 17)
58 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
58 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
59 REVLOG_DEFAULT_FORMAT = REVLOGV1
59 REVLOG_DEFAULT_FORMAT = REVLOGV1
60 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
60 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
61 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
61 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
62 REVLOGV2_FLAGS = REVLOGV1_FLAGS
62 REVLOGV2_FLAGS = REVLOGV1_FLAGS
63
63
64 # revlog index flags
64 # revlog index flags
65 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
65 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
66 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
66 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
67 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
67 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
68 REVIDX_DEFAULT_FLAGS = 0
68 REVIDX_DEFAULT_FLAGS = 0
69 # stable order in which flags need to be processed and their processors applied
69 # stable order in which flags need to be processed and their processors applied
70 REVIDX_FLAGS_ORDER = [
70 REVIDX_FLAGS_ORDER = [
71 REVIDX_ISCENSORED,
71 REVIDX_ISCENSORED,
72 REVIDX_ELLIPSIS,
72 REVIDX_ELLIPSIS,
73 REVIDX_EXTSTORED,
73 REVIDX_EXTSTORED,
74 ]
74 ]
75 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
75 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
76
76
77 # max size of revlog with inline data
77 # max size of revlog with inline data
78 _maxinline = 131072
78 _maxinline = 131072
79 _chunksize = 1048576
79 _chunksize = 1048576
80
80
81 RevlogError = error.RevlogError
81 RevlogError = error.RevlogError
82 LookupError = error.LookupError
82 LookupError = error.LookupError
83 CensoredNodeError = error.CensoredNodeError
83 CensoredNodeError = error.CensoredNodeError
84 ProgrammingError = error.ProgrammingError
84 ProgrammingError = error.ProgrammingError
85
85
86 # Store flag processors (cf. 'addflagprocessor()' to register)
86 # Store flag processors (cf. 'addflagprocessor()' to register)
87 _flagprocessors = {
87 _flagprocessors = {
88 REVIDX_ISCENSORED: None,
88 REVIDX_ISCENSORED: None,
89 }
89 }
90
90
91 def addflagprocessor(flag, processor):
91 def addflagprocessor(flag, processor):
92 """Register a flag processor on a revision data flag.
92 """Register a flag processor on a revision data flag.
93
93
94 Invariant:
94 Invariant:
95 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
95 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
96 - Only one flag processor can be registered on a specific flag.
96 - Only one flag processor can be registered on a specific flag.
97 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
97 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
98 following signatures:
98 following signatures:
99 - (read) f(self, rawtext) -> text, bool
99 - (read) f(self, rawtext) -> text, bool
100 - (write) f(self, text) -> rawtext, bool
100 - (write) f(self, text) -> rawtext, bool
101 - (raw) f(self, rawtext) -> bool
101 - (raw) f(self, rawtext) -> bool
102 "text" is presented to the user. "rawtext" is stored in revlog data, not
102 "text" is presented to the user. "rawtext" is stored in revlog data, not
103 directly visible to the user.
103 directly visible to the user.
104 The boolean returned by these transforms is used to determine whether
104 The boolean returned by these transforms is used to determine whether
105 the returned text can be used for hash integrity checking. For example,
105 the returned text can be used for hash integrity checking. For example,
106 if "write" returns False, then "text" is used to generate hash. If
106 if "write" returns False, then "text" is used to generate hash. If
107 "write" returns True, that basically means "rawtext" returned by "write"
107 "write" returns True, that basically means "rawtext" returned by "write"
108 should be used to generate hash. Usually, "write" and "read" return
108 should be used to generate hash. Usually, "write" and "read" return
109 different booleans. And "raw" returns a same boolean as "write".
109 different booleans. And "raw" returns a same boolean as "write".
110
110
111 Note: The 'raw' transform is used for changegroup generation and in some
111 Note: The 'raw' transform is used for changegroup generation and in some
112 debug commands. In this case the transform only indicates whether the
112 debug commands. In this case the transform only indicates whether the
113 contents can be used for hash integrity checks.
113 contents can be used for hash integrity checks.
114 """
114 """
115 if not flag & REVIDX_KNOWN_FLAGS:
115 if not flag & REVIDX_KNOWN_FLAGS:
116 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
116 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
117 raise ProgrammingError(msg)
117 raise ProgrammingError(msg)
118 if flag not in REVIDX_FLAGS_ORDER:
118 if flag not in REVIDX_FLAGS_ORDER:
119 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
119 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
120 raise ProgrammingError(msg)
120 raise ProgrammingError(msg)
121 if flag in _flagprocessors:
121 if flag in _flagprocessors:
122 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
122 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
123 raise error.Abort(msg)
123 raise error.Abort(msg)
124 _flagprocessors[flag] = processor
124 _flagprocessors[flag] = processor
125
125
126 def getoffset(q):
126 def getoffset(q):
127 return int(q >> 16)
127 return int(q >> 16)
128
128
129 def gettype(q):
129 def gettype(q):
130 return int(q & 0xFFFF)
130 return int(q & 0xFFFF)
131
131
132 def offset_type(offset, type):
132 def offset_type(offset, type):
133 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
133 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
134 raise ValueError('unknown revlog index flags')
134 raise ValueError('unknown revlog index flags')
135 return int(int(offset) << 16 | type)
135 return int(int(offset) << 16 | type)
136
136
137 _nullhash = hashlib.sha1(nullid)
137 _nullhash = hashlib.sha1(nullid)
138
138
139 def hash(text, p1, p2):
139 def hash(text, p1, p2):
140 """generate a hash from the given text and its parent hashes
140 """generate a hash from the given text and its parent hashes
141
141
142 This hash combines both the current file contents and its history
142 This hash combines both the current file contents and its history
143 in a manner that makes it easy to distinguish nodes with the same
143 in a manner that makes it easy to distinguish nodes with the same
144 content in the revision graph.
144 content in the revision graph.
145 """
145 """
146 # As of now, if one of the parent node is null, p2 is null
146 # As of now, if one of the parent node is null, p2 is null
147 if p2 == nullid:
147 if p2 == nullid:
148 # deep copy of a hash is faster than creating one
148 # deep copy of a hash is faster than creating one
149 s = _nullhash.copy()
149 s = _nullhash.copy()
150 s.update(p1)
150 s.update(p1)
151 else:
151 else:
152 # none of the parent nodes are nullid
152 # none of the parent nodes are nullid
153 if p1 < p2:
153 if p1 < p2:
154 a = p1
154 a = p1
155 b = p2
155 b = p2
156 else:
156 else:
157 a = p2
157 a = p2
158 b = p1
158 b = p1
159 s = hashlib.sha1(a)
159 s = hashlib.sha1(a)
160 s.update(b)
160 s.update(b)
161 s.update(text)
161 s.update(text)
162 return s.digest()
162 return s.digest()
163
163
164 # index v0:
164 # index v0:
165 # 4 bytes: offset
165 # 4 bytes: offset
166 # 4 bytes: compressed length
166 # 4 bytes: compressed length
167 # 4 bytes: base rev
167 # 4 bytes: base rev
168 # 4 bytes: link rev
168 # 4 bytes: link rev
169 # 20 bytes: parent 1 nodeid
169 # 20 bytes: parent 1 nodeid
170 # 20 bytes: parent 2 nodeid
170 # 20 bytes: parent 2 nodeid
171 # 20 bytes: nodeid
171 # 20 bytes: nodeid
172 indexformatv0 = struct.Struct(">4l20s20s20s")
172 indexformatv0 = struct.Struct(">4l20s20s20s")
173 indexformatv0_pack = indexformatv0.pack
173 indexformatv0_pack = indexformatv0.pack
174 indexformatv0_unpack = indexformatv0.unpack
174 indexformatv0_unpack = indexformatv0.unpack
175
175
176 class revlogoldio(object):
176 class revlogoldio(object):
177 def __init__(self):
177 def __init__(self):
178 self.size = indexformatv0.size
178 self.size = indexformatv0.size
179
179
180 def parseindex(self, data, inline):
180 def parseindex(self, data, inline):
181 s = self.size
181 s = self.size
182 index = []
182 index = []
183 nodemap = {nullid: nullrev}
183 nodemap = {nullid: nullrev}
184 n = off = 0
184 n = off = 0
185 l = len(data)
185 l = len(data)
186 while off + s <= l:
186 while off + s <= l:
187 cur = data[off:off + s]
187 cur = data[off:off + s]
188 off += s
188 off += s
189 e = indexformatv0_unpack(cur)
189 e = indexformatv0_unpack(cur)
190 # transform to revlogv1 format
190 # transform to revlogv1 format
191 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
191 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
192 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
192 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
193 index.append(e2)
193 index.append(e2)
194 nodemap[e[6]] = n
194 nodemap[e[6]] = n
195 n += 1
195 n += 1
196
196
197 # add the magic null revision at -1
197 # add the magic null revision at -1
198 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
198 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
199
199
200 return index, nodemap, None
200 return index, nodemap, None
201
201
202 def packentry(self, entry, node, version, rev):
202 def packentry(self, entry, node, version, rev):
203 if gettype(entry[0]):
203 if gettype(entry[0]):
204 raise RevlogError(_('index entry flags need revlog version 1'))
204 raise RevlogError(_('index entry flags need revlog version 1'))
205 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
205 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
206 node(entry[5]), node(entry[6]), entry[7])
206 node(entry[5]), node(entry[6]), entry[7])
207 return indexformatv0_pack(*e2)
207 return indexformatv0_pack(*e2)
208
208
209 # index ng:
209 # index ng:
210 # 6 bytes: offset
210 # 6 bytes: offset
211 # 2 bytes: flags
211 # 2 bytes: flags
212 # 4 bytes: compressed length
212 # 4 bytes: compressed length
213 # 4 bytes: uncompressed length
213 # 4 bytes: uncompressed length
214 # 4 bytes: base rev
214 # 4 bytes: base rev
215 # 4 bytes: link rev
215 # 4 bytes: link rev
216 # 4 bytes: parent 1 rev
216 # 4 bytes: parent 1 rev
217 # 4 bytes: parent 2 rev
217 # 4 bytes: parent 2 rev
218 # 32 bytes: nodeid
218 # 32 bytes: nodeid
219 indexformatng = struct.Struct(">Qiiiiii20s12x")
219 indexformatng = struct.Struct(">Qiiiiii20s12x")
220 indexformatng_pack = indexformatng.pack
220 indexformatng_pack = indexformatng.pack
221 versionformat = struct.Struct(">I")
221 versionformat = struct.Struct(">I")
222 versionformat_pack = versionformat.pack
222 versionformat_pack = versionformat.pack
223 versionformat_unpack = versionformat.unpack
223 versionformat_unpack = versionformat.unpack
224
224
225 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
225 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
226 # signed integer)
226 # signed integer)
227 _maxentrysize = 0x7fffffff
227 _maxentrysize = 0x7fffffff
228
228
229 class revlogio(object):
229 class revlogio(object):
230 def __init__(self):
230 def __init__(self):
231 self.size = indexformatng.size
231 self.size = indexformatng.size
232
232
233 def parseindex(self, data, inline):
233 def parseindex(self, data, inline):
234 # call the C implementation to parse the index data
234 # call the C implementation to parse the index data
235 index, cache = parsers.parse_index2(data, inline)
235 index, cache = parsers.parse_index2(data, inline)
236 return index, getattr(index, 'nodemap', None), cache
236 return index, getattr(index, 'nodemap', None), cache
237
237
238 def packentry(self, entry, node, version, rev):
238 def packentry(self, entry, node, version, rev):
239 p = indexformatng_pack(*entry)
239 p = indexformatng_pack(*entry)
240 if rev == 0:
240 if rev == 0:
241 p = versionformat_pack(version) + p[4:]
241 p = versionformat_pack(version) + p[4:]
242 return p
242 return p
243
243
244 class revlog(object):
244 class revlog(object):
245 """
245 """
246 the underlying revision storage object
246 the underlying revision storage object
247
247
248 A revlog consists of two parts, an index and the revision data.
248 A revlog consists of two parts, an index and the revision data.
249
249
250 The index is a file with a fixed record size containing
250 The index is a file with a fixed record size containing
251 information on each revision, including its nodeid (hash), the
251 information on each revision, including its nodeid (hash), the
252 nodeids of its parents, the position and offset of its data within
252 nodeids of its parents, the position and offset of its data within
253 the data file, and the revision it's based on. Finally, each entry
253 the data file, and the revision it's based on. Finally, each entry
254 contains a linkrev entry that can serve as a pointer to external
254 contains a linkrev entry that can serve as a pointer to external
255 data.
255 data.
256
256
257 The revision data itself is a linear collection of data chunks.
257 The revision data itself is a linear collection of data chunks.
258 Each chunk represents a revision and is usually represented as a
258 Each chunk represents a revision and is usually represented as a
259 delta against the previous chunk. To bound lookup time, runs of
259 delta against the previous chunk. To bound lookup time, runs of
260 deltas are limited to about 2 times the length of the original
260 deltas are limited to about 2 times the length of the original
261 version data. This makes retrieval of a version proportional to
261 version data. This makes retrieval of a version proportional to
262 its size, or O(1) relative to the number of revisions.
262 its size, or O(1) relative to the number of revisions.
263
263
264 Both pieces of the revlog are written to in an append-only
264 Both pieces of the revlog are written to in an append-only
265 fashion, which means we never need to rewrite a file to insert or
265 fashion, which means we never need to rewrite a file to insert or
266 remove data, and can use some simple techniques to avoid the need
266 remove data, and can use some simple techniques to avoid the need
267 for locking while reading.
267 for locking while reading.
268
268
269 If checkambig, indexfile is opened with checkambig=True at
269 If checkambig, indexfile is opened with checkambig=True at
270 writing, to avoid file stat ambiguity.
270 writing, to avoid file stat ambiguity.
271 """
271 """
272 def __init__(self, opener, indexfile, datafile=None, checkambig=False):
272 def __init__(self, opener, indexfile, datafile=None, checkambig=False):
273 """
273 """
274 create a revlog object
274 create a revlog object
275
275
276 opener is a function that abstracts the file opening operation
276 opener is a function that abstracts the file opening operation
277 and can be used to implement COW semantics or the like.
277 and can be used to implement COW semantics or the like.
278 """
278 """
279 self.indexfile = indexfile
279 self.indexfile = indexfile
280 self.datafile = datafile or (indexfile[:-2] + ".d")
280 self.datafile = datafile or (indexfile[:-2] + ".d")
281 self.opener = opener
281 self.opener = opener
282 # When True, indexfile is opened with checkambig=True at writing, to
282 # When True, indexfile is opened with checkambig=True at writing, to
283 # avoid file stat ambiguity.
283 # avoid file stat ambiguity.
284 self._checkambig = checkambig
284 self._checkambig = checkambig
285 # 3-tuple of (node, rev, text) for a raw revision.
285 # 3-tuple of (node, rev, text) for a raw revision.
286 self._cache = None
286 self._cache = None
287 # Maps rev to chain base rev.
287 # Maps rev to chain base rev.
288 self._chainbasecache = util.lrucachedict(100)
288 self._chainbasecache = util.lrucachedict(100)
289 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
289 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
290 self._chunkcache = (0, '')
290 self._chunkcache = (0, '')
291 # How much data to read and cache into the raw revlog data cache.
291 # How much data to read and cache into the raw revlog data cache.
292 self._chunkcachesize = 65536
292 self._chunkcachesize = 65536
293 self._maxchainlen = None
293 self._maxchainlen = None
294 self._aggressivemergedeltas = False
294 self._aggressivemergedeltas = False
295 self.index = []
295 self.index = []
296 # Mapping of partial identifiers to full nodes.
296 # Mapping of partial identifiers to full nodes.
297 self._pcache = {}
297 self._pcache = {}
298 # Mapping of revision integer to full node.
298 # Mapping of revision integer to full node.
299 self._nodecache = {nullid: nullrev}
299 self._nodecache = {nullid: nullrev}
300 self._nodepos = None
300 self._nodepos = None
301 self._compengine = 'zlib'
301 self._compengine = 'zlib'
302 self._maxdeltachainspan = -1
302 self._maxdeltachainspan = -1
303
303
304 v = REVLOG_DEFAULT_VERSION
304 v = REVLOG_DEFAULT_VERSION
305 opts = getattr(opener, 'options', None)
305 opts = getattr(opener, 'options', None)
306 if opts is not None:
306 if opts is not None:
307 if 'revlogv2' in opts:
307 if 'revlogv2' in opts:
308 # version 2 revlogs always use generaldelta.
308 # version 2 revlogs always use generaldelta.
309 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
309 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
310 elif 'revlogv1' in opts:
310 elif 'revlogv1' in opts:
311 if 'generaldelta' in opts:
311 if 'generaldelta' in opts:
312 v |= FLAG_GENERALDELTA
312 v |= FLAG_GENERALDELTA
313 else:
313 else:
314 v = 0
314 v = 0
315 if 'chunkcachesize' in opts:
315 if 'chunkcachesize' in opts:
316 self._chunkcachesize = opts['chunkcachesize']
316 self._chunkcachesize = opts['chunkcachesize']
317 if 'maxchainlen' in opts:
317 if 'maxchainlen' in opts:
318 self._maxchainlen = opts['maxchainlen']
318 self._maxchainlen = opts['maxchainlen']
319 if 'aggressivemergedeltas' in opts:
319 if 'aggressivemergedeltas' in opts:
320 self._aggressivemergedeltas = opts['aggressivemergedeltas']
320 self._aggressivemergedeltas = opts['aggressivemergedeltas']
321 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
321 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
322 if 'compengine' in opts:
322 if 'compengine' in opts:
323 self._compengine = opts['compengine']
323 self._compengine = opts['compengine']
324 if 'maxdeltachainspan' in opts:
324 if 'maxdeltachainspan' in opts:
325 self._maxdeltachainspan = opts['maxdeltachainspan']
325 self._maxdeltachainspan = opts['maxdeltachainspan']
326
326
327 if self._chunkcachesize <= 0:
327 if self._chunkcachesize <= 0:
328 raise RevlogError(_('revlog chunk cache size %r is not greater '
328 raise RevlogError(_('revlog chunk cache size %r is not greater '
329 'than 0') % self._chunkcachesize)
329 'than 0') % self._chunkcachesize)
330 elif self._chunkcachesize & (self._chunkcachesize - 1):
330 elif self._chunkcachesize & (self._chunkcachesize - 1):
331 raise RevlogError(_('revlog chunk cache size %r is not a power '
331 raise RevlogError(_('revlog chunk cache size %r is not a power '
332 'of 2') % self._chunkcachesize)
332 'of 2') % self._chunkcachesize)
333
333
334 indexdata = ''
334 indexdata = ''
335 self._initempty = True
335 self._initempty = True
336 try:
336 try:
337 f = self.opener(self.indexfile)
337 f = self.opener(self.indexfile)
338 indexdata = f.read()
338 indexdata = f.read()
339 f.close()
339 f.close()
340 if len(indexdata) > 0:
340 if len(indexdata) > 0:
341 v = versionformat_unpack(indexdata[:4])[0]
341 v = versionformat_unpack(indexdata[:4])[0]
342 self._initempty = False
342 self._initempty = False
343 except IOError as inst:
343 except IOError as inst:
344 if inst.errno != errno.ENOENT:
344 if inst.errno != errno.ENOENT:
345 raise
345 raise
346
346
347 self.version = v
347 self.version = v
348 self._inline = v & FLAG_INLINE_DATA
348 self._inline = v & FLAG_INLINE_DATA
349 self._generaldelta = v & FLAG_GENERALDELTA
349 self._generaldelta = v & FLAG_GENERALDELTA
350 flags = v & ~0xFFFF
350 flags = v & ~0xFFFF
351 fmt = v & 0xFFFF
351 fmt = v & 0xFFFF
352 if fmt == REVLOGV0:
352 if fmt == REVLOGV0:
353 if flags:
353 if flags:
354 raise RevlogError(_('unknown flags (%#04x) in version %d '
354 raise RevlogError(_('unknown flags (%#04x) in version %d '
355 'revlog %s') %
355 'revlog %s') %
356 (flags >> 16, fmt, self.indexfile))
356 (flags >> 16, fmt, self.indexfile))
357 elif fmt == REVLOGV1:
357 elif fmt == REVLOGV1:
358 if flags & ~REVLOGV1_FLAGS:
358 if flags & ~REVLOGV1_FLAGS:
359 raise RevlogError(_('unknown flags (%#04x) in version %d '
359 raise RevlogError(_('unknown flags (%#04x) in version %d '
360 'revlog %s') %
360 'revlog %s') %
361 (flags >> 16, fmt, self.indexfile))
361 (flags >> 16, fmt, self.indexfile))
362 elif fmt == REVLOGV2:
362 elif fmt == REVLOGV2:
363 if flags & ~REVLOGV2_FLAGS:
363 if flags & ~REVLOGV2_FLAGS:
364 raise RevlogError(_('unknown flags (%#04x) in version %d '
364 raise RevlogError(_('unknown flags (%#04x) in version %d '
365 'revlog %s') %
365 'revlog %s') %
366 (flags >> 16, fmt, self.indexfile))
366 (flags >> 16, fmt, self.indexfile))
367 else:
367 else:
368 raise RevlogError(_('unknown version (%d) in revlog %s') %
368 raise RevlogError(_('unknown version (%d) in revlog %s') %
369 (fmt, self.indexfile))
369 (fmt, self.indexfile))
370
370
371 self.storedeltachains = True
371 self.storedeltachains = True
372
372
373 self._io = revlogio()
373 self._io = revlogio()
374 if self.version == REVLOGV0:
374 if self.version == REVLOGV0:
375 self._io = revlogoldio()
375 self._io = revlogoldio()
376 try:
376 try:
377 d = self._io.parseindex(indexdata, self._inline)
377 d = self._io.parseindex(indexdata, self._inline)
378 except (ValueError, IndexError):
378 except (ValueError, IndexError):
379 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
379 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
380 self.index, nodemap, self._chunkcache = d
380 self.index, nodemap, self._chunkcache = d
381 if nodemap is not None:
381 if nodemap is not None:
382 self.nodemap = self._nodecache = nodemap
382 self.nodemap = self._nodecache = nodemap
383 if not self._chunkcache:
383 if not self._chunkcache:
384 self._chunkclear()
384 self._chunkclear()
385 # revnum -> (chain-length, sum-delta-length)
385 # revnum -> (chain-length, sum-delta-length)
386 self._chaininfocache = {}
386 self._chaininfocache = {}
387 # revlog header -> revlog compressor
387 # revlog header -> revlog compressor
388 self._decompressors = {}
388 self._decompressors = {}
389
389
390 @util.propertycache
390 @util.propertycache
391 def _compressor(self):
391 def _compressor(self):
392 return util.compengines[self._compengine].revlogcompressor()
392 return util.compengines[self._compengine].revlogcompressor()
393
393
394 def tip(self):
394 def tip(self):
395 return self.node(len(self.index) - 2)
395 return self.node(len(self.index) - 2)
396 def __contains__(self, rev):
396 def __contains__(self, rev):
397 return 0 <= rev < len(self)
397 return 0 <= rev < len(self)
398 def __len__(self):
398 def __len__(self):
399 return len(self.index) - 1
399 return len(self.index) - 1
400 def __iter__(self):
400 def __iter__(self):
401 return iter(xrange(len(self)))
401 return iter(xrange(len(self)))
402 def revs(self, start=0, stop=None):
402 def revs(self, start=0, stop=None):
403 """iterate over all rev in this revlog (from start to stop)"""
403 """iterate over all rev in this revlog (from start to stop)"""
404 step = 1
404 step = 1
405 if stop is not None:
405 if stop is not None:
406 if start > stop:
406 if start > stop:
407 step = -1
407 step = -1
408 stop += step
408 stop += step
409 else:
409 else:
410 stop = len(self)
410 stop = len(self)
411 return xrange(start, stop, step)
411 return xrange(start, stop, step)
412
412
413 @util.propertycache
413 @util.propertycache
414 def nodemap(self):
414 def nodemap(self):
415 self.rev(self.node(0))
415 self.rev(self.node(0))
416 return self._nodecache
416 return self._nodecache
417
417
418 def hasnode(self, node):
418 def hasnode(self, node):
419 try:
419 try:
420 self.rev(node)
420 self.rev(node)
421 return True
421 return True
422 except KeyError:
422 except KeyError:
423 return False
423 return False
424
424
425 def clearcaches(self):
425 def clearcaches(self):
426 self._cache = None
426 self._cache = None
427 self._chainbasecache.clear()
427 self._chainbasecache.clear()
428 self._chunkcache = (0, '')
428 self._chunkcache = (0, '')
429 self._pcache = {}
429 self._pcache = {}
430
430
431 try:
431 try:
432 self._nodecache.clearcaches()
432 self._nodecache.clearcaches()
433 except AttributeError:
433 except AttributeError:
434 self._nodecache = {nullid: nullrev}
434 self._nodecache = {nullid: nullrev}
435 self._nodepos = None
435 self._nodepos = None
436
436
437 def rev(self, node):
437 def rev(self, node):
438 try:
438 try:
439 return self._nodecache[node]
439 return self._nodecache[node]
440 except TypeError:
440 except TypeError:
441 raise
441 raise
442 except RevlogError:
442 except RevlogError:
443 # parsers.c radix tree lookup failed
443 # parsers.c radix tree lookup failed
444 if node == wdirid:
444 if node == wdirid:
445 raise error.WdirUnsupported
445 raise error.WdirUnsupported
446 raise LookupError(node, self.indexfile, _('no node'))
446 raise LookupError(node, self.indexfile, _('no node'))
447 except KeyError:
447 except KeyError:
448 # pure python cache lookup failed
448 # pure python cache lookup failed
449 n = self._nodecache
449 n = self._nodecache
450 i = self.index
450 i = self.index
451 p = self._nodepos
451 p = self._nodepos
452 if p is None:
452 if p is None:
453 p = len(i) - 2
453 p = len(i) - 2
454 for r in xrange(p, -1, -1):
454 for r in xrange(p, -1, -1):
455 v = i[r][7]
455 v = i[r][7]
456 n[v] = r
456 n[v] = r
457 if v == node:
457 if v == node:
458 self._nodepos = r - 1
458 self._nodepos = r - 1
459 return r
459 return r
460 if node == wdirid:
460 if node == wdirid:
461 raise error.WdirUnsupported
461 raise error.WdirUnsupported
462 raise LookupError(node, self.indexfile, _('no node'))
462 raise LookupError(node, self.indexfile, _('no node'))
463
463
464 # Accessors for index entries.
464 # Accessors for index entries.
465
465
466 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
466 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
467 # are flags.
467 # are flags.
468 def start(self, rev):
468 def start(self, rev):
469 return int(self.index[rev][0] >> 16)
469 return int(self.index[rev][0] >> 16)
470
470
471 def flags(self, rev):
471 def flags(self, rev):
472 return self.index[rev][0] & 0xFFFF
472 return self.index[rev][0] & 0xFFFF
473
473
474 def length(self, rev):
474 def length(self, rev):
475 return self.index[rev][1]
475 return self.index[rev][1]
476
476
477 def rawsize(self, rev):
477 def rawsize(self, rev):
478 """return the length of the uncompressed text for a given revision"""
478 """return the length of the uncompressed text for a given revision"""
479 l = self.index[rev][2]
479 l = self.index[rev][2]
480 if l >= 0:
480 if l >= 0:
481 return l
481 return l
482
482
483 t = self.revision(rev, raw=True)
483 t = self.revision(rev, raw=True)
484 return len(t)
484 return len(t)
485
485
486 def size(self, rev):
486 def size(self, rev):
487 """length of non-raw text (processed by a "read" flag processor)"""
487 """length of non-raw text (processed by a "read" flag processor)"""
488 # fast path: if no "read" flag processor could change the content,
488 # fast path: if no "read" flag processor could change the content,
489 # size is rawsize. note: ELLIPSIS is known to not change the content.
489 # size is rawsize. note: ELLIPSIS is known to not change the content.
490 flags = self.flags(rev)
490 flags = self.flags(rev)
491 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
491 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
492 return self.rawsize(rev)
492 return self.rawsize(rev)
493
493
494 return len(self.revision(rev, raw=False))
494 return len(self.revision(rev, raw=False))
495
495
496 def chainbase(self, rev):
496 def chainbase(self, rev):
497 base = self._chainbasecache.get(rev)
497 base = self._chainbasecache.get(rev)
498 if base is not None:
498 if base is not None:
499 return base
499 return base
500
500
501 index = self.index
501 index = self.index
502 base = index[rev][3]
502 base = index[rev][3]
503 while base != rev:
503 while base != rev:
504 rev = base
504 rev = base
505 base = index[rev][3]
505 base = index[rev][3]
506
506
507 self._chainbasecache[rev] = base
507 self._chainbasecache[rev] = base
508 return base
508 return base
509
509
510 def linkrev(self, rev):
510 def linkrev(self, rev):
511 return self.index[rev][4]
511 return self.index[rev][4]
512
512
513 def parentrevs(self, rev):
513 def parentrevs(self, rev):
514 try:
514 try:
515 return self.index[rev][5:7]
515 return self.index[rev][5:7]
516 except IndexError:
516 except IndexError:
517 if rev == wdirrev:
517 if rev == wdirrev:
518 raise error.WdirUnsupported
518 raise error.WdirUnsupported
519 raise
519 raise
520
520
521 def node(self, rev):
521 def node(self, rev):
522 try:
522 try:
523 return self.index[rev][7]
523 return self.index[rev][7]
524 except IndexError:
524 except IndexError:
525 if rev == wdirrev:
525 if rev == wdirrev:
526 raise error.WdirUnsupported
526 raise error.WdirUnsupported
527 raise
527 raise
528
528
529 # Derived from index values.
529 # Derived from index values.
530
530
531 def end(self, rev):
531 def end(self, rev):
532 return self.start(rev) + self.length(rev)
532 return self.start(rev) + self.length(rev)
533
533
534 def parents(self, node):
534 def parents(self, node):
535 i = self.index
535 i = self.index
536 d = i[self.rev(node)]
536 d = i[self.rev(node)]
537 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
537 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
538
538
539 def chainlen(self, rev):
539 def chainlen(self, rev):
540 return self._chaininfo(rev)[0]
540 return self._chaininfo(rev)[0]
541
541
542 def _chaininfo(self, rev):
542 def _chaininfo(self, rev):
543 chaininfocache = self._chaininfocache
543 chaininfocache = self._chaininfocache
544 if rev in chaininfocache:
544 if rev in chaininfocache:
545 return chaininfocache[rev]
545 return chaininfocache[rev]
546 index = self.index
546 index = self.index
547 generaldelta = self._generaldelta
547 generaldelta = self._generaldelta
548 iterrev = rev
548 iterrev = rev
549 e = index[iterrev]
549 e = index[iterrev]
550 clen = 0
550 clen = 0
551 compresseddeltalen = 0
551 compresseddeltalen = 0
552 while iterrev != e[3]:
552 while iterrev != e[3]:
553 clen += 1
553 clen += 1
554 compresseddeltalen += e[1]
554 compresseddeltalen += e[1]
555 if generaldelta:
555 if generaldelta:
556 iterrev = e[3]
556 iterrev = e[3]
557 else:
557 else:
558 iterrev -= 1
558 iterrev -= 1
559 if iterrev in chaininfocache:
559 if iterrev in chaininfocache:
560 t = chaininfocache[iterrev]
560 t = chaininfocache[iterrev]
561 clen += t[0]
561 clen += t[0]
562 compresseddeltalen += t[1]
562 compresseddeltalen += t[1]
563 break
563 break
564 e = index[iterrev]
564 e = index[iterrev]
565 else:
565 else:
566 # Add text length of base since decompressing that also takes
566 # Add text length of base since decompressing that also takes
567 # work. For cache hits the length is already included.
567 # work. For cache hits the length is already included.
568 compresseddeltalen += e[1]
568 compresseddeltalen += e[1]
569 r = (clen, compresseddeltalen)
569 r = (clen, compresseddeltalen)
570 chaininfocache[rev] = r
570 chaininfocache[rev] = r
571 return r
571 return r
572
572
573 def _deltachain(self, rev, stoprev=None):
573 def _deltachain(self, rev, stoprev=None):
574 """Obtain the delta chain for a revision.
574 """Obtain the delta chain for a revision.
575
575
576 ``stoprev`` specifies a revision to stop at. If not specified, we
576 ``stoprev`` specifies a revision to stop at. If not specified, we
577 stop at the base of the chain.
577 stop at the base of the chain.
578
578
579 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
579 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
580 revs in ascending order and ``stopped`` is a bool indicating whether
580 revs in ascending order and ``stopped`` is a bool indicating whether
581 ``stoprev`` was hit.
581 ``stoprev`` was hit.
582 """
582 """
583 # Try C implementation.
583 # Try C implementation.
584 try:
584 try:
585 return self.index.deltachain(rev, stoprev, self._generaldelta)
585 return self.index.deltachain(rev, stoprev, self._generaldelta)
586 except AttributeError:
586 except AttributeError:
587 pass
587 pass
588
588
589 chain = []
589 chain = []
590
590
591 # Alias to prevent attribute lookup in tight loop.
591 # Alias to prevent attribute lookup in tight loop.
592 index = self.index
592 index = self.index
593 generaldelta = self._generaldelta
593 generaldelta = self._generaldelta
594
594
595 iterrev = rev
595 iterrev = rev
596 e = index[iterrev]
596 e = index[iterrev]
597 while iterrev != e[3] and iterrev != stoprev:
597 while iterrev != e[3] and iterrev != stoprev:
598 chain.append(iterrev)
598 chain.append(iterrev)
599 if generaldelta:
599 if generaldelta:
600 iterrev = e[3]
600 iterrev = e[3]
601 else:
601 else:
602 iterrev -= 1
602 iterrev -= 1
603 e = index[iterrev]
603 e = index[iterrev]
604
604
605 if iterrev == stoprev:
605 if iterrev == stoprev:
606 stopped = True
606 stopped = True
607 else:
607 else:
608 chain.append(iterrev)
608 chain.append(iterrev)
609 stopped = False
609 stopped = False
610
610
611 chain.reverse()
611 chain.reverse()
612 return chain, stopped
612 return chain, stopped
613
613
614 def ancestors(self, revs, stoprev=0, inclusive=False):
614 def ancestors(self, revs, stoprev=0, inclusive=False):
615 """Generate the ancestors of 'revs' in reverse topological order.
615 """Generate the ancestors of 'revs' in reverse topological order.
616 Does not generate revs lower than stoprev.
616 Does not generate revs lower than stoprev.
617
617
618 See the documentation for ancestor.lazyancestors for more details."""
618 See the documentation for ancestor.lazyancestors for more details."""
619
619
620 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
620 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
621 inclusive=inclusive)
621 inclusive=inclusive)
622
622
623 def descendants(self, revs):
623 def descendants(self, revs):
624 """Generate the descendants of 'revs' in revision order.
624 """Generate the descendants of 'revs' in revision order.
625
625
626 Yield a sequence of revision numbers starting with a child of
626 Yield a sequence of revision numbers starting with a child of
627 some rev in revs, i.e., each revision is *not* considered a
627 some rev in revs, i.e., each revision is *not* considered a
628 descendant of itself. Results are ordered by revision number (a
628 descendant of itself. Results are ordered by revision number (a
629 topological sort)."""
629 topological sort)."""
630 first = min(revs)
630 first = min(revs)
631 if first == nullrev:
631 if first == nullrev:
632 for i in self:
632 for i in self:
633 yield i
633 yield i
634 return
634 return
635
635
636 seen = set(revs)
636 seen = set(revs)
637 for i in self.revs(start=first + 1):
637 for i in self.revs(start=first + 1):
638 for x in self.parentrevs(i):
638 for x in self.parentrevs(i):
639 if x != nullrev and x in seen:
639 if x != nullrev and x in seen:
640 seen.add(i)
640 seen.add(i)
641 yield i
641 yield i
642 break
642 break
643
643
644 def findcommonmissing(self, common=None, heads=None):
644 def findcommonmissing(self, common=None, heads=None):
645 """Return a tuple of the ancestors of common and the ancestors of heads
645 """Return a tuple of the ancestors of common and the ancestors of heads
646 that are not ancestors of common. In revset terminology, we return the
646 that are not ancestors of common. In revset terminology, we return the
647 tuple:
647 tuple:
648
648
649 ::common, (::heads) - (::common)
649 ::common, (::heads) - (::common)
650
650
651 The list is sorted by revision number, meaning it is
651 The list is sorted by revision number, meaning it is
652 topologically sorted.
652 topologically sorted.
653
653
654 'heads' and 'common' are both lists of node IDs. If heads is
654 'heads' and 'common' are both lists of node IDs. If heads is
655 not supplied, uses all of the revlog's heads. If common is not
655 not supplied, uses all of the revlog's heads. If common is not
656 supplied, uses nullid."""
656 supplied, uses nullid."""
657 if common is None:
657 if common is None:
658 common = [nullid]
658 common = [nullid]
659 if heads is None:
659 if heads is None:
660 heads = self.heads()
660 heads = self.heads()
661
661
662 common = [self.rev(n) for n in common]
662 common = [self.rev(n) for n in common]
663 heads = [self.rev(n) for n in heads]
663 heads = [self.rev(n) for n in heads]
664
664
665 # we want the ancestors, but inclusive
665 # we want the ancestors, but inclusive
666 class lazyset(object):
666 class lazyset(object):
667 def __init__(self, lazyvalues):
667 def __init__(self, lazyvalues):
668 self.addedvalues = set()
668 self.addedvalues = set()
669 self.lazyvalues = lazyvalues
669 self.lazyvalues = lazyvalues
670
670
671 def __contains__(self, value):
671 def __contains__(self, value):
672 return value in self.addedvalues or value in self.lazyvalues
672 return value in self.addedvalues or value in self.lazyvalues
673
673
674 def __iter__(self):
674 def __iter__(self):
675 added = self.addedvalues
675 added = self.addedvalues
676 for r in added:
676 for r in added:
677 yield r
677 yield r
678 for r in self.lazyvalues:
678 for r in self.lazyvalues:
679 if not r in added:
679 if not r in added:
680 yield r
680 yield r
681
681
682 def add(self, value):
682 def add(self, value):
683 self.addedvalues.add(value)
683 self.addedvalues.add(value)
684
684
685 def update(self, values):
685 def update(self, values):
686 self.addedvalues.update(values)
686 self.addedvalues.update(values)
687
687
688 has = lazyset(self.ancestors(common))
688 has = lazyset(self.ancestors(common))
689 has.add(nullrev)
689 has.add(nullrev)
690 has.update(common)
690 has.update(common)
691
691
692 # take all ancestors from heads that aren't in has
692 # take all ancestors from heads that aren't in has
693 missing = set()
693 missing = set()
694 visit = collections.deque(r for r in heads if r not in has)
694 visit = collections.deque(r for r in heads if r not in has)
695 while visit:
695 while visit:
696 r = visit.popleft()
696 r = visit.popleft()
697 if r in missing:
697 if r in missing:
698 continue
698 continue
699 else:
699 else:
700 missing.add(r)
700 missing.add(r)
701 for p in self.parentrevs(r):
701 for p in self.parentrevs(r):
702 if p not in has:
702 if p not in has:
703 visit.append(p)
703 visit.append(p)
704 missing = list(missing)
704 missing = list(missing)
705 missing.sort()
705 missing.sort()
706 return has, [self.node(miss) for miss in missing]
706 return has, [self.node(miss) for miss in missing]
707
707
708 def incrementalmissingrevs(self, common=None):
708 def incrementalmissingrevs(self, common=None):
709 """Return an object that can be used to incrementally compute the
709 """Return an object that can be used to incrementally compute the
710 revision numbers of the ancestors of arbitrary sets that are not
710 revision numbers of the ancestors of arbitrary sets that are not
711 ancestors of common. This is an ancestor.incrementalmissingancestors
711 ancestors of common. This is an ancestor.incrementalmissingancestors
712 object.
712 object.
713
713
714 'common' is a list of revision numbers. If common is not supplied, uses
714 'common' is a list of revision numbers. If common is not supplied, uses
715 nullrev.
715 nullrev.
716 """
716 """
717 if common is None:
717 if common is None:
718 common = [nullrev]
718 common = [nullrev]
719
719
720 return ancestor.incrementalmissingancestors(self.parentrevs, common)
720 return ancestor.incrementalmissingancestors(self.parentrevs, common)
721
721
722 def findmissingrevs(self, common=None, heads=None):
722 def findmissingrevs(self, common=None, heads=None):
723 """Return the revision numbers of the ancestors of heads that
723 """Return the revision numbers of the ancestors of heads that
724 are not ancestors of common.
724 are not ancestors of common.
725
725
726 More specifically, return a list of revision numbers corresponding to
726 More specifically, return a list of revision numbers corresponding to
727 nodes N such that every N satisfies the following constraints:
727 nodes N such that every N satisfies the following constraints:
728
728
729 1. N is an ancestor of some node in 'heads'
729 1. N is an ancestor of some node in 'heads'
730 2. N is not an ancestor of any node in 'common'
730 2. N is not an ancestor of any node in 'common'
731
731
732 The list is sorted by revision number, meaning it is
732 The list is sorted by revision number, meaning it is
733 topologically sorted.
733 topologically sorted.
734
734
735 'heads' and 'common' are both lists of revision numbers. If heads is
735 'heads' and 'common' are both lists of revision numbers. If heads is
736 not supplied, uses all of the revlog's heads. If common is not
736 not supplied, uses all of the revlog's heads. If common is not
737 supplied, uses nullid."""
737 supplied, uses nullid."""
738 if common is None:
738 if common is None:
739 common = [nullrev]
739 common = [nullrev]
740 if heads is None:
740 if heads is None:
741 heads = self.headrevs()
741 heads = self.headrevs()
742
742
743 inc = self.incrementalmissingrevs(common=common)
743 inc = self.incrementalmissingrevs(common=common)
744 return inc.missingancestors(heads)
744 return inc.missingancestors(heads)
745
745
746 def findmissing(self, common=None, heads=None):
746 def findmissing(self, common=None, heads=None):
747 """Return the ancestors of heads that are not ancestors of common.
747 """Return the ancestors of heads that are not ancestors of common.
748
748
749 More specifically, return a list of nodes N such that every N
749 More specifically, return a list of nodes N such that every N
750 satisfies the following constraints:
750 satisfies the following constraints:
751
751
752 1. N is an ancestor of some node in 'heads'
752 1. N is an ancestor of some node in 'heads'
753 2. N is not an ancestor of any node in 'common'
753 2. N is not an ancestor of any node in 'common'
754
754
755 The list is sorted by revision number, meaning it is
755 The list is sorted by revision number, meaning it is
756 topologically sorted.
756 topologically sorted.
757
757
758 'heads' and 'common' are both lists of node IDs. If heads is
758 'heads' and 'common' are both lists of node IDs. If heads is
759 not supplied, uses all of the revlog's heads. If common is not
759 not supplied, uses all of the revlog's heads. If common is not
760 supplied, uses nullid."""
760 supplied, uses nullid."""
761 if common is None:
761 if common is None:
762 common = [nullid]
762 common = [nullid]
763 if heads is None:
763 if heads is None:
764 heads = self.heads()
764 heads = self.heads()
765
765
766 common = [self.rev(n) for n in common]
766 common = [self.rev(n) for n in common]
767 heads = [self.rev(n) for n in heads]
767 heads = [self.rev(n) for n in heads]
768
768
769 inc = self.incrementalmissingrevs(common=common)
769 inc = self.incrementalmissingrevs(common=common)
770 return [self.node(r) for r in inc.missingancestors(heads)]
770 return [self.node(r) for r in inc.missingancestors(heads)]
771
771
772 def nodesbetween(self, roots=None, heads=None):
772 def nodesbetween(self, roots=None, heads=None):
773 """Return a topological path from 'roots' to 'heads'.
773 """Return a topological path from 'roots' to 'heads'.
774
774
775 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
775 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
776 topologically sorted list of all nodes N that satisfy both of
776 topologically sorted list of all nodes N that satisfy both of
777 these constraints:
777 these constraints:
778
778
779 1. N is a descendant of some node in 'roots'
779 1. N is a descendant of some node in 'roots'
780 2. N is an ancestor of some node in 'heads'
780 2. N is an ancestor of some node in 'heads'
781
781
782 Every node is considered to be both a descendant and an ancestor
782 Every node is considered to be both a descendant and an ancestor
783 of itself, so every reachable node in 'roots' and 'heads' will be
783 of itself, so every reachable node in 'roots' and 'heads' will be
784 included in 'nodes'.
784 included in 'nodes'.
785
785
786 'outroots' is the list of reachable nodes in 'roots', i.e., the
786 'outroots' is the list of reachable nodes in 'roots', i.e., the
787 subset of 'roots' that is returned in 'nodes'. Likewise,
787 subset of 'roots' that is returned in 'nodes'. Likewise,
788 'outheads' is the subset of 'heads' that is also in 'nodes'.
788 'outheads' is the subset of 'heads' that is also in 'nodes'.
789
789
790 'roots' and 'heads' are both lists of node IDs. If 'roots' is
790 'roots' and 'heads' are both lists of node IDs. If 'roots' is
791 unspecified, uses nullid as the only root. If 'heads' is
791 unspecified, uses nullid as the only root. If 'heads' is
792 unspecified, uses list of all of the revlog's heads."""
792 unspecified, uses list of all of the revlog's heads."""
793 nonodes = ([], [], [])
793 nonodes = ([], [], [])
794 if roots is not None:
794 if roots is not None:
795 roots = list(roots)
795 roots = list(roots)
796 if not roots:
796 if not roots:
797 return nonodes
797 return nonodes
798 lowestrev = min([self.rev(n) for n in roots])
798 lowestrev = min([self.rev(n) for n in roots])
799 else:
799 else:
800 roots = [nullid] # Everybody's a descendant of nullid
800 roots = [nullid] # Everybody's a descendant of nullid
801 lowestrev = nullrev
801 lowestrev = nullrev
802 if (lowestrev == nullrev) and (heads is None):
802 if (lowestrev == nullrev) and (heads is None):
803 # We want _all_ the nodes!
803 # We want _all_ the nodes!
804 return ([self.node(r) for r in self], [nullid], list(self.heads()))
804 return ([self.node(r) for r in self], [nullid], list(self.heads()))
805 if heads is None:
805 if heads is None:
806 # All nodes are ancestors, so the latest ancestor is the last
806 # All nodes are ancestors, so the latest ancestor is the last
807 # node.
807 # node.
808 highestrev = len(self) - 1
808 highestrev = len(self) - 1
809 # Set ancestors to None to signal that every node is an ancestor.
809 # Set ancestors to None to signal that every node is an ancestor.
810 ancestors = None
810 ancestors = None
811 # Set heads to an empty dictionary for later discovery of heads
811 # Set heads to an empty dictionary for later discovery of heads
812 heads = {}
812 heads = {}
813 else:
813 else:
814 heads = list(heads)
814 heads = list(heads)
815 if not heads:
815 if not heads:
816 return nonodes
816 return nonodes
817 ancestors = set()
817 ancestors = set()
818 # Turn heads into a dictionary so we can remove 'fake' heads.
818 # Turn heads into a dictionary so we can remove 'fake' heads.
819 # Also, later we will be using it to filter out the heads we can't
819 # Also, later we will be using it to filter out the heads we can't
820 # find from roots.
820 # find from roots.
821 heads = dict.fromkeys(heads, False)
821 heads = dict.fromkeys(heads, False)
822 # Start at the top and keep marking parents until we're done.
822 # Start at the top and keep marking parents until we're done.
823 nodestotag = set(heads)
823 nodestotag = set(heads)
824 # Remember where the top was so we can use it as a limit later.
824 # Remember where the top was so we can use it as a limit later.
825 highestrev = max([self.rev(n) for n in nodestotag])
825 highestrev = max([self.rev(n) for n in nodestotag])
826 while nodestotag:
826 while nodestotag:
827 # grab a node to tag
827 # grab a node to tag
828 n = nodestotag.pop()
828 n = nodestotag.pop()
829 # Never tag nullid
829 # Never tag nullid
830 if n == nullid:
830 if n == nullid:
831 continue
831 continue
832 # A node's revision number represents its place in a
832 # A node's revision number represents its place in a
833 # topologically sorted list of nodes.
833 # topologically sorted list of nodes.
834 r = self.rev(n)
834 r = self.rev(n)
835 if r >= lowestrev:
835 if r >= lowestrev:
836 if n not in ancestors:
836 if n not in ancestors:
837 # If we are possibly a descendant of one of the roots
837 # If we are possibly a descendant of one of the roots
838 # and we haven't already been marked as an ancestor
838 # and we haven't already been marked as an ancestor
839 ancestors.add(n) # Mark as ancestor
839 ancestors.add(n) # Mark as ancestor
840 # Add non-nullid parents to list of nodes to tag.
840 # Add non-nullid parents to list of nodes to tag.
841 nodestotag.update([p for p in self.parents(n) if
841 nodestotag.update([p for p in self.parents(n) if
842 p != nullid])
842 p != nullid])
843 elif n in heads: # We've seen it before, is it a fake head?
843 elif n in heads: # We've seen it before, is it a fake head?
844 # So it is, real heads should not be the ancestors of
844 # So it is, real heads should not be the ancestors of
845 # any other heads.
845 # any other heads.
846 heads.pop(n)
846 heads.pop(n)
847 if not ancestors:
847 if not ancestors:
848 return nonodes
848 return nonodes
849 # Now that we have our set of ancestors, we want to remove any
849 # Now that we have our set of ancestors, we want to remove any
850 # roots that are not ancestors.
850 # roots that are not ancestors.
851
851
852 # If one of the roots was nullid, everything is included anyway.
852 # If one of the roots was nullid, everything is included anyway.
853 if lowestrev > nullrev:
853 if lowestrev > nullrev:
854 # But, since we weren't, let's recompute the lowest rev to not
854 # But, since we weren't, let's recompute the lowest rev to not
855 # include roots that aren't ancestors.
855 # include roots that aren't ancestors.
856
856
857 # Filter out roots that aren't ancestors of heads
857 # Filter out roots that aren't ancestors of heads
858 roots = [root for root in roots if root in ancestors]
858 roots = [root for root in roots if root in ancestors]
859 # Recompute the lowest revision
859 # Recompute the lowest revision
860 if roots:
860 if roots:
861 lowestrev = min([self.rev(root) for root in roots])
861 lowestrev = min([self.rev(root) for root in roots])
862 else:
862 else:
863 # No more roots? Return empty list
863 # No more roots? Return empty list
864 return nonodes
864 return nonodes
865 else:
865 else:
866 # We are descending from nullid, and don't need to care about
866 # We are descending from nullid, and don't need to care about
867 # any other roots.
867 # any other roots.
868 lowestrev = nullrev
868 lowestrev = nullrev
869 roots = [nullid]
869 roots = [nullid]
870 # Transform our roots list into a set.
870 # Transform our roots list into a set.
871 descendants = set(roots)
871 descendants = set(roots)
872 # Also, keep the original roots so we can filter out roots that aren't
872 # Also, keep the original roots so we can filter out roots that aren't
873 # 'real' roots (i.e. are descended from other roots).
873 # 'real' roots (i.e. are descended from other roots).
874 roots = descendants.copy()
874 roots = descendants.copy()
875 # Our topologically sorted list of output nodes.
875 # Our topologically sorted list of output nodes.
876 orderedout = []
876 orderedout = []
877 # Don't start at nullid since we don't want nullid in our output list,
877 # Don't start at nullid since we don't want nullid in our output list,
878 # and if nullid shows up in descendants, empty parents will look like
878 # and if nullid shows up in descendants, empty parents will look like
879 # they're descendants.
879 # they're descendants.
880 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
880 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
881 n = self.node(r)
881 n = self.node(r)
882 isdescendant = False
882 isdescendant = False
883 if lowestrev == nullrev: # Everybody is a descendant of nullid
883 if lowestrev == nullrev: # Everybody is a descendant of nullid
884 isdescendant = True
884 isdescendant = True
885 elif n in descendants:
885 elif n in descendants:
886 # n is already a descendant
886 # n is already a descendant
887 isdescendant = True
887 isdescendant = True
888 # This check only needs to be done here because all the roots
888 # This check only needs to be done here because all the roots
889 # will start being marked is descendants before the loop.
889 # will start being marked is descendants before the loop.
890 if n in roots:
890 if n in roots:
891 # If n was a root, check if it's a 'real' root.
891 # If n was a root, check if it's a 'real' root.
892 p = tuple(self.parents(n))
892 p = tuple(self.parents(n))
893 # If any of its parents are descendants, it's not a root.
893 # If any of its parents are descendants, it's not a root.
894 if (p[0] in descendants) or (p[1] in descendants):
894 if (p[0] in descendants) or (p[1] in descendants):
895 roots.remove(n)
895 roots.remove(n)
896 else:
896 else:
897 p = tuple(self.parents(n))
897 p = tuple(self.parents(n))
898 # A node is a descendant if either of its parents are
898 # A node is a descendant if either of its parents are
899 # descendants. (We seeded the dependents list with the roots
899 # descendants. (We seeded the dependents list with the roots
900 # up there, remember?)
900 # up there, remember?)
901 if (p[0] in descendants) or (p[1] in descendants):
901 if (p[0] in descendants) or (p[1] in descendants):
902 descendants.add(n)
902 descendants.add(n)
903 isdescendant = True
903 isdescendant = True
904 if isdescendant and ((ancestors is None) or (n in ancestors)):
904 if isdescendant and ((ancestors is None) or (n in ancestors)):
905 # Only include nodes that are both descendants and ancestors.
905 # Only include nodes that are both descendants and ancestors.
906 orderedout.append(n)
906 orderedout.append(n)
907 if (ancestors is not None) and (n in heads):
907 if (ancestors is not None) and (n in heads):
908 # We're trying to figure out which heads are reachable
908 # We're trying to figure out which heads are reachable
909 # from roots.
909 # from roots.
910 # Mark this head as having been reached
910 # Mark this head as having been reached
911 heads[n] = True
911 heads[n] = True
912 elif ancestors is None:
912 elif ancestors is None:
913 # Otherwise, we're trying to discover the heads.
913 # Otherwise, we're trying to discover the heads.
914 # Assume this is a head because if it isn't, the next step
914 # Assume this is a head because if it isn't, the next step
915 # will eventually remove it.
915 # will eventually remove it.
916 heads[n] = True
916 heads[n] = True
917 # But, obviously its parents aren't.
917 # But, obviously its parents aren't.
918 for p in self.parents(n):
918 for p in self.parents(n):
919 heads.pop(p, None)
919 heads.pop(p, None)
920 heads = [head for head, flag in heads.iteritems() if flag]
920 heads = [head for head, flag in heads.iteritems() if flag]
921 roots = list(roots)
921 roots = list(roots)
922 assert orderedout
922 assert orderedout
923 assert roots
923 assert roots
924 assert heads
924 assert heads
925 return (orderedout, roots, heads)
925 return (orderedout, roots, heads)
926
926
927 def headrevs(self):
927 def headrevs(self):
928 try:
928 try:
929 return self.index.headrevs()
929 return self.index.headrevs()
930 except AttributeError:
930 except AttributeError:
931 return self._headrevs()
931 return self._headrevs()
932
932
933 def computephases(self, roots):
933 def computephases(self, roots):
934 return self.index.computephasesmapsets(roots)
934 return self.index.computephasesmapsets(roots)
935
935
936 def _headrevs(self):
936 def _headrevs(self):
937 count = len(self)
937 count = len(self)
938 if not count:
938 if not count:
939 return [nullrev]
939 return [nullrev]
940 # we won't iter over filtered rev so nobody is a head at start
940 # we won't iter over filtered rev so nobody is a head at start
941 ishead = [0] * (count + 1)
941 ishead = [0] * (count + 1)
942 index = self.index
942 index = self.index
943 for r in self:
943 for r in self:
944 ishead[r] = 1 # I may be an head
944 ishead[r] = 1 # I may be an head
945 e = index[r]
945 e = index[r]
946 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
946 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
947 return [r for r, val in enumerate(ishead) if val]
947 return [r for r, val in enumerate(ishead) if val]
948
948
949 def heads(self, start=None, stop=None):
949 def heads(self, start=None, stop=None):
950 """return the list of all nodes that have no children
950 """return the list of all nodes that have no children
951
951
952 if start is specified, only heads that are descendants of
952 if start is specified, only heads that are descendants of
953 start will be returned
953 start will be returned
954 if stop is specified, it will consider all the revs from stop
954 if stop is specified, it will consider all the revs from stop
955 as if they had no children
955 as if they had no children
956 """
956 """
957 if start is None and stop is None:
957 if start is None and stop is None:
958 if not len(self):
958 if not len(self):
959 return [nullid]
959 return [nullid]
960 return [self.node(r) for r in self.headrevs()]
960 return [self.node(r) for r in self.headrevs()]
961
961
962 if start is None:
962 if start is None:
963 start = nullid
963 start = nullid
964 if stop is None:
964 if stop is None:
965 stop = []
965 stop = []
966 stoprevs = set([self.rev(n) for n in stop])
966 stoprevs = set([self.rev(n) for n in stop])
967 startrev = self.rev(start)
967 startrev = self.rev(start)
968 reachable = {startrev}
968 reachable = {startrev}
969 heads = {startrev}
969 heads = {startrev}
970
970
971 parentrevs = self.parentrevs
971 parentrevs = self.parentrevs
972 for r in self.revs(start=startrev + 1):
972 for r in self.revs(start=startrev + 1):
973 for p in parentrevs(r):
973 for p in parentrevs(r):
974 if p in reachable:
974 if p in reachable:
975 if r not in stoprevs:
975 if r not in stoprevs:
976 reachable.add(r)
976 reachable.add(r)
977 heads.add(r)
977 heads.add(r)
978 if p in heads and p not in stoprevs:
978 if p in heads and p not in stoprevs:
979 heads.remove(p)
979 heads.remove(p)
980
980
981 return [self.node(r) for r in heads]
981 return [self.node(r) for r in heads]
982
982
983 def children(self, node):
983 def children(self, node):
984 """find the children of a given node"""
984 """find the children of a given node"""
985 c = []
985 c = []
986 p = self.rev(node)
986 p = self.rev(node)
987 for r in self.revs(start=p + 1):
987 for r in self.revs(start=p + 1):
988 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
988 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
989 if prevs:
989 if prevs:
990 for pr in prevs:
990 for pr in prevs:
991 if pr == p:
991 if pr == p:
992 c.append(self.node(r))
992 c.append(self.node(r))
993 elif p == nullrev:
993 elif p == nullrev:
994 c.append(self.node(r))
994 c.append(self.node(r))
995 return c
995 return c
996
996
997 def descendant(self, start, end):
997 def descendant(self, start, end):
998 if start == nullrev:
998 if start == nullrev:
999 return True
999 return True
1000 for i in self.descendants([start]):
1000 for i in self.descendants([start]):
1001 if i == end:
1001 if i == end:
1002 return True
1002 return True
1003 elif i > end:
1003 elif i > end:
1004 break
1004 break
1005 return False
1005 return False
1006
1006
1007 def commonancestorsheads(self, a, b):
1007 def commonancestorsheads(self, a, b):
1008 """calculate all the heads of the common ancestors of nodes a and b"""
1008 """calculate all the heads of the common ancestors of nodes a and b"""
1009 a, b = self.rev(a), self.rev(b)
1009 a, b = self.rev(a), self.rev(b)
1010 try:
1010 try:
1011 ancs = self.index.commonancestorsheads(a, b)
1011 ancs = self.index.commonancestorsheads(a, b)
1012 except (AttributeError, OverflowError): # C implementation failed
1012 except (AttributeError, OverflowError): # C implementation failed
1013 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1013 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1014 return pycompat.maplist(self.node, ancs)
1014 return pycompat.maplist(self.node, ancs)
1015
1015
1016 def isancestor(self, a, b):
1016 def isancestor(self, a, b):
1017 """return True if node a is an ancestor of node b
1017 """return True if node a is an ancestor of node b
1018
1018
1019 The implementation of this is trivial but the use of
1019 The implementation of this is trivial but the use of
1020 commonancestorsheads is not."""
1020 commonancestorsheads is not."""
1021 return a in self.commonancestorsheads(a, b)
1021 return a in self.commonancestorsheads(a, b)
1022
1022
1023 def ancestor(self, a, b):
1023 def ancestor(self, a, b):
1024 """calculate the "best" common ancestor of nodes a and b"""
1024 """calculate the "best" common ancestor of nodes a and b"""
1025
1025
1026 a, b = self.rev(a), self.rev(b)
1026 a, b = self.rev(a), self.rev(b)
1027 try:
1027 try:
1028 ancs = self.index.ancestors(a, b)
1028 ancs = self.index.ancestors(a, b)
1029 except (AttributeError, OverflowError):
1029 except (AttributeError, OverflowError):
1030 ancs = ancestor.ancestors(self.parentrevs, a, b)
1030 ancs = ancestor.ancestors(self.parentrevs, a, b)
1031 if ancs:
1031 if ancs:
1032 # choose a consistent winner when there's a tie
1032 # choose a consistent winner when there's a tie
1033 return min(map(self.node, ancs))
1033 return min(map(self.node, ancs))
1034 return nullid
1034 return nullid
1035
1035
1036 def _match(self, id):
1036 def _match(self, id):
1037 if isinstance(id, int):
1037 if isinstance(id, int):
1038 # rev
1038 # rev
1039 return self.node(id)
1039 return self.node(id)
1040 if len(id) == 20:
1040 if len(id) == 20:
1041 # possibly a binary node
1041 # possibly a binary node
1042 # odds of a binary node being all hex in ASCII are 1 in 10**25
1042 # odds of a binary node being all hex in ASCII are 1 in 10**25
1043 try:
1043 try:
1044 node = id
1044 node = id
1045 self.rev(node) # quick search the index
1045 self.rev(node) # quick search the index
1046 return node
1046 return node
1047 except LookupError:
1047 except LookupError:
1048 pass # may be partial hex id
1048 pass # may be partial hex id
1049 try:
1049 try:
1050 # str(rev)
1050 # str(rev)
1051 rev = int(id)
1051 rev = int(id)
1052 if str(rev) != id:
1052 if str(rev) != id:
1053 raise ValueError
1053 raise ValueError
1054 if rev < 0:
1054 if rev < 0:
1055 rev = len(self) + rev
1055 rev = len(self) + rev
1056 if rev < 0 or rev >= len(self):
1056 if rev < 0 or rev >= len(self):
1057 raise ValueError
1057 raise ValueError
1058 return self.node(rev)
1058 return self.node(rev)
1059 except (ValueError, OverflowError):
1059 except (ValueError, OverflowError):
1060 pass
1060 pass
1061 if len(id) == 40:
1061 if len(id) == 40:
1062 try:
1062 try:
1063 # a full hex nodeid?
1063 # a full hex nodeid?
1064 node = bin(id)
1064 node = bin(id)
1065 self.rev(node)
1065 self.rev(node)
1066 return node
1066 return node
1067 except (TypeError, LookupError):
1067 except (TypeError, LookupError):
1068 pass
1068 pass
1069
1069
1070 def _partialmatch(self, id):
1070 def _partialmatch(self, id):
1071 maybewdir = wdirhex.startswith(id)
1071 maybewdir = wdirhex.startswith(id)
1072 try:
1072 try:
1073 partial = self.index.partialmatch(id)
1073 partial = self.index.partialmatch(id)
1074 if partial and self.hasnode(partial):
1074 if partial and self.hasnode(partial):
1075 if maybewdir:
1075 if maybewdir:
1076 # single 'ff...' match in radix tree, ambiguous with wdir
1076 # single 'ff...' match in radix tree, ambiguous with wdir
1077 raise RevlogError
1077 raise RevlogError
1078 return partial
1078 return partial
1079 if maybewdir:
1079 if maybewdir:
1080 # no 'ff...' match in radix tree, wdir identified
1080 # no 'ff...' match in radix tree, wdir identified
1081 raise error.WdirUnsupported
1081 raise error.WdirUnsupported
1082 return None
1082 return None
1083 except RevlogError:
1083 except RevlogError:
1084 # parsers.c radix tree lookup gave multiple matches
1084 # parsers.c radix tree lookup gave multiple matches
1085 # fast path: for unfiltered changelog, radix tree is accurate
1085 # fast path: for unfiltered changelog, radix tree is accurate
1086 if not getattr(self, 'filteredrevs', None):
1086 if not getattr(self, 'filteredrevs', None):
1087 raise LookupError(id, self.indexfile,
1087 raise LookupError(id, self.indexfile,
1088 _('ambiguous identifier'))
1088 _('ambiguous identifier'))
1089 # fall through to slow path that filters hidden revisions
1089 # fall through to slow path that filters hidden revisions
1090 except (AttributeError, ValueError):
1090 except (AttributeError, ValueError):
1091 # we are pure python, or key was too short to search radix tree
1091 # we are pure python, or key was too short to search radix tree
1092 pass
1092 pass
1093
1093
1094 if id in self._pcache:
1094 if id in self._pcache:
1095 return self._pcache[id]
1095 return self._pcache[id]
1096
1096
1097 if len(id) < 40:
1097 if len(id) < 40:
1098 try:
1098 try:
1099 # hex(node)[:...]
1099 # hex(node)[:...]
1100 l = len(id) // 2 # grab an even number of digits
1100 l = len(id) // 2 # grab an even number of digits
1101 prefix = bin(id[:l * 2])
1101 prefix = bin(id[:l * 2])
1102 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1102 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1103 nl = [n for n in nl if hex(n).startswith(id) and
1103 nl = [n for n in nl if hex(n).startswith(id) and
1104 self.hasnode(n)]
1104 self.hasnode(n)]
1105 if len(nl) > 0:
1105 if len(nl) > 0:
1106 if len(nl) == 1 and not maybewdir:
1106 if len(nl) == 1 and not maybewdir:
1107 self._pcache[id] = nl[0]
1107 self._pcache[id] = nl[0]
1108 return nl[0]
1108 return nl[0]
1109 raise LookupError(id, self.indexfile,
1109 raise LookupError(id, self.indexfile,
1110 _('ambiguous identifier'))
1110 _('ambiguous identifier'))
1111 if maybewdir:
1111 if maybewdir:
1112 raise error.WdirUnsupported
1112 raise error.WdirUnsupported
1113 return None
1113 return None
1114 except (TypeError, binascii.Error):
1114 except (TypeError, binascii.Error):
1115 pass
1115 pass
1116
1116
1117 def lookup(self, id):
1117 def lookup(self, id):
1118 """locate a node based on:
1118 """locate a node based on:
1119 - revision number or str(revision number)
1119 - revision number or str(revision number)
1120 - nodeid or subset of hex nodeid
1120 - nodeid or subset of hex nodeid
1121 """
1121 """
1122 n = self._match(id)
1122 n = self._match(id)
1123 if n is not None:
1123 if n is not None:
1124 return n
1124 return n
1125 n = self._partialmatch(id)
1125 n = self._partialmatch(id)
1126 if n:
1126 if n:
1127 return n
1127 return n
1128
1128
1129 raise LookupError(id, self.indexfile, _('no match found'))
1129 raise LookupError(id, self.indexfile, _('no match found'))
1130
1130
1131 def shortest(self, hexnode, minlength=1):
1131 def shortest(self, hexnode, minlength=1):
1132 """Find the shortest unambiguous prefix that matches hexnode."""
1132 """Find the shortest unambiguous prefix that matches hexnode."""
1133 def isvalid(test):
1133 def isvalid(test):
1134 try:
1134 try:
1135 if self._partialmatch(test) is None:
1135 if self._partialmatch(test) is None:
1136 return False
1136 return False
1137
1137
1138 try:
1138 try:
1139 i = int(test)
1139 i = int(test)
1140 # if we are a pure int, then starting with zero will not be
1140 # if we are a pure int, then starting with zero will not be
1141 # confused as a rev; or, obviously, if the int is larger
1141 # confused as a rev; or, obviously, if the int is larger
1142 # than the value of the tip rev
1142 # than the value of the tip rev
1143 if test[0] == '0' or i > len(self):
1143 if test[0] == '0' or i > len(self):
1144 return True
1144 return True
1145 return False
1145 return False
1146 except ValueError:
1146 except ValueError:
1147 return True
1147 return True
1148 except error.RevlogError:
1148 except error.RevlogError:
1149 return False
1149 return False
1150 except error.WdirUnsupported:
1150 except error.WdirUnsupported:
1151 # single 'ff...' match
1151 # single 'ff...' match
1152 return True
1152 return True
1153
1153
1154 shortest = hexnode
1154 shortest = hexnode
1155 startlength = max(6, minlength)
1155 startlength = max(6, minlength)
1156 length = startlength
1156 length = startlength
1157 while True:
1157 while True:
1158 test = hexnode[:length]
1158 test = hexnode[:length]
1159 if isvalid(test):
1159 if isvalid(test):
1160 shortest = test
1160 shortest = test
1161 if length == minlength or length > startlength:
1161 if length == minlength or length > startlength:
1162 return shortest
1162 return shortest
1163 length -= 1
1163 length -= 1
1164 else:
1164 else:
1165 length += 1
1165 length += 1
1166 if len(shortest) <= length:
1166 if len(shortest) <= length:
1167 return shortest
1167 return shortest
1168
1168
1169 def cmp(self, node, text):
1169 def cmp(self, node, text):
1170 """compare text with a given file revision
1170 """compare text with a given file revision
1171
1171
1172 returns True if text is different than what is stored.
1172 returns True if text is different than what is stored.
1173 """
1173 """
1174 p1, p2 = self.parents(node)
1174 p1, p2 = self.parents(node)
1175 return hash(text, p1, p2) != node
1175 return hash(text, p1, p2) != node
1176
1176
1177 def _cachesegment(self, offset, data):
1177 def _cachesegment(self, offset, data):
1178 """Add a segment to the revlog cache.
1178 """Add a segment to the revlog cache.
1179
1179
1180 Accepts an absolute offset and the data that is at that location.
1180 Accepts an absolute offset and the data that is at that location.
1181 """
1181 """
1182 o, d = self._chunkcache
1182 o, d = self._chunkcache
1183 # try to add to existing cache
1183 # try to add to existing cache
1184 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1184 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1185 self._chunkcache = o, d + data
1185 self._chunkcache = o, d + data
1186 else:
1186 else:
1187 self._chunkcache = offset, data
1187 self._chunkcache = offset, data
1188
1188
1189 def _readsegment(self, offset, length, df=None):
1189 def _readsegment(self, offset, length, df=None):
1190 """Load a segment of raw data from the revlog.
1190 """Load a segment of raw data from the revlog.
1191
1191
1192 Accepts an absolute offset, length to read, and an optional existing
1192 Accepts an absolute offset, length to read, and an optional existing
1193 file handle to read from.
1193 file handle to read from.
1194
1194
1195 If an existing file handle is passed, it will be seeked and the
1195 If an existing file handle is passed, it will be seeked and the
1196 original seek position will NOT be restored.
1196 original seek position will NOT be restored.
1197
1197
1198 Returns a str or buffer of raw byte data.
1198 Returns a str or buffer of raw byte data.
1199 """
1199 """
1200 if df is not None:
1200 if df is not None:
1201 closehandle = False
1201 closehandle = False
1202 else:
1202 else:
1203 if self._inline:
1203 if self._inline:
1204 df = self.opener(self.indexfile)
1204 df = self.opener(self.indexfile)
1205 else:
1205 else:
1206 df = self.opener(self.datafile)
1206 df = self.opener(self.datafile)
1207 closehandle = True
1207 closehandle = True
1208
1208
1209 # Cache data both forward and backward around the requested
1209 # Cache data both forward and backward around the requested
1210 # data, in a fixed size window. This helps speed up operations
1210 # data, in a fixed size window. This helps speed up operations
1211 # involving reading the revlog backwards.
1211 # involving reading the revlog backwards.
1212 cachesize = self._chunkcachesize
1212 cachesize = self._chunkcachesize
1213 realoffset = offset & ~(cachesize - 1)
1213 realoffset = offset & ~(cachesize - 1)
1214 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1214 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1215 - realoffset)
1215 - realoffset)
1216 df.seek(realoffset)
1216 df.seek(realoffset)
1217 d = df.read(reallength)
1217 d = df.read(reallength)
1218 if closehandle:
1218 if closehandle:
1219 df.close()
1219 df.close()
1220 self._cachesegment(realoffset, d)
1220 self._cachesegment(realoffset, d)
1221 if offset != realoffset or reallength != length:
1221 if offset != realoffset or reallength != length:
1222 return util.buffer(d, offset - realoffset, length)
1222 return util.buffer(d, offset - realoffset, length)
1223 return d
1223 return d
1224
1224
1225 def _getsegment(self, offset, length, df=None):
1225 def _getsegment(self, offset, length, df=None):
1226 """Obtain a segment of raw data from the revlog.
1226 """Obtain a segment of raw data from the revlog.
1227
1227
1228 Accepts an absolute offset, length of bytes to obtain, and an
1228 Accepts an absolute offset, length of bytes to obtain, and an
1229 optional file handle to the already-opened revlog. If the file
1229 optional file handle to the already-opened revlog. If the file
1230 handle is used, it's original seek position will not be preserved.
1230 handle is used, it's original seek position will not be preserved.
1231
1231
1232 Requests for data may be returned from a cache.
1232 Requests for data may be returned from a cache.
1233
1233
1234 Returns a str or a buffer instance of raw byte data.
1234 Returns a str or a buffer instance of raw byte data.
1235 """
1235 """
1236 o, d = self._chunkcache
1236 o, d = self._chunkcache
1237 l = len(d)
1237 l = len(d)
1238
1238
1239 # is it in the cache?
1239 # is it in the cache?
1240 cachestart = offset - o
1240 cachestart = offset - o
1241 cacheend = cachestart + length
1241 cacheend = cachestart + length
1242 if cachestart >= 0 and cacheend <= l:
1242 if cachestart >= 0 and cacheend <= l:
1243 if cachestart == 0 and cacheend == l:
1243 if cachestart == 0 and cacheend == l:
1244 return d # avoid a copy
1244 return d # avoid a copy
1245 return util.buffer(d, cachestart, cacheend - cachestart)
1245 return util.buffer(d, cachestart, cacheend - cachestart)
1246
1246
1247 return self._readsegment(offset, length, df=df)
1247 return self._readsegment(offset, length, df=df)
1248
1248
1249 def _getsegmentforrevs(self, startrev, endrev, df=None):
1249 def _getsegmentforrevs(self, startrev, endrev, df=None):
1250 """Obtain a segment of raw data corresponding to a range of revisions.
1250 """Obtain a segment of raw data corresponding to a range of revisions.
1251
1251
1252 Accepts the start and end revisions and an optional already-open
1252 Accepts the start and end revisions and an optional already-open
1253 file handle to be used for reading. If the file handle is read, its
1253 file handle to be used for reading. If the file handle is read, its
1254 seek position will not be preserved.
1254 seek position will not be preserved.
1255
1255
1256 Requests for data may be satisfied by a cache.
1256 Requests for data may be satisfied by a cache.
1257
1257
1258 Returns a 2-tuple of (offset, data) for the requested range of
1258 Returns a 2-tuple of (offset, data) for the requested range of
1259 revisions. Offset is the integer offset from the beginning of the
1259 revisions. Offset is the integer offset from the beginning of the
1260 revlog and data is a str or buffer of the raw byte data.
1260 revlog and data is a str or buffer of the raw byte data.
1261
1261
1262 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1262 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1263 to determine where each revision's data begins and ends.
1263 to determine where each revision's data begins and ends.
1264 """
1264 """
1265 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1265 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1266 # (functions are expensive).
1266 # (functions are expensive).
1267 index = self.index
1267 index = self.index
1268 istart = index[startrev]
1268 istart = index[startrev]
1269 start = int(istart[0] >> 16)
1269 start = int(istart[0] >> 16)
1270 if startrev == endrev:
1270 if startrev == endrev:
1271 end = start + istart[1]
1271 end = start + istart[1]
1272 else:
1272 else:
1273 iend = index[endrev]
1273 iend = index[endrev]
1274 end = int(iend[0] >> 16) + iend[1]
1274 end = int(iend[0] >> 16) + iend[1]
1275
1275
1276 if self._inline:
1276 if self._inline:
1277 start += (startrev + 1) * self._io.size
1277 start += (startrev + 1) * self._io.size
1278 end += (endrev + 1) * self._io.size
1278 end += (endrev + 1) * self._io.size
1279 length = end - start
1279 length = end - start
1280
1280
1281 return start, self._getsegment(start, length, df=df)
1281 return start, self._getsegment(start, length, df=df)
1282
1282
1283 def _chunk(self, rev, df=None):
1283 def _chunk(self, rev, df=None):
1284 """Obtain a single decompressed chunk for a revision.
1284 """Obtain a single decompressed chunk for a revision.
1285
1285
1286 Accepts an integer revision and an optional already-open file handle
1286 Accepts an integer revision and an optional already-open file handle
1287 to be used for reading. If used, the seek position of the file will not
1287 to be used for reading. If used, the seek position of the file will not
1288 be preserved.
1288 be preserved.
1289
1289
1290 Returns a str holding uncompressed data for the requested revision.
1290 Returns a str holding uncompressed data for the requested revision.
1291 """
1291 """
1292 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1292 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1293
1293
1294 def _chunks(self, revs, df=None):
1294 def _chunks(self, revs, df=None):
1295 """Obtain decompressed chunks for the specified revisions.
1295 """Obtain decompressed chunks for the specified revisions.
1296
1296
1297 Accepts an iterable of numeric revisions that are assumed to be in
1297 Accepts an iterable of numeric revisions that are assumed to be in
1298 ascending order. Also accepts an optional already-open file handle
1298 ascending order. Also accepts an optional already-open file handle
1299 to be used for reading. If used, the seek position of the file will
1299 to be used for reading. If used, the seek position of the file will
1300 not be preserved.
1300 not be preserved.
1301
1301
1302 This function is similar to calling ``self._chunk()`` multiple times,
1302 This function is similar to calling ``self._chunk()`` multiple times,
1303 but is faster.
1303 but is faster.
1304
1304
1305 Returns a list with decompressed data for each requested revision.
1305 Returns a list with decompressed data for each requested revision.
1306 """
1306 """
1307 if not revs:
1307 if not revs:
1308 return []
1308 return []
1309 start = self.start
1309 start = self.start
1310 length = self.length
1310 length = self.length
1311 inline = self._inline
1311 inline = self._inline
1312 iosize = self._io.size
1312 iosize = self._io.size
1313 buffer = util.buffer
1313 buffer = util.buffer
1314
1314
1315 l = []
1315 l = []
1316 ladd = l.append
1316 ladd = l.append
1317
1317
1318 try:
1318 try:
1319 offset, data = self._getsegmentforrevs(revs[0], revs[-1], df=df)
1319 offset, data = self._getsegmentforrevs(revs[0], revs[-1], df=df)
1320 except OverflowError:
1320 except OverflowError:
1321 # issue4215 - we can't cache a run of chunks greater than
1321 # issue4215 - we can't cache a run of chunks greater than
1322 # 2G on Windows
1322 # 2G on Windows
1323 return [self._chunk(rev, df=df) for rev in revs]
1323 return [self._chunk(rev, df=df) for rev in revs]
1324
1324
1325 decomp = self.decompress
1325 decomp = self.decompress
1326 for rev in revs:
1326 for rev in revs:
1327 chunkstart = start(rev)
1327 chunkstart = start(rev)
1328 if inline:
1328 if inline:
1329 chunkstart += (rev + 1) * iosize
1329 chunkstart += (rev + 1) * iosize
1330 chunklength = length(rev)
1330 chunklength = length(rev)
1331 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1331 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1332
1332
1333 return l
1333 return l
1334
1334
1335 def _chunkclear(self):
1335 def _chunkclear(self):
1336 """Clear the raw chunk cache."""
1336 """Clear the raw chunk cache."""
1337 self._chunkcache = (0, '')
1337 self._chunkcache = (0, '')
1338
1338
1339 def deltaparent(self, rev):
1339 def deltaparent(self, rev):
1340 """return deltaparent of the given revision"""
1340 """return deltaparent of the given revision"""
1341 base = self.index[rev][3]
1341 base = self.index[rev][3]
1342 if base == rev:
1342 if base == rev:
1343 return nullrev
1343 return nullrev
1344 elif self._generaldelta:
1344 elif self._generaldelta:
1345 return base
1345 return base
1346 else:
1346 else:
1347 return rev - 1
1347 return rev - 1
1348
1348
1349 def revdiff(self, rev1, rev2):
1349 def revdiff(self, rev1, rev2):
1350 """return or calculate a delta between two revisions
1350 """return or calculate a delta between two revisions
1351
1351
1352 The delta calculated is in binary form and is intended to be written to
1352 The delta calculated is in binary form and is intended to be written to
1353 revlog data directly. So this function needs raw revision data.
1353 revlog data directly. So this function needs raw revision data.
1354 """
1354 """
1355 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1355 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1356 return bytes(self._chunk(rev2))
1356 return bytes(self._chunk(rev2))
1357
1357
1358 return mdiff.textdiff(self.revision(rev1, raw=True),
1358 return mdiff.textdiff(self.revision(rev1, raw=True),
1359 self.revision(rev2, raw=True))
1359 self.revision(rev2, raw=True))
1360
1360
1361 def revision(self, nodeorrev, _df=None, raw=False):
1361 def revision(self, nodeorrev, _df=None, raw=False):
1362 """return an uncompressed revision of a given node or revision
1362 """return an uncompressed revision of a given node or revision
1363 number.
1363 number.
1364
1364
1365 _df - an existing file handle to read from. (internal-only)
1365 _df - an existing file handle to read from. (internal-only)
1366 raw - an optional argument specifying if the revision data is to be
1366 raw - an optional argument specifying if the revision data is to be
1367 treated as raw data when applying flag transforms. 'raw' should be set
1367 treated as raw data when applying flag transforms. 'raw' should be set
1368 to True when generating changegroups or in debug commands.
1368 to True when generating changegroups or in debug commands.
1369 """
1369 """
1370 if isinstance(nodeorrev, int):
1370 if isinstance(nodeorrev, int):
1371 rev = nodeorrev
1371 rev = nodeorrev
1372 node = self.node(rev)
1372 node = self.node(rev)
1373 else:
1373 else:
1374 node = nodeorrev
1374 node = nodeorrev
1375 rev = None
1375 rev = None
1376
1376
1377 cachedrev = None
1377 cachedrev = None
1378 flags = None
1378 flags = None
1379 rawtext = None
1379 rawtext = None
1380 if node == nullid:
1380 if node == nullid:
1381 return ""
1381 return ""
1382 if self._cache:
1382 if self._cache:
1383 if self._cache[0] == node:
1383 if self._cache[0] == node:
1384 # _cache only stores rawtext
1384 # _cache only stores rawtext
1385 if raw:
1385 if raw:
1386 return self._cache[2]
1386 return self._cache[2]
1387 # duplicated, but good for perf
1387 # duplicated, but good for perf
1388 if rev is None:
1388 if rev is None:
1389 rev = self.rev(node)
1389 rev = self.rev(node)
1390 if flags is None:
1390 if flags is None:
1391 flags = self.flags(rev)
1391 flags = self.flags(rev)
1392 # no extra flags set, no flag processor runs, text = rawtext
1392 # no extra flags set, no flag processor runs, text = rawtext
1393 if flags == REVIDX_DEFAULT_FLAGS:
1393 if flags == REVIDX_DEFAULT_FLAGS:
1394 return self._cache[2]
1394 return self._cache[2]
1395 # rawtext is reusable. need to run flag processor
1395 # rawtext is reusable. need to run flag processor
1396 rawtext = self._cache[2]
1396 rawtext = self._cache[2]
1397
1397
1398 cachedrev = self._cache[1]
1398 cachedrev = self._cache[1]
1399
1399
1400 # look up what we need to read
1400 # look up what we need to read
1401 if rawtext is None:
1401 if rawtext is None:
1402 if rev is None:
1402 if rev is None:
1403 rev = self.rev(node)
1403 rev = self.rev(node)
1404
1404
1405 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1405 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1406 if stopped:
1406 if stopped:
1407 rawtext = self._cache[2]
1407 rawtext = self._cache[2]
1408
1408
1409 # drop cache to save memory
1409 # drop cache to save memory
1410 self._cache = None
1410 self._cache = None
1411
1411
1412 bins = self._chunks(chain, df=_df)
1412 bins = self._chunks(chain, df=_df)
1413 if rawtext is None:
1413 if rawtext is None:
1414 rawtext = bytes(bins[0])
1414 rawtext = bytes(bins[0])
1415 bins = bins[1:]
1415 bins = bins[1:]
1416
1416
1417 rawtext = mdiff.patches(rawtext, bins)
1417 rawtext = mdiff.patches(rawtext, bins)
1418 self._cache = (node, rev, rawtext)
1418 self._cache = (node, rev, rawtext)
1419
1419
1420 if flags is None:
1420 if flags is None:
1421 if rev is None:
1421 if rev is None:
1422 rev = self.rev(node)
1422 rev = self.rev(node)
1423 flags = self.flags(rev)
1423 flags = self.flags(rev)
1424
1424
1425 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1425 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1426 if validatehash:
1426 if validatehash:
1427 self.checkhash(text, node, rev=rev)
1427 self.checkhash(text, node, rev=rev)
1428
1428
1429 return text
1429 return text
1430
1430
1431 def hash(self, text, p1, p2):
1431 def hash(self, text, p1, p2):
1432 """Compute a node hash.
1432 """Compute a node hash.
1433
1433
1434 Available as a function so that subclasses can replace the hash
1434 Available as a function so that subclasses can replace the hash
1435 as needed.
1435 as needed.
1436 """
1436 """
1437 return hash(text, p1, p2)
1437 return hash(text, p1, p2)
1438
1438
1439 def _processflags(self, text, flags, operation, raw=False):
1439 def _processflags(self, text, flags, operation, raw=False):
1440 """Inspect revision data flags and applies transforms defined by
1440 """Inspect revision data flags and applies transforms defined by
1441 registered flag processors.
1441 registered flag processors.
1442
1442
1443 ``text`` - the revision data to process
1443 ``text`` - the revision data to process
1444 ``flags`` - the revision flags
1444 ``flags`` - the revision flags
1445 ``operation`` - the operation being performed (read or write)
1445 ``operation`` - the operation being performed (read or write)
1446 ``raw`` - an optional argument describing if the raw transform should be
1446 ``raw`` - an optional argument describing if the raw transform should be
1447 applied.
1447 applied.
1448
1448
1449 This method processes the flags in the order (or reverse order if
1449 This method processes the flags in the order (or reverse order if
1450 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1450 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1451 flag processors registered for present flags. The order of flags defined
1451 flag processors registered for present flags. The order of flags defined
1452 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1452 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1453
1453
1454 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1454 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1455 processed text and ``validatehash`` is a bool indicating whether the
1455 processed text and ``validatehash`` is a bool indicating whether the
1456 returned text should be checked for hash integrity.
1456 returned text should be checked for hash integrity.
1457
1457
1458 Note: If the ``raw`` argument is set, it has precedence over the
1458 Note: If the ``raw`` argument is set, it has precedence over the
1459 operation and will only update the value of ``validatehash``.
1459 operation and will only update the value of ``validatehash``.
1460 """
1460 """
1461 # fast path: no flag processors will run
1461 # fast path: no flag processors will run
1462 if flags == 0:
1462 if flags == 0:
1463 return text, True
1463 return text, True
1464 if not operation in ('read', 'write'):
1464 if not operation in ('read', 'write'):
1465 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1465 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1466 # Check all flags are known.
1466 # Check all flags are known.
1467 if flags & ~REVIDX_KNOWN_FLAGS:
1467 if flags & ~REVIDX_KNOWN_FLAGS:
1468 raise RevlogError(_("incompatible revision flag '%#x'") %
1468 raise RevlogError(_("incompatible revision flag '%#x'") %
1469 (flags & ~REVIDX_KNOWN_FLAGS))
1469 (flags & ~REVIDX_KNOWN_FLAGS))
1470 validatehash = True
1470 validatehash = True
1471 # Depending on the operation (read or write), the order might be
1471 # Depending on the operation (read or write), the order might be
1472 # reversed due to non-commutative transforms.
1472 # reversed due to non-commutative transforms.
1473 orderedflags = REVIDX_FLAGS_ORDER
1473 orderedflags = REVIDX_FLAGS_ORDER
1474 if operation == 'write':
1474 if operation == 'write':
1475 orderedflags = reversed(orderedflags)
1475 orderedflags = reversed(orderedflags)
1476
1476
1477 for flag in orderedflags:
1477 for flag in orderedflags:
1478 # If a flagprocessor has been registered for a known flag, apply the
1478 # If a flagprocessor has been registered for a known flag, apply the
1479 # related operation transform and update result tuple.
1479 # related operation transform and update result tuple.
1480 if flag & flags:
1480 if flag & flags:
1481 vhash = True
1481 vhash = True
1482
1482
1483 if flag not in _flagprocessors:
1483 if flag not in _flagprocessors:
1484 message = _("missing processor for flag '%#x'") % (flag)
1484 message = _("missing processor for flag '%#x'") % (flag)
1485 raise RevlogError(message)
1485 raise RevlogError(message)
1486
1486
1487 processor = _flagprocessors[flag]
1487 processor = _flagprocessors[flag]
1488 if processor is not None:
1488 if processor is not None:
1489 readtransform, writetransform, rawtransform = processor
1489 readtransform, writetransform, rawtransform = processor
1490
1490
1491 if raw:
1491 if raw:
1492 vhash = rawtransform(self, text)
1492 vhash = rawtransform(self, text)
1493 elif operation == 'read':
1493 elif operation == 'read':
1494 text, vhash = readtransform(self, text)
1494 text, vhash = readtransform(self, text)
1495 else: # write operation
1495 else: # write operation
1496 text, vhash = writetransform(self, text)
1496 text, vhash = writetransform(self, text)
1497 validatehash = validatehash and vhash
1497 validatehash = validatehash and vhash
1498
1498
1499 return text, validatehash
1499 return text, validatehash
1500
1500
1501 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1501 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1502 """Check node hash integrity.
1502 """Check node hash integrity.
1503
1503
1504 Available as a function so that subclasses can extend hash mismatch
1504 Available as a function so that subclasses can extend hash mismatch
1505 behaviors as needed.
1505 behaviors as needed.
1506 """
1506 """
1507 if p1 is None and p2 is None:
1507 if p1 is None and p2 is None:
1508 p1, p2 = self.parents(node)
1508 p1, p2 = self.parents(node)
1509 if node != self.hash(text, p1, p2):
1509 if node != self.hash(text, p1, p2):
1510 revornode = rev
1510 revornode = rev
1511 if revornode is None:
1511 if revornode is None:
1512 revornode = templatefilters.short(hex(node))
1512 revornode = templatefilters.short(hex(node))
1513 raise RevlogError(_("integrity check failed on %s:%s")
1513 raise RevlogError(_("integrity check failed on %s:%s")
1514 % (self.indexfile, pycompat.bytestr(revornode)))
1514 % (self.indexfile, pycompat.bytestr(revornode)))
1515
1515
1516 def checkinlinesize(self, tr, fp=None):
1516 def checkinlinesize(self, tr, fp=None):
1517 """Check if the revlog is too big for inline and convert if so.
1517 """Check if the revlog is too big for inline and convert if so.
1518
1518
1519 This should be called after revisions are added to the revlog. If the
1519 This should be called after revisions are added to the revlog. If the
1520 revlog has grown too large to be an inline revlog, it will convert it
1520 revlog has grown too large to be an inline revlog, it will convert it
1521 to use multiple index and data files.
1521 to use multiple index and data files.
1522 """
1522 """
1523 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1523 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1524 return
1524 return
1525
1525
1526 trinfo = tr.find(self.indexfile)
1526 trinfo = tr.find(self.indexfile)
1527 if trinfo is None:
1527 if trinfo is None:
1528 raise RevlogError(_("%s not found in the transaction")
1528 raise RevlogError(_("%s not found in the transaction")
1529 % self.indexfile)
1529 % self.indexfile)
1530
1530
1531 trindex = trinfo[2]
1531 trindex = trinfo[2]
1532 if trindex is not None:
1532 if trindex is not None:
1533 dataoff = self.start(trindex)
1533 dataoff = self.start(trindex)
1534 else:
1534 else:
1535 # revlog was stripped at start of transaction, use all leftover data
1535 # revlog was stripped at start of transaction, use all leftover data
1536 trindex = len(self) - 1
1536 trindex = len(self) - 1
1537 dataoff = self.end(-2)
1537 dataoff = self.end(-2)
1538
1538
1539 tr.add(self.datafile, dataoff)
1539 tr.add(self.datafile, dataoff)
1540
1540
1541 if fp:
1541 if fp:
1542 fp.flush()
1542 fp.flush()
1543 fp.close()
1543 fp.close()
1544
1544
1545 df = self.opener(self.datafile, 'w')
1545 df = self.opener(self.datafile, 'w')
1546 try:
1546 try:
1547 for r in self:
1547 for r in self:
1548 df.write(self._getsegmentforrevs(r, r)[1])
1548 df.write(self._getsegmentforrevs(r, r)[1])
1549 finally:
1549 finally:
1550 df.close()
1550 df.close()
1551
1551
1552 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1552 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1553 checkambig=self._checkambig)
1553 checkambig=self._checkambig)
1554 self.version &= ~FLAG_INLINE_DATA
1554 self.version &= ~FLAG_INLINE_DATA
1555 self._inline = False
1555 self._inline = False
1556 for i in self:
1556 for i in self:
1557 e = self._io.packentry(self.index[i], self.node, self.version, i)
1557 e = self._io.packentry(self.index[i], self.node, self.version, i)
1558 fp.write(e)
1558 fp.write(e)
1559
1559
1560 # if we don't call close, the temp file will never replace the
1560 # if we don't call close, the temp file will never replace the
1561 # real index
1561 # real index
1562 fp.close()
1562 fp.close()
1563
1563
1564 tr.replace(self.indexfile, trindex * self._io.size)
1564 tr.replace(self.indexfile, trindex * self._io.size)
1565 self._chunkclear()
1565 self._chunkclear()
1566
1566
1567 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1567 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1568 node=None, flags=REVIDX_DEFAULT_FLAGS):
1568 node=None, flags=REVIDX_DEFAULT_FLAGS):
1569 """add a revision to the log
1569 """add a revision to the log
1570
1570
1571 text - the revision data to add
1571 text - the revision data to add
1572 transaction - the transaction object used for rollback
1572 transaction - the transaction object used for rollback
1573 link - the linkrev data to add
1573 link - the linkrev data to add
1574 p1, p2 - the parent nodeids of the revision
1574 p1, p2 - the parent nodeids of the revision
1575 cachedelta - an optional precomputed delta
1575 cachedelta - an optional precomputed delta
1576 node - nodeid of revision; typically node is not specified, and it is
1576 node - nodeid of revision; typically node is not specified, and it is
1577 computed by default as hash(text, p1, p2), however subclasses might
1577 computed by default as hash(text, p1, p2), however subclasses might
1578 use different hashing method (and override checkhash() in such case)
1578 use different hashing method (and override checkhash() in such case)
1579 flags - the known flags to set on the revision
1579 flags - the known flags to set on the revision
1580 """
1580 """
1581 if link == nullrev:
1581 if link == nullrev:
1582 raise RevlogError(_("attempted to add linkrev -1 to %s")
1582 raise RevlogError(_("attempted to add linkrev -1 to %s")
1583 % self.indexfile)
1583 % self.indexfile)
1584
1584
1585 if flags:
1585 if flags:
1586 node = node or self.hash(text, p1, p2)
1586 node = node or self.hash(text, p1, p2)
1587
1587
1588 rawtext, validatehash = self._processflags(text, flags, 'write')
1588 rawtext, validatehash = self._processflags(text, flags, 'write')
1589
1589
1590 # If the flag processor modifies the revision data, ignore any provided
1590 # If the flag processor modifies the revision data, ignore any provided
1591 # cachedelta.
1591 # cachedelta.
1592 if rawtext != text:
1592 if rawtext != text:
1593 cachedelta = None
1593 cachedelta = None
1594
1594
1595 if len(rawtext) > _maxentrysize:
1595 if len(rawtext) > _maxentrysize:
1596 raise RevlogError(
1596 raise RevlogError(
1597 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1597 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1598 % (self.indexfile, len(rawtext)))
1598 % (self.indexfile, len(rawtext)))
1599
1599
1600 node = node or self.hash(rawtext, p1, p2)
1600 node = node or self.hash(rawtext, p1, p2)
1601 if node in self.nodemap:
1601 if node in self.nodemap:
1602 return node
1602 return node
1603
1603
1604 if validatehash:
1604 if validatehash:
1605 self.checkhash(rawtext, node, p1=p1, p2=p2)
1605 self.checkhash(rawtext, node, p1=p1, p2=p2)
1606
1606
1607 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1607 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1608 flags, cachedelta=cachedelta)
1608 flags, cachedelta=cachedelta)
1609
1609
1610 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1610 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1611 cachedelta=None):
1611 cachedelta=None):
1612 """add a raw revision with known flags, node and parents
1612 """add a raw revision with known flags, node and parents
1613 useful when reusing a revision not stored in this revlog (ex: received
1613 useful when reusing a revision not stored in this revlog (ex: received
1614 over wire, or read from an external bundle).
1614 over wire, or read from an external bundle).
1615 """
1615 """
1616 dfh = None
1616 dfh = None
1617 if not self._inline:
1617 if not self._inline:
1618 dfh = self.opener(self.datafile, "a+")
1618 dfh = self.opener(self.datafile, "a+")
1619 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1619 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1620 try:
1620 try:
1621 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1621 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1622 flags, cachedelta, ifh, dfh)
1622 flags, cachedelta, ifh, dfh)
1623 finally:
1623 finally:
1624 if dfh:
1624 if dfh:
1625 dfh.close()
1625 dfh.close()
1626 ifh.close()
1626 ifh.close()
1627
1627
1628 def compress(self, data):
1628 def compress(self, data):
1629 """Generate a possibly-compressed representation of data."""
1629 """Generate a possibly-compressed representation of data."""
1630 if not data:
1630 if not data:
1631 return '', data
1631 return '', data
1632
1632
1633 compressed = self._compressor.compress(data)
1633 compressed = self._compressor.compress(data)
1634
1634
1635 if compressed:
1635 if compressed:
1636 # The revlog compressor added the header in the returned data.
1636 # The revlog compressor added the header in the returned data.
1637 return '', compressed
1637 return '', compressed
1638
1638
1639 if data[0:1] == '\0':
1639 if data[0:1] == '\0':
1640 return '', data
1640 return '', data
1641 return 'u', data
1641 return 'u', data
1642
1642
1643 def decompress(self, data):
1643 def decompress(self, data):
1644 """Decompress a revlog chunk.
1644 """Decompress a revlog chunk.
1645
1645
1646 The chunk is expected to begin with a header identifying the
1646 The chunk is expected to begin with a header identifying the
1647 format type so it can be routed to an appropriate decompressor.
1647 format type so it can be routed to an appropriate decompressor.
1648 """
1648 """
1649 if not data:
1649 if not data:
1650 return data
1650 return data
1651
1651
1652 # Revlogs are read much more frequently than they are written and many
1652 # Revlogs are read much more frequently than they are written and many
1653 # chunks only take microseconds to decompress, so performance is
1653 # chunks only take microseconds to decompress, so performance is
1654 # important here.
1654 # important here.
1655 #
1655 #
1656 # We can make a few assumptions about revlogs:
1656 # We can make a few assumptions about revlogs:
1657 #
1657 #
1658 # 1) the majority of chunks will be compressed (as opposed to inline
1658 # 1) the majority of chunks will be compressed (as opposed to inline
1659 # raw data).
1659 # raw data).
1660 # 2) decompressing *any* data will likely by at least 10x slower than
1660 # 2) decompressing *any* data will likely by at least 10x slower than
1661 # returning raw inline data.
1661 # returning raw inline data.
1662 # 3) we want to prioritize common and officially supported compression
1662 # 3) we want to prioritize common and officially supported compression
1663 # engines
1663 # engines
1664 #
1664 #
1665 # It follows that we want to optimize for "decompress compressed data
1665 # It follows that we want to optimize for "decompress compressed data
1666 # when encoded with common and officially supported compression engines"
1666 # when encoded with common and officially supported compression engines"
1667 # case over "raw data" and "data encoded by less common or non-official
1667 # case over "raw data" and "data encoded by less common or non-official
1668 # compression engines." That is why we have the inline lookup first
1668 # compression engines." That is why we have the inline lookup first
1669 # followed by the compengines lookup.
1669 # followed by the compengines lookup.
1670 #
1670 #
1671 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1671 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1672 # compressed chunks. And this matters for changelog and manifest reads.
1672 # compressed chunks. And this matters for changelog and manifest reads.
1673 t = data[0:1]
1673 t = data[0:1]
1674
1674
1675 if t == 'x':
1675 if t == 'x':
1676 try:
1676 try:
1677 return _zlibdecompress(data)
1677 return _zlibdecompress(data)
1678 except zlib.error as e:
1678 except zlib.error as e:
1679 raise RevlogError(_('revlog decompress error: %s') % str(e))
1679 raise RevlogError(_('revlog decompress error: %s') % str(e))
1680 # '\0' is more common than 'u' so it goes first.
1680 # '\0' is more common than 'u' so it goes first.
1681 elif t == '\0':
1681 elif t == '\0':
1682 return data
1682 return data
1683 elif t == 'u':
1683 elif t == 'u':
1684 return util.buffer(data, 1)
1684 return util.buffer(data, 1)
1685
1685
1686 try:
1686 try:
1687 compressor = self._decompressors[t]
1687 compressor = self._decompressors[t]
1688 except KeyError:
1688 except KeyError:
1689 try:
1689 try:
1690 engine = util.compengines.forrevlogheader(t)
1690 engine = util.compengines.forrevlogheader(t)
1691 compressor = engine.revlogcompressor()
1691 compressor = engine.revlogcompressor()
1692 self._decompressors[t] = compressor
1692 self._decompressors[t] = compressor
1693 except KeyError:
1693 except KeyError:
1694 raise RevlogError(_('unknown compression type %r') % t)
1694 raise RevlogError(_('unknown compression type %r') % t)
1695
1695
1696 return compressor.decompress(data)
1696 return compressor.decompress(data)
1697
1697
1698 def _isgooddelta(self, d, textlen):
1698 def _isgooddelta(self, d, textlen):
1699 """Returns True if the given delta is good. Good means that it is within
1699 """Returns True if the given delta is good. Good means that it is within
1700 the disk span, disk size, and chain length bounds that we know to be
1700 the disk span, disk size, and chain length bounds that we know to be
1701 performant."""
1701 performant."""
1702 if d is None:
1702 if d is None:
1703 return False
1703 return False
1704
1704
1705 # - 'dist' is the distance from the base revision -- bounding it limits
1705 # - 'dist' is the distance from the base revision -- bounding it limits
1706 # the amount of I/O we need to do.
1706 # the amount of I/O we need to do.
1707 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1707 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1708 # to apply -- bounding it limits the amount of CPU we consume.
1708 # to apply -- bounding it limits the amount of CPU we consume.
1709 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1709 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1710
1710
1711 defaultmax = textlen * 4
1711 defaultmax = textlen * 4
1712 maxdist = self._maxdeltachainspan
1712 maxdist = self._maxdeltachainspan
1713 if not maxdist:
1713 if not maxdist:
1714 maxdist = dist # ensure the conditional pass
1714 maxdist = dist # ensure the conditional pass
1715 maxdist = max(maxdist, defaultmax)
1715 maxdist = max(maxdist, defaultmax)
1716 if (dist > maxdist or l > textlen or
1716 if (dist > maxdist or l > textlen or
1717 compresseddeltalen > textlen * 2 or
1717 compresseddeltalen > textlen * 2 or
1718 (self._maxchainlen and chainlen > self._maxchainlen)):
1718 (self._maxchainlen and chainlen > self._maxchainlen)):
1719 return False
1719 return False
1720
1720
1721 return True
1721 return True
1722
1722
1723 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1723 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1724 cachedelta, ifh, dfh, alwayscache=False):
1724 cachedelta, ifh, dfh, alwayscache=False):
1725 """internal function to add revisions to the log
1725 """internal function to add revisions to the log
1726
1726
1727 see addrevision for argument descriptions.
1727 see addrevision for argument descriptions.
1728
1728
1729 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1729 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1730
1730
1731 invariants:
1731 invariants:
1732 - rawtext is optional (can be None); if not set, cachedelta must be set.
1732 - rawtext is optional (can be None); if not set, cachedelta must be set.
1733 if both are set, they must correspond to each other.
1733 if both are set, they must correspond to each other.
1734 """
1734 """
1735 if node == nullid:
1735 if node == nullid:
1736 raise RevlogError(_("%s: attempt to add null revision") %
1736 raise RevlogError(_("%s: attempt to add null revision") %
1737 (self.indexfile))
1737 (self.indexfile))
1738 if node == wdirid:
1738 if node == wdirid:
1739 raise RevlogError(_("%s: attempt to add wdir revision") %
1739 raise RevlogError(_("%s: attempt to add wdir revision") %
1740 (self.indexfile))
1740 (self.indexfile))
1741
1741
1742 btext = [rawtext]
1742 btext = [rawtext]
1743 def buildtext():
1743 def buildtext():
1744 if btext[0] is not None:
1744 if btext[0] is not None:
1745 return btext[0]
1745 return btext[0]
1746 baserev = cachedelta[0]
1746 baserev = cachedelta[0]
1747 delta = cachedelta[1]
1747 delta = cachedelta[1]
1748 # special case deltas which replace entire base; no need to decode
1748 # special case deltas which replace entire base; no need to decode
1749 # base revision. this neatly avoids censored bases, which throw when
1749 # base revision. this neatly avoids censored bases, which throw when
1750 # they're decoded.
1750 # they're decoded.
1751 hlen = struct.calcsize(">lll")
1751 hlen = struct.calcsize(">lll")
1752 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1752 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1753 len(delta) - hlen):
1753 len(delta) - hlen):
1754 btext[0] = delta[hlen:]
1754 btext[0] = delta[hlen:]
1755 else:
1755 else:
1756 if self._inline:
1756 if self._inline:
1757 fh = ifh
1757 fh = ifh
1758 else:
1758 else:
1759 fh = dfh
1759 fh = dfh
1760 basetext = self.revision(baserev, _df=fh, raw=True)
1760 basetext = self.revision(baserev, _df=fh, raw=True)
1761 btext[0] = mdiff.patch(basetext, delta)
1761 btext[0] = mdiff.patch(basetext, delta)
1762
1762
1763 try:
1763 try:
1764 res = self._processflags(btext[0], flags, 'read', raw=True)
1764 res = self._processflags(btext[0], flags, 'read', raw=True)
1765 btext[0], validatehash = res
1765 btext[0], validatehash = res
1766 if validatehash:
1766 if validatehash:
1767 self.checkhash(btext[0], node, p1=p1, p2=p2)
1767 self.checkhash(btext[0], node, p1=p1, p2=p2)
1768 if flags & REVIDX_ISCENSORED:
1768 if flags & REVIDX_ISCENSORED:
1769 raise RevlogError(_('node %s is not censored') % node)
1769 raise RevlogError(_('node %s is not censored') % node)
1770 except CensoredNodeError:
1770 except CensoredNodeError:
1771 # must pass the censored index flag to add censored revisions
1771 # must pass the censored index flag to add censored revisions
1772 if not flags & REVIDX_ISCENSORED:
1772 if not flags & REVIDX_ISCENSORED:
1773 raise
1773 raise
1774 return btext[0]
1774 return btext[0]
1775
1775
1776 def builddelta(rev):
1776 def builddelta(rev):
1777 # can we use the cached delta?
1777 # can we use the cached delta?
1778 if cachedelta and cachedelta[0] == rev:
1778 if cachedelta and cachedelta[0] == rev:
1779 delta = cachedelta[1]
1779 delta = cachedelta[1]
1780 else:
1780 else:
1781 t = buildtext()
1781 t = buildtext()
1782 if self.iscensored(rev):
1782 if self.iscensored(rev):
1783 # deltas based on a censored revision must replace the
1783 # deltas based on a censored revision must replace the
1784 # full content in one patch, so delta works everywhere
1784 # full content in one patch, so delta works everywhere
1785 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1785 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1786 delta = header + t
1786 delta = header + t
1787 else:
1787 else:
1788 if self._inline:
1788 if self._inline:
1789 fh = ifh
1789 fh = ifh
1790 else:
1790 else:
1791 fh = dfh
1791 fh = dfh
1792 ptext = self.revision(rev, _df=fh, raw=True)
1792 ptext = self.revision(rev, _df=fh, raw=True)
1793 delta = mdiff.textdiff(ptext, t)
1793 delta = mdiff.textdiff(ptext, t)
1794 header, data = self.compress(delta)
1794 header, data = self.compress(delta)
1795 deltalen = len(header) + len(data)
1795 deltalen = len(header) + len(data)
1796 chainbase = self.chainbase(rev)
1796 chainbase = self.chainbase(rev)
1797 dist = deltalen + offset - self.start(chainbase)
1797 dist = deltalen + offset - self.start(chainbase)
1798 if self._generaldelta:
1798 if self._generaldelta:
1799 base = rev
1799 base = rev
1800 else:
1800 else:
1801 base = chainbase
1801 base = chainbase
1802 chainlen, compresseddeltalen = self._chaininfo(rev)
1802 chainlen, compresseddeltalen = self._chaininfo(rev)
1803 chainlen += 1
1803 chainlen += 1
1804 compresseddeltalen += deltalen
1804 compresseddeltalen += deltalen
1805 return (dist, deltalen, (header, data), base,
1805 return (dist, deltalen, (header, data), base,
1806 chainbase, chainlen, compresseddeltalen)
1806 chainbase, chainlen, compresseddeltalen)
1807
1807
1808 curr = len(self)
1808 curr = len(self)
1809 prev = curr - 1
1809 prev = curr - 1
1810 offset = self.end(prev)
1810 offset = self.end(prev)
1811 delta = None
1811 delta = None
1812 p1r, p2r = self.rev(p1), self.rev(p2)
1812 p1r, p2r = self.rev(p1), self.rev(p2)
1813
1813
1814 # full versions are inserted when the needed deltas
1814 # full versions are inserted when the needed deltas
1815 # become comparable to the uncompressed text
1815 # become comparable to the uncompressed text
1816 if rawtext is None:
1816 if rawtext is None:
1817 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1817 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1818 cachedelta[1])
1818 cachedelta[1])
1819 else:
1819 else:
1820 textlen = len(rawtext)
1820 textlen = len(rawtext)
1821
1821
1822 # should we try to build a delta?
1822 # should we try to build a delta?
1823 if prev != nullrev and self.storedeltachains:
1823 if prev != nullrev and self.storedeltachains:
1824 tested = set()
1824 tested = set()
1825 # This condition is true most of the time when processing
1825 # This condition is true most of the time when processing
1826 # changegroup data into a generaldelta repo. The only time it
1826 # changegroup data into a generaldelta repo. The only time it
1827 # isn't true is if this is the first revision in a delta chain
1827 # isn't true is if this is the first revision in a delta chain
1828 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1828 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1829 if cachedelta and self._generaldelta and self._lazydeltabase:
1829 if cachedelta and self._generaldelta and self._lazydeltabase:
1830 # Assume what we received from the server is a good choice
1830 # Assume what we received from the server is a good choice
1831 # build delta will reuse the cache
1831 # build delta will reuse the cache
1832 candidatedelta = builddelta(cachedelta[0])
1832 candidatedelta = builddelta(cachedelta[0])
1833 tested.add(cachedelta[0])
1833 tested.add(cachedelta[0])
1834 if self._isgooddelta(candidatedelta, textlen):
1834 if self._isgooddelta(candidatedelta, textlen):
1835 delta = candidatedelta
1835 delta = candidatedelta
1836 if delta is None and self._generaldelta:
1836 if delta is None and self._generaldelta:
1837 # exclude already lazy tested base if any
1837 # exclude already lazy tested base if any
1838 parents = [p for p in (p1r, p2r)
1838 parents = [p for p in (p1r, p2r)
1839 if p != nullrev and p not in tested]
1839 if p != nullrev and p not in tested]
1840 if parents and not self._aggressivemergedeltas:
1840 if parents and not self._aggressivemergedeltas:
1841 # Pick whichever parent is closer to us (to minimize the
1841 # Pick whichever parent is closer to us (to minimize the
1842 # chance of having to build a fulltext).
1842 # chance of having to build a fulltext).
1843 parents = [max(parents)]
1843 parents = [max(parents)]
1844 tested.update(parents)
1844 tested.update(parents)
1845 pdeltas = []
1845 pdeltas = []
1846 for p in parents:
1846 for p in parents:
1847 pd = builddelta(p)
1847 pd = builddelta(p)
1848 if self._isgooddelta(pd, textlen):
1848 if self._isgooddelta(pd, textlen):
1849 pdeltas.append(pd)
1849 pdeltas.append(pd)
1850 if pdeltas:
1850 if pdeltas:
1851 delta = min(pdeltas, key=lambda x: x[1])
1851 delta = min(pdeltas, key=lambda x: x[1])
1852 if delta is None and prev not in tested:
1852 if delta is None and prev not in tested:
1853 # other approach failed try against prev to hopefully save us a
1853 # other approach failed try against prev to hopefully save us a
1854 # fulltext.
1854 # fulltext.
1855 candidatedelta = builddelta(prev)
1855 candidatedelta = builddelta(prev)
1856 if self._isgooddelta(candidatedelta, textlen):
1856 if self._isgooddelta(candidatedelta, textlen):
1857 delta = candidatedelta
1857 delta = candidatedelta
1858 if delta is not None:
1858 if delta is not None:
1859 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1859 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1860 else:
1860 else:
1861 rawtext = buildtext()
1861 rawtext = buildtext()
1862 data = self.compress(rawtext)
1862 data = self.compress(rawtext)
1863 l = len(data[1]) + len(data[0])
1863 l = len(data[1]) + len(data[0])
1864 base = chainbase = curr
1864 base = chainbase = curr
1865
1865
1866 e = (offset_type(offset, flags), l, textlen,
1866 e = (offset_type(offset, flags), l, textlen,
1867 base, link, p1r, p2r, node)
1867 base, link, p1r, p2r, node)
1868 self.index.insert(-1, e)
1868 self.index.insert(-1, e)
1869 self.nodemap[node] = curr
1869 self.nodemap[node] = curr
1870
1870
1871 entry = self._io.packentry(e, self.node, self.version, curr)
1871 entry = self._io.packentry(e, self.node, self.version, curr)
1872 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1872 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1873
1873
1874 if alwayscache and rawtext is None:
1874 if alwayscache and rawtext is None:
1875 rawtext = buildtext()
1875 rawtext = buildtext()
1876
1876
1877 if type(rawtext) == str: # only accept immutable objects
1877 if type(rawtext) == str: # only accept immutable objects
1878 self._cache = (node, curr, rawtext)
1878 self._cache = (node, curr, rawtext)
1879 self._chainbasecache[curr] = chainbase
1879 self._chainbasecache[curr] = chainbase
1880 return node
1880 return node
1881
1881
1882 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1882 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1883 # Files opened in a+ mode have inconsistent behavior on various
1883 # Files opened in a+ mode have inconsistent behavior on various
1884 # platforms. Windows requires that a file positioning call be made
1884 # platforms. Windows requires that a file positioning call be made
1885 # when the file handle transitions between reads and writes. See
1885 # when the file handle transitions between reads and writes. See
1886 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1886 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1887 # platforms, Python or the platform itself can be buggy. Some versions
1887 # platforms, Python or the platform itself can be buggy. Some versions
1888 # of Solaris have been observed to not append at the end of the file
1888 # of Solaris have been observed to not append at the end of the file
1889 # if the file was seeked to before the end. See issue4943 for more.
1889 # if the file was seeked to before the end. See issue4943 for more.
1890 #
1890 #
1891 # We work around this issue by inserting a seek() before writing.
1891 # We work around this issue by inserting a seek() before writing.
1892 # Note: This is likely not necessary on Python 3.
1892 # Note: This is likely not necessary on Python 3.
1893 ifh.seek(0, os.SEEK_END)
1893 ifh.seek(0, os.SEEK_END)
1894 if dfh:
1894 if dfh:
1895 dfh.seek(0, os.SEEK_END)
1895 dfh.seek(0, os.SEEK_END)
1896
1896
1897 curr = len(self) - 1
1897 curr = len(self) - 1
1898 if not self._inline:
1898 if not self._inline:
1899 transaction.add(self.datafile, offset)
1899 transaction.add(self.datafile, offset)
1900 transaction.add(self.indexfile, curr * len(entry))
1900 transaction.add(self.indexfile, curr * len(entry))
1901 if data[0]:
1901 if data[0]:
1902 dfh.write(data[0])
1902 dfh.write(data[0])
1903 dfh.write(data[1])
1903 dfh.write(data[1])
1904 ifh.write(entry)
1904 ifh.write(entry)
1905 else:
1905 else:
1906 offset += curr * self._io.size
1906 offset += curr * self._io.size
1907 transaction.add(self.indexfile, offset, curr)
1907 transaction.add(self.indexfile, offset, curr)
1908 ifh.write(entry)
1908 ifh.write(entry)
1909 ifh.write(data[0])
1909 ifh.write(data[0])
1910 ifh.write(data[1])
1910 ifh.write(data[1])
1911 self.checkinlinesize(transaction, ifh)
1911 self.checkinlinesize(transaction, ifh)
1912
1912
1913 def addgroup(self, deltas, transaction, addrevisioncb=None):
1913 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1914 """
1914 """
1915 add a delta group
1915 add a delta group
1916
1916
1917 given a set of deltas, add them to the revision log. the
1917 given a set of deltas, add them to the revision log. the
1918 first delta is against its parent, which should be in our
1918 first delta is against its parent, which should be in our
1919 log, the rest are against the previous delta.
1919 log, the rest are against the previous delta.
1920
1920
1921 If ``addrevisioncb`` is defined, it will be called with arguments of
1921 If ``addrevisioncb`` is defined, it will be called with arguments of
1922 this revlog and the node that was added.
1922 this revlog and the node that was added.
1923 """
1923 """
1924
1924
1925 nodes = []
1925 nodes = []
1926
1926
1927 r = len(self)
1927 r = len(self)
1928 end = 0
1928 end = 0
1929 if r:
1929 if r:
1930 end = self.end(r - 1)
1930 end = self.end(r - 1)
1931 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1931 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1932 isize = r * self._io.size
1932 isize = r * self._io.size
1933 if self._inline:
1933 if self._inline:
1934 transaction.add(self.indexfile, end + isize, r)
1934 transaction.add(self.indexfile, end + isize, r)
1935 dfh = None
1935 dfh = None
1936 else:
1936 else:
1937 transaction.add(self.indexfile, isize, r)
1937 transaction.add(self.indexfile, isize, r)
1938 transaction.add(self.datafile, end)
1938 transaction.add(self.datafile, end)
1939 dfh = self.opener(self.datafile, "a+")
1939 dfh = self.opener(self.datafile, "a+")
1940 def flush():
1940 def flush():
1941 if dfh:
1941 if dfh:
1942 dfh.flush()
1942 dfh.flush()
1943 ifh.flush()
1943 ifh.flush()
1944 try:
1944 try:
1945 # loop through our set of deltas
1945 # loop through our set of deltas
1946 for data in deltas:
1946 for data in deltas:
1947 node, p1, p2, link, deltabase, delta, flags = data
1947 node, p1, p2, linknode, deltabase, delta, flags = data
1948 link = linkmapper(linknode)
1948 flags = flags or REVIDX_DEFAULT_FLAGS
1949 flags = flags or REVIDX_DEFAULT_FLAGS
1949
1950
1950 nodes.append(node)
1951 nodes.append(node)
1951
1952
1952 if node in self.nodemap:
1953 if node in self.nodemap:
1953 # this can happen if two branches make the same change
1954 # this can happen if two branches make the same change
1954 continue
1955 continue
1955
1956
1956 for p in (p1, p2):
1957 for p in (p1, p2):
1957 if p not in self.nodemap:
1958 if p not in self.nodemap:
1958 raise LookupError(p, self.indexfile,
1959 raise LookupError(p, self.indexfile,
1959 _('unknown parent'))
1960 _('unknown parent'))
1960
1961
1961 if deltabase not in self.nodemap:
1962 if deltabase not in self.nodemap:
1962 raise LookupError(deltabase, self.indexfile,
1963 raise LookupError(deltabase, self.indexfile,
1963 _('unknown delta base'))
1964 _('unknown delta base'))
1964
1965
1965 baserev = self.rev(deltabase)
1966 baserev = self.rev(deltabase)
1966
1967
1967 if baserev != nullrev and self.iscensored(baserev):
1968 if baserev != nullrev and self.iscensored(baserev):
1968 # if base is censored, delta must be full replacement in a
1969 # if base is censored, delta must be full replacement in a
1969 # single patch operation
1970 # single patch operation
1970 hlen = struct.calcsize(">lll")
1971 hlen = struct.calcsize(">lll")
1971 oldlen = self.rawsize(baserev)
1972 oldlen = self.rawsize(baserev)
1972 newlen = len(delta) - hlen
1973 newlen = len(delta) - hlen
1973 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1974 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1974 raise error.CensoredBaseError(self.indexfile,
1975 raise error.CensoredBaseError(self.indexfile,
1975 self.node(baserev))
1976 self.node(baserev))
1976
1977
1977 if not flags and self._peek_iscensored(baserev, delta, flush):
1978 if not flags and self._peek_iscensored(baserev, delta, flush):
1978 flags |= REVIDX_ISCENSORED
1979 flags |= REVIDX_ISCENSORED
1979
1980
1980 # We assume consumers of addrevisioncb will want to retrieve
1981 # We assume consumers of addrevisioncb will want to retrieve
1981 # the added revision, which will require a call to
1982 # the added revision, which will require a call to
1982 # revision(). revision() will fast path if there is a cache
1983 # revision(). revision() will fast path if there is a cache
1983 # hit. So, we tell _addrevision() to always cache in this case.
1984 # hit. So, we tell _addrevision() to always cache in this case.
1984 # We're only using addgroup() in the context of changegroup
1985 # We're only using addgroup() in the context of changegroup
1985 # generation so the revision data can always be handled as raw
1986 # generation so the revision data can always be handled as raw
1986 # by the flagprocessor.
1987 # by the flagprocessor.
1987 self._addrevision(node, None, transaction, link,
1988 self._addrevision(node, None, transaction, link,
1988 p1, p2, flags, (baserev, delta),
1989 p1, p2, flags, (baserev, delta),
1989 ifh, dfh,
1990 ifh, dfh,
1990 alwayscache=bool(addrevisioncb))
1991 alwayscache=bool(addrevisioncb))
1991
1992
1992 if addrevisioncb:
1993 if addrevisioncb:
1993 addrevisioncb(self, node)
1994 addrevisioncb(self, node)
1994
1995
1995 if not dfh and not self._inline:
1996 if not dfh and not self._inline:
1996 # addrevision switched from inline to conventional
1997 # addrevision switched from inline to conventional
1997 # reopen the index
1998 # reopen the index
1998 ifh.close()
1999 ifh.close()
1999 dfh = self.opener(self.datafile, "a+")
2000 dfh = self.opener(self.datafile, "a+")
2000 ifh = self.opener(self.indexfile, "a+",
2001 ifh = self.opener(self.indexfile, "a+",
2001 checkambig=self._checkambig)
2002 checkambig=self._checkambig)
2002 finally:
2003 finally:
2003 if dfh:
2004 if dfh:
2004 dfh.close()
2005 dfh.close()
2005 ifh.close()
2006 ifh.close()
2006
2007
2007 return nodes
2008 return nodes
2008
2009
2009 def iscensored(self, rev):
2010 def iscensored(self, rev):
2010 """Check if a file revision is censored."""
2011 """Check if a file revision is censored."""
2011 return False
2012 return False
2012
2013
2013 def _peek_iscensored(self, baserev, delta, flush):
2014 def _peek_iscensored(self, baserev, delta, flush):
2014 """Quickly check if a delta produces a censored revision."""
2015 """Quickly check if a delta produces a censored revision."""
2015 return False
2016 return False
2016
2017
2017 def getstrippoint(self, minlink):
2018 def getstrippoint(self, minlink):
2018 """find the minimum rev that must be stripped to strip the linkrev
2019 """find the minimum rev that must be stripped to strip the linkrev
2019
2020
2020 Returns a tuple containing the minimum rev and a set of all revs that
2021 Returns a tuple containing the minimum rev and a set of all revs that
2021 have linkrevs that will be broken by this strip.
2022 have linkrevs that will be broken by this strip.
2022 """
2023 """
2023 brokenrevs = set()
2024 brokenrevs = set()
2024 strippoint = len(self)
2025 strippoint = len(self)
2025
2026
2026 heads = {}
2027 heads = {}
2027 futurelargelinkrevs = set()
2028 futurelargelinkrevs = set()
2028 for head in self.headrevs():
2029 for head in self.headrevs():
2029 headlinkrev = self.linkrev(head)
2030 headlinkrev = self.linkrev(head)
2030 heads[head] = headlinkrev
2031 heads[head] = headlinkrev
2031 if headlinkrev >= minlink:
2032 if headlinkrev >= minlink:
2032 futurelargelinkrevs.add(headlinkrev)
2033 futurelargelinkrevs.add(headlinkrev)
2033
2034
2034 # This algorithm involves walking down the rev graph, starting at the
2035 # This algorithm involves walking down the rev graph, starting at the
2035 # heads. Since the revs are topologically sorted according to linkrev,
2036 # heads. Since the revs are topologically sorted according to linkrev,
2036 # once all head linkrevs are below the minlink, we know there are
2037 # once all head linkrevs are below the minlink, we know there are
2037 # no more revs that could have a linkrev greater than minlink.
2038 # no more revs that could have a linkrev greater than minlink.
2038 # So we can stop walking.
2039 # So we can stop walking.
2039 while futurelargelinkrevs:
2040 while futurelargelinkrevs:
2040 strippoint -= 1
2041 strippoint -= 1
2041 linkrev = heads.pop(strippoint)
2042 linkrev = heads.pop(strippoint)
2042
2043
2043 if linkrev < minlink:
2044 if linkrev < minlink:
2044 brokenrevs.add(strippoint)
2045 brokenrevs.add(strippoint)
2045 else:
2046 else:
2046 futurelargelinkrevs.remove(linkrev)
2047 futurelargelinkrevs.remove(linkrev)
2047
2048
2048 for p in self.parentrevs(strippoint):
2049 for p in self.parentrevs(strippoint):
2049 if p != nullrev:
2050 if p != nullrev:
2050 plinkrev = self.linkrev(p)
2051 plinkrev = self.linkrev(p)
2051 heads[p] = plinkrev
2052 heads[p] = plinkrev
2052 if plinkrev >= minlink:
2053 if plinkrev >= minlink:
2053 futurelargelinkrevs.add(plinkrev)
2054 futurelargelinkrevs.add(plinkrev)
2054
2055
2055 return strippoint, brokenrevs
2056 return strippoint, brokenrevs
2056
2057
2057 def strip(self, minlink, transaction):
2058 def strip(self, minlink, transaction):
2058 """truncate the revlog on the first revision with a linkrev >= minlink
2059 """truncate the revlog on the first revision with a linkrev >= minlink
2059
2060
2060 This function is called when we're stripping revision minlink and
2061 This function is called when we're stripping revision minlink and
2061 its descendants from the repository.
2062 its descendants from the repository.
2062
2063
2063 We have to remove all revisions with linkrev >= minlink, because
2064 We have to remove all revisions with linkrev >= minlink, because
2064 the equivalent changelog revisions will be renumbered after the
2065 the equivalent changelog revisions will be renumbered after the
2065 strip.
2066 strip.
2066
2067
2067 So we truncate the revlog on the first of these revisions, and
2068 So we truncate the revlog on the first of these revisions, and
2068 trust that the caller has saved the revisions that shouldn't be
2069 trust that the caller has saved the revisions that shouldn't be
2069 removed and that it'll re-add them after this truncation.
2070 removed and that it'll re-add them after this truncation.
2070 """
2071 """
2071 if len(self) == 0:
2072 if len(self) == 0:
2072 return
2073 return
2073
2074
2074 rev, _ = self.getstrippoint(minlink)
2075 rev, _ = self.getstrippoint(minlink)
2075 if rev == len(self):
2076 if rev == len(self):
2076 return
2077 return
2077
2078
2078 # first truncate the files on disk
2079 # first truncate the files on disk
2079 end = self.start(rev)
2080 end = self.start(rev)
2080 if not self._inline:
2081 if not self._inline:
2081 transaction.add(self.datafile, end)
2082 transaction.add(self.datafile, end)
2082 end = rev * self._io.size
2083 end = rev * self._io.size
2083 else:
2084 else:
2084 end += rev * self._io.size
2085 end += rev * self._io.size
2085
2086
2086 transaction.add(self.indexfile, end)
2087 transaction.add(self.indexfile, end)
2087
2088
2088 # then reset internal state in memory to forget those revisions
2089 # then reset internal state in memory to forget those revisions
2089 self._cache = None
2090 self._cache = None
2090 self._chaininfocache = {}
2091 self._chaininfocache = {}
2091 self._chunkclear()
2092 self._chunkclear()
2092 for x in xrange(rev, len(self)):
2093 for x in xrange(rev, len(self)):
2093 del self.nodemap[self.node(x)]
2094 del self.nodemap[self.node(x)]
2094
2095
2095 del self.index[rev:-1]
2096 del self.index[rev:-1]
2096
2097
2097 def checksize(self):
2098 def checksize(self):
2098 expected = 0
2099 expected = 0
2099 if len(self):
2100 if len(self):
2100 expected = max(0, self.end(len(self) - 1))
2101 expected = max(0, self.end(len(self) - 1))
2101
2102
2102 try:
2103 try:
2103 f = self.opener(self.datafile)
2104 f = self.opener(self.datafile)
2104 f.seek(0, 2)
2105 f.seek(0, 2)
2105 actual = f.tell()
2106 actual = f.tell()
2106 f.close()
2107 f.close()
2107 dd = actual - expected
2108 dd = actual - expected
2108 except IOError as inst:
2109 except IOError as inst:
2109 if inst.errno != errno.ENOENT:
2110 if inst.errno != errno.ENOENT:
2110 raise
2111 raise
2111 dd = 0
2112 dd = 0
2112
2113
2113 try:
2114 try:
2114 f = self.opener(self.indexfile)
2115 f = self.opener(self.indexfile)
2115 f.seek(0, 2)
2116 f.seek(0, 2)
2116 actual = f.tell()
2117 actual = f.tell()
2117 f.close()
2118 f.close()
2118 s = self._io.size
2119 s = self._io.size
2119 i = max(0, actual // s)
2120 i = max(0, actual // s)
2120 di = actual - (i * s)
2121 di = actual - (i * s)
2121 if self._inline:
2122 if self._inline:
2122 databytes = 0
2123 databytes = 0
2123 for r in self:
2124 for r in self:
2124 databytes += max(0, self.length(r))
2125 databytes += max(0, self.length(r))
2125 dd = 0
2126 dd = 0
2126 di = actual - len(self) * s - databytes
2127 di = actual - len(self) * s - databytes
2127 except IOError as inst:
2128 except IOError as inst:
2128 if inst.errno != errno.ENOENT:
2129 if inst.errno != errno.ENOENT:
2129 raise
2130 raise
2130 di = 0
2131 di = 0
2131
2132
2132 return (dd, di)
2133 return (dd, di)
2133
2134
2134 def files(self):
2135 def files(self):
2135 res = [self.indexfile]
2136 res = [self.indexfile]
2136 if not self._inline:
2137 if not self._inline:
2137 res.append(self.datafile)
2138 res.append(self.datafile)
2138 return res
2139 return res
2139
2140
2140 DELTAREUSEALWAYS = 'always'
2141 DELTAREUSEALWAYS = 'always'
2141 DELTAREUSESAMEREVS = 'samerevs'
2142 DELTAREUSESAMEREVS = 'samerevs'
2142 DELTAREUSENEVER = 'never'
2143 DELTAREUSENEVER = 'never'
2143
2144
2144 DELTAREUSEALL = {'always', 'samerevs', 'never'}
2145 DELTAREUSEALL = {'always', 'samerevs', 'never'}
2145
2146
2146 def clone(self, tr, destrevlog, addrevisioncb=None,
2147 def clone(self, tr, destrevlog, addrevisioncb=None,
2147 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2148 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2148 """Copy this revlog to another, possibly with format changes.
2149 """Copy this revlog to another, possibly with format changes.
2149
2150
2150 The destination revlog will contain the same revisions and nodes.
2151 The destination revlog will contain the same revisions and nodes.
2151 However, it may not be bit-for-bit identical due to e.g. delta encoding
2152 However, it may not be bit-for-bit identical due to e.g. delta encoding
2152 differences.
2153 differences.
2153
2154
2154 The ``deltareuse`` argument control how deltas from the existing revlog
2155 The ``deltareuse`` argument control how deltas from the existing revlog
2155 are preserved in the destination revlog. The argument can have the
2156 are preserved in the destination revlog. The argument can have the
2156 following values:
2157 following values:
2157
2158
2158 DELTAREUSEALWAYS
2159 DELTAREUSEALWAYS
2159 Deltas will always be reused (if possible), even if the destination
2160 Deltas will always be reused (if possible), even if the destination
2160 revlog would not select the same revisions for the delta. This is the
2161 revlog would not select the same revisions for the delta. This is the
2161 fastest mode of operation.
2162 fastest mode of operation.
2162 DELTAREUSESAMEREVS
2163 DELTAREUSESAMEREVS
2163 Deltas will be reused if the destination revlog would pick the same
2164 Deltas will be reused if the destination revlog would pick the same
2164 revisions for the delta. This mode strikes a balance between speed
2165 revisions for the delta. This mode strikes a balance between speed
2165 and optimization.
2166 and optimization.
2166 DELTAREUSENEVER
2167 DELTAREUSENEVER
2167 Deltas will never be reused. This is the slowest mode of execution.
2168 Deltas will never be reused. This is the slowest mode of execution.
2168 This mode can be used to recompute deltas (e.g. if the diff/delta
2169 This mode can be used to recompute deltas (e.g. if the diff/delta
2169 algorithm changes).
2170 algorithm changes).
2170
2171
2171 Delta computation can be slow, so the choice of delta reuse policy can
2172 Delta computation can be slow, so the choice of delta reuse policy can
2172 significantly affect run time.
2173 significantly affect run time.
2173
2174
2174 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2175 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2175 two extremes. Deltas will be reused if they are appropriate. But if the
2176 two extremes. Deltas will be reused if they are appropriate. But if the
2176 delta could choose a better revision, it will do so. This means if you
2177 delta could choose a better revision, it will do so. This means if you
2177 are converting a non-generaldelta revlog to a generaldelta revlog,
2178 are converting a non-generaldelta revlog to a generaldelta revlog,
2178 deltas will be recomputed if the delta's parent isn't a parent of the
2179 deltas will be recomputed if the delta's parent isn't a parent of the
2179 revision.
2180 revision.
2180
2181
2181 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2182 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2182 controls whether to compute deltas against both parents for merges.
2183 controls whether to compute deltas against both parents for merges.
2183 By default, the current default is used.
2184 By default, the current default is used.
2184 """
2185 """
2185 if deltareuse not in self.DELTAREUSEALL:
2186 if deltareuse not in self.DELTAREUSEALL:
2186 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2187 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2187
2188
2188 if len(destrevlog):
2189 if len(destrevlog):
2189 raise ValueError(_('destination revlog is not empty'))
2190 raise ValueError(_('destination revlog is not empty'))
2190
2191
2191 if getattr(self, 'filteredrevs', None):
2192 if getattr(self, 'filteredrevs', None):
2192 raise ValueError(_('source revlog has filtered revisions'))
2193 raise ValueError(_('source revlog has filtered revisions'))
2193 if getattr(destrevlog, 'filteredrevs', None):
2194 if getattr(destrevlog, 'filteredrevs', None):
2194 raise ValueError(_('destination revlog has filtered revisions'))
2195 raise ValueError(_('destination revlog has filtered revisions'))
2195
2196
2196 # lazydeltabase controls whether to reuse a cached delta, if possible.
2197 # lazydeltabase controls whether to reuse a cached delta, if possible.
2197 oldlazydeltabase = destrevlog._lazydeltabase
2198 oldlazydeltabase = destrevlog._lazydeltabase
2198 oldamd = destrevlog._aggressivemergedeltas
2199 oldamd = destrevlog._aggressivemergedeltas
2199
2200
2200 try:
2201 try:
2201 if deltareuse == self.DELTAREUSEALWAYS:
2202 if deltareuse == self.DELTAREUSEALWAYS:
2202 destrevlog._lazydeltabase = True
2203 destrevlog._lazydeltabase = True
2203 elif deltareuse == self.DELTAREUSESAMEREVS:
2204 elif deltareuse == self.DELTAREUSESAMEREVS:
2204 destrevlog._lazydeltabase = False
2205 destrevlog._lazydeltabase = False
2205
2206
2206 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2207 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2207
2208
2208 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2209 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2209 self.DELTAREUSESAMEREVS)
2210 self.DELTAREUSESAMEREVS)
2210
2211
2211 index = self.index
2212 index = self.index
2212 for rev in self:
2213 for rev in self:
2213 entry = index[rev]
2214 entry = index[rev]
2214
2215
2215 # Some classes override linkrev to take filtered revs into
2216 # Some classes override linkrev to take filtered revs into
2216 # account. Use raw entry from index.
2217 # account. Use raw entry from index.
2217 flags = entry[0] & 0xffff
2218 flags = entry[0] & 0xffff
2218 linkrev = entry[4]
2219 linkrev = entry[4]
2219 p1 = index[entry[5]][7]
2220 p1 = index[entry[5]][7]
2220 p2 = index[entry[6]][7]
2221 p2 = index[entry[6]][7]
2221 node = entry[7]
2222 node = entry[7]
2222
2223
2223 # (Possibly) reuse the delta from the revlog if allowed and
2224 # (Possibly) reuse the delta from the revlog if allowed and
2224 # the revlog chunk is a delta.
2225 # the revlog chunk is a delta.
2225 cachedelta = None
2226 cachedelta = None
2226 rawtext = None
2227 rawtext = None
2227 if populatecachedelta:
2228 if populatecachedelta:
2228 dp = self.deltaparent(rev)
2229 dp = self.deltaparent(rev)
2229 if dp != nullrev:
2230 if dp != nullrev:
2230 cachedelta = (dp, str(self._chunk(rev)))
2231 cachedelta = (dp, str(self._chunk(rev)))
2231
2232
2232 if not cachedelta:
2233 if not cachedelta:
2233 rawtext = self.revision(rev, raw=True)
2234 rawtext = self.revision(rev, raw=True)
2234
2235
2235 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2236 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2236 checkambig=False)
2237 checkambig=False)
2237 dfh = None
2238 dfh = None
2238 if not destrevlog._inline:
2239 if not destrevlog._inline:
2239 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2240 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2240 try:
2241 try:
2241 destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,
2242 destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,
2242 flags, cachedelta, ifh, dfh)
2243 flags, cachedelta, ifh, dfh)
2243 finally:
2244 finally:
2244 if dfh:
2245 if dfh:
2245 dfh.close()
2246 dfh.close()
2246 ifh.close()
2247 ifh.close()
2247
2248
2248 if addrevisioncb:
2249 if addrevisioncb:
2249 addrevisioncb(self, rev, node)
2250 addrevisioncb(self, rev, node)
2250 finally:
2251 finally:
2251 destrevlog._lazydeltabase = oldlazydeltabase
2252 destrevlog._lazydeltabase = oldlazydeltabase
2252 destrevlog._aggressivemergedeltas = oldamd
2253 destrevlog._aggressivemergedeltas = oldamd
@@ -1,316 +1,315 b''
1 # test revlog interaction about raw data (flagprocessor)
1 # test revlog interaction about raw data (flagprocessor)
2
2
3 from __future__ import absolute_import, print_function
3 from __future__ import absolute_import, print_function
4
4
5 import sys
5 import sys
6
6
7 from mercurial import (
7 from mercurial import (
8 encoding,
8 encoding,
9 node,
9 node,
10 revlog,
10 revlog,
11 transaction,
11 transaction,
12 vfs,
12 vfs,
13 )
13 )
14
14
15 # TESTTMP is optional. This makes it convenient to run without run-tests.py
15 # TESTTMP is optional. This makes it convenient to run without run-tests.py
16 tvfs = vfs.vfs(encoding.environ.get('TESTTMP', b'/tmp'))
16 tvfs = vfs.vfs(encoding.environ.get('TESTTMP', b'/tmp'))
17
17
18 # Enable generaldelta otherwise revlog won't use delta as expected by the test
18 # Enable generaldelta otherwise revlog won't use delta as expected by the test
19 tvfs.options = {'generaldelta': True, 'revlogv1': True}
19 tvfs.options = {'generaldelta': True, 'revlogv1': True}
20
20
21 # The test wants to control whether to use delta explicitly, based on
21 # The test wants to control whether to use delta explicitly, based on
22 # "storedeltachains".
22 # "storedeltachains".
23 revlog.revlog._isgooddelta = lambda self, d, textlen: self.storedeltachains
23 revlog.revlog._isgooddelta = lambda self, d, textlen: self.storedeltachains
24
24
25 def abort(msg):
25 def abort(msg):
26 print('abort: %s' % msg)
26 print('abort: %s' % msg)
27 # Return 0 so run-tests.py could compare the output.
27 # Return 0 so run-tests.py could compare the output.
28 sys.exit()
28 sys.exit()
29
29
30 # Register a revlog processor for flag EXTSTORED.
30 # Register a revlog processor for flag EXTSTORED.
31 #
31 #
32 # It simply prepends a fixed header, and replaces '1' to 'i'. So it has
32 # It simply prepends a fixed header, and replaces '1' to 'i'. So it has
33 # insertion and replacement, and may be interesting to test revlog's line-based
33 # insertion and replacement, and may be interesting to test revlog's line-based
34 # deltas.
34 # deltas.
35 _extheader = b'E\n'
35 _extheader = b'E\n'
36
36
37 def readprocessor(self, rawtext):
37 def readprocessor(self, rawtext):
38 # True: the returned text could be used to verify hash
38 # True: the returned text could be used to verify hash
39 text = rawtext[len(_extheader):].replace(b'i', b'1')
39 text = rawtext[len(_extheader):].replace(b'i', b'1')
40 return text, True
40 return text, True
41
41
42 def writeprocessor(self, text):
42 def writeprocessor(self, text):
43 # False: the returned rawtext shouldn't be used to verify hash
43 # False: the returned rawtext shouldn't be used to verify hash
44 rawtext = _extheader + text.replace(b'1', b'i')
44 rawtext = _extheader + text.replace(b'1', b'i')
45 return rawtext, False
45 return rawtext, False
46
46
47 def rawprocessor(self, rawtext):
47 def rawprocessor(self, rawtext):
48 # False: do not verify hash. Only the content returned by "readprocessor"
48 # False: do not verify hash. Only the content returned by "readprocessor"
49 # can be used to verify hash.
49 # can be used to verify hash.
50 return False
50 return False
51
51
52 revlog.addflagprocessor(revlog.REVIDX_EXTSTORED,
52 revlog.addflagprocessor(revlog.REVIDX_EXTSTORED,
53 (readprocessor, writeprocessor, rawprocessor))
53 (readprocessor, writeprocessor, rawprocessor))
54
54
55 # Utilities about reading and appending revlog
55 # Utilities about reading and appending revlog
56
56
57 def newtransaction():
57 def newtransaction():
58 # A transaction is required to write revlogs
58 # A transaction is required to write revlogs
59 report = lambda msg: None
59 report = lambda msg: None
60 return transaction.transaction(report, tvfs, {'plain': tvfs}, b'journal')
60 return transaction.transaction(report, tvfs, {'plain': tvfs}, b'journal')
61
61
62 def newrevlog(name=b'_testrevlog.i', recreate=False):
62 def newrevlog(name=b'_testrevlog.i', recreate=False):
63 if recreate:
63 if recreate:
64 tvfs.tryunlink(name)
64 tvfs.tryunlink(name)
65 rlog = revlog.revlog(tvfs, name)
65 rlog = revlog.revlog(tvfs, name)
66 return rlog
66 return rlog
67
67
68 def appendrev(rlog, text, tr, isext=False, isdelta=True):
68 def appendrev(rlog, text, tr, isext=False, isdelta=True):
69 '''Append a revision. If isext is True, set the EXTSTORED flag so flag
69 '''Append a revision. If isext is True, set the EXTSTORED flag so flag
70 processor will be used (and rawtext is different from text). If isdelta is
70 processor will be used (and rawtext is different from text). If isdelta is
71 True, force the revision to be a delta, otherwise it's full text.
71 True, force the revision to be a delta, otherwise it's full text.
72 '''
72 '''
73 nextrev = len(rlog)
73 nextrev = len(rlog)
74 p1 = rlog.node(nextrev - 1)
74 p1 = rlog.node(nextrev - 1)
75 p2 = node.nullid
75 p2 = node.nullid
76 if isext:
76 if isext:
77 flags = revlog.REVIDX_EXTSTORED
77 flags = revlog.REVIDX_EXTSTORED
78 else:
78 else:
79 flags = revlog.REVIDX_DEFAULT_FLAGS
79 flags = revlog.REVIDX_DEFAULT_FLAGS
80 # Change storedeltachains temporarily, to override revlog's delta decision
80 # Change storedeltachains temporarily, to override revlog's delta decision
81 rlog.storedeltachains = isdelta
81 rlog.storedeltachains = isdelta
82 try:
82 try:
83 rlog.addrevision(text, tr, nextrev, p1, p2, flags=flags)
83 rlog.addrevision(text, tr, nextrev, p1, p2, flags=flags)
84 return nextrev
84 return nextrev
85 except Exception as ex:
85 except Exception as ex:
86 abort('rev %d: failed to append: %s' % (nextrev, ex))
86 abort('rev %d: failed to append: %s' % (nextrev, ex))
87 finally:
87 finally:
88 # Restore storedeltachains. It is always True, see revlog.__init__
88 # Restore storedeltachains. It is always True, see revlog.__init__
89 rlog.storedeltachains = True
89 rlog.storedeltachains = True
90
90
91 def addgroupcopy(rlog, tr, destname=b'_destrevlog.i', optimaldelta=True):
91 def addgroupcopy(rlog, tr, destname=b'_destrevlog.i', optimaldelta=True):
92 '''Copy revlog to destname using revlog.addgroup. Return the copied revlog.
92 '''Copy revlog to destname using revlog.addgroup. Return the copied revlog.
93
93
94 This emulates push or pull. They use changegroup. Changegroup requires
94 This emulates push or pull. They use changegroup. Changegroup requires
95 repo to work. We don't have a repo, so a dummy changegroup is used.
95 repo to work. We don't have a repo, so a dummy changegroup is used.
96
96
97 If optimaldelta is True, use optimized delta parent, so the destination
97 If optimaldelta is True, use optimized delta parent, so the destination
98 revlog could probably reuse it. Otherwise it builds sub-optimal delta, and
98 revlog could probably reuse it. Otherwise it builds sub-optimal delta, and
99 the destination revlog needs more work to use it.
99 the destination revlog needs more work to use it.
100
100
101 This exercises some revlog.addgroup (and revlog._addrevision(text=None))
101 This exercises some revlog.addgroup (and revlog._addrevision(text=None))
102 code path, which is not covered by "appendrev" alone.
102 code path, which is not covered by "appendrev" alone.
103 '''
103 '''
104 class dummychangegroup(object):
104 class dummychangegroup(object):
105 @staticmethod
105 @staticmethod
106 def deltachunk(pnode):
106 def deltachunk(pnode):
107 pnode = pnode or node.nullid
107 pnode = pnode or node.nullid
108 parentrev = rlog.rev(pnode)
108 parentrev = rlog.rev(pnode)
109 r = parentrev + 1
109 r = parentrev + 1
110 if r >= len(rlog):
110 if r >= len(rlog):
111 return {}
111 return {}
112 if optimaldelta:
112 if optimaldelta:
113 deltaparent = parentrev
113 deltaparent = parentrev
114 else:
114 else:
115 # suboptimal deltaparent
115 # suboptimal deltaparent
116 deltaparent = min(0, parentrev)
116 deltaparent = min(0, parentrev)
117 return {'node': rlog.node(r), 'p1': pnode, 'p2': node.nullid,
117 return {'node': rlog.node(r), 'p1': pnode, 'p2': node.nullid,
118 'cs': rlog.node(rlog.linkrev(r)), 'flags': rlog.flags(r),
118 'cs': rlog.node(rlog.linkrev(r)), 'flags': rlog.flags(r),
119 'deltabase': rlog.node(deltaparent),
119 'deltabase': rlog.node(deltaparent),
120 'delta': rlog.revdiff(deltaparent, r)}
120 'delta': rlog.revdiff(deltaparent, r)}
121
121
122 def deltaiter(self, linkmapper):
122 def deltaiter(self):
123 chain = None
123 chain = None
124 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
124 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
125 node = chunkdata['node']
125 node = chunkdata['node']
126 p1 = chunkdata['p1']
126 p1 = chunkdata['p1']
127 p2 = chunkdata['p2']
127 p2 = chunkdata['p2']
128 cs = chunkdata['cs']
128 cs = chunkdata['cs']
129 deltabase = chunkdata['deltabase']
129 deltabase = chunkdata['deltabase']
130 delta = chunkdata['delta']
130 delta = chunkdata['delta']
131 flags = chunkdata['flags']
131 flags = chunkdata['flags']
132
132
133 link = linkmapper(cs)
134 chain = node
133 chain = node
135
134
136 yield (node, p1, p2, link, deltabase, delta, flags)
135 yield (node, p1, p2, cs, deltabase, delta, flags)
137
136
138 def linkmap(lnode):
137 def linkmap(lnode):
139 return rlog.rev(lnode)
138 return rlog.rev(lnode)
140
139
141 dlog = newrevlog(destname, recreate=True)
140 dlog = newrevlog(destname, recreate=True)
142 dummydeltas = dummychangegroup().deltaiter(linkmap)
141 dummydeltas = dummychangegroup().deltaiter()
143 dlog.addgroup(dummydeltas, tr)
142 dlog.addgroup(dummydeltas, linkmap, tr)
144 return dlog
143 return dlog
145
144
146 def lowlevelcopy(rlog, tr, destname=b'_destrevlog.i'):
145 def lowlevelcopy(rlog, tr, destname=b'_destrevlog.i'):
147 '''Like addgroupcopy, but use the low level revlog._addrevision directly.
146 '''Like addgroupcopy, but use the low level revlog._addrevision directly.
148
147
149 It exercises some code paths that are hard to reach easily otherwise.
148 It exercises some code paths that are hard to reach easily otherwise.
150 '''
149 '''
151 dlog = newrevlog(destname, recreate=True)
150 dlog = newrevlog(destname, recreate=True)
152 for r in rlog:
151 for r in rlog:
153 p1 = rlog.node(r - 1)
152 p1 = rlog.node(r - 1)
154 p2 = node.nullid
153 p2 = node.nullid
155 if r == 0:
154 if r == 0:
156 text = rlog.revision(r, raw=True)
155 text = rlog.revision(r, raw=True)
157 cachedelta = None
156 cachedelta = None
158 else:
157 else:
159 # deltaparent is more interesting if it has the EXTSTORED flag.
158 # deltaparent is more interesting if it has the EXTSTORED flag.
160 deltaparent = max([0] + [p for p in range(r - 2) if rlog.flags(p)])
159 deltaparent = max([0] + [p for p in range(r - 2) if rlog.flags(p)])
161 text = None
160 text = None
162 cachedelta = (deltaparent, rlog.revdiff(deltaparent, r))
161 cachedelta = (deltaparent, rlog.revdiff(deltaparent, r))
163 flags = rlog.flags(r)
162 flags = rlog.flags(r)
164 ifh = dfh = None
163 ifh = dfh = None
165 try:
164 try:
166 ifh = dlog.opener(dlog.indexfile, 'a+')
165 ifh = dlog.opener(dlog.indexfile, 'a+')
167 if not dlog._inline:
166 if not dlog._inline:
168 dfh = dlog.opener(dlog.datafile, 'a+')
167 dfh = dlog.opener(dlog.datafile, 'a+')
169 dlog._addrevision(rlog.node(r), text, tr, r, p1, p2, flags,
168 dlog._addrevision(rlog.node(r), text, tr, r, p1, p2, flags,
170 cachedelta, ifh, dfh)
169 cachedelta, ifh, dfh)
171 finally:
170 finally:
172 if dfh is not None:
171 if dfh is not None:
173 dfh.close()
172 dfh.close()
174 if ifh is not None:
173 if ifh is not None:
175 ifh.close()
174 ifh.close()
176 return dlog
175 return dlog
177
176
178 # Utilities to generate revisions for testing
177 # Utilities to generate revisions for testing
179
178
180 def genbits(n):
179 def genbits(n):
181 '''Given a number n, generate (2 ** (n * 2) + 1) numbers in range(2 ** n).
180 '''Given a number n, generate (2 ** (n * 2) + 1) numbers in range(2 ** n).
182 i.e. the generated numbers have a width of n bits.
181 i.e. the generated numbers have a width of n bits.
183
182
184 The combination of two adjacent numbers will cover all possible cases.
183 The combination of two adjacent numbers will cover all possible cases.
185 That is to say, given any x, y where both x, and y are in range(2 ** n),
184 That is to say, given any x, y where both x, and y are in range(2 ** n),
186 there is an x followed immediately by y in the generated sequence.
185 there is an x followed immediately by y in the generated sequence.
187 '''
186 '''
188 m = 2 ** n
187 m = 2 ** n
189
188
190 # Gray Code. See https://en.wikipedia.org/wiki/Gray_code
189 # Gray Code. See https://en.wikipedia.org/wiki/Gray_code
191 gray = lambda x: x ^ (x >> 1)
190 gray = lambda x: x ^ (x >> 1)
192 reversegray = dict((gray(i), i) for i in range(m))
191 reversegray = dict((gray(i), i) for i in range(m))
193
192
194 # Generate (n * 2) bit gray code, yield lower n bits as X, and look for
193 # Generate (n * 2) bit gray code, yield lower n bits as X, and look for
195 # the next unused gray code where higher n bits equal to X.
194 # the next unused gray code where higher n bits equal to X.
196
195
197 # For gray codes whose higher bits are X, a[X] of them have been used.
196 # For gray codes whose higher bits are X, a[X] of them have been used.
198 a = [0] * m
197 a = [0] * m
199
198
200 # Iterate from 0.
199 # Iterate from 0.
201 x = 0
200 x = 0
202 yield x
201 yield x
203 for i in range(m * m):
202 for i in range(m * m):
204 x = reversegray[x]
203 x = reversegray[x]
205 y = gray(a[x] + x * m) & (m - 1)
204 y = gray(a[x] + x * m) & (m - 1)
206 assert a[x] < m
205 assert a[x] < m
207 a[x] += 1
206 a[x] += 1
208 x = y
207 x = y
209 yield x
208 yield x
210
209
211 def gentext(rev):
210 def gentext(rev):
212 '''Given a revision number, generate dummy text'''
211 '''Given a revision number, generate dummy text'''
213 return b''.join(b'%d\n' % j for j in range(-1, rev % 5))
212 return b''.join(b'%d\n' % j for j in range(-1, rev % 5))
214
213
215 def writecases(rlog, tr):
214 def writecases(rlog, tr):
216 '''Write some revisions interested to the test.
215 '''Write some revisions interested to the test.
217
216
218 The test is interested in 3 properties of a revision:
217 The test is interested in 3 properties of a revision:
219
218
220 - Is it a delta or a full text? (isdelta)
219 - Is it a delta or a full text? (isdelta)
221 This is to catch some delta application issues.
220 This is to catch some delta application issues.
222 - Does it have a flag of EXTSTORED? (isext)
221 - Does it have a flag of EXTSTORED? (isext)
223 This is to catch some flag processor issues. Especially when
222 This is to catch some flag processor issues. Especially when
224 interacted with revlog deltas.
223 interacted with revlog deltas.
225 - Is its text empty? (isempty)
224 - Is its text empty? (isempty)
226 This is less important. It is intended to try to catch some careless
225 This is less important. It is intended to try to catch some careless
227 checks like "if text" instead of "if text is None". Note: if flag
226 checks like "if text" instead of "if text is None". Note: if flag
228 processor is involved, raw text may be not empty.
227 processor is involved, raw text may be not empty.
229
228
230 Write 65 revisions. So that all combinations of the above flags for
229 Write 65 revisions. So that all combinations of the above flags for
231 adjacent revisions are covered. That is to say,
230 adjacent revisions are covered. That is to say,
232
231
233 len(set(
232 len(set(
234 (r.delta, r.ext, r.empty, (r+1).delta, (r+1).ext, (r+1).empty)
233 (r.delta, r.ext, r.empty, (r+1).delta, (r+1).ext, (r+1).empty)
235 for r in range(len(rlog) - 1)
234 for r in range(len(rlog) - 1)
236 )) is 64.
235 )) is 64.
237
236
238 Where "r.delta", "r.ext", and "r.empty" are booleans matching properties
237 Where "r.delta", "r.ext", and "r.empty" are booleans matching properties
239 mentioned above.
238 mentioned above.
240
239
241 Return expected [(text, rawtext)].
240 Return expected [(text, rawtext)].
242 '''
241 '''
243 result = []
242 result = []
244 for i, x in enumerate(genbits(3)):
243 for i, x in enumerate(genbits(3)):
245 isdelta, isext, isempty = bool(x & 1), bool(x & 2), bool(x & 4)
244 isdelta, isext, isempty = bool(x & 1), bool(x & 2), bool(x & 4)
246 if isempty:
245 if isempty:
247 text = b''
246 text = b''
248 else:
247 else:
249 text = gentext(i)
248 text = gentext(i)
250 rev = appendrev(rlog, text, tr, isext=isext, isdelta=isdelta)
249 rev = appendrev(rlog, text, tr, isext=isext, isdelta=isdelta)
251
250
252 # Verify text, rawtext, and rawsize
251 # Verify text, rawtext, and rawsize
253 if isext:
252 if isext:
254 rawtext = writeprocessor(None, text)[0]
253 rawtext = writeprocessor(None, text)[0]
255 else:
254 else:
256 rawtext = text
255 rawtext = text
257 if rlog.rawsize(rev) != len(rawtext):
256 if rlog.rawsize(rev) != len(rawtext):
258 abort('rev %d: wrong rawsize' % rev)
257 abort('rev %d: wrong rawsize' % rev)
259 if rlog.revision(rev, raw=False) != text:
258 if rlog.revision(rev, raw=False) != text:
260 abort('rev %d: wrong text' % rev)
259 abort('rev %d: wrong text' % rev)
261 if rlog.revision(rev, raw=True) != rawtext:
260 if rlog.revision(rev, raw=True) != rawtext:
262 abort('rev %d: wrong rawtext' % rev)
261 abort('rev %d: wrong rawtext' % rev)
263 result.append((text, rawtext))
262 result.append((text, rawtext))
264
263
265 # Verify flags like isdelta, isext work as expected
264 # Verify flags like isdelta, isext work as expected
266 if bool(rlog.deltaparent(rev) > -1) != isdelta:
265 if bool(rlog.deltaparent(rev) > -1) != isdelta:
267 abort('rev %d: isdelta is ineffective' % rev)
266 abort('rev %d: isdelta is ineffective' % rev)
268 if bool(rlog.flags(rev)) != isext:
267 if bool(rlog.flags(rev)) != isext:
269 abort('rev %d: isext is ineffective' % rev)
268 abort('rev %d: isext is ineffective' % rev)
270 return result
269 return result
271
270
272 # Main test and checking
271 # Main test and checking
273
272
274 def checkrevlog(rlog, expected):
273 def checkrevlog(rlog, expected):
275 '''Check if revlog has expected contents. expected is [(text, rawtext)]'''
274 '''Check if revlog has expected contents. expected is [(text, rawtext)]'''
276 # Test using different access orders. This could expose some issues
275 # Test using different access orders. This could expose some issues
277 # depending on revlog caching (see revlog._cache).
276 # depending on revlog caching (see revlog._cache).
278 for r0 in range(len(rlog) - 1):
277 for r0 in range(len(rlog) - 1):
279 r1 = r0 + 1
278 r1 = r0 + 1
280 for revorder in [[r0, r1], [r1, r0]]:
279 for revorder in [[r0, r1], [r1, r0]]:
281 for raworder in [[True], [False], [True, False], [False, True]]:
280 for raworder in [[True], [False], [True, False], [False, True]]:
282 nlog = newrevlog()
281 nlog = newrevlog()
283 for rev in revorder:
282 for rev in revorder:
284 for raw in raworder:
283 for raw in raworder:
285 t = nlog.revision(rev, raw=raw)
284 t = nlog.revision(rev, raw=raw)
286 if t != expected[rev][int(raw)]:
285 if t != expected[rev][int(raw)]:
287 abort('rev %d: corrupted %stext'
286 abort('rev %d: corrupted %stext'
288 % (rev, raw and 'raw' or ''))
287 % (rev, raw and 'raw' or ''))
289
288
290 def maintest():
289 def maintest():
291 expected = rl = None
290 expected = rl = None
292 with newtransaction() as tr:
291 with newtransaction() as tr:
293 rl = newrevlog(recreate=True)
292 rl = newrevlog(recreate=True)
294 expected = writecases(rl, tr)
293 expected = writecases(rl, tr)
295 checkrevlog(rl, expected)
294 checkrevlog(rl, expected)
296 print('local test passed')
295 print('local test passed')
297 # Copy via revlog.addgroup
296 # Copy via revlog.addgroup
298 rl1 = addgroupcopy(rl, tr)
297 rl1 = addgroupcopy(rl, tr)
299 checkrevlog(rl1, expected)
298 checkrevlog(rl1, expected)
300 rl2 = addgroupcopy(rl, tr, optimaldelta=False)
299 rl2 = addgroupcopy(rl, tr, optimaldelta=False)
301 checkrevlog(rl2, expected)
300 checkrevlog(rl2, expected)
302 print('addgroupcopy test passed')
301 print('addgroupcopy test passed')
303 # Copy via revlog.clone
302 # Copy via revlog.clone
304 rl3 = newrevlog(name='_destrevlog3.i', recreate=True)
303 rl3 = newrevlog(name='_destrevlog3.i', recreate=True)
305 rl.clone(tr, rl3)
304 rl.clone(tr, rl3)
306 checkrevlog(rl3, expected)
305 checkrevlog(rl3, expected)
307 print('clone test passed')
306 print('clone test passed')
308 # Copy via low-level revlog._addrevision
307 # Copy via low-level revlog._addrevision
309 rl4 = lowlevelcopy(rl, tr)
308 rl4 = lowlevelcopy(rl, tr)
310 checkrevlog(rl4, expected)
309 checkrevlog(rl4, expected)
311 print('lowlevelcopy test passed')
310 print('lowlevelcopy test passed')
312
311
313 try:
312 try:
314 maintest()
313 maintest()
315 except Exception as ex:
314 except Exception as ex:
316 abort('crashed: %s' % ex)
315 abort('crashed: %s' % ex)
General Comments 0
You need to be logged in to leave comments. Login now