##// END OF EJS Templates
revlog: pass a transaction object to `rewrite_sidedata`...
marmoute -
r47990:2bd4b521 default
parent child Browse files
Show More
@@ -1,1952 +1,1954 b''
1 # changegroup.py - Mercurial changegroup manipulation functions
1 # changegroup.py - Mercurial changegroup manipulation functions
2 #
2 #
3 # Copyright 2006 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2006 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import os
10 import os
11 import struct
11 import struct
12 import weakref
12 import weakref
13
13
14 from .i18n import _
14 from .i18n import _
15 from .node import (
15 from .node import (
16 hex,
16 hex,
17 nullrev,
17 nullrev,
18 short,
18 short,
19 )
19 )
20 from .pycompat import open
20 from .pycompat import open
21
21
22 from . import (
22 from . import (
23 error,
23 error,
24 match as matchmod,
24 match as matchmod,
25 mdiff,
25 mdiff,
26 phases,
26 phases,
27 pycompat,
27 pycompat,
28 requirements,
28 requirements,
29 scmutil,
29 scmutil,
30 util,
30 util,
31 )
31 )
32
32
33 from .interfaces import repository
33 from .interfaces import repository
34 from .revlogutils import sidedata as sidedatamod
34 from .revlogutils import sidedata as sidedatamod
35 from .revlogutils import constants as revlog_constants
35 from .revlogutils import constants as revlog_constants
36 from .utils import storageutil
36 from .utils import storageutil
37
37
38 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct(b"20s20s20s20s")
38 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct(b"20s20s20s20s")
39 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct(b"20s20s20s20s20s")
39 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct(b"20s20s20s20s20s")
40 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(b">20s20s20s20s20sH")
40 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(b">20s20s20s20s20sH")
41 _CHANGEGROUPV4_DELTA_HEADER = struct.Struct(b">B20s20s20s20s20sH")
41 _CHANGEGROUPV4_DELTA_HEADER = struct.Struct(b">B20s20s20s20s20sH")
42
42
43 LFS_REQUIREMENT = b'lfs'
43 LFS_REQUIREMENT = b'lfs'
44
44
45 readexactly = util.readexactly
45 readexactly = util.readexactly
46
46
47
47
48 def getchunk(stream):
48 def getchunk(stream):
49 """return the next chunk from stream as a string"""
49 """return the next chunk from stream as a string"""
50 d = readexactly(stream, 4)
50 d = readexactly(stream, 4)
51 l = struct.unpack(b">l", d)[0]
51 l = struct.unpack(b">l", d)[0]
52 if l <= 4:
52 if l <= 4:
53 if l:
53 if l:
54 raise error.Abort(_(b"invalid chunk length %d") % l)
54 raise error.Abort(_(b"invalid chunk length %d") % l)
55 return b""
55 return b""
56 return readexactly(stream, l - 4)
56 return readexactly(stream, l - 4)
57
57
58
58
59 def chunkheader(length):
59 def chunkheader(length):
60 """return a changegroup chunk header (string)"""
60 """return a changegroup chunk header (string)"""
61 return struct.pack(b">l", length + 4)
61 return struct.pack(b">l", length + 4)
62
62
63
63
64 def closechunk():
64 def closechunk():
65 """return a changegroup chunk header (string) for a zero-length chunk"""
65 """return a changegroup chunk header (string) for a zero-length chunk"""
66 return struct.pack(b">l", 0)
66 return struct.pack(b">l", 0)
67
67
68
68
69 def _fileheader(path):
69 def _fileheader(path):
70 """Obtain a changegroup chunk header for a named path."""
70 """Obtain a changegroup chunk header for a named path."""
71 return chunkheader(len(path)) + path
71 return chunkheader(len(path)) + path
72
72
73
73
74 def writechunks(ui, chunks, filename, vfs=None):
74 def writechunks(ui, chunks, filename, vfs=None):
75 """Write chunks to a file and return its filename.
75 """Write chunks to a file and return its filename.
76
76
77 The stream is assumed to be a bundle file.
77 The stream is assumed to be a bundle file.
78 Existing files will not be overwritten.
78 Existing files will not be overwritten.
79 If no filename is specified, a temporary file is created.
79 If no filename is specified, a temporary file is created.
80 """
80 """
81 fh = None
81 fh = None
82 cleanup = None
82 cleanup = None
83 try:
83 try:
84 if filename:
84 if filename:
85 if vfs:
85 if vfs:
86 fh = vfs.open(filename, b"wb")
86 fh = vfs.open(filename, b"wb")
87 else:
87 else:
88 # Increase default buffer size because default is usually
88 # Increase default buffer size because default is usually
89 # small (4k is common on Linux).
89 # small (4k is common on Linux).
90 fh = open(filename, b"wb", 131072)
90 fh = open(filename, b"wb", 131072)
91 else:
91 else:
92 fd, filename = pycompat.mkstemp(prefix=b"hg-bundle-", suffix=b".hg")
92 fd, filename = pycompat.mkstemp(prefix=b"hg-bundle-", suffix=b".hg")
93 fh = os.fdopen(fd, "wb")
93 fh = os.fdopen(fd, "wb")
94 cleanup = filename
94 cleanup = filename
95 for c in chunks:
95 for c in chunks:
96 fh.write(c)
96 fh.write(c)
97 cleanup = None
97 cleanup = None
98 return filename
98 return filename
99 finally:
99 finally:
100 if fh is not None:
100 if fh is not None:
101 fh.close()
101 fh.close()
102 if cleanup is not None:
102 if cleanup is not None:
103 if filename and vfs:
103 if filename and vfs:
104 vfs.unlink(cleanup)
104 vfs.unlink(cleanup)
105 else:
105 else:
106 os.unlink(cleanup)
106 os.unlink(cleanup)
107
107
108
108
109 class cg1unpacker(object):
109 class cg1unpacker(object):
110 """Unpacker for cg1 changegroup streams.
110 """Unpacker for cg1 changegroup streams.
111
111
112 A changegroup unpacker handles the framing of the revision data in
112 A changegroup unpacker handles the framing of the revision data in
113 the wire format. Most consumers will want to use the apply()
113 the wire format. Most consumers will want to use the apply()
114 method to add the changes from the changegroup to a repository.
114 method to add the changes from the changegroup to a repository.
115
115
116 If you're forwarding a changegroup unmodified to another consumer,
116 If you're forwarding a changegroup unmodified to another consumer,
117 use getchunks(), which returns an iterator of changegroup
117 use getchunks(), which returns an iterator of changegroup
118 chunks. This is mostly useful for cases where you need to know the
118 chunks. This is mostly useful for cases where you need to know the
119 data stream has ended by observing the end of the changegroup.
119 data stream has ended by observing the end of the changegroup.
120
120
121 deltachunk() is useful only if you're applying delta data. Most
121 deltachunk() is useful only if you're applying delta data. Most
122 consumers should prefer apply() instead.
122 consumers should prefer apply() instead.
123
123
124 A few other public methods exist. Those are used only for
124 A few other public methods exist. Those are used only for
125 bundlerepo and some debug commands - their use is discouraged.
125 bundlerepo and some debug commands - their use is discouraged.
126 """
126 """
127
127
128 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
128 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
129 deltaheadersize = deltaheader.size
129 deltaheadersize = deltaheader.size
130 version = b'01'
130 version = b'01'
131 _grouplistcount = 1 # One list of files after the manifests
131 _grouplistcount = 1 # One list of files after the manifests
132
132
133 def __init__(self, fh, alg, extras=None):
133 def __init__(self, fh, alg, extras=None):
134 if alg is None:
134 if alg is None:
135 alg = b'UN'
135 alg = b'UN'
136 if alg not in util.compengines.supportedbundletypes:
136 if alg not in util.compengines.supportedbundletypes:
137 raise error.Abort(_(b'unknown stream compression type: %s') % alg)
137 raise error.Abort(_(b'unknown stream compression type: %s') % alg)
138 if alg == b'BZ':
138 if alg == b'BZ':
139 alg = b'_truncatedBZ'
139 alg = b'_truncatedBZ'
140
140
141 compengine = util.compengines.forbundletype(alg)
141 compengine = util.compengines.forbundletype(alg)
142 self._stream = compengine.decompressorreader(fh)
142 self._stream = compengine.decompressorreader(fh)
143 self._type = alg
143 self._type = alg
144 self.extras = extras or {}
144 self.extras = extras or {}
145 self.callback = None
145 self.callback = None
146
146
147 # These methods (compressed, read, seek, tell) all appear to only
147 # These methods (compressed, read, seek, tell) all appear to only
148 # be used by bundlerepo, but it's a little hard to tell.
148 # be used by bundlerepo, but it's a little hard to tell.
149 def compressed(self):
149 def compressed(self):
150 return self._type is not None and self._type != b'UN'
150 return self._type is not None and self._type != b'UN'
151
151
152 def read(self, l):
152 def read(self, l):
153 return self._stream.read(l)
153 return self._stream.read(l)
154
154
155 def seek(self, pos):
155 def seek(self, pos):
156 return self._stream.seek(pos)
156 return self._stream.seek(pos)
157
157
158 def tell(self):
158 def tell(self):
159 return self._stream.tell()
159 return self._stream.tell()
160
160
161 def close(self):
161 def close(self):
162 return self._stream.close()
162 return self._stream.close()
163
163
164 def _chunklength(self):
164 def _chunklength(self):
165 d = readexactly(self._stream, 4)
165 d = readexactly(self._stream, 4)
166 l = struct.unpack(b">l", d)[0]
166 l = struct.unpack(b">l", d)[0]
167 if l <= 4:
167 if l <= 4:
168 if l:
168 if l:
169 raise error.Abort(_(b"invalid chunk length %d") % l)
169 raise error.Abort(_(b"invalid chunk length %d") % l)
170 return 0
170 return 0
171 if self.callback:
171 if self.callback:
172 self.callback()
172 self.callback()
173 return l - 4
173 return l - 4
174
174
175 def changelogheader(self):
175 def changelogheader(self):
176 """v10 does not have a changelog header chunk"""
176 """v10 does not have a changelog header chunk"""
177 return {}
177 return {}
178
178
179 def manifestheader(self):
179 def manifestheader(self):
180 """v10 does not have a manifest header chunk"""
180 """v10 does not have a manifest header chunk"""
181 return {}
181 return {}
182
182
183 def filelogheader(self):
183 def filelogheader(self):
184 """return the header of the filelogs chunk, v10 only has the filename"""
184 """return the header of the filelogs chunk, v10 only has the filename"""
185 l = self._chunklength()
185 l = self._chunklength()
186 if not l:
186 if not l:
187 return {}
187 return {}
188 fname = readexactly(self._stream, l)
188 fname = readexactly(self._stream, l)
189 return {b'filename': fname}
189 return {b'filename': fname}
190
190
191 def _deltaheader(self, headertuple, prevnode):
191 def _deltaheader(self, headertuple, prevnode):
192 node, p1, p2, cs = headertuple
192 node, p1, p2, cs = headertuple
193 if prevnode is None:
193 if prevnode is None:
194 deltabase = p1
194 deltabase = p1
195 else:
195 else:
196 deltabase = prevnode
196 deltabase = prevnode
197 flags = 0
197 flags = 0
198 protocol_flags = 0
198 protocol_flags = 0
199 return node, p1, p2, deltabase, cs, flags, protocol_flags
199 return node, p1, p2, deltabase, cs, flags, protocol_flags
200
200
201 def deltachunk(self, prevnode):
201 def deltachunk(self, prevnode):
202 l = self._chunklength()
202 l = self._chunklength()
203 if not l:
203 if not l:
204 return {}
204 return {}
205 headerdata = readexactly(self._stream, self.deltaheadersize)
205 headerdata = readexactly(self._stream, self.deltaheadersize)
206 header = self.deltaheader.unpack(headerdata)
206 header = self.deltaheader.unpack(headerdata)
207 delta = readexactly(self._stream, l - self.deltaheadersize)
207 delta = readexactly(self._stream, l - self.deltaheadersize)
208 header = self._deltaheader(header, prevnode)
208 header = self._deltaheader(header, prevnode)
209 node, p1, p2, deltabase, cs, flags, protocol_flags = header
209 node, p1, p2, deltabase, cs, flags, protocol_flags = header
210 return node, p1, p2, cs, deltabase, delta, flags, protocol_flags
210 return node, p1, p2, cs, deltabase, delta, flags, protocol_flags
211
211
212 def getchunks(self):
212 def getchunks(self):
213 """returns all the chunks contains in the bundle
213 """returns all the chunks contains in the bundle
214
214
215 Used when you need to forward the binary stream to a file or another
215 Used when you need to forward the binary stream to a file or another
216 network API. To do so, it parse the changegroup data, otherwise it will
216 network API. To do so, it parse the changegroup data, otherwise it will
217 block in case of sshrepo because it don't know the end of the stream.
217 block in case of sshrepo because it don't know the end of the stream.
218 """
218 """
219 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
219 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
220 # and a list of filelogs. For changegroup 3, we expect 4 parts:
220 # and a list of filelogs. For changegroup 3, we expect 4 parts:
221 # changelog, manifestlog, a list of tree manifestlogs, and a list of
221 # changelog, manifestlog, a list of tree manifestlogs, and a list of
222 # filelogs.
222 # filelogs.
223 #
223 #
224 # Changelog and manifestlog parts are terminated with empty chunks. The
224 # Changelog and manifestlog parts are terminated with empty chunks. The
225 # tree and file parts are a list of entry sections. Each entry section
225 # tree and file parts are a list of entry sections. Each entry section
226 # is a series of chunks terminating in an empty chunk. The list of these
226 # is a series of chunks terminating in an empty chunk. The list of these
227 # entry sections is terminated in yet another empty chunk, so we know
227 # entry sections is terminated in yet another empty chunk, so we know
228 # we've reached the end of the tree/file list when we reach an empty
228 # we've reached the end of the tree/file list when we reach an empty
229 # chunk that was proceeded by no non-empty chunks.
229 # chunk that was proceeded by no non-empty chunks.
230
230
231 parts = 0
231 parts = 0
232 while parts < 2 + self._grouplistcount:
232 while parts < 2 + self._grouplistcount:
233 noentries = True
233 noentries = True
234 while True:
234 while True:
235 chunk = getchunk(self)
235 chunk = getchunk(self)
236 if not chunk:
236 if not chunk:
237 # The first two empty chunks represent the end of the
237 # The first two empty chunks represent the end of the
238 # changelog and the manifestlog portions. The remaining
238 # changelog and the manifestlog portions. The remaining
239 # empty chunks represent either A) the end of individual
239 # empty chunks represent either A) the end of individual
240 # tree or file entries in the file list, or B) the end of
240 # tree or file entries in the file list, or B) the end of
241 # the entire list. It's the end of the entire list if there
241 # the entire list. It's the end of the entire list if there
242 # were no entries (i.e. noentries is True).
242 # were no entries (i.e. noentries is True).
243 if parts < 2:
243 if parts < 2:
244 parts += 1
244 parts += 1
245 elif noentries:
245 elif noentries:
246 parts += 1
246 parts += 1
247 break
247 break
248 noentries = False
248 noentries = False
249 yield chunkheader(len(chunk))
249 yield chunkheader(len(chunk))
250 pos = 0
250 pos = 0
251 while pos < len(chunk):
251 while pos < len(chunk):
252 next = pos + 2 ** 20
252 next = pos + 2 ** 20
253 yield chunk[pos:next]
253 yield chunk[pos:next]
254 pos = next
254 pos = next
255 yield closechunk()
255 yield closechunk()
256
256
257 def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None):
257 def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None):
258 self.callback = prog.increment
258 self.callback = prog.increment
259 # no need to check for empty manifest group here:
259 # no need to check for empty manifest group here:
260 # if the result of the merge of 1 and 2 is the same in 3 and 4,
260 # if the result of the merge of 1 and 2 is the same in 3 and 4,
261 # no new manifest will be created and the manifest group will
261 # no new manifest will be created and the manifest group will
262 # be empty during the pull
262 # be empty during the pull
263 self.manifestheader()
263 self.manifestheader()
264 deltas = self.deltaiter()
264 deltas = self.deltaiter()
265 storage = repo.manifestlog.getstorage(b'')
265 storage = repo.manifestlog.getstorage(b'')
266 storage.addgroup(deltas, revmap, trp, addrevisioncb=addrevisioncb)
266 storage.addgroup(deltas, revmap, trp, addrevisioncb=addrevisioncb)
267 prog.complete()
267 prog.complete()
268 self.callback = None
268 self.callback = None
269
269
270 def apply(
270 def apply(
271 self,
271 self,
272 repo,
272 repo,
273 tr,
273 tr,
274 srctype,
274 srctype,
275 url,
275 url,
276 targetphase=phases.draft,
276 targetphase=phases.draft,
277 expectedtotal=None,
277 expectedtotal=None,
278 sidedata_categories=None,
278 sidedata_categories=None,
279 ):
279 ):
280 """Add the changegroup returned by source.read() to this repo.
280 """Add the changegroup returned by source.read() to this repo.
281 srctype is a string like 'push', 'pull', or 'unbundle'. url is
281 srctype is a string like 'push', 'pull', or 'unbundle'. url is
282 the URL of the repo where this changegroup is coming from.
282 the URL of the repo where this changegroup is coming from.
283
283
284 Return an integer summarizing the change to this repo:
284 Return an integer summarizing the change to this repo:
285 - nothing changed or no source: 0
285 - nothing changed or no source: 0
286 - more heads than before: 1+added heads (2..n)
286 - more heads than before: 1+added heads (2..n)
287 - fewer heads than before: -1-removed heads (-2..-n)
287 - fewer heads than before: -1-removed heads (-2..-n)
288 - number of heads stays the same: 1
288 - number of heads stays the same: 1
289
289
290 `sidedata_categories` is an optional set of the remote's sidedata wanted
290 `sidedata_categories` is an optional set of the remote's sidedata wanted
291 categories.
291 categories.
292 """
292 """
293 repo = repo.unfiltered()
293 repo = repo.unfiltered()
294
294
295 # Only useful if we're adding sidedata categories. If both peers have
295 # Only useful if we're adding sidedata categories. If both peers have
296 # the same categories, then we simply don't do anything.
296 # the same categories, then we simply don't do anything.
297 adding_sidedata = (
297 adding_sidedata = (
298 requirements.REVLOGV2_REQUIREMENT in repo.requirements
298 requirements.REVLOGV2_REQUIREMENT in repo.requirements
299 and self.version == b'04'
299 and self.version == b'04'
300 and srctype == b'pull'
300 and srctype == b'pull'
301 )
301 )
302 if adding_sidedata:
302 if adding_sidedata:
303 sidedata_helpers = sidedatamod.get_sidedata_helpers(
303 sidedata_helpers = sidedatamod.get_sidedata_helpers(
304 repo,
304 repo,
305 sidedata_categories or set(),
305 sidedata_categories or set(),
306 pull=True,
306 pull=True,
307 )
307 )
308 else:
308 else:
309 sidedata_helpers = None
309 sidedata_helpers = None
310
310
311 def csmap(x):
311 def csmap(x):
312 repo.ui.debug(b"add changeset %s\n" % short(x))
312 repo.ui.debug(b"add changeset %s\n" % short(x))
313 return len(cl)
313 return len(cl)
314
314
315 def revmap(x):
315 def revmap(x):
316 return cl.rev(x)
316 return cl.rev(x)
317
317
318 try:
318 try:
319 # The transaction may already carry source information. In this
319 # The transaction may already carry source information. In this
320 # case we use the top level data. We overwrite the argument
320 # case we use the top level data. We overwrite the argument
321 # because we need to use the top level value (if they exist)
321 # because we need to use the top level value (if they exist)
322 # in this function.
322 # in this function.
323 srctype = tr.hookargs.setdefault(b'source', srctype)
323 srctype = tr.hookargs.setdefault(b'source', srctype)
324 tr.hookargs.setdefault(b'url', url)
324 tr.hookargs.setdefault(b'url', url)
325 repo.hook(
325 repo.hook(
326 b'prechangegroup', throw=True, **pycompat.strkwargs(tr.hookargs)
326 b'prechangegroup', throw=True, **pycompat.strkwargs(tr.hookargs)
327 )
327 )
328
328
329 # write changelog data to temp files so concurrent readers
329 # write changelog data to temp files so concurrent readers
330 # will not see an inconsistent view
330 # will not see an inconsistent view
331 cl = repo.changelog
331 cl = repo.changelog
332 cl.delayupdate(tr)
332 cl.delayupdate(tr)
333 oldheads = set(cl.heads())
333 oldheads = set(cl.heads())
334
334
335 trp = weakref.proxy(tr)
335 trp = weakref.proxy(tr)
336 # pull off the changeset group
336 # pull off the changeset group
337 repo.ui.status(_(b"adding changesets\n"))
337 repo.ui.status(_(b"adding changesets\n"))
338 clstart = len(cl)
338 clstart = len(cl)
339 progress = repo.ui.makeprogress(
339 progress = repo.ui.makeprogress(
340 _(b'changesets'), unit=_(b'chunks'), total=expectedtotal
340 _(b'changesets'), unit=_(b'chunks'), total=expectedtotal
341 )
341 )
342 self.callback = progress.increment
342 self.callback = progress.increment
343
343
344 efilesset = set()
344 efilesset = set()
345 duprevs = []
345 duprevs = []
346
346
347 def ondupchangelog(cl, rev):
347 def ondupchangelog(cl, rev):
348 if rev < clstart:
348 if rev < clstart:
349 duprevs.append(rev)
349 duprevs.append(rev)
350
350
351 def onchangelog(cl, rev):
351 def onchangelog(cl, rev):
352 ctx = cl.changelogrevision(rev)
352 ctx = cl.changelogrevision(rev)
353 efilesset.update(ctx.files)
353 efilesset.update(ctx.files)
354 repo.register_changeset(rev, ctx)
354 repo.register_changeset(rev, ctx)
355
355
356 self.changelogheader()
356 self.changelogheader()
357 deltas = self.deltaiter()
357 deltas = self.deltaiter()
358 if not cl.addgroup(
358 if not cl.addgroup(
359 deltas,
359 deltas,
360 csmap,
360 csmap,
361 trp,
361 trp,
362 alwayscache=True,
362 alwayscache=True,
363 addrevisioncb=onchangelog,
363 addrevisioncb=onchangelog,
364 duplicaterevisioncb=ondupchangelog,
364 duplicaterevisioncb=ondupchangelog,
365 ):
365 ):
366 repo.ui.develwarn(
366 repo.ui.develwarn(
367 b'applied empty changelog from changegroup',
367 b'applied empty changelog from changegroup',
368 config=b'warn-empty-changegroup',
368 config=b'warn-empty-changegroup',
369 )
369 )
370 efiles = len(efilesset)
370 efiles = len(efilesset)
371 clend = len(cl)
371 clend = len(cl)
372 changesets = clend - clstart
372 changesets = clend - clstart
373 progress.complete()
373 progress.complete()
374 del deltas
374 del deltas
375 # TODO Python 2.7 removal
375 # TODO Python 2.7 removal
376 # del efilesset
376 # del efilesset
377 efilesset = None
377 efilesset = None
378 self.callback = None
378 self.callback = None
379
379
380 # Keep track of the (non-changelog) revlogs we've updated and their
380 # Keep track of the (non-changelog) revlogs we've updated and their
381 # range of new revisions for sidedata rewrite.
381 # range of new revisions for sidedata rewrite.
382 # TODO do something more efficient than keeping the reference to
382 # TODO do something more efficient than keeping the reference to
383 # the revlogs, especially memory-wise.
383 # the revlogs, especially memory-wise.
384 touched_manifests = {}
384 touched_manifests = {}
385 touched_filelogs = {}
385 touched_filelogs = {}
386
386
387 # pull off the manifest group
387 # pull off the manifest group
388 repo.ui.status(_(b"adding manifests\n"))
388 repo.ui.status(_(b"adding manifests\n"))
389 # We know that we'll never have more manifests than we had
389 # We know that we'll never have more manifests than we had
390 # changesets.
390 # changesets.
391 progress = repo.ui.makeprogress(
391 progress = repo.ui.makeprogress(
392 _(b'manifests'), unit=_(b'chunks'), total=changesets
392 _(b'manifests'), unit=_(b'chunks'), total=changesets
393 )
393 )
394 on_manifest_rev = None
394 on_manifest_rev = None
395 if sidedata_helpers:
395 if sidedata_helpers:
396 if revlog_constants.KIND_MANIFESTLOG in sidedata_helpers[1]:
396 if revlog_constants.KIND_MANIFESTLOG in sidedata_helpers[1]:
397
397
398 def on_manifest_rev(manifest, rev):
398 def on_manifest_rev(manifest, rev):
399 range = touched_manifests.get(manifest)
399 range = touched_manifests.get(manifest)
400 if not range:
400 if not range:
401 touched_manifests[manifest] = (rev, rev)
401 touched_manifests[manifest] = (rev, rev)
402 else:
402 else:
403 assert rev == range[1] + 1
403 assert rev == range[1] + 1
404 touched_manifests[manifest] = (range[0], rev)
404 touched_manifests[manifest] = (range[0], rev)
405
405
406 self._unpackmanifests(
406 self._unpackmanifests(
407 repo,
407 repo,
408 revmap,
408 revmap,
409 trp,
409 trp,
410 progress,
410 progress,
411 addrevisioncb=on_manifest_rev,
411 addrevisioncb=on_manifest_rev,
412 )
412 )
413
413
414 needfiles = {}
414 needfiles = {}
415 if repo.ui.configbool(b'server', b'validate'):
415 if repo.ui.configbool(b'server', b'validate'):
416 cl = repo.changelog
416 cl = repo.changelog
417 ml = repo.manifestlog
417 ml = repo.manifestlog
418 # validate incoming csets have their manifests
418 # validate incoming csets have their manifests
419 for cset in pycompat.xrange(clstart, clend):
419 for cset in pycompat.xrange(clstart, clend):
420 mfnode = cl.changelogrevision(cset).manifest
420 mfnode = cl.changelogrevision(cset).manifest
421 mfest = ml[mfnode].readdelta()
421 mfest = ml[mfnode].readdelta()
422 # store file nodes we must see
422 # store file nodes we must see
423 for f, n in pycompat.iteritems(mfest):
423 for f, n in pycompat.iteritems(mfest):
424 needfiles.setdefault(f, set()).add(n)
424 needfiles.setdefault(f, set()).add(n)
425
425
426 on_filelog_rev = None
426 on_filelog_rev = None
427 if sidedata_helpers:
427 if sidedata_helpers:
428 if revlog_constants.KIND_FILELOG in sidedata_helpers[1]:
428 if revlog_constants.KIND_FILELOG in sidedata_helpers[1]:
429
429
430 def on_filelog_rev(filelog, rev):
430 def on_filelog_rev(filelog, rev):
431 range = touched_filelogs.get(filelog)
431 range = touched_filelogs.get(filelog)
432 if not range:
432 if not range:
433 touched_filelogs[filelog] = (rev, rev)
433 touched_filelogs[filelog] = (rev, rev)
434 else:
434 else:
435 assert rev == range[1] + 1
435 assert rev == range[1] + 1
436 touched_filelogs[filelog] = (range[0], rev)
436 touched_filelogs[filelog] = (range[0], rev)
437
437
438 # process the files
438 # process the files
439 repo.ui.status(_(b"adding file changes\n"))
439 repo.ui.status(_(b"adding file changes\n"))
440 newrevs, newfiles = _addchangegroupfiles(
440 newrevs, newfiles = _addchangegroupfiles(
441 repo,
441 repo,
442 self,
442 self,
443 revmap,
443 revmap,
444 trp,
444 trp,
445 efiles,
445 efiles,
446 needfiles,
446 needfiles,
447 addrevisioncb=on_filelog_rev,
447 addrevisioncb=on_filelog_rev,
448 )
448 )
449
449
450 if sidedata_helpers:
450 if sidedata_helpers:
451 if revlog_constants.KIND_CHANGELOG in sidedata_helpers[1]:
451 if revlog_constants.KIND_CHANGELOG in sidedata_helpers[1]:
452 cl.rewrite_sidedata(sidedata_helpers, clstart, clend - 1)
452 cl.rewrite_sidedata(
453 trp, sidedata_helpers, clstart, clend - 1
454 )
453 for mf, (startrev, endrev) in touched_manifests.items():
455 for mf, (startrev, endrev) in touched_manifests.items():
454 mf.rewrite_sidedata(sidedata_helpers, startrev, endrev)
456 mf.rewrite_sidedata(trp, sidedata_helpers, startrev, endrev)
455 for fl, (startrev, endrev) in touched_filelogs.items():
457 for fl, (startrev, endrev) in touched_filelogs.items():
456 fl.rewrite_sidedata(sidedata_helpers, startrev, endrev)
458 fl.rewrite_sidedata(trp, sidedata_helpers, startrev, endrev)
457
459
458 # making sure the value exists
460 # making sure the value exists
459 tr.changes.setdefault(b'changegroup-count-changesets', 0)
461 tr.changes.setdefault(b'changegroup-count-changesets', 0)
460 tr.changes.setdefault(b'changegroup-count-revisions', 0)
462 tr.changes.setdefault(b'changegroup-count-revisions', 0)
461 tr.changes.setdefault(b'changegroup-count-files', 0)
463 tr.changes.setdefault(b'changegroup-count-files', 0)
462 tr.changes.setdefault(b'changegroup-count-heads', 0)
464 tr.changes.setdefault(b'changegroup-count-heads', 0)
463
465
464 # some code use bundle operation for internal purpose. They usually
466 # some code use bundle operation for internal purpose. They usually
465 # set `ui.quiet` to do this outside of user sight. Size the report
467 # set `ui.quiet` to do this outside of user sight. Size the report
466 # of such operation now happens at the end of the transaction, that
468 # of such operation now happens at the end of the transaction, that
467 # ui.quiet has not direct effect on the output.
469 # ui.quiet has not direct effect on the output.
468 #
470 #
469 # To preserve this intend use an inelegant hack, we fail to report
471 # To preserve this intend use an inelegant hack, we fail to report
470 # the change if `quiet` is set. We should probably move to
472 # the change if `quiet` is set. We should probably move to
471 # something better, but this is a good first step to allow the "end
473 # something better, but this is a good first step to allow the "end
472 # of transaction report" to pass tests.
474 # of transaction report" to pass tests.
473 if not repo.ui.quiet:
475 if not repo.ui.quiet:
474 tr.changes[b'changegroup-count-changesets'] += changesets
476 tr.changes[b'changegroup-count-changesets'] += changesets
475 tr.changes[b'changegroup-count-revisions'] += newrevs
477 tr.changes[b'changegroup-count-revisions'] += newrevs
476 tr.changes[b'changegroup-count-files'] += newfiles
478 tr.changes[b'changegroup-count-files'] += newfiles
477
479
478 deltaheads = 0
480 deltaheads = 0
479 if oldheads:
481 if oldheads:
480 heads = cl.heads()
482 heads = cl.heads()
481 deltaheads += len(heads) - len(oldheads)
483 deltaheads += len(heads) - len(oldheads)
482 for h in heads:
484 for h in heads:
483 if h not in oldheads and repo[h].closesbranch():
485 if h not in oldheads and repo[h].closesbranch():
484 deltaheads -= 1
486 deltaheads -= 1
485
487
486 # see previous comment about checking ui.quiet
488 # see previous comment about checking ui.quiet
487 if not repo.ui.quiet:
489 if not repo.ui.quiet:
488 tr.changes[b'changegroup-count-heads'] += deltaheads
490 tr.changes[b'changegroup-count-heads'] += deltaheads
489 repo.invalidatevolatilesets()
491 repo.invalidatevolatilesets()
490
492
491 if changesets > 0:
493 if changesets > 0:
492 if b'node' not in tr.hookargs:
494 if b'node' not in tr.hookargs:
493 tr.hookargs[b'node'] = hex(cl.node(clstart))
495 tr.hookargs[b'node'] = hex(cl.node(clstart))
494 tr.hookargs[b'node_last'] = hex(cl.node(clend - 1))
496 tr.hookargs[b'node_last'] = hex(cl.node(clend - 1))
495 hookargs = dict(tr.hookargs)
497 hookargs = dict(tr.hookargs)
496 else:
498 else:
497 hookargs = dict(tr.hookargs)
499 hookargs = dict(tr.hookargs)
498 hookargs[b'node'] = hex(cl.node(clstart))
500 hookargs[b'node'] = hex(cl.node(clstart))
499 hookargs[b'node_last'] = hex(cl.node(clend - 1))
501 hookargs[b'node_last'] = hex(cl.node(clend - 1))
500 repo.hook(
502 repo.hook(
501 b'pretxnchangegroup',
503 b'pretxnchangegroup',
502 throw=True,
504 throw=True,
503 **pycompat.strkwargs(hookargs)
505 **pycompat.strkwargs(hookargs)
504 )
506 )
505
507
506 added = pycompat.xrange(clstart, clend)
508 added = pycompat.xrange(clstart, clend)
507 phaseall = None
509 phaseall = None
508 if srctype in (b'push', b'serve'):
510 if srctype in (b'push', b'serve'):
509 # Old servers can not push the boundary themselves.
511 # Old servers can not push the boundary themselves.
510 # New servers won't push the boundary if changeset already
512 # New servers won't push the boundary if changeset already
511 # exists locally as secret
513 # exists locally as secret
512 #
514 #
513 # We should not use added here but the list of all change in
515 # We should not use added here but the list of all change in
514 # the bundle
516 # the bundle
515 if repo.publishing():
517 if repo.publishing():
516 targetphase = phaseall = phases.public
518 targetphase = phaseall = phases.public
517 else:
519 else:
518 # closer target phase computation
520 # closer target phase computation
519
521
520 # Those changesets have been pushed from the
522 # Those changesets have been pushed from the
521 # outside, their phases are going to be pushed
523 # outside, their phases are going to be pushed
522 # alongside. Therefor `targetphase` is
524 # alongside. Therefor `targetphase` is
523 # ignored.
525 # ignored.
524 targetphase = phaseall = phases.draft
526 targetphase = phaseall = phases.draft
525 if added:
527 if added:
526 phases.registernew(repo, tr, targetphase, added)
528 phases.registernew(repo, tr, targetphase, added)
527 if phaseall is not None:
529 if phaseall is not None:
528 if duprevs:
530 if duprevs:
529 duprevs.extend(added)
531 duprevs.extend(added)
530 else:
532 else:
531 duprevs = added
533 duprevs = added
532 phases.advanceboundary(repo, tr, phaseall, [], revs=duprevs)
534 phases.advanceboundary(repo, tr, phaseall, [], revs=duprevs)
533 duprevs = []
535 duprevs = []
534
536
535 if changesets > 0:
537 if changesets > 0:
536
538
537 def runhooks(unused_success):
539 def runhooks(unused_success):
538 # These hooks run when the lock releases, not when the
540 # These hooks run when the lock releases, not when the
539 # transaction closes. So it's possible for the changelog
541 # transaction closes. So it's possible for the changelog
540 # to have changed since we last saw it.
542 # to have changed since we last saw it.
541 if clstart >= len(repo):
543 if clstart >= len(repo):
542 return
544 return
543
545
544 repo.hook(b"changegroup", **pycompat.strkwargs(hookargs))
546 repo.hook(b"changegroup", **pycompat.strkwargs(hookargs))
545
547
546 for rev in added:
548 for rev in added:
547 args = hookargs.copy()
549 args = hookargs.copy()
548 args[b'node'] = hex(cl.node(rev))
550 args[b'node'] = hex(cl.node(rev))
549 del args[b'node_last']
551 del args[b'node_last']
550 repo.hook(b"incoming", **pycompat.strkwargs(args))
552 repo.hook(b"incoming", **pycompat.strkwargs(args))
551
553
552 newheads = [h for h in repo.heads() if h not in oldheads]
554 newheads = [h for h in repo.heads() if h not in oldheads]
553 repo.ui.log(
555 repo.ui.log(
554 b"incoming",
556 b"incoming",
555 b"%d incoming changes - new heads: %s\n",
557 b"%d incoming changes - new heads: %s\n",
556 len(added),
558 len(added),
557 b', '.join([hex(c[:6]) for c in newheads]),
559 b', '.join([hex(c[:6]) for c in newheads]),
558 )
560 )
559
561
560 tr.addpostclose(
562 tr.addpostclose(
561 b'changegroup-runhooks-%020i' % clstart,
563 b'changegroup-runhooks-%020i' % clstart,
562 lambda tr: repo._afterlock(runhooks),
564 lambda tr: repo._afterlock(runhooks),
563 )
565 )
564 finally:
566 finally:
565 repo.ui.flush()
567 repo.ui.flush()
566 # never return 0 here:
568 # never return 0 here:
567 if deltaheads < 0:
569 if deltaheads < 0:
568 ret = deltaheads - 1
570 ret = deltaheads - 1
569 else:
571 else:
570 ret = deltaheads + 1
572 ret = deltaheads + 1
571 return ret
573 return ret
572
574
573 def deltaiter(self):
575 def deltaiter(self):
574 """
576 """
575 returns an iterator of the deltas in this changegroup
577 returns an iterator of the deltas in this changegroup
576
578
577 Useful for passing to the underlying storage system to be stored.
579 Useful for passing to the underlying storage system to be stored.
578 """
580 """
579 chain = None
581 chain = None
580 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
582 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
581 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags, sidedata)
583 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags, sidedata)
582 yield chunkdata
584 yield chunkdata
583 chain = chunkdata[0]
585 chain = chunkdata[0]
584
586
585
587
586 class cg2unpacker(cg1unpacker):
588 class cg2unpacker(cg1unpacker):
587 """Unpacker for cg2 streams.
589 """Unpacker for cg2 streams.
588
590
589 cg2 streams add support for generaldelta, so the delta header
591 cg2 streams add support for generaldelta, so the delta header
590 format is slightly different. All other features about the data
592 format is slightly different. All other features about the data
591 remain the same.
593 remain the same.
592 """
594 """
593
595
594 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
596 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
595 deltaheadersize = deltaheader.size
597 deltaheadersize = deltaheader.size
596 version = b'02'
598 version = b'02'
597
599
598 def _deltaheader(self, headertuple, prevnode):
600 def _deltaheader(self, headertuple, prevnode):
599 node, p1, p2, deltabase, cs = headertuple
601 node, p1, p2, deltabase, cs = headertuple
600 flags = 0
602 flags = 0
601 protocol_flags = 0
603 protocol_flags = 0
602 return node, p1, p2, deltabase, cs, flags, protocol_flags
604 return node, p1, p2, deltabase, cs, flags, protocol_flags
603
605
604
606
605 class cg3unpacker(cg2unpacker):
607 class cg3unpacker(cg2unpacker):
606 """Unpacker for cg3 streams.
608 """Unpacker for cg3 streams.
607
609
608 cg3 streams add support for exchanging treemanifests and revlog
610 cg3 streams add support for exchanging treemanifests and revlog
609 flags. It adds the revlog flags to the delta header and an empty chunk
611 flags. It adds the revlog flags to the delta header and an empty chunk
610 separating manifests and files.
612 separating manifests and files.
611 """
613 """
612
614
613 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
615 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
614 deltaheadersize = deltaheader.size
616 deltaheadersize = deltaheader.size
615 version = b'03'
617 version = b'03'
616 _grouplistcount = 2 # One list of manifests and one list of files
618 _grouplistcount = 2 # One list of manifests and one list of files
617
619
618 def _deltaheader(self, headertuple, prevnode):
620 def _deltaheader(self, headertuple, prevnode):
619 node, p1, p2, deltabase, cs, flags = headertuple
621 node, p1, p2, deltabase, cs, flags = headertuple
620 protocol_flags = 0
622 protocol_flags = 0
621 return node, p1, p2, deltabase, cs, flags, protocol_flags
623 return node, p1, p2, deltabase, cs, flags, protocol_flags
622
624
623 def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None):
625 def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None):
624 super(cg3unpacker, self)._unpackmanifests(
626 super(cg3unpacker, self)._unpackmanifests(
625 repo, revmap, trp, prog, addrevisioncb=addrevisioncb
627 repo, revmap, trp, prog, addrevisioncb=addrevisioncb
626 )
628 )
627 for chunkdata in iter(self.filelogheader, {}):
629 for chunkdata in iter(self.filelogheader, {}):
628 # If we get here, there are directory manifests in the changegroup
630 # If we get here, there are directory manifests in the changegroup
629 d = chunkdata[b"filename"]
631 d = chunkdata[b"filename"]
630 repo.ui.debug(b"adding %s revisions\n" % d)
632 repo.ui.debug(b"adding %s revisions\n" % d)
631 deltas = self.deltaiter()
633 deltas = self.deltaiter()
632 if not repo.manifestlog.getstorage(d).addgroup(
634 if not repo.manifestlog.getstorage(d).addgroup(
633 deltas, revmap, trp, addrevisioncb=addrevisioncb
635 deltas, revmap, trp, addrevisioncb=addrevisioncb
634 ):
636 ):
635 raise error.Abort(_(b"received dir revlog group is empty"))
637 raise error.Abort(_(b"received dir revlog group is empty"))
636
638
637
639
638 class cg4unpacker(cg3unpacker):
640 class cg4unpacker(cg3unpacker):
639 """Unpacker for cg4 streams.
641 """Unpacker for cg4 streams.
640
642
641 cg4 streams add support for exchanging sidedata.
643 cg4 streams add support for exchanging sidedata.
642 """
644 """
643
645
644 deltaheader = _CHANGEGROUPV4_DELTA_HEADER
646 deltaheader = _CHANGEGROUPV4_DELTA_HEADER
645 deltaheadersize = deltaheader.size
647 deltaheadersize = deltaheader.size
646 version = b'04'
648 version = b'04'
647
649
648 def _deltaheader(self, headertuple, prevnode):
650 def _deltaheader(self, headertuple, prevnode):
649 protocol_flags, node, p1, p2, deltabase, cs, flags = headertuple
651 protocol_flags, node, p1, p2, deltabase, cs, flags = headertuple
650 return node, p1, p2, deltabase, cs, flags, protocol_flags
652 return node, p1, p2, deltabase, cs, flags, protocol_flags
651
653
652 def deltachunk(self, prevnode):
654 def deltachunk(self, prevnode):
653 res = super(cg4unpacker, self).deltachunk(prevnode)
655 res = super(cg4unpacker, self).deltachunk(prevnode)
654 if not res:
656 if not res:
655 return res
657 return res
656
658
657 (node, p1, p2, cs, deltabase, delta, flags, protocol_flags) = res
659 (node, p1, p2, cs, deltabase, delta, flags, protocol_flags) = res
658
660
659 sidedata = {}
661 sidedata = {}
660 if protocol_flags & storageutil.CG_FLAG_SIDEDATA:
662 if protocol_flags & storageutil.CG_FLAG_SIDEDATA:
661 sidedata_raw = getchunk(self._stream)
663 sidedata_raw = getchunk(self._stream)
662 sidedata = sidedatamod.deserialize_sidedata(sidedata_raw)
664 sidedata = sidedatamod.deserialize_sidedata(sidedata_raw)
663
665
664 return node, p1, p2, cs, deltabase, delta, flags, sidedata
666 return node, p1, p2, cs, deltabase, delta, flags, sidedata
665
667
666
668
667 class headerlessfixup(object):
669 class headerlessfixup(object):
668 def __init__(self, fh, h):
670 def __init__(self, fh, h):
669 self._h = h
671 self._h = h
670 self._fh = fh
672 self._fh = fh
671
673
672 def read(self, n):
674 def read(self, n):
673 if self._h:
675 if self._h:
674 d, self._h = self._h[:n], self._h[n:]
676 d, self._h = self._h[:n], self._h[n:]
675 if len(d) < n:
677 if len(d) < n:
676 d += readexactly(self._fh, n - len(d))
678 d += readexactly(self._fh, n - len(d))
677 return d
679 return d
678 return readexactly(self._fh, n)
680 return readexactly(self._fh, n)
679
681
680
682
681 def _revisiondeltatochunks(repo, delta, headerfn):
683 def _revisiondeltatochunks(repo, delta, headerfn):
682 """Serialize a revisiondelta to changegroup chunks."""
684 """Serialize a revisiondelta to changegroup chunks."""
683
685
684 # The captured revision delta may be encoded as a delta against
686 # The captured revision delta may be encoded as a delta against
685 # a base revision or as a full revision. The changegroup format
687 # a base revision or as a full revision. The changegroup format
686 # requires that everything on the wire be deltas. So for full
688 # requires that everything on the wire be deltas. So for full
687 # revisions, we need to invent a header that says to rewrite
689 # revisions, we need to invent a header that says to rewrite
688 # data.
690 # data.
689
691
690 if delta.delta is not None:
692 if delta.delta is not None:
691 prefix, data = b'', delta.delta
693 prefix, data = b'', delta.delta
692 elif delta.basenode == repo.nullid:
694 elif delta.basenode == repo.nullid:
693 data = delta.revision
695 data = delta.revision
694 prefix = mdiff.trivialdiffheader(len(data))
696 prefix = mdiff.trivialdiffheader(len(data))
695 else:
697 else:
696 data = delta.revision
698 data = delta.revision
697 prefix = mdiff.replacediffheader(delta.baserevisionsize, len(data))
699 prefix = mdiff.replacediffheader(delta.baserevisionsize, len(data))
698
700
699 meta = headerfn(delta)
701 meta = headerfn(delta)
700
702
701 yield chunkheader(len(meta) + len(prefix) + len(data))
703 yield chunkheader(len(meta) + len(prefix) + len(data))
702 yield meta
704 yield meta
703 if prefix:
705 if prefix:
704 yield prefix
706 yield prefix
705 yield data
707 yield data
706
708
707 if delta.protocol_flags & storageutil.CG_FLAG_SIDEDATA:
709 if delta.protocol_flags & storageutil.CG_FLAG_SIDEDATA:
708 # Need a separate chunk for sidedata to be able to differentiate
710 # Need a separate chunk for sidedata to be able to differentiate
709 # "raw delta" length and sidedata length
711 # "raw delta" length and sidedata length
710 sidedata = delta.sidedata
712 sidedata = delta.sidedata
711 yield chunkheader(len(sidedata))
713 yield chunkheader(len(sidedata))
712 yield sidedata
714 yield sidedata
713
715
714
716
715 def _sortnodesellipsis(store, nodes, cl, lookup):
717 def _sortnodesellipsis(store, nodes, cl, lookup):
716 """Sort nodes for changegroup generation."""
718 """Sort nodes for changegroup generation."""
717 # Ellipses serving mode.
719 # Ellipses serving mode.
718 #
720 #
719 # In a perfect world, we'd generate better ellipsis-ified graphs
721 # In a perfect world, we'd generate better ellipsis-ified graphs
720 # for non-changelog revlogs. In practice, we haven't started doing
722 # for non-changelog revlogs. In practice, we haven't started doing
721 # that yet, so the resulting DAGs for the manifestlog and filelogs
723 # that yet, so the resulting DAGs for the manifestlog and filelogs
722 # are actually full of bogus parentage on all the ellipsis
724 # are actually full of bogus parentage on all the ellipsis
723 # nodes. This has the side effect that, while the contents are
725 # nodes. This has the side effect that, while the contents are
724 # correct, the individual DAGs might be completely out of whack in
726 # correct, the individual DAGs might be completely out of whack in
725 # a case like 882681bc3166 and its ancestors (back about 10
727 # a case like 882681bc3166 and its ancestors (back about 10
726 # revisions or so) in the main hg repo.
728 # revisions or so) in the main hg repo.
727 #
729 #
728 # The one invariant we *know* holds is that the new (potentially
730 # The one invariant we *know* holds is that the new (potentially
729 # bogus) DAG shape will be valid if we order the nodes in the
731 # bogus) DAG shape will be valid if we order the nodes in the
730 # order that they're introduced in dramatis personae by the
732 # order that they're introduced in dramatis personae by the
731 # changelog, so what we do is we sort the non-changelog histories
733 # changelog, so what we do is we sort the non-changelog histories
732 # by the order in which they are used by the changelog.
734 # by the order in which they are used by the changelog.
733 key = lambda n: cl.rev(lookup(n))
735 key = lambda n: cl.rev(lookup(n))
734 return sorted(nodes, key=key)
736 return sorted(nodes, key=key)
735
737
736
738
737 def _resolvenarrowrevisioninfo(
739 def _resolvenarrowrevisioninfo(
738 cl,
740 cl,
739 store,
741 store,
740 ischangelog,
742 ischangelog,
741 rev,
743 rev,
742 linkrev,
744 linkrev,
743 linknode,
745 linknode,
744 clrevtolocalrev,
746 clrevtolocalrev,
745 fullclnodes,
747 fullclnodes,
746 precomputedellipsis,
748 precomputedellipsis,
747 ):
749 ):
748 linkparents = precomputedellipsis[linkrev]
750 linkparents = precomputedellipsis[linkrev]
749
751
750 def local(clrev):
752 def local(clrev):
751 """Turn a changelog revnum into a local revnum.
753 """Turn a changelog revnum into a local revnum.
752
754
753 The ellipsis dag is stored as revnums on the changelog,
755 The ellipsis dag is stored as revnums on the changelog,
754 but when we're producing ellipsis entries for
756 but when we're producing ellipsis entries for
755 non-changelog revlogs, we need to turn those numbers into
757 non-changelog revlogs, we need to turn those numbers into
756 something local. This does that for us, and during the
758 something local. This does that for us, and during the
757 changelog sending phase will also expand the stored
759 changelog sending phase will also expand the stored
758 mappings as needed.
760 mappings as needed.
759 """
761 """
760 if clrev == nullrev:
762 if clrev == nullrev:
761 return nullrev
763 return nullrev
762
764
763 if ischangelog:
765 if ischangelog:
764 return clrev
766 return clrev
765
767
766 # Walk the ellipsis-ized changelog breadth-first looking for a
768 # Walk the ellipsis-ized changelog breadth-first looking for a
767 # change that has been linked from the current revlog.
769 # change that has been linked from the current revlog.
768 #
770 #
769 # For a flat manifest revlog only a single step should be necessary
771 # For a flat manifest revlog only a single step should be necessary
770 # as all relevant changelog entries are relevant to the flat
772 # as all relevant changelog entries are relevant to the flat
771 # manifest.
773 # manifest.
772 #
774 #
773 # For a filelog or tree manifest dirlog however not every changelog
775 # For a filelog or tree manifest dirlog however not every changelog
774 # entry will have been relevant, so we need to skip some changelog
776 # entry will have been relevant, so we need to skip some changelog
775 # nodes even after ellipsis-izing.
777 # nodes even after ellipsis-izing.
776 walk = [clrev]
778 walk = [clrev]
777 while walk:
779 while walk:
778 p = walk[0]
780 p = walk[0]
779 walk = walk[1:]
781 walk = walk[1:]
780 if p in clrevtolocalrev:
782 if p in clrevtolocalrev:
781 return clrevtolocalrev[p]
783 return clrevtolocalrev[p]
782 elif p in fullclnodes:
784 elif p in fullclnodes:
783 walk.extend([pp for pp in cl.parentrevs(p) if pp != nullrev])
785 walk.extend([pp for pp in cl.parentrevs(p) if pp != nullrev])
784 elif p in precomputedellipsis:
786 elif p in precomputedellipsis:
785 walk.extend(
787 walk.extend(
786 [pp for pp in precomputedellipsis[p] if pp != nullrev]
788 [pp for pp in precomputedellipsis[p] if pp != nullrev]
787 )
789 )
788 else:
790 else:
789 # In this case, we've got an ellipsis with parents
791 # In this case, we've got an ellipsis with parents
790 # outside the current bundle (likely an
792 # outside the current bundle (likely an
791 # incremental pull). We "know" that we can use the
793 # incremental pull). We "know" that we can use the
792 # value of this same revlog at whatever revision
794 # value of this same revlog at whatever revision
793 # is pointed to by linknode. "Know" is in scare
795 # is pointed to by linknode. "Know" is in scare
794 # quotes because I haven't done enough examination
796 # quotes because I haven't done enough examination
795 # of edge cases to convince myself this is really
797 # of edge cases to convince myself this is really
796 # a fact - it works for all the (admittedly
798 # a fact - it works for all the (admittedly
797 # thorough) cases in our testsuite, but I would be
799 # thorough) cases in our testsuite, but I would be
798 # somewhat unsurprised to find a case in the wild
800 # somewhat unsurprised to find a case in the wild
799 # where this breaks down a bit. That said, I don't
801 # where this breaks down a bit. That said, I don't
800 # know if it would hurt anything.
802 # know if it would hurt anything.
801 for i in pycompat.xrange(rev, 0, -1):
803 for i in pycompat.xrange(rev, 0, -1):
802 if store.linkrev(i) == clrev:
804 if store.linkrev(i) == clrev:
803 return i
805 return i
804 # We failed to resolve a parent for this node, so
806 # We failed to resolve a parent for this node, so
805 # we crash the changegroup construction.
807 # we crash the changegroup construction.
806 if util.safehasattr(store, 'target'):
808 if util.safehasattr(store, 'target'):
807 target = store.display_id
809 target = store.display_id
808 else:
810 else:
809 # some revlog not actually a revlog
811 # some revlog not actually a revlog
810 target = store._revlog.display_id
812 target = store._revlog.display_id
811
813
812 raise error.Abort(
814 raise error.Abort(
813 b"unable to resolve parent while packing '%s' %r"
815 b"unable to resolve parent while packing '%s' %r"
814 b' for changeset %r' % (target, rev, clrev)
816 b' for changeset %r' % (target, rev, clrev)
815 )
817 )
816
818
817 return nullrev
819 return nullrev
818
820
819 if not linkparents or (store.parentrevs(rev) == (nullrev, nullrev)):
821 if not linkparents or (store.parentrevs(rev) == (nullrev, nullrev)):
820 p1, p2 = nullrev, nullrev
822 p1, p2 = nullrev, nullrev
821 elif len(linkparents) == 1:
823 elif len(linkparents) == 1:
822 (p1,) = sorted(local(p) for p in linkparents)
824 (p1,) = sorted(local(p) for p in linkparents)
823 p2 = nullrev
825 p2 = nullrev
824 else:
826 else:
825 p1, p2 = sorted(local(p) for p in linkparents)
827 p1, p2 = sorted(local(p) for p in linkparents)
826
828
827 p1node, p2node = store.node(p1), store.node(p2)
829 p1node, p2node = store.node(p1), store.node(p2)
828
830
829 return p1node, p2node, linknode
831 return p1node, p2node, linknode
830
832
831
833
832 def deltagroup(
834 def deltagroup(
833 repo,
835 repo,
834 store,
836 store,
835 nodes,
837 nodes,
836 ischangelog,
838 ischangelog,
837 lookup,
839 lookup,
838 forcedeltaparentprev,
840 forcedeltaparentprev,
839 topic=None,
841 topic=None,
840 ellipses=False,
842 ellipses=False,
841 clrevtolocalrev=None,
843 clrevtolocalrev=None,
842 fullclnodes=None,
844 fullclnodes=None,
843 precomputedellipsis=None,
845 precomputedellipsis=None,
844 sidedata_helpers=None,
846 sidedata_helpers=None,
845 ):
847 ):
846 """Calculate deltas for a set of revisions.
848 """Calculate deltas for a set of revisions.
847
849
848 Is a generator of ``revisiondelta`` instances.
850 Is a generator of ``revisiondelta`` instances.
849
851
850 If topic is not None, progress detail will be generated using this
852 If topic is not None, progress detail will be generated using this
851 topic name (e.g. changesets, manifests, etc).
853 topic name (e.g. changesets, manifests, etc).
852
854
853 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
855 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
854 `sidedata_helpers`.
856 `sidedata_helpers`.
855 """
857 """
856 if not nodes:
858 if not nodes:
857 return
859 return
858
860
859 cl = repo.changelog
861 cl = repo.changelog
860
862
861 if ischangelog:
863 if ischangelog:
862 # `hg log` shows changesets in storage order. To preserve order
864 # `hg log` shows changesets in storage order. To preserve order
863 # across clones, send out changesets in storage order.
865 # across clones, send out changesets in storage order.
864 nodesorder = b'storage'
866 nodesorder = b'storage'
865 elif ellipses:
867 elif ellipses:
866 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
868 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
867 nodesorder = b'nodes'
869 nodesorder = b'nodes'
868 else:
870 else:
869 nodesorder = None
871 nodesorder = None
870
872
871 # Perform ellipses filtering and revision massaging. We do this before
873 # Perform ellipses filtering and revision massaging. We do this before
872 # emitrevisions() because a) filtering out revisions creates less work
874 # emitrevisions() because a) filtering out revisions creates less work
873 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
875 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
874 # assumptions about delta choices and we would possibly send a delta
876 # assumptions about delta choices and we would possibly send a delta
875 # referencing a missing base revision.
877 # referencing a missing base revision.
876 #
878 #
877 # Also, calling lookup() has side-effects with regards to populating
879 # Also, calling lookup() has side-effects with regards to populating
878 # data structures. If we don't call lookup() for each node or if we call
880 # data structures. If we don't call lookup() for each node or if we call
879 # lookup() after the first pass through each node, things can break -
881 # lookup() after the first pass through each node, things can break -
880 # possibly intermittently depending on the python hash seed! For that
882 # possibly intermittently depending on the python hash seed! For that
881 # reason, we store a mapping of all linknodes during the initial node
883 # reason, we store a mapping of all linknodes during the initial node
882 # pass rather than use lookup() on the output side.
884 # pass rather than use lookup() on the output side.
883 if ellipses:
885 if ellipses:
884 filtered = []
886 filtered = []
885 adjustedparents = {}
887 adjustedparents = {}
886 linknodes = {}
888 linknodes = {}
887
889
888 for node in nodes:
890 for node in nodes:
889 rev = store.rev(node)
891 rev = store.rev(node)
890 linknode = lookup(node)
892 linknode = lookup(node)
891 linkrev = cl.rev(linknode)
893 linkrev = cl.rev(linknode)
892 clrevtolocalrev[linkrev] = rev
894 clrevtolocalrev[linkrev] = rev
893
895
894 # If linknode is in fullclnodes, it means the corresponding
896 # If linknode is in fullclnodes, it means the corresponding
895 # changeset was a full changeset and is being sent unaltered.
897 # changeset was a full changeset and is being sent unaltered.
896 if linknode in fullclnodes:
898 if linknode in fullclnodes:
897 linknodes[node] = linknode
899 linknodes[node] = linknode
898
900
899 # If the corresponding changeset wasn't in the set computed
901 # If the corresponding changeset wasn't in the set computed
900 # as relevant to us, it should be dropped outright.
902 # as relevant to us, it should be dropped outright.
901 elif linkrev not in precomputedellipsis:
903 elif linkrev not in precomputedellipsis:
902 continue
904 continue
903
905
904 else:
906 else:
905 # We could probably do this later and avoid the dict
907 # We could probably do this later and avoid the dict
906 # holding state. But it likely doesn't matter.
908 # holding state. But it likely doesn't matter.
907 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
909 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
908 cl,
910 cl,
909 store,
911 store,
910 ischangelog,
912 ischangelog,
911 rev,
913 rev,
912 linkrev,
914 linkrev,
913 linknode,
915 linknode,
914 clrevtolocalrev,
916 clrevtolocalrev,
915 fullclnodes,
917 fullclnodes,
916 precomputedellipsis,
918 precomputedellipsis,
917 )
919 )
918
920
919 adjustedparents[node] = (p1node, p2node)
921 adjustedparents[node] = (p1node, p2node)
920 linknodes[node] = linknode
922 linknodes[node] = linknode
921
923
922 filtered.append(node)
924 filtered.append(node)
923
925
924 nodes = filtered
926 nodes = filtered
925
927
926 # We expect the first pass to be fast, so we only engage the progress
928 # We expect the first pass to be fast, so we only engage the progress
927 # meter for constructing the revision deltas.
929 # meter for constructing the revision deltas.
928 progress = None
930 progress = None
929 if topic is not None:
931 if topic is not None:
930 progress = repo.ui.makeprogress(
932 progress = repo.ui.makeprogress(
931 topic, unit=_(b'chunks'), total=len(nodes)
933 topic, unit=_(b'chunks'), total=len(nodes)
932 )
934 )
933
935
934 configtarget = repo.ui.config(b'devel', b'bundle.delta')
936 configtarget = repo.ui.config(b'devel', b'bundle.delta')
935 if configtarget not in (b'', b'p1', b'full'):
937 if configtarget not in (b'', b'p1', b'full'):
936 msg = _(b"""config "devel.bundle.delta" as unknown value: %s""")
938 msg = _(b"""config "devel.bundle.delta" as unknown value: %s""")
937 repo.ui.warn(msg % configtarget)
939 repo.ui.warn(msg % configtarget)
938
940
939 deltamode = repository.CG_DELTAMODE_STD
941 deltamode = repository.CG_DELTAMODE_STD
940 if forcedeltaparentprev:
942 if forcedeltaparentprev:
941 deltamode = repository.CG_DELTAMODE_PREV
943 deltamode = repository.CG_DELTAMODE_PREV
942 elif configtarget == b'p1':
944 elif configtarget == b'p1':
943 deltamode = repository.CG_DELTAMODE_P1
945 deltamode = repository.CG_DELTAMODE_P1
944 elif configtarget == b'full':
946 elif configtarget == b'full':
945 deltamode = repository.CG_DELTAMODE_FULL
947 deltamode = repository.CG_DELTAMODE_FULL
946
948
947 revisions = store.emitrevisions(
949 revisions = store.emitrevisions(
948 nodes,
950 nodes,
949 nodesorder=nodesorder,
951 nodesorder=nodesorder,
950 revisiondata=True,
952 revisiondata=True,
951 assumehaveparentrevisions=not ellipses,
953 assumehaveparentrevisions=not ellipses,
952 deltamode=deltamode,
954 deltamode=deltamode,
953 sidedata_helpers=sidedata_helpers,
955 sidedata_helpers=sidedata_helpers,
954 )
956 )
955
957
956 for i, revision in enumerate(revisions):
958 for i, revision in enumerate(revisions):
957 if progress:
959 if progress:
958 progress.update(i + 1)
960 progress.update(i + 1)
959
961
960 if ellipses:
962 if ellipses:
961 linknode = linknodes[revision.node]
963 linknode = linknodes[revision.node]
962
964
963 if revision.node in adjustedparents:
965 if revision.node in adjustedparents:
964 p1node, p2node = adjustedparents[revision.node]
966 p1node, p2node = adjustedparents[revision.node]
965 revision.p1node = p1node
967 revision.p1node = p1node
966 revision.p2node = p2node
968 revision.p2node = p2node
967 revision.flags |= repository.REVISION_FLAG_ELLIPSIS
969 revision.flags |= repository.REVISION_FLAG_ELLIPSIS
968
970
969 else:
971 else:
970 linknode = lookup(revision.node)
972 linknode = lookup(revision.node)
971
973
972 revision.linknode = linknode
974 revision.linknode = linknode
973 yield revision
975 yield revision
974
976
975 if progress:
977 if progress:
976 progress.complete()
978 progress.complete()
977
979
978
980
979 class cgpacker(object):
981 class cgpacker(object):
980 def __init__(
982 def __init__(
981 self,
983 self,
982 repo,
984 repo,
983 oldmatcher,
985 oldmatcher,
984 matcher,
986 matcher,
985 version,
987 version,
986 builddeltaheader,
988 builddeltaheader,
987 manifestsend,
989 manifestsend,
988 forcedeltaparentprev=False,
990 forcedeltaparentprev=False,
989 bundlecaps=None,
991 bundlecaps=None,
990 ellipses=False,
992 ellipses=False,
991 shallow=False,
993 shallow=False,
992 ellipsisroots=None,
994 ellipsisroots=None,
993 fullnodes=None,
995 fullnodes=None,
994 remote_sidedata=None,
996 remote_sidedata=None,
995 ):
997 ):
996 """Given a source repo, construct a bundler.
998 """Given a source repo, construct a bundler.
997
999
998 oldmatcher is a matcher that matches on files the client already has.
1000 oldmatcher is a matcher that matches on files the client already has.
999 These will not be included in the changegroup.
1001 These will not be included in the changegroup.
1000
1002
1001 matcher is a matcher that matches on files to include in the
1003 matcher is a matcher that matches on files to include in the
1002 changegroup. Used to facilitate sparse changegroups.
1004 changegroup. Used to facilitate sparse changegroups.
1003
1005
1004 forcedeltaparentprev indicates whether delta parents must be against
1006 forcedeltaparentprev indicates whether delta parents must be against
1005 the previous revision in a delta group. This should only be used for
1007 the previous revision in a delta group. This should only be used for
1006 compatibility with changegroup version 1.
1008 compatibility with changegroup version 1.
1007
1009
1008 builddeltaheader is a callable that constructs the header for a group
1010 builddeltaheader is a callable that constructs the header for a group
1009 delta.
1011 delta.
1010
1012
1011 manifestsend is a chunk to send after manifests have been fully emitted.
1013 manifestsend is a chunk to send after manifests have been fully emitted.
1012
1014
1013 ellipses indicates whether ellipsis serving mode is enabled.
1015 ellipses indicates whether ellipsis serving mode is enabled.
1014
1016
1015 bundlecaps is optional and can be used to specify the set of
1017 bundlecaps is optional and can be used to specify the set of
1016 capabilities which can be used to build the bundle. While bundlecaps is
1018 capabilities which can be used to build the bundle. While bundlecaps is
1017 unused in core Mercurial, extensions rely on this feature to communicate
1019 unused in core Mercurial, extensions rely on this feature to communicate
1018 capabilities to customize the changegroup packer.
1020 capabilities to customize the changegroup packer.
1019
1021
1020 shallow indicates whether shallow data might be sent. The packer may
1022 shallow indicates whether shallow data might be sent. The packer may
1021 need to pack file contents not introduced by the changes being packed.
1023 need to pack file contents not introduced by the changes being packed.
1022
1024
1023 fullnodes is the set of changelog nodes which should not be ellipsis
1025 fullnodes is the set of changelog nodes which should not be ellipsis
1024 nodes. We store this rather than the set of nodes that should be
1026 nodes. We store this rather than the set of nodes that should be
1025 ellipsis because for very large histories we expect this to be
1027 ellipsis because for very large histories we expect this to be
1026 significantly smaller.
1028 significantly smaller.
1027
1029
1028 remote_sidedata is the set of sidedata categories wanted by the remote.
1030 remote_sidedata is the set of sidedata categories wanted by the remote.
1029 """
1031 """
1030 assert oldmatcher
1032 assert oldmatcher
1031 assert matcher
1033 assert matcher
1032 self._oldmatcher = oldmatcher
1034 self._oldmatcher = oldmatcher
1033 self._matcher = matcher
1035 self._matcher = matcher
1034
1036
1035 self.version = version
1037 self.version = version
1036 self._forcedeltaparentprev = forcedeltaparentprev
1038 self._forcedeltaparentprev = forcedeltaparentprev
1037 self._builddeltaheader = builddeltaheader
1039 self._builddeltaheader = builddeltaheader
1038 self._manifestsend = manifestsend
1040 self._manifestsend = manifestsend
1039 self._ellipses = ellipses
1041 self._ellipses = ellipses
1040
1042
1041 # Set of capabilities we can use to build the bundle.
1043 # Set of capabilities we can use to build the bundle.
1042 if bundlecaps is None:
1044 if bundlecaps is None:
1043 bundlecaps = set()
1045 bundlecaps = set()
1044 self._bundlecaps = bundlecaps
1046 self._bundlecaps = bundlecaps
1045 if remote_sidedata is None:
1047 if remote_sidedata is None:
1046 remote_sidedata = set()
1048 remote_sidedata = set()
1047 self._remote_sidedata = remote_sidedata
1049 self._remote_sidedata = remote_sidedata
1048 self._isshallow = shallow
1050 self._isshallow = shallow
1049 self._fullclnodes = fullnodes
1051 self._fullclnodes = fullnodes
1050
1052
1051 # Maps ellipsis revs to their roots at the changelog level.
1053 # Maps ellipsis revs to their roots at the changelog level.
1052 self._precomputedellipsis = ellipsisroots
1054 self._precomputedellipsis = ellipsisroots
1053
1055
1054 self._repo = repo
1056 self._repo = repo
1055
1057
1056 if self._repo.ui.verbose and not self._repo.ui.debugflag:
1058 if self._repo.ui.verbose and not self._repo.ui.debugflag:
1057 self._verbosenote = self._repo.ui.note
1059 self._verbosenote = self._repo.ui.note
1058 else:
1060 else:
1059 self._verbosenote = lambda s: None
1061 self._verbosenote = lambda s: None
1060
1062
1061 def generate(
1063 def generate(
1062 self, commonrevs, clnodes, fastpathlinkrev, source, changelog=True
1064 self, commonrevs, clnodes, fastpathlinkrev, source, changelog=True
1063 ):
1065 ):
1064 """Yield a sequence of changegroup byte chunks.
1066 """Yield a sequence of changegroup byte chunks.
1065 If changelog is False, changelog data won't be added to changegroup
1067 If changelog is False, changelog data won't be added to changegroup
1066 """
1068 """
1067
1069
1068 repo = self._repo
1070 repo = self._repo
1069 cl = repo.changelog
1071 cl = repo.changelog
1070
1072
1071 self._verbosenote(_(b'uncompressed size of bundle content:\n'))
1073 self._verbosenote(_(b'uncompressed size of bundle content:\n'))
1072 size = 0
1074 size = 0
1073
1075
1074 sidedata_helpers = None
1076 sidedata_helpers = None
1075 if self.version == b'04':
1077 if self.version == b'04':
1076 remote_sidedata = self._remote_sidedata
1078 remote_sidedata = self._remote_sidedata
1077 if source == b'strip':
1079 if source == b'strip':
1078 # We're our own remote when stripping, get the no-op helpers
1080 # We're our own remote when stripping, get the no-op helpers
1079 # TODO a better approach would be for the strip bundle to
1081 # TODO a better approach would be for the strip bundle to
1080 # correctly advertise its sidedata categories directly.
1082 # correctly advertise its sidedata categories directly.
1081 remote_sidedata = repo._wanted_sidedata
1083 remote_sidedata = repo._wanted_sidedata
1082 sidedata_helpers = sidedatamod.get_sidedata_helpers(
1084 sidedata_helpers = sidedatamod.get_sidedata_helpers(
1083 repo, remote_sidedata
1085 repo, remote_sidedata
1084 )
1086 )
1085
1087
1086 clstate, deltas = self._generatechangelog(
1088 clstate, deltas = self._generatechangelog(
1087 cl,
1089 cl,
1088 clnodes,
1090 clnodes,
1089 generate=changelog,
1091 generate=changelog,
1090 sidedata_helpers=sidedata_helpers,
1092 sidedata_helpers=sidedata_helpers,
1091 )
1093 )
1092 for delta in deltas:
1094 for delta in deltas:
1093 for chunk in _revisiondeltatochunks(
1095 for chunk in _revisiondeltatochunks(
1094 self._repo, delta, self._builddeltaheader
1096 self._repo, delta, self._builddeltaheader
1095 ):
1097 ):
1096 size += len(chunk)
1098 size += len(chunk)
1097 yield chunk
1099 yield chunk
1098
1100
1099 close = closechunk()
1101 close = closechunk()
1100 size += len(close)
1102 size += len(close)
1101 yield closechunk()
1103 yield closechunk()
1102
1104
1103 self._verbosenote(_(b'%8.i (changelog)\n') % size)
1105 self._verbosenote(_(b'%8.i (changelog)\n') % size)
1104
1106
1105 clrevorder = clstate[b'clrevorder']
1107 clrevorder = clstate[b'clrevorder']
1106 manifests = clstate[b'manifests']
1108 manifests = clstate[b'manifests']
1107 changedfiles = clstate[b'changedfiles']
1109 changedfiles = clstate[b'changedfiles']
1108
1110
1109 # We need to make sure that the linkrev in the changegroup refers to
1111 # We need to make sure that the linkrev in the changegroup refers to
1110 # the first changeset that introduced the manifest or file revision.
1112 # the first changeset that introduced the manifest or file revision.
1111 # The fastpath is usually safer than the slowpath, because the filelogs
1113 # The fastpath is usually safer than the slowpath, because the filelogs
1112 # are walked in revlog order.
1114 # are walked in revlog order.
1113 #
1115 #
1114 # When taking the slowpath when the manifest revlog uses generaldelta,
1116 # When taking the slowpath when the manifest revlog uses generaldelta,
1115 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
1117 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
1116 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
1118 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
1117 #
1119 #
1118 # When taking the fastpath, we are only vulnerable to reordering
1120 # When taking the fastpath, we are only vulnerable to reordering
1119 # of the changelog itself. The changelog never uses generaldelta and is
1121 # of the changelog itself. The changelog never uses generaldelta and is
1120 # never reordered. To handle this case, we simply take the slowpath,
1122 # never reordered. To handle this case, we simply take the slowpath,
1121 # which already has the 'clrevorder' logic. This was also fixed in
1123 # which already has the 'clrevorder' logic. This was also fixed in
1122 # cc0ff93d0c0c.
1124 # cc0ff93d0c0c.
1123
1125
1124 # Treemanifests don't work correctly with fastpathlinkrev
1126 # Treemanifests don't work correctly with fastpathlinkrev
1125 # either, because we don't discover which directory nodes to
1127 # either, because we don't discover which directory nodes to
1126 # send along with files. This could probably be fixed.
1128 # send along with files. This could probably be fixed.
1127 fastpathlinkrev = fastpathlinkrev and not scmutil.istreemanifest(repo)
1129 fastpathlinkrev = fastpathlinkrev and not scmutil.istreemanifest(repo)
1128
1130
1129 fnodes = {} # needed file nodes
1131 fnodes = {} # needed file nodes
1130
1132
1131 size = 0
1133 size = 0
1132 it = self.generatemanifests(
1134 it = self.generatemanifests(
1133 commonrevs,
1135 commonrevs,
1134 clrevorder,
1136 clrevorder,
1135 fastpathlinkrev,
1137 fastpathlinkrev,
1136 manifests,
1138 manifests,
1137 fnodes,
1139 fnodes,
1138 source,
1140 source,
1139 clstate[b'clrevtomanifestrev'],
1141 clstate[b'clrevtomanifestrev'],
1140 sidedata_helpers=sidedata_helpers,
1142 sidedata_helpers=sidedata_helpers,
1141 )
1143 )
1142
1144
1143 for tree, deltas in it:
1145 for tree, deltas in it:
1144 if tree:
1146 if tree:
1145 assert self.version in (b'03', b'04')
1147 assert self.version in (b'03', b'04')
1146 chunk = _fileheader(tree)
1148 chunk = _fileheader(tree)
1147 size += len(chunk)
1149 size += len(chunk)
1148 yield chunk
1150 yield chunk
1149
1151
1150 for delta in deltas:
1152 for delta in deltas:
1151 chunks = _revisiondeltatochunks(
1153 chunks = _revisiondeltatochunks(
1152 self._repo, delta, self._builddeltaheader
1154 self._repo, delta, self._builddeltaheader
1153 )
1155 )
1154 for chunk in chunks:
1156 for chunk in chunks:
1155 size += len(chunk)
1157 size += len(chunk)
1156 yield chunk
1158 yield chunk
1157
1159
1158 close = closechunk()
1160 close = closechunk()
1159 size += len(close)
1161 size += len(close)
1160 yield close
1162 yield close
1161
1163
1162 self._verbosenote(_(b'%8.i (manifests)\n') % size)
1164 self._verbosenote(_(b'%8.i (manifests)\n') % size)
1163 yield self._manifestsend
1165 yield self._manifestsend
1164
1166
1165 mfdicts = None
1167 mfdicts = None
1166 if self._ellipses and self._isshallow:
1168 if self._ellipses and self._isshallow:
1167 mfdicts = [
1169 mfdicts = [
1168 (repo.manifestlog[n].read(), lr)
1170 (repo.manifestlog[n].read(), lr)
1169 for (n, lr) in pycompat.iteritems(manifests)
1171 for (n, lr) in pycompat.iteritems(manifests)
1170 ]
1172 ]
1171
1173
1172 manifests.clear()
1174 manifests.clear()
1173 clrevs = {cl.rev(x) for x in clnodes}
1175 clrevs = {cl.rev(x) for x in clnodes}
1174
1176
1175 it = self.generatefiles(
1177 it = self.generatefiles(
1176 changedfiles,
1178 changedfiles,
1177 commonrevs,
1179 commonrevs,
1178 source,
1180 source,
1179 mfdicts,
1181 mfdicts,
1180 fastpathlinkrev,
1182 fastpathlinkrev,
1181 fnodes,
1183 fnodes,
1182 clrevs,
1184 clrevs,
1183 sidedata_helpers=sidedata_helpers,
1185 sidedata_helpers=sidedata_helpers,
1184 )
1186 )
1185
1187
1186 for path, deltas in it:
1188 for path, deltas in it:
1187 h = _fileheader(path)
1189 h = _fileheader(path)
1188 size = len(h)
1190 size = len(h)
1189 yield h
1191 yield h
1190
1192
1191 for delta in deltas:
1193 for delta in deltas:
1192 chunks = _revisiondeltatochunks(
1194 chunks = _revisiondeltatochunks(
1193 self._repo, delta, self._builddeltaheader
1195 self._repo, delta, self._builddeltaheader
1194 )
1196 )
1195 for chunk in chunks:
1197 for chunk in chunks:
1196 size += len(chunk)
1198 size += len(chunk)
1197 yield chunk
1199 yield chunk
1198
1200
1199 close = closechunk()
1201 close = closechunk()
1200 size += len(close)
1202 size += len(close)
1201 yield close
1203 yield close
1202
1204
1203 self._verbosenote(_(b'%8.i %s\n') % (size, path))
1205 self._verbosenote(_(b'%8.i %s\n') % (size, path))
1204
1206
1205 yield closechunk()
1207 yield closechunk()
1206
1208
1207 if clnodes:
1209 if clnodes:
1208 repo.hook(b'outgoing', node=hex(clnodes[0]), source=source)
1210 repo.hook(b'outgoing', node=hex(clnodes[0]), source=source)
1209
1211
1210 def _generatechangelog(
1212 def _generatechangelog(
1211 self, cl, nodes, generate=True, sidedata_helpers=None
1213 self, cl, nodes, generate=True, sidedata_helpers=None
1212 ):
1214 ):
1213 """Generate data for changelog chunks.
1215 """Generate data for changelog chunks.
1214
1216
1215 Returns a 2-tuple of a dict containing state and an iterable of
1217 Returns a 2-tuple of a dict containing state and an iterable of
1216 byte chunks. The state will not be fully populated until the
1218 byte chunks. The state will not be fully populated until the
1217 chunk stream has been fully consumed.
1219 chunk stream has been fully consumed.
1218
1220
1219 if generate is False, the state will be fully populated and no chunk
1221 if generate is False, the state will be fully populated and no chunk
1220 stream will be yielded
1222 stream will be yielded
1221
1223
1222 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
1224 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
1223 `sidedata_helpers`.
1225 `sidedata_helpers`.
1224 """
1226 """
1225 clrevorder = {}
1227 clrevorder = {}
1226 manifests = {}
1228 manifests = {}
1227 mfl = self._repo.manifestlog
1229 mfl = self._repo.manifestlog
1228 changedfiles = set()
1230 changedfiles = set()
1229 clrevtomanifestrev = {}
1231 clrevtomanifestrev = {}
1230
1232
1231 state = {
1233 state = {
1232 b'clrevorder': clrevorder,
1234 b'clrevorder': clrevorder,
1233 b'manifests': manifests,
1235 b'manifests': manifests,
1234 b'changedfiles': changedfiles,
1236 b'changedfiles': changedfiles,
1235 b'clrevtomanifestrev': clrevtomanifestrev,
1237 b'clrevtomanifestrev': clrevtomanifestrev,
1236 }
1238 }
1237
1239
1238 if not (generate or self._ellipses):
1240 if not (generate or self._ellipses):
1239 # sort the nodes in storage order
1241 # sort the nodes in storage order
1240 nodes = sorted(nodes, key=cl.rev)
1242 nodes = sorted(nodes, key=cl.rev)
1241 for node in nodes:
1243 for node in nodes:
1242 c = cl.changelogrevision(node)
1244 c = cl.changelogrevision(node)
1243 clrevorder[node] = len(clrevorder)
1245 clrevorder[node] = len(clrevorder)
1244 # record the first changeset introducing this manifest version
1246 # record the first changeset introducing this manifest version
1245 manifests.setdefault(c.manifest, node)
1247 manifests.setdefault(c.manifest, node)
1246 # Record a complete list of potentially-changed files in
1248 # Record a complete list of potentially-changed files in
1247 # this manifest.
1249 # this manifest.
1248 changedfiles.update(c.files)
1250 changedfiles.update(c.files)
1249
1251
1250 return state, ()
1252 return state, ()
1251
1253
1252 # Callback for the changelog, used to collect changed files and
1254 # Callback for the changelog, used to collect changed files and
1253 # manifest nodes.
1255 # manifest nodes.
1254 # Returns the linkrev node (identity in the changelog case).
1256 # Returns the linkrev node (identity in the changelog case).
1255 def lookupcl(x):
1257 def lookupcl(x):
1256 c = cl.changelogrevision(x)
1258 c = cl.changelogrevision(x)
1257 clrevorder[x] = len(clrevorder)
1259 clrevorder[x] = len(clrevorder)
1258
1260
1259 if self._ellipses:
1261 if self._ellipses:
1260 # Only update manifests if x is going to be sent. Otherwise we
1262 # Only update manifests if x is going to be sent. Otherwise we
1261 # end up with bogus linkrevs specified for manifests and
1263 # end up with bogus linkrevs specified for manifests and
1262 # we skip some manifest nodes that we should otherwise
1264 # we skip some manifest nodes that we should otherwise
1263 # have sent.
1265 # have sent.
1264 if (
1266 if (
1265 x in self._fullclnodes
1267 x in self._fullclnodes
1266 or cl.rev(x) in self._precomputedellipsis
1268 or cl.rev(x) in self._precomputedellipsis
1267 ):
1269 ):
1268
1270
1269 manifestnode = c.manifest
1271 manifestnode = c.manifest
1270 # Record the first changeset introducing this manifest
1272 # Record the first changeset introducing this manifest
1271 # version.
1273 # version.
1272 manifests.setdefault(manifestnode, x)
1274 manifests.setdefault(manifestnode, x)
1273 # Set this narrow-specific dict so we have the lowest
1275 # Set this narrow-specific dict so we have the lowest
1274 # manifest revnum to look up for this cl revnum. (Part of
1276 # manifest revnum to look up for this cl revnum. (Part of
1275 # mapping changelog ellipsis parents to manifest ellipsis
1277 # mapping changelog ellipsis parents to manifest ellipsis
1276 # parents)
1278 # parents)
1277 clrevtomanifestrev.setdefault(
1279 clrevtomanifestrev.setdefault(
1278 cl.rev(x), mfl.rev(manifestnode)
1280 cl.rev(x), mfl.rev(manifestnode)
1279 )
1281 )
1280 # We can't trust the changed files list in the changeset if the
1282 # We can't trust the changed files list in the changeset if the
1281 # client requested a shallow clone.
1283 # client requested a shallow clone.
1282 if self._isshallow:
1284 if self._isshallow:
1283 changedfiles.update(mfl[c.manifest].read().keys())
1285 changedfiles.update(mfl[c.manifest].read().keys())
1284 else:
1286 else:
1285 changedfiles.update(c.files)
1287 changedfiles.update(c.files)
1286 else:
1288 else:
1287 # record the first changeset introducing this manifest version
1289 # record the first changeset introducing this manifest version
1288 manifests.setdefault(c.manifest, x)
1290 manifests.setdefault(c.manifest, x)
1289 # Record a complete list of potentially-changed files in
1291 # Record a complete list of potentially-changed files in
1290 # this manifest.
1292 # this manifest.
1291 changedfiles.update(c.files)
1293 changedfiles.update(c.files)
1292
1294
1293 return x
1295 return x
1294
1296
1295 gen = deltagroup(
1297 gen = deltagroup(
1296 self._repo,
1298 self._repo,
1297 cl,
1299 cl,
1298 nodes,
1300 nodes,
1299 True,
1301 True,
1300 lookupcl,
1302 lookupcl,
1301 self._forcedeltaparentprev,
1303 self._forcedeltaparentprev,
1302 ellipses=self._ellipses,
1304 ellipses=self._ellipses,
1303 topic=_(b'changesets'),
1305 topic=_(b'changesets'),
1304 clrevtolocalrev={},
1306 clrevtolocalrev={},
1305 fullclnodes=self._fullclnodes,
1307 fullclnodes=self._fullclnodes,
1306 precomputedellipsis=self._precomputedellipsis,
1308 precomputedellipsis=self._precomputedellipsis,
1307 sidedata_helpers=sidedata_helpers,
1309 sidedata_helpers=sidedata_helpers,
1308 )
1310 )
1309
1311
1310 return state, gen
1312 return state, gen
1311
1313
1312 def generatemanifests(
1314 def generatemanifests(
1313 self,
1315 self,
1314 commonrevs,
1316 commonrevs,
1315 clrevorder,
1317 clrevorder,
1316 fastpathlinkrev,
1318 fastpathlinkrev,
1317 manifests,
1319 manifests,
1318 fnodes,
1320 fnodes,
1319 source,
1321 source,
1320 clrevtolocalrev,
1322 clrevtolocalrev,
1321 sidedata_helpers=None,
1323 sidedata_helpers=None,
1322 ):
1324 ):
1323 """Returns an iterator of changegroup chunks containing manifests.
1325 """Returns an iterator of changegroup chunks containing manifests.
1324
1326
1325 `source` is unused here, but is used by extensions like remotefilelog to
1327 `source` is unused here, but is used by extensions like remotefilelog to
1326 change what is sent based in pulls vs pushes, etc.
1328 change what is sent based in pulls vs pushes, etc.
1327
1329
1328 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
1330 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
1329 `sidedata_helpers`.
1331 `sidedata_helpers`.
1330 """
1332 """
1331 repo = self._repo
1333 repo = self._repo
1332 mfl = repo.manifestlog
1334 mfl = repo.manifestlog
1333 tmfnodes = {b'': manifests}
1335 tmfnodes = {b'': manifests}
1334
1336
1335 # Callback for the manifest, used to collect linkrevs for filelog
1337 # Callback for the manifest, used to collect linkrevs for filelog
1336 # revisions.
1338 # revisions.
1337 # Returns the linkrev node (collected in lookupcl).
1339 # Returns the linkrev node (collected in lookupcl).
1338 def makelookupmflinknode(tree, nodes):
1340 def makelookupmflinknode(tree, nodes):
1339 if fastpathlinkrev:
1341 if fastpathlinkrev:
1340 assert not tree
1342 assert not tree
1341
1343
1342 # pytype: disable=unsupported-operands
1344 # pytype: disable=unsupported-operands
1343 return manifests.__getitem__
1345 return manifests.__getitem__
1344 # pytype: enable=unsupported-operands
1346 # pytype: enable=unsupported-operands
1345
1347
1346 def lookupmflinknode(x):
1348 def lookupmflinknode(x):
1347 """Callback for looking up the linknode for manifests.
1349 """Callback for looking up the linknode for manifests.
1348
1350
1349 Returns the linkrev node for the specified manifest.
1351 Returns the linkrev node for the specified manifest.
1350
1352
1351 SIDE EFFECT:
1353 SIDE EFFECT:
1352
1354
1353 1) fclnodes gets populated with the list of relevant
1355 1) fclnodes gets populated with the list of relevant
1354 file nodes if we're not using fastpathlinkrev
1356 file nodes if we're not using fastpathlinkrev
1355 2) When treemanifests are in use, collects treemanifest nodes
1357 2) When treemanifests are in use, collects treemanifest nodes
1356 to send
1358 to send
1357
1359
1358 Note that this means manifests must be completely sent to
1360 Note that this means manifests must be completely sent to
1359 the client before you can trust the list of files and
1361 the client before you can trust the list of files and
1360 treemanifests to send.
1362 treemanifests to send.
1361 """
1363 """
1362 clnode = nodes[x]
1364 clnode = nodes[x]
1363 mdata = mfl.get(tree, x).readfast(shallow=True)
1365 mdata = mfl.get(tree, x).readfast(shallow=True)
1364 for p, n, fl in mdata.iterentries():
1366 for p, n, fl in mdata.iterentries():
1365 if fl == b't': # subdirectory manifest
1367 if fl == b't': # subdirectory manifest
1366 subtree = tree + p + b'/'
1368 subtree = tree + p + b'/'
1367 tmfclnodes = tmfnodes.setdefault(subtree, {})
1369 tmfclnodes = tmfnodes.setdefault(subtree, {})
1368 tmfclnode = tmfclnodes.setdefault(n, clnode)
1370 tmfclnode = tmfclnodes.setdefault(n, clnode)
1369 if clrevorder[clnode] < clrevorder[tmfclnode]:
1371 if clrevorder[clnode] < clrevorder[tmfclnode]:
1370 tmfclnodes[n] = clnode
1372 tmfclnodes[n] = clnode
1371 else:
1373 else:
1372 f = tree + p
1374 f = tree + p
1373 fclnodes = fnodes.setdefault(f, {})
1375 fclnodes = fnodes.setdefault(f, {})
1374 fclnode = fclnodes.setdefault(n, clnode)
1376 fclnode = fclnodes.setdefault(n, clnode)
1375 if clrevorder[clnode] < clrevorder[fclnode]:
1377 if clrevorder[clnode] < clrevorder[fclnode]:
1376 fclnodes[n] = clnode
1378 fclnodes[n] = clnode
1377 return clnode
1379 return clnode
1378
1380
1379 return lookupmflinknode
1381 return lookupmflinknode
1380
1382
1381 while tmfnodes:
1383 while tmfnodes:
1382 tree, nodes = tmfnodes.popitem()
1384 tree, nodes = tmfnodes.popitem()
1383
1385
1384 should_visit = self._matcher.visitdir(tree[:-1])
1386 should_visit = self._matcher.visitdir(tree[:-1])
1385 if tree and not should_visit:
1387 if tree and not should_visit:
1386 continue
1388 continue
1387
1389
1388 store = mfl.getstorage(tree)
1390 store = mfl.getstorage(tree)
1389
1391
1390 if not should_visit:
1392 if not should_visit:
1391 # No nodes to send because this directory is out of
1393 # No nodes to send because this directory is out of
1392 # the client's view of the repository (probably
1394 # the client's view of the repository (probably
1393 # because of narrow clones). Do this even for the root
1395 # because of narrow clones). Do this even for the root
1394 # directory (tree=='')
1396 # directory (tree=='')
1395 prunednodes = []
1397 prunednodes = []
1396 else:
1398 else:
1397 # Avoid sending any manifest nodes we can prove the
1399 # Avoid sending any manifest nodes we can prove the
1398 # client already has by checking linkrevs. See the
1400 # client already has by checking linkrevs. See the
1399 # related comment in generatefiles().
1401 # related comment in generatefiles().
1400 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1402 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1401
1403
1402 if tree and not prunednodes:
1404 if tree and not prunednodes:
1403 continue
1405 continue
1404
1406
1405 lookupfn = makelookupmflinknode(tree, nodes)
1407 lookupfn = makelookupmflinknode(tree, nodes)
1406
1408
1407 deltas = deltagroup(
1409 deltas = deltagroup(
1408 self._repo,
1410 self._repo,
1409 store,
1411 store,
1410 prunednodes,
1412 prunednodes,
1411 False,
1413 False,
1412 lookupfn,
1414 lookupfn,
1413 self._forcedeltaparentprev,
1415 self._forcedeltaparentprev,
1414 ellipses=self._ellipses,
1416 ellipses=self._ellipses,
1415 topic=_(b'manifests'),
1417 topic=_(b'manifests'),
1416 clrevtolocalrev=clrevtolocalrev,
1418 clrevtolocalrev=clrevtolocalrev,
1417 fullclnodes=self._fullclnodes,
1419 fullclnodes=self._fullclnodes,
1418 precomputedellipsis=self._precomputedellipsis,
1420 precomputedellipsis=self._precomputedellipsis,
1419 sidedata_helpers=sidedata_helpers,
1421 sidedata_helpers=sidedata_helpers,
1420 )
1422 )
1421
1423
1422 if not self._oldmatcher.visitdir(store.tree[:-1]):
1424 if not self._oldmatcher.visitdir(store.tree[:-1]):
1423 yield tree, deltas
1425 yield tree, deltas
1424 else:
1426 else:
1425 # 'deltas' is a generator and we need to consume it even if
1427 # 'deltas' is a generator and we need to consume it even if
1426 # we are not going to send it because a side-effect is that
1428 # we are not going to send it because a side-effect is that
1427 # it updates tmdnodes (via lookupfn)
1429 # it updates tmdnodes (via lookupfn)
1428 for d in deltas:
1430 for d in deltas:
1429 pass
1431 pass
1430 if not tree:
1432 if not tree:
1431 yield tree, []
1433 yield tree, []
1432
1434
1433 def _prunemanifests(self, store, nodes, commonrevs):
1435 def _prunemanifests(self, store, nodes, commonrevs):
1434 if not self._ellipses:
1436 if not self._ellipses:
1435 # In non-ellipses case and large repositories, it is better to
1437 # In non-ellipses case and large repositories, it is better to
1436 # prevent calling of store.rev and store.linkrev on a lot of
1438 # prevent calling of store.rev and store.linkrev on a lot of
1437 # nodes as compared to sending some extra data
1439 # nodes as compared to sending some extra data
1438 return nodes.copy()
1440 return nodes.copy()
1439 # This is split out as a separate method to allow filtering
1441 # This is split out as a separate method to allow filtering
1440 # commonrevs in extension code.
1442 # commonrevs in extension code.
1441 #
1443 #
1442 # TODO(augie): this shouldn't be required, instead we should
1444 # TODO(augie): this shouldn't be required, instead we should
1443 # make filtering of revisions to send delegated to the store
1445 # make filtering of revisions to send delegated to the store
1444 # layer.
1446 # layer.
1445 frev, flr = store.rev, store.linkrev
1447 frev, flr = store.rev, store.linkrev
1446 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1448 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1447
1449
1448 # The 'source' parameter is useful for extensions
1450 # The 'source' parameter is useful for extensions
1449 def generatefiles(
1451 def generatefiles(
1450 self,
1452 self,
1451 changedfiles,
1453 changedfiles,
1452 commonrevs,
1454 commonrevs,
1453 source,
1455 source,
1454 mfdicts,
1456 mfdicts,
1455 fastpathlinkrev,
1457 fastpathlinkrev,
1456 fnodes,
1458 fnodes,
1457 clrevs,
1459 clrevs,
1458 sidedata_helpers=None,
1460 sidedata_helpers=None,
1459 ):
1461 ):
1460 changedfiles = [
1462 changedfiles = [
1461 f
1463 f
1462 for f in changedfiles
1464 for f in changedfiles
1463 if self._matcher(f) and not self._oldmatcher(f)
1465 if self._matcher(f) and not self._oldmatcher(f)
1464 ]
1466 ]
1465
1467
1466 if not fastpathlinkrev:
1468 if not fastpathlinkrev:
1467
1469
1468 def normallinknodes(unused, fname):
1470 def normallinknodes(unused, fname):
1469 return fnodes.get(fname, {})
1471 return fnodes.get(fname, {})
1470
1472
1471 else:
1473 else:
1472 cln = self._repo.changelog.node
1474 cln = self._repo.changelog.node
1473
1475
1474 def normallinknodes(store, fname):
1476 def normallinknodes(store, fname):
1475 flinkrev = store.linkrev
1477 flinkrev = store.linkrev
1476 fnode = store.node
1478 fnode = store.node
1477 revs = ((r, flinkrev(r)) for r in store)
1479 revs = ((r, flinkrev(r)) for r in store)
1478 return {fnode(r): cln(lr) for r, lr in revs if lr in clrevs}
1480 return {fnode(r): cln(lr) for r, lr in revs if lr in clrevs}
1479
1481
1480 clrevtolocalrev = {}
1482 clrevtolocalrev = {}
1481
1483
1482 if self._isshallow:
1484 if self._isshallow:
1483 # In a shallow clone, the linknodes callback needs to also include
1485 # In a shallow clone, the linknodes callback needs to also include
1484 # those file nodes that are in the manifests we sent but weren't
1486 # those file nodes that are in the manifests we sent but weren't
1485 # introduced by those manifests.
1487 # introduced by those manifests.
1486 commonctxs = [self._repo[c] for c in commonrevs]
1488 commonctxs = [self._repo[c] for c in commonrevs]
1487 clrev = self._repo.changelog.rev
1489 clrev = self._repo.changelog.rev
1488
1490
1489 def linknodes(flog, fname):
1491 def linknodes(flog, fname):
1490 for c in commonctxs:
1492 for c in commonctxs:
1491 try:
1493 try:
1492 fnode = c.filenode(fname)
1494 fnode = c.filenode(fname)
1493 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1495 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1494 except error.ManifestLookupError:
1496 except error.ManifestLookupError:
1495 pass
1497 pass
1496 links = normallinknodes(flog, fname)
1498 links = normallinknodes(flog, fname)
1497 if len(links) != len(mfdicts):
1499 if len(links) != len(mfdicts):
1498 for mf, lr in mfdicts:
1500 for mf, lr in mfdicts:
1499 fnode = mf.get(fname, None)
1501 fnode = mf.get(fname, None)
1500 if fnode in links:
1502 if fnode in links:
1501 links[fnode] = min(links[fnode], lr, key=clrev)
1503 links[fnode] = min(links[fnode], lr, key=clrev)
1502 elif fnode:
1504 elif fnode:
1503 links[fnode] = lr
1505 links[fnode] = lr
1504 return links
1506 return links
1505
1507
1506 else:
1508 else:
1507 linknodes = normallinknodes
1509 linknodes = normallinknodes
1508
1510
1509 repo = self._repo
1511 repo = self._repo
1510 progress = repo.ui.makeprogress(
1512 progress = repo.ui.makeprogress(
1511 _(b'files'), unit=_(b'files'), total=len(changedfiles)
1513 _(b'files'), unit=_(b'files'), total=len(changedfiles)
1512 )
1514 )
1513 for i, fname in enumerate(sorted(changedfiles)):
1515 for i, fname in enumerate(sorted(changedfiles)):
1514 filerevlog = repo.file(fname)
1516 filerevlog = repo.file(fname)
1515 if not filerevlog:
1517 if not filerevlog:
1516 raise error.Abort(
1518 raise error.Abort(
1517 _(b"empty or missing file data for %s") % fname
1519 _(b"empty or missing file data for %s") % fname
1518 )
1520 )
1519
1521
1520 clrevtolocalrev.clear()
1522 clrevtolocalrev.clear()
1521
1523
1522 linkrevnodes = linknodes(filerevlog, fname)
1524 linkrevnodes = linknodes(filerevlog, fname)
1523 # Lookup for filenodes, we collected the linkrev nodes above in the
1525 # Lookup for filenodes, we collected the linkrev nodes above in the
1524 # fastpath case and with lookupmf in the slowpath case.
1526 # fastpath case and with lookupmf in the slowpath case.
1525 def lookupfilelog(x):
1527 def lookupfilelog(x):
1526 return linkrevnodes[x]
1528 return linkrevnodes[x]
1527
1529
1528 frev, flr = filerevlog.rev, filerevlog.linkrev
1530 frev, flr = filerevlog.rev, filerevlog.linkrev
1529 # Skip sending any filenode we know the client already
1531 # Skip sending any filenode we know the client already
1530 # has. This avoids over-sending files relatively
1532 # has. This avoids over-sending files relatively
1531 # inexpensively, so it's not a problem if we under-filter
1533 # inexpensively, so it's not a problem if we under-filter
1532 # here.
1534 # here.
1533 filenodes = [
1535 filenodes = [
1534 n for n in linkrevnodes if flr(frev(n)) not in commonrevs
1536 n for n in linkrevnodes if flr(frev(n)) not in commonrevs
1535 ]
1537 ]
1536
1538
1537 if not filenodes:
1539 if not filenodes:
1538 continue
1540 continue
1539
1541
1540 progress.update(i + 1, item=fname)
1542 progress.update(i + 1, item=fname)
1541
1543
1542 deltas = deltagroup(
1544 deltas = deltagroup(
1543 self._repo,
1545 self._repo,
1544 filerevlog,
1546 filerevlog,
1545 filenodes,
1547 filenodes,
1546 False,
1548 False,
1547 lookupfilelog,
1549 lookupfilelog,
1548 self._forcedeltaparentprev,
1550 self._forcedeltaparentprev,
1549 ellipses=self._ellipses,
1551 ellipses=self._ellipses,
1550 clrevtolocalrev=clrevtolocalrev,
1552 clrevtolocalrev=clrevtolocalrev,
1551 fullclnodes=self._fullclnodes,
1553 fullclnodes=self._fullclnodes,
1552 precomputedellipsis=self._precomputedellipsis,
1554 precomputedellipsis=self._precomputedellipsis,
1553 sidedata_helpers=sidedata_helpers,
1555 sidedata_helpers=sidedata_helpers,
1554 )
1556 )
1555
1557
1556 yield fname, deltas
1558 yield fname, deltas
1557
1559
1558 progress.complete()
1560 progress.complete()
1559
1561
1560
1562
1561 def _makecg1packer(
1563 def _makecg1packer(
1562 repo,
1564 repo,
1563 oldmatcher,
1565 oldmatcher,
1564 matcher,
1566 matcher,
1565 bundlecaps,
1567 bundlecaps,
1566 ellipses=False,
1568 ellipses=False,
1567 shallow=False,
1569 shallow=False,
1568 ellipsisroots=None,
1570 ellipsisroots=None,
1569 fullnodes=None,
1571 fullnodes=None,
1570 remote_sidedata=None,
1572 remote_sidedata=None,
1571 ):
1573 ):
1572 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1574 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1573 d.node, d.p1node, d.p2node, d.linknode
1575 d.node, d.p1node, d.p2node, d.linknode
1574 )
1576 )
1575
1577
1576 return cgpacker(
1578 return cgpacker(
1577 repo,
1579 repo,
1578 oldmatcher,
1580 oldmatcher,
1579 matcher,
1581 matcher,
1580 b'01',
1582 b'01',
1581 builddeltaheader=builddeltaheader,
1583 builddeltaheader=builddeltaheader,
1582 manifestsend=b'',
1584 manifestsend=b'',
1583 forcedeltaparentprev=True,
1585 forcedeltaparentprev=True,
1584 bundlecaps=bundlecaps,
1586 bundlecaps=bundlecaps,
1585 ellipses=ellipses,
1587 ellipses=ellipses,
1586 shallow=shallow,
1588 shallow=shallow,
1587 ellipsisroots=ellipsisroots,
1589 ellipsisroots=ellipsisroots,
1588 fullnodes=fullnodes,
1590 fullnodes=fullnodes,
1589 )
1591 )
1590
1592
1591
1593
1592 def _makecg2packer(
1594 def _makecg2packer(
1593 repo,
1595 repo,
1594 oldmatcher,
1596 oldmatcher,
1595 matcher,
1597 matcher,
1596 bundlecaps,
1598 bundlecaps,
1597 ellipses=False,
1599 ellipses=False,
1598 shallow=False,
1600 shallow=False,
1599 ellipsisroots=None,
1601 ellipsisroots=None,
1600 fullnodes=None,
1602 fullnodes=None,
1601 remote_sidedata=None,
1603 remote_sidedata=None,
1602 ):
1604 ):
1603 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1605 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1604 d.node, d.p1node, d.p2node, d.basenode, d.linknode
1606 d.node, d.p1node, d.p2node, d.basenode, d.linknode
1605 )
1607 )
1606
1608
1607 return cgpacker(
1609 return cgpacker(
1608 repo,
1610 repo,
1609 oldmatcher,
1611 oldmatcher,
1610 matcher,
1612 matcher,
1611 b'02',
1613 b'02',
1612 builddeltaheader=builddeltaheader,
1614 builddeltaheader=builddeltaheader,
1613 manifestsend=b'',
1615 manifestsend=b'',
1614 bundlecaps=bundlecaps,
1616 bundlecaps=bundlecaps,
1615 ellipses=ellipses,
1617 ellipses=ellipses,
1616 shallow=shallow,
1618 shallow=shallow,
1617 ellipsisroots=ellipsisroots,
1619 ellipsisroots=ellipsisroots,
1618 fullnodes=fullnodes,
1620 fullnodes=fullnodes,
1619 )
1621 )
1620
1622
1621
1623
1622 def _makecg3packer(
1624 def _makecg3packer(
1623 repo,
1625 repo,
1624 oldmatcher,
1626 oldmatcher,
1625 matcher,
1627 matcher,
1626 bundlecaps,
1628 bundlecaps,
1627 ellipses=False,
1629 ellipses=False,
1628 shallow=False,
1630 shallow=False,
1629 ellipsisroots=None,
1631 ellipsisroots=None,
1630 fullnodes=None,
1632 fullnodes=None,
1631 remote_sidedata=None,
1633 remote_sidedata=None,
1632 ):
1634 ):
1633 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1635 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1634 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1636 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1635 )
1637 )
1636
1638
1637 return cgpacker(
1639 return cgpacker(
1638 repo,
1640 repo,
1639 oldmatcher,
1641 oldmatcher,
1640 matcher,
1642 matcher,
1641 b'03',
1643 b'03',
1642 builddeltaheader=builddeltaheader,
1644 builddeltaheader=builddeltaheader,
1643 manifestsend=closechunk(),
1645 manifestsend=closechunk(),
1644 bundlecaps=bundlecaps,
1646 bundlecaps=bundlecaps,
1645 ellipses=ellipses,
1647 ellipses=ellipses,
1646 shallow=shallow,
1648 shallow=shallow,
1647 ellipsisroots=ellipsisroots,
1649 ellipsisroots=ellipsisroots,
1648 fullnodes=fullnodes,
1650 fullnodes=fullnodes,
1649 )
1651 )
1650
1652
1651
1653
1652 def _makecg4packer(
1654 def _makecg4packer(
1653 repo,
1655 repo,
1654 oldmatcher,
1656 oldmatcher,
1655 matcher,
1657 matcher,
1656 bundlecaps,
1658 bundlecaps,
1657 ellipses=False,
1659 ellipses=False,
1658 shallow=False,
1660 shallow=False,
1659 ellipsisroots=None,
1661 ellipsisroots=None,
1660 fullnodes=None,
1662 fullnodes=None,
1661 remote_sidedata=None,
1663 remote_sidedata=None,
1662 ):
1664 ):
1663 # Sidedata is in a separate chunk from the delta to differentiate
1665 # Sidedata is in a separate chunk from the delta to differentiate
1664 # "raw delta" and sidedata.
1666 # "raw delta" and sidedata.
1665 def builddeltaheader(d):
1667 def builddeltaheader(d):
1666 return _CHANGEGROUPV4_DELTA_HEADER.pack(
1668 return _CHANGEGROUPV4_DELTA_HEADER.pack(
1667 d.protocol_flags,
1669 d.protocol_flags,
1668 d.node,
1670 d.node,
1669 d.p1node,
1671 d.p1node,
1670 d.p2node,
1672 d.p2node,
1671 d.basenode,
1673 d.basenode,
1672 d.linknode,
1674 d.linknode,
1673 d.flags,
1675 d.flags,
1674 )
1676 )
1675
1677
1676 return cgpacker(
1678 return cgpacker(
1677 repo,
1679 repo,
1678 oldmatcher,
1680 oldmatcher,
1679 matcher,
1681 matcher,
1680 b'04',
1682 b'04',
1681 builddeltaheader=builddeltaheader,
1683 builddeltaheader=builddeltaheader,
1682 manifestsend=closechunk(),
1684 manifestsend=closechunk(),
1683 bundlecaps=bundlecaps,
1685 bundlecaps=bundlecaps,
1684 ellipses=ellipses,
1686 ellipses=ellipses,
1685 shallow=shallow,
1687 shallow=shallow,
1686 ellipsisroots=ellipsisroots,
1688 ellipsisroots=ellipsisroots,
1687 fullnodes=fullnodes,
1689 fullnodes=fullnodes,
1688 remote_sidedata=remote_sidedata,
1690 remote_sidedata=remote_sidedata,
1689 )
1691 )
1690
1692
1691
1693
1692 _packermap = {
1694 _packermap = {
1693 b'01': (_makecg1packer, cg1unpacker),
1695 b'01': (_makecg1packer, cg1unpacker),
1694 # cg2 adds support for exchanging generaldelta
1696 # cg2 adds support for exchanging generaldelta
1695 b'02': (_makecg2packer, cg2unpacker),
1697 b'02': (_makecg2packer, cg2unpacker),
1696 # cg3 adds support for exchanging revlog flags and treemanifests
1698 # cg3 adds support for exchanging revlog flags and treemanifests
1697 b'03': (_makecg3packer, cg3unpacker),
1699 b'03': (_makecg3packer, cg3unpacker),
1698 # ch4 adds support for exchanging sidedata
1700 # ch4 adds support for exchanging sidedata
1699 b'04': (_makecg4packer, cg4unpacker),
1701 b'04': (_makecg4packer, cg4unpacker),
1700 }
1702 }
1701
1703
1702
1704
1703 def allsupportedversions(repo):
1705 def allsupportedversions(repo):
1704 versions = set(_packermap.keys())
1706 versions = set(_packermap.keys())
1705 needv03 = False
1707 needv03 = False
1706 if (
1708 if (
1707 repo.ui.configbool(b'experimental', b'changegroup3')
1709 repo.ui.configbool(b'experimental', b'changegroup3')
1708 or repo.ui.configbool(b'experimental', b'treemanifest')
1710 or repo.ui.configbool(b'experimental', b'treemanifest')
1709 or scmutil.istreemanifest(repo)
1711 or scmutil.istreemanifest(repo)
1710 ):
1712 ):
1711 # we keep version 03 because we need to to exchange treemanifest data
1713 # we keep version 03 because we need to to exchange treemanifest data
1712 #
1714 #
1713 # we also keep vresion 01 and 02, because it is possible for repo to
1715 # we also keep vresion 01 and 02, because it is possible for repo to
1714 # contains both normal and tree manifest at the same time. so using
1716 # contains both normal and tree manifest at the same time. so using
1715 # older version to pull data is viable
1717 # older version to pull data is viable
1716 #
1718 #
1717 # (or even to push subset of history)
1719 # (or even to push subset of history)
1718 needv03 = True
1720 needv03 = True
1719 if not needv03:
1721 if not needv03:
1720 versions.discard(b'03')
1722 versions.discard(b'03')
1721 want_v4 = (
1723 want_v4 = (
1722 repo.ui.configbool(b'experimental', b'changegroup4')
1724 repo.ui.configbool(b'experimental', b'changegroup4')
1723 or requirements.REVLOGV2_REQUIREMENT in repo.requirements
1725 or requirements.REVLOGV2_REQUIREMENT in repo.requirements
1724 )
1726 )
1725 if not want_v4:
1727 if not want_v4:
1726 versions.discard(b'04')
1728 versions.discard(b'04')
1727 return versions
1729 return versions
1728
1730
1729
1731
1730 # Changegroup versions that can be applied to the repo
1732 # Changegroup versions that can be applied to the repo
1731 def supportedincomingversions(repo):
1733 def supportedincomingversions(repo):
1732 return allsupportedversions(repo)
1734 return allsupportedversions(repo)
1733
1735
1734
1736
1735 # Changegroup versions that can be created from the repo
1737 # Changegroup versions that can be created from the repo
1736 def supportedoutgoingversions(repo):
1738 def supportedoutgoingversions(repo):
1737 versions = allsupportedversions(repo)
1739 versions = allsupportedversions(repo)
1738 if scmutil.istreemanifest(repo):
1740 if scmutil.istreemanifest(repo):
1739 # Versions 01 and 02 support only flat manifests and it's just too
1741 # Versions 01 and 02 support only flat manifests and it's just too
1740 # expensive to convert between the flat manifest and tree manifest on
1742 # expensive to convert between the flat manifest and tree manifest on
1741 # the fly. Since tree manifests are hashed differently, all of history
1743 # the fly. Since tree manifests are hashed differently, all of history
1742 # would have to be converted. Instead, we simply don't even pretend to
1744 # would have to be converted. Instead, we simply don't even pretend to
1743 # support versions 01 and 02.
1745 # support versions 01 and 02.
1744 versions.discard(b'01')
1746 versions.discard(b'01')
1745 versions.discard(b'02')
1747 versions.discard(b'02')
1746 if requirements.NARROW_REQUIREMENT in repo.requirements:
1748 if requirements.NARROW_REQUIREMENT in repo.requirements:
1747 # Versions 01 and 02 don't support revlog flags, and we need to
1749 # Versions 01 and 02 don't support revlog flags, and we need to
1748 # support that for stripping and unbundling to work.
1750 # support that for stripping and unbundling to work.
1749 versions.discard(b'01')
1751 versions.discard(b'01')
1750 versions.discard(b'02')
1752 versions.discard(b'02')
1751 if LFS_REQUIREMENT in repo.requirements:
1753 if LFS_REQUIREMENT in repo.requirements:
1752 # Versions 01 and 02 don't support revlog flags, and we need to
1754 # Versions 01 and 02 don't support revlog flags, and we need to
1753 # mark LFS entries with REVIDX_EXTSTORED.
1755 # mark LFS entries with REVIDX_EXTSTORED.
1754 versions.discard(b'01')
1756 versions.discard(b'01')
1755 versions.discard(b'02')
1757 versions.discard(b'02')
1756
1758
1757 return versions
1759 return versions
1758
1760
1759
1761
1760 def localversion(repo):
1762 def localversion(repo):
1761 # Finds the best version to use for bundles that are meant to be used
1763 # Finds the best version to use for bundles that are meant to be used
1762 # locally, such as those from strip and shelve, and temporary bundles.
1764 # locally, such as those from strip and shelve, and temporary bundles.
1763 return max(supportedoutgoingversions(repo))
1765 return max(supportedoutgoingversions(repo))
1764
1766
1765
1767
1766 def safeversion(repo):
1768 def safeversion(repo):
1767 # Finds the smallest version that it's safe to assume clients of the repo
1769 # Finds the smallest version that it's safe to assume clients of the repo
1768 # will support. For example, all hg versions that support generaldelta also
1770 # will support. For example, all hg versions that support generaldelta also
1769 # support changegroup 02.
1771 # support changegroup 02.
1770 versions = supportedoutgoingversions(repo)
1772 versions = supportedoutgoingversions(repo)
1771 if requirements.GENERALDELTA_REQUIREMENT in repo.requirements:
1773 if requirements.GENERALDELTA_REQUIREMENT in repo.requirements:
1772 versions.discard(b'01')
1774 versions.discard(b'01')
1773 assert versions
1775 assert versions
1774 return min(versions)
1776 return min(versions)
1775
1777
1776
1778
1777 def getbundler(
1779 def getbundler(
1778 version,
1780 version,
1779 repo,
1781 repo,
1780 bundlecaps=None,
1782 bundlecaps=None,
1781 oldmatcher=None,
1783 oldmatcher=None,
1782 matcher=None,
1784 matcher=None,
1783 ellipses=False,
1785 ellipses=False,
1784 shallow=False,
1786 shallow=False,
1785 ellipsisroots=None,
1787 ellipsisroots=None,
1786 fullnodes=None,
1788 fullnodes=None,
1787 remote_sidedata=None,
1789 remote_sidedata=None,
1788 ):
1790 ):
1789 assert version in supportedoutgoingversions(repo)
1791 assert version in supportedoutgoingversions(repo)
1790
1792
1791 if matcher is None:
1793 if matcher is None:
1792 matcher = matchmod.always()
1794 matcher = matchmod.always()
1793 if oldmatcher is None:
1795 if oldmatcher is None:
1794 oldmatcher = matchmod.never()
1796 oldmatcher = matchmod.never()
1795
1797
1796 if version == b'01' and not matcher.always():
1798 if version == b'01' and not matcher.always():
1797 raise error.ProgrammingError(
1799 raise error.ProgrammingError(
1798 b'version 01 changegroups do not support sparse file matchers'
1800 b'version 01 changegroups do not support sparse file matchers'
1799 )
1801 )
1800
1802
1801 if ellipses and version in (b'01', b'02'):
1803 if ellipses and version in (b'01', b'02'):
1802 raise error.Abort(
1804 raise error.Abort(
1803 _(
1805 _(
1804 b'ellipsis nodes require at least cg3 on client and server, '
1806 b'ellipsis nodes require at least cg3 on client and server, '
1805 b'but negotiated version %s'
1807 b'but negotiated version %s'
1806 )
1808 )
1807 % version
1809 % version
1808 )
1810 )
1809
1811
1810 # Requested files could include files not in the local store. So
1812 # Requested files could include files not in the local store. So
1811 # filter those out.
1813 # filter those out.
1812 matcher = repo.narrowmatch(matcher)
1814 matcher = repo.narrowmatch(matcher)
1813
1815
1814 fn = _packermap[version][0]
1816 fn = _packermap[version][0]
1815 return fn(
1817 return fn(
1816 repo,
1818 repo,
1817 oldmatcher,
1819 oldmatcher,
1818 matcher,
1820 matcher,
1819 bundlecaps,
1821 bundlecaps,
1820 ellipses=ellipses,
1822 ellipses=ellipses,
1821 shallow=shallow,
1823 shallow=shallow,
1822 ellipsisroots=ellipsisroots,
1824 ellipsisroots=ellipsisroots,
1823 fullnodes=fullnodes,
1825 fullnodes=fullnodes,
1824 remote_sidedata=remote_sidedata,
1826 remote_sidedata=remote_sidedata,
1825 )
1827 )
1826
1828
1827
1829
1828 def getunbundler(version, fh, alg, extras=None):
1830 def getunbundler(version, fh, alg, extras=None):
1829 return _packermap[version][1](fh, alg, extras=extras)
1831 return _packermap[version][1](fh, alg, extras=extras)
1830
1832
1831
1833
1832 def _changegroupinfo(repo, nodes, source):
1834 def _changegroupinfo(repo, nodes, source):
1833 if repo.ui.verbose or source == b'bundle':
1835 if repo.ui.verbose or source == b'bundle':
1834 repo.ui.status(_(b"%d changesets found\n") % len(nodes))
1836 repo.ui.status(_(b"%d changesets found\n") % len(nodes))
1835 if repo.ui.debugflag:
1837 if repo.ui.debugflag:
1836 repo.ui.debug(b"list of changesets:\n")
1838 repo.ui.debug(b"list of changesets:\n")
1837 for node in nodes:
1839 for node in nodes:
1838 repo.ui.debug(b"%s\n" % hex(node))
1840 repo.ui.debug(b"%s\n" % hex(node))
1839
1841
1840
1842
1841 def makechangegroup(
1843 def makechangegroup(
1842 repo, outgoing, version, source, fastpath=False, bundlecaps=None
1844 repo, outgoing, version, source, fastpath=False, bundlecaps=None
1843 ):
1845 ):
1844 cgstream = makestream(
1846 cgstream = makestream(
1845 repo,
1847 repo,
1846 outgoing,
1848 outgoing,
1847 version,
1849 version,
1848 source,
1850 source,
1849 fastpath=fastpath,
1851 fastpath=fastpath,
1850 bundlecaps=bundlecaps,
1852 bundlecaps=bundlecaps,
1851 )
1853 )
1852 return getunbundler(
1854 return getunbundler(
1853 version,
1855 version,
1854 util.chunkbuffer(cgstream),
1856 util.chunkbuffer(cgstream),
1855 None,
1857 None,
1856 {b'clcount': len(outgoing.missing)},
1858 {b'clcount': len(outgoing.missing)},
1857 )
1859 )
1858
1860
1859
1861
1860 def makestream(
1862 def makestream(
1861 repo,
1863 repo,
1862 outgoing,
1864 outgoing,
1863 version,
1865 version,
1864 source,
1866 source,
1865 fastpath=False,
1867 fastpath=False,
1866 bundlecaps=None,
1868 bundlecaps=None,
1867 matcher=None,
1869 matcher=None,
1868 remote_sidedata=None,
1870 remote_sidedata=None,
1869 ):
1871 ):
1870 bundler = getbundler(
1872 bundler = getbundler(
1871 version,
1873 version,
1872 repo,
1874 repo,
1873 bundlecaps=bundlecaps,
1875 bundlecaps=bundlecaps,
1874 matcher=matcher,
1876 matcher=matcher,
1875 remote_sidedata=remote_sidedata,
1877 remote_sidedata=remote_sidedata,
1876 )
1878 )
1877
1879
1878 repo = repo.unfiltered()
1880 repo = repo.unfiltered()
1879 commonrevs = outgoing.common
1881 commonrevs = outgoing.common
1880 csets = outgoing.missing
1882 csets = outgoing.missing
1881 heads = outgoing.ancestorsof
1883 heads = outgoing.ancestorsof
1882 # We go through the fast path if we get told to, or if all (unfiltered
1884 # We go through the fast path if we get told to, or if all (unfiltered
1883 # heads have been requested (since we then know there all linkrevs will
1885 # heads have been requested (since we then know there all linkrevs will
1884 # be pulled by the client).
1886 # be pulled by the client).
1885 heads.sort()
1887 heads.sort()
1886 fastpathlinkrev = fastpath or (
1888 fastpathlinkrev = fastpath or (
1887 repo.filtername is None and heads == sorted(repo.heads())
1889 repo.filtername is None and heads == sorted(repo.heads())
1888 )
1890 )
1889
1891
1890 repo.hook(b'preoutgoing', throw=True, source=source)
1892 repo.hook(b'preoutgoing', throw=True, source=source)
1891 _changegroupinfo(repo, csets, source)
1893 _changegroupinfo(repo, csets, source)
1892 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1894 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1893
1895
1894
1896
1895 def _addchangegroupfiles(
1897 def _addchangegroupfiles(
1896 repo,
1898 repo,
1897 source,
1899 source,
1898 revmap,
1900 revmap,
1899 trp,
1901 trp,
1900 expectedfiles,
1902 expectedfiles,
1901 needfiles,
1903 needfiles,
1902 addrevisioncb=None,
1904 addrevisioncb=None,
1903 ):
1905 ):
1904 revisions = 0
1906 revisions = 0
1905 files = 0
1907 files = 0
1906 progress = repo.ui.makeprogress(
1908 progress = repo.ui.makeprogress(
1907 _(b'files'), unit=_(b'files'), total=expectedfiles
1909 _(b'files'), unit=_(b'files'), total=expectedfiles
1908 )
1910 )
1909 for chunkdata in iter(source.filelogheader, {}):
1911 for chunkdata in iter(source.filelogheader, {}):
1910 files += 1
1912 files += 1
1911 f = chunkdata[b"filename"]
1913 f = chunkdata[b"filename"]
1912 repo.ui.debug(b"adding %s revisions\n" % f)
1914 repo.ui.debug(b"adding %s revisions\n" % f)
1913 progress.increment()
1915 progress.increment()
1914 fl = repo.file(f)
1916 fl = repo.file(f)
1915 o = len(fl)
1917 o = len(fl)
1916 try:
1918 try:
1917 deltas = source.deltaiter()
1919 deltas = source.deltaiter()
1918 added = fl.addgroup(
1920 added = fl.addgroup(
1919 deltas,
1921 deltas,
1920 revmap,
1922 revmap,
1921 trp,
1923 trp,
1922 addrevisioncb=addrevisioncb,
1924 addrevisioncb=addrevisioncb,
1923 )
1925 )
1924 if not added:
1926 if not added:
1925 raise error.Abort(_(b"received file revlog group is empty"))
1927 raise error.Abort(_(b"received file revlog group is empty"))
1926 except error.CensoredBaseError as e:
1928 except error.CensoredBaseError as e:
1927 raise error.Abort(_(b"received delta base is censored: %s") % e)
1929 raise error.Abort(_(b"received delta base is censored: %s") % e)
1928 revisions += len(fl) - o
1930 revisions += len(fl) - o
1929 if f in needfiles:
1931 if f in needfiles:
1930 needs = needfiles[f]
1932 needs = needfiles[f]
1931 for new in pycompat.xrange(o, len(fl)):
1933 for new in pycompat.xrange(o, len(fl)):
1932 n = fl.node(new)
1934 n = fl.node(new)
1933 if n in needs:
1935 if n in needs:
1934 needs.remove(n)
1936 needs.remove(n)
1935 else:
1937 else:
1936 raise error.Abort(_(b"received spurious file revlog entry"))
1938 raise error.Abort(_(b"received spurious file revlog entry"))
1937 if not needs:
1939 if not needs:
1938 del needfiles[f]
1940 del needfiles[f]
1939 progress.complete()
1941 progress.complete()
1940
1942
1941 for f, needs in pycompat.iteritems(needfiles):
1943 for f, needs in pycompat.iteritems(needfiles):
1942 fl = repo.file(f)
1944 fl = repo.file(f)
1943 for n in needs:
1945 for n in needs:
1944 try:
1946 try:
1945 fl.rev(n)
1947 fl.rev(n)
1946 except error.LookupError:
1948 except error.LookupError:
1947 raise error.Abort(
1949 raise error.Abort(
1948 _(b'missing file data for %s:%s - run hg verify')
1950 _(b'missing file data for %s:%s - run hg verify')
1949 % (f, hex(n))
1951 % (f, hex(n))
1950 )
1952 )
1951
1953
1952 return revisions, files
1954 return revisions, files
@@ -1,3192 +1,3192 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 INDEX_HEADER,
40 INDEX_HEADER,
41 REVLOGV0,
41 REVLOGV0,
42 REVLOGV1,
42 REVLOGV1,
43 REVLOGV1_FLAGS,
43 REVLOGV1_FLAGS,
44 REVLOGV2,
44 REVLOGV2,
45 REVLOGV2_FLAGS,
45 REVLOGV2_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FORMAT,
47 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_VERSION,
48 REVLOG_DEFAULT_VERSION,
49 )
49 )
50 from .revlogutils.flagutil import (
50 from .revlogutils.flagutil import (
51 REVIDX_DEFAULT_FLAGS,
51 REVIDX_DEFAULT_FLAGS,
52 REVIDX_ELLIPSIS,
52 REVIDX_ELLIPSIS,
53 REVIDX_EXTSTORED,
53 REVIDX_EXTSTORED,
54 REVIDX_FLAGS_ORDER,
54 REVIDX_FLAGS_ORDER,
55 REVIDX_HASCOPIESINFO,
55 REVIDX_HASCOPIESINFO,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 )
58 )
59 from .thirdparty import attr
59 from .thirdparty import attr
60 from . import (
60 from . import (
61 ancestor,
61 ancestor,
62 dagop,
62 dagop,
63 error,
63 error,
64 mdiff,
64 mdiff,
65 policy,
65 policy,
66 pycompat,
66 pycompat,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .interfaces import (
70 from .interfaces import (
71 repository,
71 repository,
72 util as interfaceutil,
72 util as interfaceutil,
73 )
73 )
74 from .revlogutils import (
74 from .revlogutils import (
75 deltas as deltautil,
75 deltas as deltautil,
76 flagutil,
76 flagutil,
77 nodemap as nodemaputil,
77 nodemap as nodemaputil,
78 revlogv0,
78 revlogv0,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88
88
89 REVLOGV0
89 REVLOGV0
90 REVLOGV1
90 REVLOGV1
91 REVLOGV2
91 REVLOGV2
92 FLAG_INLINE_DATA
92 FLAG_INLINE_DATA
93 FLAG_GENERALDELTA
93 FLAG_GENERALDELTA
94 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FLAGS
95 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_FORMAT
96 REVLOG_DEFAULT_VERSION
96 REVLOG_DEFAULT_VERSION
97 REVLOGV1_FLAGS
97 REVLOGV1_FLAGS
98 REVLOGV2_FLAGS
98 REVLOGV2_FLAGS
99 REVIDX_ISCENSORED
99 REVIDX_ISCENSORED
100 REVIDX_ELLIPSIS
100 REVIDX_ELLIPSIS
101 REVIDX_HASCOPIESINFO
101 REVIDX_HASCOPIESINFO
102 REVIDX_EXTSTORED
102 REVIDX_EXTSTORED
103 REVIDX_DEFAULT_FLAGS
103 REVIDX_DEFAULT_FLAGS
104 REVIDX_FLAGS_ORDER
104 REVIDX_FLAGS_ORDER
105 REVIDX_RAWTEXT_CHANGING_FLAGS
105 REVIDX_RAWTEXT_CHANGING_FLAGS
106
106
107 parsers = policy.importmod('parsers')
107 parsers = policy.importmod('parsers')
108 rustancestor = policy.importrust('ancestor')
108 rustancestor = policy.importrust('ancestor')
109 rustdagop = policy.importrust('dagop')
109 rustdagop = policy.importrust('dagop')
110 rustrevlog = policy.importrust('revlog')
110 rustrevlog = policy.importrust('revlog')
111
111
112 # Aliased for performance.
112 # Aliased for performance.
113 _zlibdecompress = zlib.decompress
113 _zlibdecompress = zlib.decompress
114
114
115 # max size of revlog with inline data
115 # max size of revlog with inline data
116 _maxinline = 131072
116 _maxinline = 131072
117 _chunksize = 1048576
117 _chunksize = 1048576
118
118
119 # Flag processors for REVIDX_ELLIPSIS.
119 # Flag processors for REVIDX_ELLIPSIS.
120 def ellipsisreadprocessor(rl, text):
120 def ellipsisreadprocessor(rl, text):
121 return text, False
121 return text, False
122
122
123
123
124 def ellipsiswriteprocessor(rl, text):
124 def ellipsiswriteprocessor(rl, text):
125 return text, False
125 return text, False
126
126
127
127
128 def ellipsisrawprocessor(rl, text):
128 def ellipsisrawprocessor(rl, text):
129 return False
129 return False
130
130
131
131
132 ellipsisprocessor = (
132 ellipsisprocessor = (
133 ellipsisreadprocessor,
133 ellipsisreadprocessor,
134 ellipsiswriteprocessor,
134 ellipsiswriteprocessor,
135 ellipsisrawprocessor,
135 ellipsisrawprocessor,
136 )
136 )
137
137
138
138
139 def offset_type(offset, type):
139 def offset_type(offset, type):
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
141 raise ValueError(b'unknown revlog index flags')
141 raise ValueError(b'unknown revlog index flags')
142 return int(int(offset) << 16 | type)
142 return int(int(offset) << 16 | type)
143
143
144
144
145 def _verify_revision(rl, skipflags, state, node):
145 def _verify_revision(rl, skipflags, state, node):
146 """Verify the integrity of the given revlog ``node`` while providing a hook
146 """Verify the integrity of the given revlog ``node`` while providing a hook
147 point for extensions to influence the operation."""
147 point for extensions to influence the operation."""
148 if skipflags:
148 if skipflags:
149 state[b'skipread'].add(node)
149 state[b'skipread'].add(node)
150 else:
150 else:
151 # Side-effect: read content and verify hash.
151 # Side-effect: read content and verify hash.
152 rl.revision(node)
152 rl.revision(node)
153
153
154
154
155 # True if a fast implementation for persistent-nodemap is available
155 # True if a fast implementation for persistent-nodemap is available
156 #
156 #
157 # We also consider we have a "fast" implementation in "pure" python because
157 # We also consider we have a "fast" implementation in "pure" python because
158 # people using pure don't really have performance consideration (and a
158 # people using pure don't really have performance consideration (and a
159 # wheelbarrow of other slowness source)
159 # wheelbarrow of other slowness source)
160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
161 parsers, 'BaseIndexObject'
161 parsers, 'BaseIndexObject'
162 )
162 )
163
163
164
164
165 @attr.s(slots=True, frozen=True)
165 @attr.s(slots=True, frozen=True)
166 class _revisioninfo(object):
166 class _revisioninfo(object):
167 """Information about a revision that allows building its fulltext
167 """Information about a revision that allows building its fulltext
168 node: expected hash of the revision
168 node: expected hash of the revision
169 p1, p2: parent revs of the revision
169 p1, p2: parent revs of the revision
170 btext: built text cache consisting of a one-element list
170 btext: built text cache consisting of a one-element list
171 cachedelta: (baserev, uncompressed_delta) or None
171 cachedelta: (baserev, uncompressed_delta) or None
172 flags: flags associated to the revision storage
172 flags: flags associated to the revision storage
173
173
174 One of btext[0] or cachedelta must be set.
174 One of btext[0] or cachedelta must be set.
175 """
175 """
176
176
177 node = attr.ib()
177 node = attr.ib()
178 p1 = attr.ib()
178 p1 = attr.ib()
179 p2 = attr.ib()
179 p2 = attr.ib()
180 btext = attr.ib()
180 btext = attr.ib()
181 textlen = attr.ib()
181 textlen = attr.ib()
182 cachedelta = attr.ib()
182 cachedelta = attr.ib()
183 flags = attr.ib()
183 flags = attr.ib()
184
184
185
185
186 @interfaceutil.implementer(repository.irevisiondelta)
186 @interfaceutil.implementer(repository.irevisiondelta)
187 @attr.s(slots=True)
187 @attr.s(slots=True)
188 class revlogrevisiondelta(object):
188 class revlogrevisiondelta(object):
189 node = attr.ib()
189 node = attr.ib()
190 p1node = attr.ib()
190 p1node = attr.ib()
191 p2node = attr.ib()
191 p2node = attr.ib()
192 basenode = attr.ib()
192 basenode = attr.ib()
193 flags = attr.ib()
193 flags = attr.ib()
194 baserevisionsize = attr.ib()
194 baserevisionsize = attr.ib()
195 revision = attr.ib()
195 revision = attr.ib()
196 delta = attr.ib()
196 delta = attr.ib()
197 sidedata = attr.ib()
197 sidedata = attr.ib()
198 protocol_flags = attr.ib()
198 protocol_flags = attr.ib()
199 linknode = attr.ib(default=None)
199 linknode = attr.ib(default=None)
200
200
201
201
202 @interfaceutil.implementer(repository.iverifyproblem)
202 @interfaceutil.implementer(repository.iverifyproblem)
203 @attr.s(frozen=True)
203 @attr.s(frozen=True)
204 class revlogproblem(object):
204 class revlogproblem(object):
205 warning = attr.ib(default=None)
205 warning = attr.ib(default=None)
206 error = attr.ib(default=None)
206 error = attr.ib(default=None)
207 node = attr.ib(default=None)
207 node = attr.ib(default=None)
208
208
209
209
210 def parse_index_v1(data, inline):
210 def parse_index_v1(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline)
212 index, cache = parsers.parse_index2(data, inline)
213 return index, cache
213 return index, cache
214
214
215
215
216 def parse_index_v2(data, inline):
216 def parse_index_v2(data, inline):
217 # call the C implementation to parse the index data
217 # call the C implementation to parse the index data
218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
219 return index, cache
219 return index, cache
220
220
221
221
222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
223
223
224 def parse_index_v1_nodemap(data, inline):
224 def parse_index_v1_nodemap(data, inline):
225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
226 return index, cache
226 return index, cache
227
227
228
228
229 else:
229 else:
230 parse_index_v1_nodemap = None
230 parse_index_v1_nodemap = None
231
231
232
232
233 def parse_index_v1_mixed(data, inline):
233 def parse_index_v1_mixed(data, inline):
234 index, cache = parse_index_v1(data, inline)
234 index, cache = parse_index_v1(data, inline)
235 return rustrevlog.MixedIndex(index), cache
235 return rustrevlog.MixedIndex(index), cache
236
236
237
237
238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
239 # signed integer)
239 # signed integer)
240 _maxentrysize = 0x7FFFFFFF
240 _maxentrysize = 0x7FFFFFFF
241
241
242
242
243 class revlog(object):
243 class revlog(object):
244 """
244 """
245 the underlying revision storage object
245 the underlying revision storage object
246
246
247 A revlog consists of two parts, an index and the revision data.
247 A revlog consists of two parts, an index and the revision data.
248
248
249 The index is a file with a fixed record size containing
249 The index is a file with a fixed record size containing
250 information on each revision, including its nodeid (hash), the
250 information on each revision, including its nodeid (hash), the
251 nodeids of its parents, the position and offset of its data within
251 nodeids of its parents, the position and offset of its data within
252 the data file, and the revision it's based on. Finally, each entry
252 the data file, and the revision it's based on. Finally, each entry
253 contains a linkrev entry that can serve as a pointer to external
253 contains a linkrev entry that can serve as a pointer to external
254 data.
254 data.
255
255
256 The revision data itself is a linear collection of data chunks.
256 The revision data itself is a linear collection of data chunks.
257 Each chunk represents a revision and is usually represented as a
257 Each chunk represents a revision and is usually represented as a
258 delta against the previous chunk. To bound lookup time, runs of
258 delta against the previous chunk. To bound lookup time, runs of
259 deltas are limited to about 2 times the length of the original
259 deltas are limited to about 2 times the length of the original
260 version data. This makes retrieval of a version proportional to
260 version data. This makes retrieval of a version proportional to
261 its size, or O(1) relative to the number of revisions.
261 its size, or O(1) relative to the number of revisions.
262
262
263 Both pieces of the revlog are written to in an append-only
263 Both pieces of the revlog are written to in an append-only
264 fashion, which means we never need to rewrite a file to insert or
264 fashion, which means we never need to rewrite a file to insert or
265 remove data, and can use some simple techniques to avoid the need
265 remove data, and can use some simple techniques to avoid the need
266 for locking while reading.
266 for locking while reading.
267
267
268 If checkambig, indexfile is opened with checkambig=True at
268 If checkambig, indexfile is opened with checkambig=True at
269 writing, to avoid file stat ambiguity.
269 writing, to avoid file stat ambiguity.
270
270
271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 index will be mmapped rather than read if it is larger than the
272 index will be mmapped rather than read if it is larger than the
273 configured threshold.
273 configured threshold.
274
274
275 If censorable is True, the revlog can have censored revisions.
275 If censorable is True, the revlog can have censored revisions.
276
276
277 If `upperboundcomp` is not None, this is the expected maximal gain from
277 If `upperboundcomp` is not None, this is the expected maximal gain from
278 compression for the data content.
278 compression for the data content.
279
279
280 `concurrencychecker` is an optional function that receives 3 arguments: a
280 `concurrencychecker` is an optional function that receives 3 arguments: a
281 file handle, a filename, and an expected position. It should check whether
281 file handle, a filename, and an expected position. It should check whether
282 the current position in the file handle is valid, and log/warn/fail (by
282 the current position in the file handle is valid, and log/warn/fail (by
283 raising).
283 raising).
284 """
284 """
285
285
286 _flagserrorclass = error.RevlogError
286 _flagserrorclass = error.RevlogError
287
287
288 def __init__(
288 def __init__(
289 self,
289 self,
290 opener,
290 opener,
291 target,
291 target,
292 radix,
292 radix,
293 postfix=None,
293 postfix=None,
294 checkambig=False,
294 checkambig=False,
295 mmaplargeindex=False,
295 mmaplargeindex=False,
296 censorable=False,
296 censorable=False,
297 upperboundcomp=None,
297 upperboundcomp=None,
298 persistentnodemap=False,
298 persistentnodemap=False,
299 concurrencychecker=None,
299 concurrencychecker=None,
300 ):
300 ):
301 """
301 """
302 create a revlog object
302 create a revlog object
303
303
304 opener is a function that abstracts the file opening operation
304 opener is a function that abstracts the file opening operation
305 and can be used to implement COW semantics or the like.
305 and can be used to implement COW semantics or the like.
306
306
307 `target`: a (KIND, ID) tuple that identify the content stored in
307 `target`: a (KIND, ID) tuple that identify the content stored in
308 this revlog. It help the rest of the code to understand what the revlog
308 this revlog. It help the rest of the code to understand what the revlog
309 is about without having to resort to heuristic and index filename
309 is about without having to resort to heuristic and index filename
310 analysis. Note: that this must be reliably be set by normal code, but
310 analysis. Note: that this must be reliably be set by normal code, but
311 that test, debug, or performance measurement code might not set this to
311 that test, debug, or performance measurement code might not set this to
312 accurate value.
312 accurate value.
313 """
313 """
314 self.upperboundcomp = upperboundcomp
314 self.upperboundcomp = upperboundcomp
315
315
316 self.radix = radix
316 self.radix = radix
317
317
318 self._indexfile = None
318 self._indexfile = None
319 self._datafile = None
319 self._datafile = None
320 self._nodemap_file = None
320 self._nodemap_file = None
321 self.postfix = postfix
321 self.postfix = postfix
322 self.opener = opener
322 self.opener = opener
323 if persistentnodemap:
323 if persistentnodemap:
324 self._nodemap_file = nodemaputil.get_nodemap_file(self)
324 self._nodemap_file = nodemaputil.get_nodemap_file(self)
325
325
326 assert target[0] in ALL_KINDS
326 assert target[0] in ALL_KINDS
327 assert len(target) == 2
327 assert len(target) == 2
328 self.target = target
328 self.target = target
329 # When True, indexfile is opened with checkambig=True at writing, to
329 # When True, indexfile is opened with checkambig=True at writing, to
330 # avoid file stat ambiguity.
330 # avoid file stat ambiguity.
331 self._checkambig = checkambig
331 self._checkambig = checkambig
332 self._mmaplargeindex = mmaplargeindex
332 self._mmaplargeindex = mmaplargeindex
333 self._censorable = censorable
333 self._censorable = censorable
334 # 3-tuple of (node, rev, text) for a raw revision.
334 # 3-tuple of (node, rev, text) for a raw revision.
335 self._revisioncache = None
335 self._revisioncache = None
336 # Maps rev to chain base rev.
336 # Maps rev to chain base rev.
337 self._chainbasecache = util.lrucachedict(100)
337 self._chainbasecache = util.lrucachedict(100)
338 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
338 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
339 self._chunkcache = (0, b'')
339 self._chunkcache = (0, b'')
340 # How much data to read and cache into the raw revlog data cache.
340 # How much data to read and cache into the raw revlog data cache.
341 self._chunkcachesize = 65536
341 self._chunkcachesize = 65536
342 self._maxchainlen = None
342 self._maxchainlen = None
343 self._deltabothparents = True
343 self._deltabothparents = True
344 self.index = None
344 self.index = None
345 self._nodemap_docket = None
345 self._nodemap_docket = None
346 # Mapping of partial identifiers to full nodes.
346 # Mapping of partial identifiers to full nodes.
347 self._pcache = {}
347 self._pcache = {}
348 # Mapping of revision integer to full node.
348 # Mapping of revision integer to full node.
349 self._compengine = b'zlib'
349 self._compengine = b'zlib'
350 self._compengineopts = {}
350 self._compengineopts = {}
351 self._maxdeltachainspan = -1
351 self._maxdeltachainspan = -1
352 self._withsparseread = False
352 self._withsparseread = False
353 self._sparserevlog = False
353 self._sparserevlog = False
354 self._srdensitythreshold = 0.50
354 self._srdensitythreshold = 0.50
355 self._srmingapsize = 262144
355 self._srmingapsize = 262144
356
356
357 # Make copy of flag processors so each revlog instance can support
357 # Make copy of flag processors so each revlog instance can support
358 # custom flags.
358 # custom flags.
359 self._flagprocessors = dict(flagutil.flagprocessors)
359 self._flagprocessors = dict(flagutil.flagprocessors)
360
360
361 # 2-tuple of file handles being used for active writing.
361 # 2-tuple of file handles being used for active writing.
362 self._writinghandles = None
362 self._writinghandles = None
363 # prevent nesting of addgroup
363 # prevent nesting of addgroup
364 self._adding_group = None
364 self._adding_group = None
365
365
366 self._loadindex()
366 self._loadindex()
367
367
368 self._concurrencychecker = concurrencychecker
368 self._concurrencychecker = concurrencychecker
369
369
370 def _init_opts(self):
370 def _init_opts(self):
371 """process options (from above/config) to setup associated default revlog mode
371 """process options (from above/config) to setup associated default revlog mode
372
372
373 These values might be affected when actually reading on disk information.
373 These values might be affected when actually reading on disk information.
374
374
375 The relevant values are returned for use in _loadindex().
375 The relevant values are returned for use in _loadindex().
376
376
377 * newversionflags:
377 * newversionflags:
378 version header to use if we need to create a new revlog
378 version header to use if we need to create a new revlog
379
379
380 * mmapindexthreshold:
380 * mmapindexthreshold:
381 minimal index size for start to use mmap
381 minimal index size for start to use mmap
382
382
383 * force_nodemap:
383 * force_nodemap:
384 force the usage of a "development" version of the nodemap code
384 force the usage of a "development" version of the nodemap code
385 """
385 """
386 mmapindexthreshold = None
386 mmapindexthreshold = None
387 opts = self.opener.options
387 opts = self.opener.options
388
388
389 if b'revlogv2' in opts:
389 if b'revlogv2' in opts:
390 new_header = REVLOGV2 | FLAG_INLINE_DATA
390 new_header = REVLOGV2 | FLAG_INLINE_DATA
391 elif b'revlogv1' in opts:
391 elif b'revlogv1' in opts:
392 new_header = REVLOGV1 | FLAG_INLINE_DATA
392 new_header = REVLOGV1 | FLAG_INLINE_DATA
393 if b'generaldelta' in opts:
393 if b'generaldelta' in opts:
394 new_header |= FLAG_GENERALDELTA
394 new_header |= FLAG_GENERALDELTA
395 elif b'revlogv0' in self.opener.options:
395 elif b'revlogv0' in self.opener.options:
396 new_header = REVLOGV0
396 new_header = REVLOGV0
397 else:
397 else:
398 new_header = REVLOG_DEFAULT_VERSION
398 new_header = REVLOG_DEFAULT_VERSION
399
399
400 if b'chunkcachesize' in opts:
400 if b'chunkcachesize' in opts:
401 self._chunkcachesize = opts[b'chunkcachesize']
401 self._chunkcachesize = opts[b'chunkcachesize']
402 if b'maxchainlen' in opts:
402 if b'maxchainlen' in opts:
403 self._maxchainlen = opts[b'maxchainlen']
403 self._maxchainlen = opts[b'maxchainlen']
404 if b'deltabothparents' in opts:
404 if b'deltabothparents' in opts:
405 self._deltabothparents = opts[b'deltabothparents']
405 self._deltabothparents = opts[b'deltabothparents']
406 self._lazydelta = bool(opts.get(b'lazydelta', True))
406 self._lazydelta = bool(opts.get(b'lazydelta', True))
407 self._lazydeltabase = False
407 self._lazydeltabase = False
408 if self._lazydelta:
408 if self._lazydelta:
409 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
409 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
410 if b'compengine' in opts:
410 if b'compengine' in opts:
411 self._compengine = opts[b'compengine']
411 self._compengine = opts[b'compengine']
412 if b'zlib.level' in opts:
412 if b'zlib.level' in opts:
413 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
413 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
414 if b'zstd.level' in opts:
414 if b'zstd.level' in opts:
415 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
415 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
416 if b'maxdeltachainspan' in opts:
416 if b'maxdeltachainspan' in opts:
417 self._maxdeltachainspan = opts[b'maxdeltachainspan']
417 self._maxdeltachainspan = opts[b'maxdeltachainspan']
418 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
418 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
419 mmapindexthreshold = opts[b'mmapindexthreshold']
419 mmapindexthreshold = opts[b'mmapindexthreshold']
420 self.hassidedata = bool(opts.get(b'side-data', False))
420 self.hassidedata = bool(opts.get(b'side-data', False))
421 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
421 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
422 withsparseread = bool(opts.get(b'with-sparse-read', False))
422 withsparseread = bool(opts.get(b'with-sparse-read', False))
423 # sparse-revlog forces sparse-read
423 # sparse-revlog forces sparse-read
424 self._withsparseread = self._sparserevlog or withsparseread
424 self._withsparseread = self._sparserevlog or withsparseread
425 if b'sparse-read-density-threshold' in opts:
425 if b'sparse-read-density-threshold' in opts:
426 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
426 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
427 if b'sparse-read-min-gap-size' in opts:
427 if b'sparse-read-min-gap-size' in opts:
428 self._srmingapsize = opts[b'sparse-read-min-gap-size']
428 self._srmingapsize = opts[b'sparse-read-min-gap-size']
429 if opts.get(b'enableellipsis'):
429 if opts.get(b'enableellipsis'):
430 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
430 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
431
431
432 # revlog v0 doesn't have flag processors
432 # revlog v0 doesn't have flag processors
433 for flag, processor in pycompat.iteritems(
433 for flag, processor in pycompat.iteritems(
434 opts.get(b'flagprocessors', {})
434 opts.get(b'flagprocessors', {})
435 ):
435 ):
436 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
436 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
437
437
438 if self._chunkcachesize <= 0:
438 if self._chunkcachesize <= 0:
439 raise error.RevlogError(
439 raise error.RevlogError(
440 _(b'revlog chunk cache size %r is not greater than 0')
440 _(b'revlog chunk cache size %r is not greater than 0')
441 % self._chunkcachesize
441 % self._chunkcachesize
442 )
442 )
443 elif self._chunkcachesize & (self._chunkcachesize - 1):
443 elif self._chunkcachesize & (self._chunkcachesize - 1):
444 raise error.RevlogError(
444 raise error.RevlogError(
445 _(b'revlog chunk cache size %r is not a power of 2')
445 _(b'revlog chunk cache size %r is not a power of 2')
446 % self._chunkcachesize
446 % self._chunkcachesize
447 )
447 )
448 force_nodemap = opts.get(b'devel-force-nodemap', False)
448 force_nodemap = opts.get(b'devel-force-nodemap', False)
449 return new_header, mmapindexthreshold, force_nodemap
449 return new_header, mmapindexthreshold, force_nodemap
450
450
451 def _get_data(self, filepath, mmap_threshold):
451 def _get_data(self, filepath, mmap_threshold):
452 """return a file content with or without mmap
452 """return a file content with or without mmap
453
453
454 If the file is missing return the empty string"""
454 If the file is missing return the empty string"""
455 try:
455 try:
456 with self.opener(filepath) as fp:
456 with self.opener(filepath) as fp:
457 if mmap_threshold is not None:
457 if mmap_threshold is not None:
458 file_size = self.opener.fstat(fp).st_size
458 file_size = self.opener.fstat(fp).st_size
459 if file_size >= mmap_threshold:
459 if file_size >= mmap_threshold:
460 # TODO: should .close() to release resources without
460 # TODO: should .close() to release resources without
461 # relying on Python GC
461 # relying on Python GC
462 return util.buffer(util.mmapread(fp))
462 return util.buffer(util.mmapread(fp))
463 return fp.read()
463 return fp.read()
464 except IOError as inst:
464 except IOError as inst:
465 if inst.errno != errno.ENOENT:
465 if inst.errno != errno.ENOENT:
466 raise
466 raise
467 return b''
467 return b''
468
468
469 def _loadindex(self):
469 def _loadindex(self):
470
470
471 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
471 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
472
472
473 if self.postfix is None:
473 if self.postfix is None:
474 entry_point = b'%s.i' % self.radix
474 entry_point = b'%s.i' % self.radix
475 else:
475 else:
476 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
476 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
477
477
478 entry_data = b''
478 entry_data = b''
479 self._initempty = True
479 self._initempty = True
480 entry_data = self._get_data(entry_point, mmapindexthreshold)
480 entry_data = self._get_data(entry_point, mmapindexthreshold)
481 if len(entry_data) > 0:
481 if len(entry_data) > 0:
482 header = INDEX_HEADER.unpack(entry_data[:4])[0]
482 header = INDEX_HEADER.unpack(entry_data[:4])[0]
483 self._initempty = False
483 self._initempty = False
484 else:
484 else:
485 header = new_header
485 header = new_header
486
486
487 self._format_flags = header & ~0xFFFF
487 self._format_flags = header & ~0xFFFF
488 self._format_version = header & 0xFFFF
488 self._format_version = header & 0xFFFF
489
489
490 if self._format_version == REVLOGV0:
490 if self._format_version == REVLOGV0:
491 if self._format_flags:
491 if self._format_flags:
492 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
492 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
493 display_flag = self._format_flags >> 16
493 display_flag = self._format_flags >> 16
494 msg %= (display_flag, self._format_version, self.display_id)
494 msg %= (display_flag, self._format_version, self.display_id)
495 raise error.RevlogError(msg)
495 raise error.RevlogError(msg)
496
496
497 self._inline = False
497 self._inline = False
498 self._generaldelta = False
498 self._generaldelta = False
499
499
500 elif self._format_version == REVLOGV1:
500 elif self._format_version == REVLOGV1:
501 if self._format_flags & ~REVLOGV1_FLAGS:
501 if self._format_flags & ~REVLOGV1_FLAGS:
502 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
502 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
503 display_flag = self._format_flags >> 16
503 display_flag = self._format_flags >> 16
504 msg %= (display_flag, self._format_version, self.display_id)
504 msg %= (display_flag, self._format_version, self.display_id)
505 raise error.RevlogError(msg)
505 raise error.RevlogError(msg)
506
506
507 self._inline = self._format_flags & FLAG_INLINE_DATA
507 self._inline = self._format_flags & FLAG_INLINE_DATA
508 self._generaldelta = self._format_flags & FLAG_GENERALDELTA
508 self._generaldelta = self._format_flags & FLAG_GENERALDELTA
509
509
510 elif self._format_version == REVLOGV2:
510 elif self._format_version == REVLOGV2:
511 if self._format_flags & ~REVLOGV2_FLAGS:
511 if self._format_flags & ~REVLOGV2_FLAGS:
512 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
512 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
513 display_flag = self._format_flags >> 16
513 display_flag = self._format_flags >> 16
514 msg %= (display_flag, self._format_version, self.display_id)
514 msg %= (display_flag, self._format_version, self.display_id)
515 raise error.RevlogError(msg)
515 raise error.RevlogError(msg)
516
516
517 # There is a bug in the transaction handling when going from an
517 # There is a bug in the transaction handling when going from an
518 # inline revlog to a separate index and data file. Turn it off until
518 # inline revlog to a separate index and data file. Turn it off until
519 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
519 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
520 # See issue6485
520 # See issue6485
521 self._inline = False
521 self._inline = False
522 # generaldelta implied by version 2 revlogs.
522 # generaldelta implied by version 2 revlogs.
523 self._generaldelta = True
523 self._generaldelta = True
524
524
525 else:
525 else:
526 msg = _(b'unknown version (%d) in revlog %s')
526 msg = _(b'unknown version (%d) in revlog %s')
527 msg %= (self._format_version, self.display_id)
527 msg %= (self._format_version, self.display_id)
528 raise error.RevlogError(msg)
528 raise error.RevlogError(msg)
529
529
530 index_data = entry_data
530 index_data = entry_data
531 self._indexfile = entry_point
531 self._indexfile = entry_point
532
532
533 if self.postfix is None or self.postfix == b'a':
533 if self.postfix is None or self.postfix == b'a':
534 self._datafile = b'%s.d' % self.radix
534 self._datafile = b'%s.d' % self.radix
535 else:
535 else:
536 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
536 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
537
537
538 self.nodeconstants = sha1nodeconstants
538 self.nodeconstants = sha1nodeconstants
539 self.nullid = self.nodeconstants.nullid
539 self.nullid = self.nodeconstants.nullid
540
540
541 # sparse-revlog can't be on without general-delta (issue6056)
541 # sparse-revlog can't be on without general-delta (issue6056)
542 if not self._generaldelta:
542 if not self._generaldelta:
543 self._sparserevlog = False
543 self._sparserevlog = False
544
544
545 self._storedeltachains = True
545 self._storedeltachains = True
546
546
547 devel_nodemap = (
547 devel_nodemap = (
548 self._nodemap_file
548 self._nodemap_file
549 and force_nodemap
549 and force_nodemap
550 and parse_index_v1_nodemap is not None
550 and parse_index_v1_nodemap is not None
551 )
551 )
552
552
553 use_rust_index = False
553 use_rust_index = False
554 if rustrevlog is not None:
554 if rustrevlog is not None:
555 if self._nodemap_file is not None:
555 if self._nodemap_file is not None:
556 use_rust_index = True
556 use_rust_index = True
557 else:
557 else:
558 use_rust_index = self.opener.options.get(b'rust.index')
558 use_rust_index = self.opener.options.get(b'rust.index')
559
559
560 self._parse_index = parse_index_v1
560 self._parse_index = parse_index_v1
561 if self._format_version == REVLOGV0:
561 if self._format_version == REVLOGV0:
562 self._parse_index = revlogv0.parse_index_v0
562 self._parse_index = revlogv0.parse_index_v0
563 elif self._format_version == REVLOGV2:
563 elif self._format_version == REVLOGV2:
564 self._parse_index = parse_index_v2
564 self._parse_index = parse_index_v2
565 elif devel_nodemap:
565 elif devel_nodemap:
566 self._parse_index = parse_index_v1_nodemap
566 self._parse_index = parse_index_v1_nodemap
567 elif use_rust_index:
567 elif use_rust_index:
568 self._parse_index = parse_index_v1_mixed
568 self._parse_index = parse_index_v1_mixed
569 try:
569 try:
570 d = self._parse_index(index_data, self._inline)
570 d = self._parse_index(index_data, self._inline)
571 index, _chunkcache = d
571 index, _chunkcache = d
572 use_nodemap = (
572 use_nodemap = (
573 not self._inline
573 not self._inline
574 and self._nodemap_file is not None
574 and self._nodemap_file is not None
575 and util.safehasattr(index, 'update_nodemap_data')
575 and util.safehasattr(index, 'update_nodemap_data')
576 )
576 )
577 if use_nodemap:
577 if use_nodemap:
578 nodemap_data = nodemaputil.persisted_data(self)
578 nodemap_data = nodemaputil.persisted_data(self)
579 if nodemap_data is not None:
579 if nodemap_data is not None:
580 docket = nodemap_data[0]
580 docket = nodemap_data[0]
581 if (
581 if (
582 len(d[0]) > docket.tip_rev
582 len(d[0]) > docket.tip_rev
583 and d[0][docket.tip_rev][7] == docket.tip_node
583 and d[0][docket.tip_rev][7] == docket.tip_node
584 ):
584 ):
585 # no changelog tampering
585 # no changelog tampering
586 self._nodemap_docket = docket
586 self._nodemap_docket = docket
587 index.update_nodemap_data(*nodemap_data)
587 index.update_nodemap_data(*nodemap_data)
588 except (ValueError, IndexError):
588 except (ValueError, IndexError):
589 raise error.RevlogError(
589 raise error.RevlogError(
590 _(b"index %s is corrupted") % self.display_id
590 _(b"index %s is corrupted") % self.display_id
591 )
591 )
592 self.index, self._chunkcache = d
592 self.index, self._chunkcache = d
593 if not self._chunkcache:
593 if not self._chunkcache:
594 self._chunkclear()
594 self._chunkclear()
595 # revnum -> (chain-length, sum-delta-length)
595 # revnum -> (chain-length, sum-delta-length)
596 self._chaininfocache = util.lrucachedict(500)
596 self._chaininfocache = util.lrucachedict(500)
597 # revlog header -> revlog compressor
597 # revlog header -> revlog compressor
598 self._decompressors = {}
598 self._decompressors = {}
599
599
600 @util.propertycache
600 @util.propertycache
601 def revlog_kind(self):
601 def revlog_kind(self):
602 return self.target[0]
602 return self.target[0]
603
603
604 @util.propertycache
604 @util.propertycache
605 def display_id(self):
605 def display_id(self):
606 """The public facing "ID" of the revlog that we use in message"""
606 """The public facing "ID" of the revlog that we use in message"""
607 # Maybe we should build a user facing representation of
607 # Maybe we should build a user facing representation of
608 # revlog.target instead of using `self.radix`
608 # revlog.target instead of using `self.radix`
609 return self.radix
609 return self.radix
610
610
611 @util.propertycache
611 @util.propertycache
612 def _compressor(self):
612 def _compressor(self):
613 engine = util.compengines[self._compengine]
613 engine = util.compengines[self._compengine]
614 return engine.revlogcompressor(self._compengineopts)
614 return engine.revlogcompressor(self._compengineopts)
615
615
616 def _indexfp(self, mode=b'r'):
616 def _indexfp(self, mode=b'r'):
617 """file object for the revlog's index file"""
617 """file object for the revlog's index file"""
618 args = {'mode': mode}
618 args = {'mode': mode}
619 if mode != b'r':
619 if mode != b'r':
620 args['checkambig'] = self._checkambig
620 args['checkambig'] = self._checkambig
621 if mode == b'w':
621 if mode == b'w':
622 args['atomictemp'] = True
622 args['atomictemp'] = True
623 return self.opener(self._indexfile, **args)
623 return self.opener(self._indexfile, **args)
624
624
625 def _datafp(self, mode=b'r'):
625 def _datafp(self, mode=b'r'):
626 """file object for the revlog's data file"""
626 """file object for the revlog's data file"""
627 return self.opener(self._datafile, mode=mode)
627 return self.opener(self._datafile, mode=mode)
628
628
629 @contextlib.contextmanager
629 @contextlib.contextmanager
630 def _datareadfp(self, existingfp=None):
630 def _datareadfp(self, existingfp=None):
631 """file object suitable to read data"""
631 """file object suitable to read data"""
632 # Use explicit file handle, if given.
632 # Use explicit file handle, if given.
633 if existingfp is not None:
633 if existingfp is not None:
634 yield existingfp
634 yield existingfp
635
635
636 # Use a file handle being actively used for writes, if available.
636 # Use a file handle being actively used for writes, if available.
637 # There is some danger to doing this because reads will seek the
637 # There is some danger to doing this because reads will seek the
638 # file. However, _writeentry() performs a SEEK_END before all writes,
638 # file. However, _writeentry() performs a SEEK_END before all writes,
639 # so we should be safe.
639 # so we should be safe.
640 elif self._writinghandles:
640 elif self._writinghandles:
641 if self._inline:
641 if self._inline:
642 yield self._writinghandles[0]
642 yield self._writinghandles[0]
643 else:
643 else:
644 yield self._writinghandles[1]
644 yield self._writinghandles[1]
645
645
646 # Otherwise open a new file handle.
646 # Otherwise open a new file handle.
647 else:
647 else:
648 if self._inline:
648 if self._inline:
649 func = self._indexfp
649 func = self._indexfp
650 else:
650 else:
651 func = self._datafp
651 func = self._datafp
652 with func() as fp:
652 with func() as fp:
653 yield fp
653 yield fp
654
654
655 def tiprev(self):
655 def tiprev(self):
656 return len(self.index) - 1
656 return len(self.index) - 1
657
657
658 def tip(self):
658 def tip(self):
659 return self.node(self.tiprev())
659 return self.node(self.tiprev())
660
660
661 def __contains__(self, rev):
661 def __contains__(self, rev):
662 return 0 <= rev < len(self)
662 return 0 <= rev < len(self)
663
663
664 def __len__(self):
664 def __len__(self):
665 return len(self.index)
665 return len(self.index)
666
666
667 def __iter__(self):
667 def __iter__(self):
668 return iter(pycompat.xrange(len(self)))
668 return iter(pycompat.xrange(len(self)))
669
669
670 def revs(self, start=0, stop=None):
670 def revs(self, start=0, stop=None):
671 """iterate over all rev in this revlog (from start to stop)"""
671 """iterate over all rev in this revlog (from start to stop)"""
672 return storageutil.iterrevs(len(self), start=start, stop=stop)
672 return storageutil.iterrevs(len(self), start=start, stop=stop)
673
673
674 @property
674 @property
675 def nodemap(self):
675 def nodemap(self):
676 msg = (
676 msg = (
677 b"revlog.nodemap is deprecated, "
677 b"revlog.nodemap is deprecated, "
678 b"use revlog.index.[has_node|rev|get_rev]"
678 b"use revlog.index.[has_node|rev|get_rev]"
679 )
679 )
680 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
680 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
681 return self.index.nodemap
681 return self.index.nodemap
682
682
683 @property
683 @property
684 def _nodecache(self):
684 def _nodecache(self):
685 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
685 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
686 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
686 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
687 return self.index.nodemap
687 return self.index.nodemap
688
688
689 def hasnode(self, node):
689 def hasnode(self, node):
690 try:
690 try:
691 self.rev(node)
691 self.rev(node)
692 return True
692 return True
693 except KeyError:
693 except KeyError:
694 return False
694 return False
695
695
696 def candelta(self, baserev, rev):
696 def candelta(self, baserev, rev):
697 """whether two revisions (baserev, rev) can be delta-ed or not"""
697 """whether two revisions (baserev, rev) can be delta-ed or not"""
698 # Disable delta if either rev requires a content-changing flag
698 # Disable delta if either rev requires a content-changing flag
699 # processor (ex. LFS). This is because such flag processor can alter
699 # processor (ex. LFS). This is because such flag processor can alter
700 # the rawtext content that the delta will be based on, and two clients
700 # the rawtext content that the delta will be based on, and two clients
701 # could have a same revlog node with different flags (i.e. different
701 # could have a same revlog node with different flags (i.e. different
702 # rawtext contents) and the delta could be incompatible.
702 # rawtext contents) and the delta could be incompatible.
703 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
703 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
704 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
704 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
705 ):
705 ):
706 return False
706 return False
707 return True
707 return True
708
708
709 def update_caches(self, transaction):
709 def update_caches(self, transaction):
710 if self._nodemap_file is not None:
710 if self._nodemap_file is not None:
711 if transaction is None:
711 if transaction is None:
712 nodemaputil.update_persistent_nodemap(self)
712 nodemaputil.update_persistent_nodemap(self)
713 else:
713 else:
714 nodemaputil.setup_persistent_nodemap(transaction, self)
714 nodemaputil.setup_persistent_nodemap(transaction, self)
715
715
716 def clearcaches(self):
716 def clearcaches(self):
717 self._revisioncache = None
717 self._revisioncache = None
718 self._chainbasecache.clear()
718 self._chainbasecache.clear()
719 self._chunkcache = (0, b'')
719 self._chunkcache = (0, b'')
720 self._pcache = {}
720 self._pcache = {}
721 self._nodemap_docket = None
721 self._nodemap_docket = None
722 self.index.clearcaches()
722 self.index.clearcaches()
723 # The python code is the one responsible for validating the docket, we
723 # The python code is the one responsible for validating the docket, we
724 # end up having to refresh it here.
724 # end up having to refresh it here.
725 use_nodemap = (
725 use_nodemap = (
726 not self._inline
726 not self._inline
727 and self._nodemap_file is not None
727 and self._nodemap_file is not None
728 and util.safehasattr(self.index, 'update_nodemap_data')
728 and util.safehasattr(self.index, 'update_nodemap_data')
729 )
729 )
730 if use_nodemap:
730 if use_nodemap:
731 nodemap_data = nodemaputil.persisted_data(self)
731 nodemap_data = nodemaputil.persisted_data(self)
732 if nodemap_data is not None:
732 if nodemap_data is not None:
733 self._nodemap_docket = nodemap_data[0]
733 self._nodemap_docket = nodemap_data[0]
734 self.index.update_nodemap_data(*nodemap_data)
734 self.index.update_nodemap_data(*nodemap_data)
735
735
736 def rev(self, node):
736 def rev(self, node):
737 try:
737 try:
738 return self.index.rev(node)
738 return self.index.rev(node)
739 except TypeError:
739 except TypeError:
740 raise
740 raise
741 except error.RevlogError:
741 except error.RevlogError:
742 # parsers.c radix tree lookup failed
742 # parsers.c radix tree lookup failed
743 if (
743 if (
744 node == self.nodeconstants.wdirid
744 node == self.nodeconstants.wdirid
745 or node in self.nodeconstants.wdirfilenodeids
745 or node in self.nodeconstants.wdirfilenodeids
746 ):
746 ):
747 raise error.WdirUnsupported
747 raise error.WdirUnsupported
748 raise error.LookupError(node, self.display_id, _(b'no node'))
748 raise error.LookupError(node, self.display_id, _(b'no node'))
749
749
750 # Accessors for index entries.
750 # Accessors for index entries.
751
751
752 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
752 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
753 # are flags.
753 # are flags.
754 def start(self, rev):
754 def start(self, rev):
755 return int(self.index[rev][0] >> 16)
755 return int(self.index[rev][0] >> 16)
756
756
757 def flags(self, rev):
757 def flags(self, rev):
758 return self.index[rev][0] & 0xFFFF
758 return self.index[rev][0] & 0xFFFF
759
759
760 def length(self, rev):
760 def length(self, rev):
761 return self.index[rev][1]
761 return self.index[rev][1]
762
762
763 def sidedata_length(self, rev):
763 def sidedata_length(self, rev):
764 if not self.hassidedata:
764 if not self.hassidedata:
765 return 0
765 return 0
766 return self.index[rev][9]
766 return self.index[rev][9]
767
767
768 def rawsize(self, rev):
768 def rawsize(self, rev):
769 """return the length of the uncompressed text for a given revision"""
769 """return the length of the uncompressed text for a given revision"""
770 l = self.index[rev][2]
770 l = self.index[rev][2]
771 if l >= 0:
771 if l >= 0:
772 return l
772 return l
773
773
774 t = self.rawdata(rev)
774 t = self.rawdata(rev)
775 return len(t)
775 return len(t)
776
776
777 def size(self, rev):
777 def size(self, rev):
778 """length of non-raw text (processed by a "read" flag processor)"""
778 """length of non-raw text (processed by a "read" flag processor)"""
779 # fast path: if no "read" flag processor could change the content,
779 # fast path: if no "read" flag processor could change the content,
780 # size is rawsize. note: ELLIPSIS is known to not change the content.
780 # size is rawsize. note: ELLIPSIS is known to not change the content.
781 flags = self.flags(rev)
781 flags = self.flags(rev)
782 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
782 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
783 return self.rawsize(rev)
783 return self.rawsize(rev)
784
784
785 return len(self.revision(rev, raw=False))
785 return len(self.revision(rev, raw=False))
786
786
787 def chainbase(self, rev):
787 def chainbase(self, rev):
788 base = self._chainbasecache.get(rev)
788 base = self._chainbasecache.get(rev)
789 if base is not None:
789 if base is not None:
790 return base
790 return base
791
791
792 index = self.index
792 index = self.index
793 iterrev = rev
793 iterrev = rev
794 base = index[iterrev][3]
794 base = index[iterrev][3]
795 while base != iterrev:
795 while base != iterrev:
796 iterrev = base
796 iterrev = base
797 base = index[iterrev][3]
797 base = index[iterrev][3]
798
798
799 self._chainbasecache[rev] = base
799 self._chainbasecache[rev] = base
800 return base
800 return base
801
801
802 def linkrev(self, rev):
802 def linkrev(self, rev):
803 return self.index[rev][4]
803 return self.index[rev][4]
804
804
805 def parentrevs(self, rev):
805 def parentrevs(self, rev):
806 try:
806 try:
807 entry = self.index[rev]
807 entry = self.index[rev]
808 except IndexError:
808 except IndexError:
809 if rev == wdirrev:
809 if rev == wdirrev:
810 raise error.WdirUnsupported
810 raise error.WdirUnsupported
811 raise
811 raise
812 if entry[5] == nullrev:
812 if entry[5] == nullrev:
813 return entry[6], entry[5]
813 return entry[6], entry[5]
814 else:
814 else:
815 return entry[5], entry[6]
815 return entry[5], entry[6]
816
816
817 # fast parentrevs(rev) where rev isn't filtered
817 # fast parentrevs(rev) where rev isn't filtered
818 _uncheckedparentrevs = parentrevs
818 _uncheckedparentrevs = parentrevs
819
819
820 def node(self, rev):
820 def node(self, rev):
821 try:
821 try:
822 return self.index[rev][7]
822 return self.index[rev][7]
823 except IndexError:
823 except IndexError:
824 if rev == wdirrev:
824 if rev == wdirrev:
825 raise error.WdirUnsupported
825 raise error.WdirUnsupported
826 raise
826 raise
827
827
828 # Derived from index values.
828 # Derived from index values.
829
829
830 def end(self, rev):
830 def end(self, rev):
831 return self.start(rev) + self.length(rev)
831 return self.start(rev) + self.length(rev)
832
832
833 def parents(self, node):
833 def parents(self, node):
834 i = self.index
834 i = self.index
835 d = i[self.rev(node)]
835 d = i[self.rev(node)]
836 # inline node() to avoid function call overhead
836 # inline node() to avoid function call overhead
837 if d[5] == self.nullid:
837 if d[5] == self.nullid:
838 return i[d[6]][7], i[d[5]][7]
838 return i[d[6]][7], i[d[5]][7]
839 else:
839 else:
840 return i[d[5]][7], i[d[6]][7]
840 return i[d[5]][7], i[d[6]][7]
841
841
842 def chainlen(self, rev):
842 def chainlen(self, rev):
843 return self._chaininfo(rev)[0]
843 return self._chaininfo(rev)[0]
844
844
845 def _chaininfo(self, rev):
845 def _chaininfo(self, rev):
846 chaininfocache = self._chaininfocache
846 chaininfocache = self._chaininfocache
847 if rev in chaininfocache:
847 if rev in chaininfocache:
848 return chaininfocache[rev]
848 return chaininfocache[rev]
849 index = self.index
849 index = self.index
850 generaldelta = self._generaldelta
850 generaldelta = self._generaldelta
851 iterrev = rev
851 iterrev = rev
852 e = index[iterrev]
852 e = index[iterrev]
853 clen = 0
853 clen = 0
854 compresseddeltalen = 0
854 compresseddeltalen = 0
855 while iterrev != e[3]:
855 while iterrev != e[3]:
856 clen += 1
856 clen += 1
857 compresseddeltalen += e[1]
857 compresseddeltalen += e[1]
858 if generaldelta:
858 if generaldelta:
859 iterrev = e[3]
859 iterrev = e[3]
860 else:
860 else:
861 iterrev -= 1
861 iterrev -= 1
862 if iterrev in chaininfocache:
862 if iterrev in chaininfocache:
863 t = chaininfocache[iterrev]
863 t = chaininfocache[iterrev]
864 clen += t[0]
864 clen += t[0]
865 compresseddeltalen += t[1]
865 compresseddeltalen += t[1]
866 break
866 break
867 e = index[iterrev]
867 e = index[iterrev]
868 else:
868 else:
869 # Add text length of base since decompressing that also takes
869 # Add text length of base since decompressing that also takes
870 # work. For cache hits the length is already included.
870 # work. For cache hits the length is already included.
871 compresseddeltalen += e[1]
871 compresseddeltalen += e[1]
872 r = (clen, compresseddeltalen)
872 r = (clen, compresseddeltalen)
873 chaininfocache[rev] = r
873 chaininfocache[rev] = r
874 return r
874 return r
875
875
876 def _deltachain(self, rev, stoprev=None):
876 def _deltachain(self, rev, stoprev=None):
877 """Obtain the delta chain for a revision.
877 """Obtain the delta chain for a revision.
878
878
879 ``stoprev`` specifies a revision to stop at. If not specified, we
879 ``stoprev`` specifies a revision to stop at. If not specified, we
880 stop at the base of the chain.
880 stop at the base of the chain.
881
881
882 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
882 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
883 revs in ascending order and ``stopped`` is a bool indicating whether
883 revs in ascending order and ``stopped`` is a bool indicating whether
884 ``stoprev`` was hit.
884 ``stoprev`` was hit.
885 """
885 """
886 # Try C implementation.
886 # Try C implementation.
887 try:
887 try:
888 return self.index.deltachain(rev, stoprev, self._generaldelta)
888 return self.index.deltachain(rev, stoprev, self._generaldelta)
889 except AttributeError:
889 except AttributeError:
890 pass
890 pass
891
891
892 chain = []
892 chain = []
893
893
894 # Alias to prevent attribute lookup in tight loop.
894 # Alias to prevent attribute lookup in tight loop.
895 index = self.index
895 index = self.index
896 generaldelta = self._generaldelta
896 generaldelta = self._generaldelta
897
897
898 iterrev = rev
898 iterrev = rev
899 e = index[iterrev]
899 e = index[iterrev]
900 while iterrev != e[3] and iterrev != stoprev:
900 while iterrev != e[3] and iterrev != stoprev:
901 chain.append(iterrev)
901 chain.append(iterrev)
902 if generaldelta:
902 if generaldelta:
903 iterrev = e[3]
903 iterrev = e[3]
904 else:
904 else:
905 iterrev -= 1
905 iterrev -= 1
906 e = index[iterrev]
906 e = index[iterrev]
907
907
908 if iterrev == stoprev:
908 if iterrev == stoprev:
909 stopped = True
909 stopped = True
910 else:
910 else:
911 chain.append(iterrev)
911 chain.append(iterrev)
912 stopped = False
912 stopped = False
913
913
914 chain.reverse()
914 chain.reverse()
915 return chain, stopped
915 return chain, stopped
916
916
917 def ancestors(self, revs, stoprev=0, inclusive=False):
917 def ancestors(self, revs, stoprev=0, inclusive=False):
918 """Generate the ancestors of 'revs' in reverse revision order.
918 """Generate the ancestors of 'revs' in reverse revision order.
919 Does not generate revs lower than stoprev.
919 Does not generate revs lower than stoprev.
920
920
921 See the documentation for ancestor.lazyancestors for more details."""
921 See the documentation for ancestor.lazyancestors for more details."""
922
922
923 # first, make sure start revisions aren't filtered
923 # first, make sure start revisions aren't filtered
924 revs = list(revs)
924 revs = list(revs)
925 checkrev = self.node
925 checkrev = self.node
926 for r in revs:
926 for r in revs:
927 checkrev(r)
927 checkrev(r)
928 # and we're sure ancestors aren't filtered as well
928 # and we're sure ancestors aren't filtered as well
929
929
930 if rustancestor is not None:
930 if rustancestor is not None:
931 lazyancestors = rustancestor.LazyAncestors
931 lazyancestors = rustancestor.LazyAncestors
932 arg = self.index
932 arg = self.index
933 else:
933 else:
934 lazyancestors = ancestor.lazyancestors
934 lazyancestors = ancestor.lazyancestors
935 arg = self._uncheckedparentrevs
935 arg = self._uncheckedparentrevs
936 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
936 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
937
937
938 def descendants(self, revs):
938 def descendants(self, revs):
939 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
939 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
940
940
941 def findcommonmissing(self, common=None, heads=None):
941 def findcommonmissing(self, common=None, heads=None):
942 """Return a tuple of the ancestors of common and the ancestors of heads
942 """Return a tuple of the ancestors of common and the ancestors of heads
943 that are not ancestors of common. In revset terminology, we return the
943 that are not ancestors of common. In revset terminology, we return the
944 tuple:
944 tuple:
945
945
946 ::common, (::heads) - (::common)
946 ::common, (::heads) - (::common)
947
947
948 The list is sorted by revision number, meaning it is
948 The list is sorted by revision number, meaning it is
949 topologically sorted.
949 topologically sorted.
950
950
951 'heads' and 'common' are both lists of node IDs. If heads is
951 'heads' and 'common' are both lists of node IDs. If heads is
952 not supplied, uses all of the revlog's heads. If common is not
952 not supplied, uses all of the revlog's heads. If common is not
953 supplied, uses nullid."""
953 supplied, uses nullid."""
954 if common is None:
954 if common is None:
955 common = [self.nullid]
955 common = [self.nullid]
956 if heads is None:
956 if heads is None:
957 heads = self.heads()
957 heads = self.heads()
958
958
959 common = [self.rev(n) for n in common]
959 common = [self.rev(n) for n in common]
960 heads = [self.rev(n) for n in heads]
960 heads = [self.rev(n) for n in heads]
961
961
962 # we want the ancestors, but inclusive
962 # we want the ancestors, but inclusive
963 class lazyset(object):
963 class lazyset(object):
964 def __init__(self, lazyvalues):
964 def __init__(self, lazyvalues):
965 self.addedvalues = set()
965 self.addedvalues = set()
966 self.lazyvalues = lazyvalues
966 self.lazyvalues = lazyvalues
967
967
968 def __contains__(self, value):
968 def __contains__(self, value):
969 return value in self.addedvalues or value in self.lazyvalues
969 return value in self.addedvalues or value in self.lazyvalues
970
970
971 def __iter__(self):
971 def __iter__(self):
972 added = self.addedvalues
972 added = self.addedvalues
973 for r in added:
973 for r in added:
974 yield r
974 yield r
975 for r in self.lazyvalues:
975 for r in self.lazyvalues:
976 if not r in added:
976 if not r in added:
977 yield r
977 yield r
978
978
979 def add(self, value):
979 def add(self, value):
980 self.addedvalues.add(value)
980 self.addedvalues.add(value)
981
981
982 def update(self, values):
982 def update(self, values):
983 self.addedvalues.update(values)
983 self.addedvalues.update(values)
984
984
985 has = lazyset(self.ancestors(common))
985 has = lazyset(self.ancestors(common))
986 has.add(nullrev)
986 has.add(nullrev)
987 has.update(common)
987 has.update(common)
988
988
989 # take all ancestors from heads that aren't in has
989 # take all ancestors from heads that aren't in has
990 missing = set()
990 missing = set()
991 visit = collections.deque(r for r in heads if r not in has)
991 visit = collections.deque(r for r in heads if r not in has)
992 while visit:
992 while visit:
993 r = visit.popleft()
993 r = visit.popleft()
994 if r in missing:
994 if r in missing:
995 continue
995 continue
996 else:
996 else:
997 missing.add(r)
997 missing.add(r)
998 for p in self.parentrevs(r):
998 for p in self.parentrevs(r):
999 if p not in has:
999 if p not in has:
1000 visit.append(p)
1000 visit.append(p)
1001 missing = list(missing)
1001 missing = list(missing)
1002 missing.sort()
1002 missing.sort()
1003 return has, [self.node(miss) for miss in missing]
1003 return has, [self.node(miss) for miss in missing]
1004
1004
1005 def incrementalmissingrevs(self, common=None):
1005 def incrementalmissingrevs(self, common=None):
1006 """Return an object that can be used to incrementally compute the
1006 """Return an object that can be used to incrementally compute the
1007 revision numbers of the ancestors of arbitrary sets that are not
1007 revision numbers of the ancestors of arbitrary sets that are not
1008 ancestors of common. This is an ancestor.incrementalmissingancestors
1008 ancestors of common. This is an ancestor.incrementalmissingancestors
1009 object.
1009 object.
1010
1010
1011 'common' is a list of revision numbers. If common is not supplied, uses
1011 'common' is a list of revision numbers. If common is not supplied, uses
1012 nullrev.
1012 nullrev.
1013 """
1013 """
1014 if common is None:
1014 if common is None:
1015 common = [nullrev]
1015 common = [nullrev]
1016
1016
1017 if rustancestor is not None:
1017 if rustancestor is not None:
1018 return rustancestor.MissingAncestors(self.index, common)
1018 return rustancestor.MissingAncestors(self.index, common)
1019 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1019 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1020
1020
1021 def findmissingrevs(self, common=None, heads=None):
1021 def findmissingrevs(self, common=None, heads=None):
1022 """Return the revision numbers of the ancestors of heads that
1022 """Return the revision numbers of the ancestors of heads that
1023 are not ancestors of common.
1023 are not ancestors of common.
1024
1024
1025 More specifically, return a list of revision numbers corresponding to
1025 More specifically, return a list of revision numbers corresponding to
1026 nodes N such that every N satisfies the following constraints:
1026 nodes N such that every N satisfies the following constraints:
1027
1027
1028 1. N is an ancestor of some node in 'heads'
1028 1. N is an ancestor of some node in 'heads'
1029 2. N is not an ancestor of any node in 'common'
1029 2. N is not an ancestor of any node in 'common'
1030
1030
1031 The list is sorted by revision number, meaning it is
1031 The list is sorted by revision number, meaning it is
1032 topologically sorted.
1032 topologically sorted.
1033
1033
1034 'heads' and 'common' are both lists of revision numbers. If heads is
1034 'heads' and 'common' are both lists of revision numbers. If heads is
1035 not supplied, uses all of the revlog's heads. If common is not
1035 not supplied, uses all of the revlog's heads. If common is not
1036 supplied, uses nullid."""
1036 supplied, uses nullid."""
1037 if common is None:
1037 if common is None:
1038 common = [nullrev]
1038 common = [nullrev]
1039 if heads is None:
1039 if heads is None:
1040 heads = self.headrevs()
1040 heads = self.headrevs()
1041
1041
1042 inc = self.incrementalmissingrevs(common=common)
1042 inc = self.incrementalmissingrevs(common=common)
1043 return inc.missingancestors(heads)
1043 return inc.missingancestors(heads)
1044
1044
1045 def findmissing(self, common=None, heads=None):
1045 def findmissing(self, common=None, heads=None):
1046 """Return the ancestors of heads that are not ancestors of common.
1046 """Return the ancestors of heads that are not ancestors of common.
1047
1047
1048 More specifically, return a list of nodes N such that every N
1048 More specifically, return a list of nodes N such that every N
1049 satisfies the following constraints:
1049 satisfies the following constraints:
1050
1050
1051 1. N is an ancestor of some node in 'heads'
1051 1. N is an ancestor of some node in 'heads'
1052 2. N is not an ancestor of any node in 'common'
1052 2. N is not an ancestor of any node in 'common'
1053
1053
1054 The list is sorted by revision number, meaning it is
1054 The list is sorted by revision number, meaning it is
1055 topologically sorted.
1055 topologically sorted.
1056
1056
1057 'heads' and 'common' are both lists of node IDs. If heads is
1057 'heads' and 'common' are both lists of node IDs. If heads is
1058 not supplied, uses all of the revlog's heads. If common is not
1058 not supplied, uses all of the revlog's heads. If common is not
1059 supplied, uses nullid."""
1059 supplied, uses nullid."""
1060 if common is None:
1060 if common is None:
1061 common = [self.nullid]
1061 common = [self.nullid]
1062 if heads is None:
1062 if heads is None:
1063 heads = self.heads()
1063 heads = self.heads()
1064
1064
1065 common = [self.rev(n) for n in common]
1065 common = [self.rev(n) for n in common]
1066 heads = [self.rev(n) for n in heads]
1066 heads = [self.rev(n) for n in heads]
1067
1067
1068 inc = self.incrementalmissingrevs(common=common)
1068 inc = self.incrementalmissingrevs(common=common)
1069 return [self.node(r) for r in inc.missingancestors(heads)]
1069 return [self.node(r) for r in inc.missingancestors(heads)]
1070
1070
1071 def nodesbetween(self, roots=None, heads=None):
1071 def nodesbetween(self, roots=None, heads=None):
1072 """Return a topological path from 'roots' to 'heads'.
1072 """Return a topological path from 'roots' to 'heads'.
1073
1073
1074 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1074 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1075 topologically sorted list of all nodes N that satisfy both of
1075 topologically sorted list of all nodes N that satisfy both of
1076 these constraints:
1076 these constraints:
1077
1077
1078 1. N is a descendant of some node in 'roots'
1078 1. N is a descendant of some node in 'roots'
1079 2. N is an ancestor of some node in 'heads'
1079 2. N is an ancestor of some node in 'heads'
1080
1080
1081 Every node is considered to be both a descendant and an ancestor
1081 Every node is considered to be both a descendant and an ancestor
1082 of itself, so every reachable node in 'roots' and 'heads' will be
1082 of itself, so every reachable node in 'roots' and 'heads' will be
1083 included in 'nodes'.
1083 included in 'nodes'.
1084
1084
1085 'outroots' is the list of reachable nodes in 'roots', i.e., the
1085 'outroots' is the list of reachable nodes in 'roots', i.e., the
1086 subset of 'roots' that is returned in 'nodes'. Likewise,
1086 subset of 'roots' that is returned in 'nodes'. Likewise,
1087 'outheads' is the subset of 'heads' that is also in 'nodes'.
1087 'outheads' is the subset of 'heads' that is also in 'nodes'.
1088
1088
1089 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1089 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1090 unspecified, uses nullid as the only root. If 'heads' is
1090 unspecified, uses nullid as the only root. If 'heads' is
1091 unspecified, uses list of all of the revlog's heads."""
1091 unspecified, uses list of all of the revlog's heads."""
1092 nonodes = ([], [], [])
1092 nonodes = ([], [], [])
1093 if roots is not None:
1093 if roots is not None:
1094 roots = list(roots)
1094 roots = list(roots)
1095 if not roots:
1095 if not roots:
1096 return nonodes
1096 return nonodes
1097 lowestrev = min([self.rev(n) for n in roots])
1097 lowestrev = min([self.rev(n) for n in roots])
1098 else:
1098 else:
1099 roots = [self.nullid] # Everybody's a descendant of nullid
1099 roots = [self.nullid] # Everybody's a descendant of nullid
1100 lowestrev = nullrev
1100 lowestrev = nullrev
1101 if (lowestrev == nullrev) and (heads is None):
1101 if (lowestrev == nullrev) and (heads is None):
1102 # We want _all_ the nodes!
1102 # We want _all_ the nodes!
1103 return (
1103 return (
1104 [self.node(r) for r in self],
1104 [self.node(r) for r in self],
1105 [self.nullid],
1105 [self.nullid],
1106 list(self.heads()),
1106 list(self.heads()),
1107 )
1107 )
1108 if heads is None:
1108 if heads is None:
1109 # All nodes are ancestors, so the latest ancestor is the last
1109 # All nodes are ancestors, so the latest ancestor is the last
1110 # node.
1110 # node.
1111 highestrev = len(self) - 1
1111 highestrev = len(self) - 1
1112 # Set ancestors to None to signal that every node is an ancestor.
1112 # Set ancestors to None to signal that every node is an ancestor.
1113 ancestors = None
1113 ancestors = None
1114 # Set heads to an empty dictionary for later discovery of heads
1114 # Set heads to an empty dictionary for later discovery of heads
1115 heads = {}
1115 heads = {}
1116 else:
1116 else:
1117 heads = list(heads)
1117 heads = list(heads)
1118 if not heads:
1118 if not heads:
1119 return nonodes
1119 return nonodes
1120 ancestors = set()
1120 ancestors = set()
1121 # Turn heads into a dictionary so we can remove 'fake' heads.
1121 # Turn heads into a dictionary so we can remove 'fake' heads.
1122 # Also, later we will be using it to filter out the heads we can't
1122 # Also, later we will be using it to filter out the heads we can't
1123 # find from roots.
1123 # find from roots.
1124 heads = dict.fromkeys(heads, False)
1124 heads = dict.fromkeys(heads, False)
1125 # Start at the top and keep marking parents until we're done.
1125 # Start at the top and keep marking parents until we're done.
1126 nodestotag = set(heads)
1126 nodestotag = set(heads)
1127 # Remember where the top was so we can use it as a limit later.
1127 # Remember where the top was so we can use it as a limit later.
1128 highestrev = max([self.rev(n) for n in nodestotag])
1128 highestrev = max([self.rev(n) for n in nodestotag])
1129 while nodestotag:
1129 while nodestotag:
1130 # grab a node to tag
1130 # grab a node to tag
1131 n = nodestotag.pop()
1131 n = nodestotag.pop()
1132 # Never tag nullid
1132 # Never tag nullid
1133 if n == self.nullid:
1133 if n == self.nullid:
1134 continue
1134 continue
1135 # A node's revision number represents its place in a
1135 # A node's revision number represents its place in a
1136 # topologically sorted list of nodes.
1136 # topologically sorted list of nodes.
1137 r = self.rev(n)
1137 r = self.rev(n)
1138 if r >= lowestrev:
1138 if r >= lowestrev:
1139 if n not in ancestors:
1139 if n not in ancestors:
1140 # If we are possibly a descendant of one of the roots
1140 # If we are possibly a descendant of one of the roots
1141 # and we haven't already been marked as an ancestor
1141 # and we haven't already been marked as an ancestor
1142 ancestors.add(n) # Mark as ancestor
1142 ancestors.add(n) # Mark as ancestor
1143 # Add non-nullid parents to list of nodes to tag.
1143 # Add non-nullid parents to list of nodes to tag.
1144 nodestotag.update(
1144 nodestotag.update(
1145 [p for p in self.parents(n) if p != self.nullid]
1145 [p for p in self.parents(n) if p != self.nullid]
1146 )
1146 )
1147 elif n in heads: # We've seen it before, is it a fake head?
1147 elif n in heads: # We've seen it before, is it a fake head?
1148 # So it is, real heads should not be the ancestors of
1148 # So it is, real heads should not be the ancestors of
1149 # any other heads.
1149 # any other heads.
1150 heads.pop(n)
1150 heads.pop(n)
1151 if not ancestors:
1151 if not ancestors:
1152 return nonodes
1152 return nonodes
1153 # Now that we have our set of ancestors, we want to remove any
1153 # Now that we have our set of ancestors, we want to remove any
1154 # roots that are not ancestors.
1154 # roots that are not ancestors.
1155
1155
1156 # If one of the roots was nullid, everything is included anyway.
1156 # If one of the roots was nullid, everything is included anyway.
1157 if lowestrev > nullrev:
1157 if lowestrev > nullrev:
1158 # But, since we weren't, let's recompute the lowest rev to not
1158 # But, since we weren't, let's recompute the lowest rev to not
1159 # include roots that aren't ancestors.
1159 # include roots that aren't ancestors.
1160
1160
1161 # Filter out roots that aren't ancestors of heads
1161 # Filter out roots that aren't ancestors of heads
1162 roots = [root for root in roots if root in ancestors]
1162 roots = [root for root in roots if root in ancestors]
1163 # Recompute the lowest revision
1163 # Recompute the lowest revision
1164 if roots:
1164 if roots:
1165 lowestrev = min([self.rev(root) for root in roots])
1165 lowestrev = min([self.rev(root) for root in roots])
1166 else:
1166 else:
1167 # No more roots? Return empty list
1167 # No more roots? Return empty list
1168 return nonodes
1168 return nonodes
1169 else:
1169 else:
1170 # We are descending from nullid, and don't need to care about
1170 # We are descending from nullid, and don't need to care about
1171 # any other roots.
1171 # any other roots.
1172 lowestrev = nullrev
1172 lowestrev = nullrev
1173 roots = [self.nullid]
1173 roots = [self.nullid]
1174 # Transform our roots list into a set.
1174 # Transform our roots list into a set.
1175 descendants = set(roots)
1175 descendants = set(roots)
1176 # Also, keep the original roots so we can filter out roots that aren't
1176 # Also, keep the original roots so we can filter out roots that aren't
1177 # 'real' roots (i.e. are descended from other roots).
1177 # 'real' roots (i.e. are descended from other roots).
1178 roots = descendants.copy()
1178 roots = descendants.copy()
1179 # Our topologically sorted list of output nodes.
1179 # Our topologically sorted list of output nodes.
1180 orderedout = []
1180 orderedout = []
1181 # Don't start at nullid since we don't want nullid in our output list,
1181 # Don't start at nullid since we don't want nullid in our output list,
1182 # and if nullid shows up in descendants, empty parents will look like
1182 # and if nullid shows up in descendants, empty parents will look like
1183 # they're descendants.
1183 # they're descendants.
1184 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1184 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1185 n = self.node(r)
1185 n = self.node(r)
1186 isdescendant = False
1186 isdescendant = False
1187 if lowestrev == nullrev: # Everybody is a descendant of nullid
1187 if lowestrev == nullrev: # Everybody is a descendant of nullid
1188 isdescendant = True
1188 isdescendant = True
1189 elif n in descendants:
1189 elif n in descendants:
1190 # n is already a descendant
1190 # n is already a descendant
1191 isdescendant = True
1191 isdescendant = True
1192 # This check only needs to be done here because all the roots
1192 # This check only needs to be done here because all the roots
1193 # will start being marked is descendants before the loop.
1193 # will start being marked is descendants before the loop.
1194 if n in roots:
1194 if n in roots:
1195 # If n was a root, check if it's a 'real' root.
1195 # If n was a root, check if it's a 'real' root.
1196 p = tuple(self.parents(n))
1196 p = tuple(self.parents(n))
1197 # If any of its parents are descendants, it's not a root.
1197 # If any of its parents are descendants, it's not a root.
1198 if (p[0] in descendants) or (p[1] in descendants):
1198 if (p[0] in descendants) or (p[1] in descendants):
1199 roots.remove(n)
1199 roots.remove(n)
1200 else:
1200 else:
1201 p = tuple(self.parents(n))
1201 p = tuple(self.parents(n))
1202 # A node is a descendant if either of its parents are
1202 # A node is a descendant if either of its parents are
1203 # descendants. (We seeded the dependents list with the roots
1203 # descendants. (We seeded the dependents list with the roots
1204 # up there, remember?)
1204 # up there, remember?)
1205 if (p[0] in descendants) or (p[1] in descendants):
1205 if (p[0] in descendants) or (p[1] in descendants):
1206 descendants.add(n)
1206 descendants.add(n)
1207 isdescendant = True
1207 isdescendant = True
1208 if isdescendant and ((ancestors is None) or (n in ancestors)):
1208 if isdescendant and ((ancestors is None) or (n in ancestors)):
1209 # Only include nodes that are both descendants and ancestors.
1209 # Only include nodes that are both descendants and ancestors.
1210 orderedout.append(n)
1210 orderedout.append(n)
1211 if (ancestors is not None) and (n in heads):
1211 if (ancestors is not None) and (n in heads):
1212 # We're trying to figure out which heads are reachable
1212 # We're trying to figure out which heads are reachable
1213 # from roots.
1213 # from roots.
1214 # Mark this head as having been reached
1214 # Mark this head as having been reached
1215 heads[n] = True
1215 heads[n] = True
1216 elif ancestors is None:
1216 elif ancestors is None:
1217 # Otherwise, we're trying to discover the heads.
1217 # Otherwise, we're trying to discover the heads.
1218 # Assume this is a head because if it isn't, the next step
1218 # Assume this is a head because if it isn't, the next step
1219 # will eventually remove it.
1219 # will eventually remove it.
1220 heads[n] = True
1220 heads[n] = True
1221 # But, obviously its parents aren't.
1221 # But, obviously its parents aren't.
1222 for p in self.parents(n):
1222 for p in self.parents(n):
1223 heads.pop(p, None)
1223 heads.pop(p, None)
1224 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1224 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1225 roots = list(roots)
1225 roots = list(roots)
1226 assert orderedout
1226 assert orderedout
1227 assert roots
1227 assert roots
1228 assert heads
1228 assert heads
1229 return (orderedout, roots, heads)
1229 return (orderedout, roots, heads)
1230
1230
1231 def headrevs(self, revs=None):
1231 def headrevs(self, revs=None):
1232 if revs is None:
1232 if revs is None:
1233 try:
1233 try:
1234 return self.index.headrevs()
1234 return self.index.headrevs()
1235 except AttributeError:
1235 except AttributeError:
1236 return self._headrevs()
1236 return self._headrevs()
1237 if rustdagop is not None:
1237 if rustdagop is not None:
1238 return rustdagop.headrevs(self.index, revs)
1238 return rustdagop.headrevs(self.index, revs)
1239 return dagop.headrevs(revs, self._uncheckedparentrevs)
1239 return dagop.headrevs(revs, self._uncheckedparentrevs)
1240
1240
1241 def computephases(self, roots):
1241 def computephases(self, roots):
1242 return self.index.computephasesmapsets(roots)
1242 return self.index.computephasesmapsets(roots)
1243
1243
1244 def _headrevs(self):
1244 def _headrevs(self):
1245 count = len(self)
1245 count = len(self)
1246 if not count:
1246 if not count:
1247 return [nullrev]
1247 return [nullrev]
1248 # we won't iter over filtered rev so nobody is a head at start
1248 # we won't iter over filtered rev so nobody is a head at start
1249 ishead = [0] * (count + 1)
1249 ishead = [0] * (count + 1)
1250 index = self.index
1250 index = self.index
1251 for r in self:
1251 for r in self:
1252 ishead[r] = 1 # I may be an head
1252 ishead[r] = 1 # I may be an head
1253 e = index[r]
1253 e = index[r]
1254 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1254 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1255 return [r for r, val in enumerate(ishead) if val]
1255 return [r for r, val in enumerate(ishead) if val]
1256
1256
1257 def heads(self, start=None, stop=None):
1257 def heads(self, start=None, stop=None):
1258 """return the list of all nodes that have no children
1258 """return the list of all nodes that have no children
1259
1259
1260 if start is specified, only heads that are descendants of
1260 if start is specified, only heads that are descendants of
1261 start will be returned
1261 start will be returned
1262 if stop is specified, it will consider all the revs from stop
1262 if stop is specified, it will consider all the revs from stop
1263 as if they had no children
1263 as if they had no children
1264 """
1264 """
1265 if start is None and stop is None:
1265 if start is None and stop is None:
1266 if not len(self):
1266 if not len(self):
1267 return [self.nullid]
1267 return [self.nullid]
1268 return [self.node(r) for r in self.headrevs()]
1268 return [self.node(r) for r in self.headrevs()]
1269
1269
1270 if start is None:
1270 if start is None:
1271 start = nullrev
1271 start = nullrev
1272 else:
1272 else:
1273 start = self.rev(start)
1273 start = self.rev(start)
1274
1274
1275 stoprevs = {self.rev(n) for n in stop or []}
1275 stoprevs = {self.rev(n) for n in stop or []}
1276
1276
1277 revs = dagop.headrevssubset(
1277 revs = dagop.headrevssubset(
1278 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1278 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1279 )
1279 )
1280
1280
1281 return [self.node(rev) for rev in revs]
1281 return [self.node(rev) for rev in revs]
1282
1282
1283 def children(self, node):
1283 def children(self, node):
1284 """find the children of a given node"""
1284 """find the children of a given node"""
1285 c = []
1285 c = []
1286 p = self.rev(node)
1286 p = self.rev(node)
1287 for r in self.revs(start=p + 1):
1287 for r in self.revs(start=p + 1):
1288 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1288 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1289 if prevs:
1289 if prevs:
1290 for pr in prevs:
1290 for pr in prevs:
1291 if pr == p:
1291 if pr == p:
1292 c.append(self.node(r))
1292 c.append(self.node(r))
1293 elif p == nullrev:
1293 elif p == nullrev:
1294 c.append(self.node(r))
1294 c.append(self.node(r))
1295 return c
1295 return c
1296
1296
1297 def commonancestorsheads(self, a, b):
1297 def commonancestorsheads(self, a, b):
1298 """calculate all the heads of the common ancestors of nodes a and b"""
1298 """calculate all the heads of the common ancestors of nodes a and b"""
1299 a, b = self.rev(a), self.rev(b)
1299 a, b = self.rev(a), self.rev(b)
1300 ancs = self._commonancestorsheads(a, b)
1300 ancs = self._commonancestorsheads(a, b)
1301 return pycompat.maplist(self.node, ancs)
1301 return pycompat.maplist(self.node, ancs)
1302
1302
1303 def _commonancestorsheads(self, *revs):
1303 def _commonancestorsheads(self, *revs):
1304 """calculate all the heads of the common ancestors of revs"""
1304 """calculate all the heads of the common ancestors of revs"""
1305 try:
1305 try:
1306 ancs = self.index.commonancestorsheads(*revs)
1306 ancs = self.index.commonancestorsheads(*revs)
1307 except (AttributeError, OverflowError): # C implementation failed
1307 except (AttributeError, OverflowError): # C implementation failed
1308 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1308 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1309 return ancs
1309 return ancs
1310
1310
1311 def isancestor(self, a, b):
1311 def isancestor(self, a, b):
1312 """return True if node a is an ancestor of node b
1312 """return True if node a is an ancestor of node b
1313
1313
1314 A revision is considered an ancestor of itself."""
1314 A revision is considered an ancestor of itself."""
1315 a, b = self.rev(a), self.rev(b)
1315 a, b = self.rev(a), self.rev(b)
1316 return self.isancestorrev(a, b)
1316 return self.isancestorrev(a, b)
1317
1317
1318 def isancestorrev(self, a, b):
1318 def isancestorrev(self, a, b):
1319 """return True if revision a is an ancestor of revision b
1319 """return True if revision a is an ancestor of revision b
1320
1320
1321 A revision is considered an ancestor of itself.
1321 A revision is considered an ancestor of itself.
1322
1322
1323 The implementation of this is trivial but the use of
1323 The implementation of this is trivial but the use of
1324 reachableroots is not."""
1324 reachableroots is not."""
1325 if a == nullrev:
1325 if a == nullrev:
1326 return True
1326 return True
1327 elif a == b:
1327 elif a == b:
1328 return True
1328 return True
1329 elif a > b:
1329 elif a > b:
1330 return False
1330 return False
1331 return bool(self.reachableroots(a, [b], [a], includepath=False))
1331 return bool(self.reachableroots(a, [b], [a], includepath=False))
1332
1332
1333 def reachableroots(self, minroot, heads, roots, includepath=False):
1333 def reachableroots(self, minroot, heads, roots, includepath=False):
1334 """return (heads(::(<roots> and <roots>::<heads>)))
1334 """return (heads(::(<roots> and <roots>::<heads>)))
1335
1335
1336 If includepath is True, return (<roots>::<heads>)."""
1336 If includepath is True, return (<roots>::<heads>)."""
1337 try:
1337 try:
1338 return self.index.reachableroots2(
1338 return self.index.reachableroots2(
1339 minroot, heads, roots, includepath
1339 minroot, heads, roots, includepath
1340 )
1340 )
1341 except AttributeError:
1341 except AttributeError:
1342 return dagop._reachablerootspure(
1342 return dagop._reachablerootspure(
1343 self.parentrevs, minroot, roots, heads, includepath
1343 self.parentrevs, minroot, roots, heads, includepath
1344 )
1344 )
1345
1345
1346 def ancestor(self, a, b):
1346 def ancestor(self, a, b):
1347 """calculate the "best" common ancestor of nodes a and b"""
1347 """calculate the "best" common ancestor of nodes a and b"""
1348
1348
1349 a, b = self.rev(a), self.rev(b)
1349 a, b = self.rev(a), self.rev(b)
1350 try:
1350 try:
1351 ancs = self.index.ancestors(a, b)
1351 ancs = self.index.ancestors(a, b)
1352 except (AttributeError, OverflowError):
1352 except (AttributeError, OverflowError):
1353 ancs = ancestor.ancestors(self.parentrevs, a, b)
1353 ancs = ancestor.ancestors(self.parentrevs, a, b)
1354 if ancs:
1354 if ancs:
1355 # choose a consistent winner when there's a tie
1355 # choose a consistent winner when there's a tie
1356 return min(map(self.node, ancs))
1356 return min(map(self.node, ancs))
1357 return self.nullid
1357 return self.nullid
1358
1358
1359 def _match(self, id):
1359 def _match(self, id):
1360 if isinstance(id, int):
1360 if isinstance(id, int):
1361 # rev
1361 # rev
1362 return self.node(id)
1362 return self.node(id)
1363 if len(id) == self.nodeconstants.nodelen:
1363 if len(id) == self.nodeconstants.nodelen:
1364 # possibly a binary node
1364 # possibly a binary node
1365 # odds of a binary node being all hex in ASCII are 1 in 10**25
1365 # odds of a binary node being all hex in ASCII are 1 in 10**25
1366 try:
1366 try:
1367 node = id
1367 node = id
1368 self.rev(node) # quick search the index
1368 self.rev(node) # quick search the index
1369 return node
1369 return node
1370 except error.LookupError:
1370 except error.LookupError:
1371 pass # may be partial hex id
1371 pass # may be partial hex id
1372 try:
1372 try:
1373 # str(rev)
1373 # str(rev)
1374 rev = int(id)
1374 rev = int(id)
1375 if b"%d" % rev != id:
1375 if b"%d" % rev != id:
1376 raise ValueError
1376 raise ValueError
1377 if rev < 0:
1377 if rev < 0:
1378 rev = len(self) + rev
1378 rev = len(self) + rev
1379 if rev < 0 or rev >= len(self):
1379 if rev < 0 or rev >= len(self):
1380 raise ValueError
1380 raise ValueError
1381 return self.node(rev)
1381 return self.node(rev)
1382 except (ValueError, OverflowError):
1382 except (ValueError, OverflowError):
1383 pass
1383 pass
1384 if len(id) == 2 * self.nodeconstants.nodelen:
1384 if len(id) == 2 * self.nodeconstants.nodelen:
1385 try:
1385 try:
1386 # a full hex nodeid?
1386 # a full hex nodeid?
1387 node = bin(id)
1387 node = bin(id)
1388 self.rev(node)
1388 self.rev(node)
1389 return node
1389 return node
1390 except (TypeError, error.LookupError):
1390 except (TypeError, error.LookupError):
1391 pass
1391 pass
1392
1392
1393 def _partialmatch(self, id):
1393 def _partialmatch(self, id):
1394 # we don't care wdirfilenodeids as they should be always full hash
1394 # we don't care wdirfilenodeids as they should be always full hash
1395 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1395 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1396 try:
1396 try:
1397 partial = self.index.partialmatch(id)
1397 partial = self.index.partialmatch(id)
1398 if partial and self.hasnode(partial):
1398 if partial and self.hasnode(partial):
1399 if maybewdir:
1399 if maybewdir:
1400 # single 'ff...' match in radix tree, ambiguous with wdir
1400 # single 'ff...' match in radix tree, ambiguous with wdir
1401 raise error.RevlogError
1401 raise error.RevlogError
1402 return partial
1402 return partial
1403 if maybewdir:
1403 if maybewdir:
1404 # no 'ff...' match in radix tree, wdir identified
1404 # no 'ff...' match in radix tree, wdir identified
1405 raise error.WdirUnsupported
1405 raise error.WdirUnsupported
1406 return None
1406 return None
1407 except error.RevlogError:
1407 except error.RevlogError:
1408 # parsers.c radix tree lookup gave multiple matches
1408 # parsers.c radix tree lookup gave multiple matches
1409 # fast path: for unfiltered changelog, radix tree is accurate
1409 # fast path: for unfiltered changelog, radix tree is accurate
1410 if not getattr(self, 'filteredrevs', None):
1410 if not getattr(self, 'filteredrevs', None):
1411 raise error.AmbiguousPrefixLookupError(
1411 raise error.AmbiguousPrefixLookupError(
1412 id, self.display_id, _(b'ambiguous identifier')
1412 id, self.display_id, _(b'ambiguous identifier')
1413 )
1413 )
1414 # fall through to slow path that filters hidden revisions
1414 # fall through to slow path that filters hidden revisions
1415 except (AttributeError, ValueError):
1415 except (AttributeError, ValueError):
1416 # we are pure python, or key was too short to search radix tree
1416 # we are pure python, or key was too short to search radix tree
1417 pass
1417 pass
1418
1418
1419 if id in self._pcache:
1419 if id in self._pcache:
1420 return self._pcache[id]
1420 return self._pcache[id]
1421
1421
1422 if len(id) <= 40:
1422 if len(id) <= 40:
1423 try:
1423 try:
1424 # hex(node)[:...]
1424 # hex(node)[:...]
1425 l = len(id) // 2 # grab an even number of digits
1425 l = len(id) // 2 # grab an even number of digits
1426 prefix = bin(id[: l * 2])
1426 prefix = bin(id[: l * 2])
1427 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1427 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1428 nl = [
1428 nl = [
1429 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1429 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1430 ]
1430 ]
1431 if self.nodeconstants.nullhex.startswith(id):
1431 if self.nodeconstants.nullhex.startswith(id):
1432 nl.append(self.nullid)
1432 nl.append(self.nullid)
1433 if len(nl) > 0:
1433 if len(nl) > 0:
1434 if len(nl) == 1 and not maybewdir:
1434 if len(nl) == 1 and not maybewdir:
1435 self._pcache[id] = nl[0]
1435 self._pcache[id] = nl[0]
1436 return nl[0]
1436 return nl[0]
1437 raise error.AmbiguousPrefixLookupError(
1437 raise error.AmbiguousPrefixLookupError(
1438 id, self.display_id, _(b'ambiguous identifier')
1438 id, self.display_id, _(b'ambiguous identifier')
1439 )
1439 )
1440 if maybewdir:
1440 if maybewdir:
1441 raise error.WdirUnsupported
1441 raise error.WdirUnsupported
1442 return None
1442 return None
1443 except TypeError:
1443 except TypeError:
1444 pass
1444 pass
1445
1445
1446 def lookup(self, id):
1446 def lookup(self, id):
1447 """locate a node based on:
1447 """locate a node based on:
1448 - revision number or str(revision number)
1448 - revision number or str(revision number)
1449 - nodeid or subset of hex nodeid
1449 - nodeid or subset of hex nodeid
1450 """
1450 """
1451 n = self._match(id)
1451 n = self._match(id)
1452 if n is not None:
1452 if n is not None:
1453 return n
1453 return n
1454 n = self._partialmatch(id)
1454 n = self._partialmatch(id)
1455 if n:
1455 if n:
1456 return n
1456 return n
1457
1457
1458 raise error.LookupError(id, self.display_id, _(b'no match found'))
1458 raise error.LookupError(id, self.display_id, _(b'no match found'))
1459
1459
1460 def shortest(self, node, minlength=1):
1460 def shortest(self, node, minlength=1):
1461 """Find the shortest unambiguous prefix that matches node."""
1461 """Find the shortest unambiguous prefix that matches node."""
1462
1462
1463 def isvalid(prefix):
1463 def isvalid(prefix):
1464 try:
1464 try:
1465 matchednode = self._partialmatch(prefix)
1465 matchednode = self._partialmatch(prefix)
1466 except error.AmbiguousPrefixLookupError:
1466 except error.AmbiguousPrefixLookupError:
1467 return False
1467 return False
1468 except error.WdirUnsupported:
1468 except error.WdirUnsupported:
1469 # single 'ff...' match
1469 # single 'ff...' match
1470 return True
1470 return True
1471 if matchednode is None:
1471 if matchednode is None:
1472 raise error.LookupError(node, self.display_id, _(b'no node'))
1472 raise error.LookupError(node, self.display_id, _(b'no node'))
1473 return True
1473 return True
1474
1474
1475 def maybewdir(prefix):
1475 def maybewdir(prefix):
1476 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1476 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1477
1477
1478 hexnode = hex(node)
1478 hexnode = hex(node)
1479
1479
1480 def disambiguate(hexnode, minlength):
1480 def disambiguate(hexnode, minlength):
1481 """Disambiguate against wdirid."""
1481 """Disambiguate against wdirid."""
1482 for length in range(minlength, len(hexnode) + 1):
1482 for length in range(minlength, len(hexnode) + 1):
1483 prefix = hexnode[:length]
1483 prefix = hexnode[:length]
1484 if not maybewdir(prefix):
1484 if not maybewdir(prefix):
1485 return prefix
1485 return prefix
1486
1486
1487 if not getattr(self, 'filteredrevs', None):
1487 if not getattr(self, 'filteredrevs', None):
1488 try:
1488 try:
1489 length = max(self.index.shortest(node), minlength)
1489 length = max(self.index.shortest(node), minlength)
1490 return disambiguate(hexnode, length)
1490 return disambiguate(hexnode, length)
1491 except error.RevlogError:
1491 except error.RevlogError:
1492 if node != self.nodeconstants.wdirid:
1492 if node != self.nodeconstants.wdirid:
1493 raise error.LookupError(
1493 raise error.LookupError(
1494 node, self.display_id, _(b'no node')
1494 node, self.display_id, _(b'no node')
1495 )
1495 )
1496 except AttributeError:
1496 except AttributeError:
1497 # Fall through to pure code
1497 # Fall through to pure code
1498 pass
1498 pass
1499
1499
1500 if node == self.nodeconstants.wdirid:
1500 if node == self.nodeconstants.wdirid:
1501 for length in range(minlength, len(hexnode) + 1):
1501 for length in range(minlength, len(hexnode) + 1):
1502 prefix = hexnode[:length]
1502 prefix = hexnode[:length]
1503 if isvalid(prefix):
1503 if isvalid(prefix):
1504 return prefix
1504 return prefix
1505
1505
1506 for length in range(minlength, len(hexnode) + 1):
1506 for length in range(minlength, len(hexnode) + 1):
1507 prefix = hexnode[:length]
1507 prefix = hexnode[:length]
1508 if isvalid(prefix):
1508 if isvalid(prefix):
1509 return disambiguate(hexnode, length)
1509 return disambiguate(hexnode, length)
1510
1510
1511 def cmp(self, node, text):
1511 def cmp(self, node, text):
1512 """compare text with a given file revision
1512 """compare text with a given file revision
1513
1513
1514 returns True if text is different than what is stored.
1514 returns True if text is different than what is stored.
1515 """
1515 """
1516 p1, p2 = self.parents(node)
1516 p1, p2 = self.parents(node)
1517 return storageutil.hashrevisionsha1(text, p1, p2) != node
1517 return storageutil.hashrevisionsha1(text, p1, p2) != node
1518
1518
1519 def _cachesegment(self, offset, data):
1519 def _cachesegment(self, offset, data):
1520 """Add a segment to the revlog cache.
1520 """Add a segment to the revlog cache.
1521
1521
1522 Accepts an absolute offset and the data that is at that location.
1522 Accepts an absolute offset and the data that is at that location.
1523 """
1523 """
1524 o, d = self._chunkcache
1524 o, d = self._chunkcache
1525 # try to add to existing cache
1525 # try to add to existing cache
1526 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1526 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1527 self._chunkcache = o, d + data
1527 self._chunkcache = o, d + data
1528 else:
1528 else:
1529 self._chunkcache = offset, data
1529 self._chunkcache = offset, data
1530
1530
1531 def _readsegment(self, offset, length, df=None):
1531 def _readsegment(self, offset, length, df=None):
1532 """Load a segment of raw data from the revlog.
1532 """Load a segment of raw data from the revlog.
1533
1533
1534 Accepts an absolute offset, length to read, and an optional existing
1534 Accepts an absolute offset, length to read, and an optional existing
1535 file handle to read from.
1535 file handle to read from.
1536
1536
1537 If an existing file handle is passed, it will be seeked and the
1537 If an existing file handle is passed, it will be seeked and the
1538 original seek position will NOT be restored.
1538 original seek position will NOT be restored.
1539
1539
1540 Returns a str or buffer of raw byte data.
1540 Returns a str or buffer of raw byte data.
1541
1541
1542 Raises if the requested number of bytes could not be read.
1542 Raises if the requested number of bytes could not be read.
1543 """
1543 """
1544 # Cache data both forward and backward around the requested
1544 # Cache data both forward and backward around the requested
1545 # data, in a fixed size window. This helps speed up operations
1545 # data, in a fixed size window. This helps speed up operations
1546 # involving reading the revlog backwards.
1546 # involving reading the revlog backwards.
1547 cachesize = self._chunkcachesize
1547 cachesize = self._chunkcachesize
1548 realoffset = offset & ~(cachesize - 1)
1548 realoffset = offset & ~(cachesize - 1)
1549 reallength = (
1549 reallength = (
1550 (offset + length + cachesize) & ~(cachesize - 1)
1550 (offset + length + cachesize) & ~(cachesize - 1)
1551 ) - realoffset
1551 ) - realoffset
1552 with self._datareadfp(df) as df:
1552 with self._datareadfp(df) as df:
1553 df.seek(realoffset)
1553 df.seek(realoffset)
1554 d = df.read(reallength)
1554 d = df.read(reallength)
1555
1555
1556 self._cachesegment(realoffset, d)
1556 self._cachesegment(realoffset, d)
1557 if offset != realoffset or reallength != length:
1557 if offset != realoffset or reallength != length:
1558 startoffset = offset - realoffset
1558 startoffset = offset - realoffset
1559 if len(d) - startoffset < length:
1559 if len(d) - startoffset < length:
1560 raise error.RevlogError(
1560 raise error.RevlogError(
1561 _(
1561 _(
1562 b'partial read of revlog %s; expected %d bytes from '
1562 b'partial read of revlog %s; expected %d bytes from '
1563 b'offset %d, got %d'
1563 b'offset %d, got %d'
1564 )
1564 )
1565 % (
1565 % (
1566 self._indexfile if self._inline else self._datafile,
1566 self._indexfile if self._inline else self._datafile,
1567 length,
1567 length,
1568 offset,
1568 offset,
1569 len(d) - startoffset,
1569 len(d) - startoffset,
1570 )
1570 )
1571 )
1571 )
1572
1572
1573 return util.buffer(d, startoffset, length)
1573 return util.buffer(d, startoffset, length)
1574
1574
1575 if len(d) < length:
1575 if len(d) < length:
1576 raise error.RevlogError(
1576 raise error.RevlogError(
1577 _(
1577 _(
1578 b'partial read of revlog %s; expected %d bytes from offset '
1578 b'partial read of revlog %s; expected %d bytes from offset '
1579 b'%d, got %d'
1579 b'%d, got %d'
1580 )
1580 )
1581 % (
1581 % (
1582 self._indexfile if self._inline else self._datafile,
1582 self._indexfile if self._inline else self._datafile,
1583 length,
1583 length,
1584 offset,
1584 offset,
1585 len(d),
1585 len(d),
1586 )
1586 )
1587 )
1587 )
1588
1588
1589 return d
1589 return d
1590
1590
1591 def _getsegment(self, offset, length, df=None):
1591 def _getsegment(self, offset, length, df=None):
1592 """Obtain a segment of raw data from the revlog.
1592 """Obtain a segment of raw data from the revlog.
1593
1593
1594 Accepts an absolute offset, length of bytes to obtain, and an
1594 Accepts an absolute offset, length of bytes to obtain, and an
1595 optional file handle to the already-opened revlog. If the file
1595 optional file handle to the already-opened revlog. If the file
1596 handle is used, it's original seek position will not be preserved.
1596 handle is used, it's original seek position will not be preserved.
1597
1597
1598 Requests for data may be returned from a cache.
1598 Requests for data may be returned from a cache.
1599
1599
1600 Returns a str or a buffer instance of raw byte data.
1600 Returns a str or a buffer instance of raw byte data.
1601 """
1601 """
1602 o, d = self._chunkcache
1602 o, d = self._chunkcache
1603 l = len(d)
1603 l = len(d)
1604
1604
1605 # is it in the cache?
1605 # is it in the cache?
1606 cachestart = offset - o
1606 cachestart = offset - o
1607 cacheend = cachestart + length
1607 cacheend = cachestart + length
1608 if cachestart >= 0 and cacheend <= l:
1608 if cachestart >= 0 and cacheend <= l:
1609 if cachestart == 0 and cacheend == l:
1609 if cachestart == 0 and cacheend == l:
1610 return d # avoid a copy
1610 return d # avoid a copy
1611 return util.buffer(d, cachestart, cacheend - cachestart)
1611 return util.buffer(d, cachestart, cacheend - cachestart)
1612
1612
1613 return self._readsegment(offset, length, df=df)
1613 return self._readsegment(offset, length, df=df)
1614
1614
1615 def _getsegmentforrevs(self, startrev, endrev, df=None):
1615 def _getsegmentforrevs(self, startrev, endrev, df=None):
1616 """Obtain a segment of raw data corresponding to a range of revisions.
1616 """Obtain a segment of raw data corresponding to a range of revisions.
1617
1617
1618 Accepts the start and end revisions and an optional already-open
1618 Accepts the start and end revisions and an optional already-open
1619 file handle to be used for reading. If the file handle is read, its
1619 file handle to be used for reading. If the file handle is read, its
1620 seek position will not be preserved.
1620 seek position will not be preserved.
1621
1621
1622 Requests for data may be satisfied by a cache.
1622 Requests for data may be satisfied by a cache.
1623
1623
1624 Returns a 2-tuple of (offset, data) for the requested range of
1624 Returns a 2-tuple of (offset, data) for the requested range of
1625 revisions. Offset is the integer offset from the beginning of the
1625 revisions. Offset is the integer offset from the beginning of the
1626 revlog and data is a str or buffer of the raw byte data.
1626 revlog and data is a str or buffer of the raw byte data.
1627
1627
1628 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1628 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1629 to determine where each revision's data begins and ends.
1629 to determine where each revision's data begins and ends.
1630 """
1630 """
1631 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1631 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1632 # (functions are expensive).
1632 # (functions are expensive).
1633 index = self.index
1633 index = self.index
1634 istart = index[startrev]
1634 istart = index[startrev]
1635 start = int(istart[0] >> 16)
1635 start = int(istart[0] >> 16)
1636 if startrev == endrev:
1636 if startrev == endrev:
1637 end = start + istart[1]
1637 end = start + istart[1]
1638 else:
1638 else:
1639 iend = index[endrev]
1639 iend = index[endrev]
1640 end = int(iend[0] >> 16) + iend[1]
1640 end = int(iend[0] >> 16) + iend[1]
1641
1641
1642 if self._inline:
1642 if self._inline:
1643 start += (startrev + 1) * self.index.entry_size
1643 start += (startrev + 1) * self.index.entry_size
1644 end += (endrev + 1) * self.index.entry_size
1644 end += (endrev + 1) * self.index.entry_size
1645 length = end - start
1645 length = end - start
1646
1646
1647 return start, self._getsegment(start, length, df=df)
1647 return start, self._getsegment(start, length, df=df)
1648
1648
1649 def _chunk(self, rev, df=None):
1649 def _chunk(self, rev, df=None):
1650 """Obtain a single decompressed chunk for a revision.
1650 """Obtain a single decompressed chunk for a revision.
1651
1651
1652 Accepts an integer revision and an optional already-open file handle
1652 Accepts an integer revision and an optional already-open file handle
1653 to be used for reading. If used, the seek position of the file will not
1653 to be used for reading. If used, the seek position of the file will not
1654 be preserved.
1654 be preserved.
1655
1655
1656 Returns a str holding uncompressed data for the requested revision.
1656 Returns a str holding uncompressed data for the requested revision.
1657 """
1657 """
1658 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1658 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1659
1659
1660 def _chunks(self, revs, df=None, targetsize=None):
1660 def _chunks(self, revs, df=None, targetsize=None):
1661 """Obtain decompressed chunks for the specified revisions.
1661 """Obtain decompressed chunks for the specified revisions.
1662
1662
1663 Accepts an iterable of numeric revisions that are assumed to be in
1663 Accepts an iterable of numeric revisions that are assumed to be in
1664 ascending order. Also accepts an optional already-open file handle
1664 ascending order. Also accepts an optional already-open file handle
1665 to be used for reading. If used, the seek position of the file will
1665 to be used for reading. If used, the seek position of the file will
1666 not be preserved.
1666 not be preserved.
1667
1667
1668 This function is similar to calling ``self._chunk()`` multiple times,
1668 This function is similar to calling ``self._chunk()`` multiple times,
1669 but is faster.
1669 but is faster.
1670
1670
1671 Returns a list with decompressed data for each requested revision.
1671 Returns a list with decompressed data for each requested revision.
1672 """
1672 """
1673 if not revs:
1673 if not revs:
1674 return []
1674 return []
1675 start = self.start
1675 start = self.start
1676 length = self.length
1676 length = self.length
1677 inline = self._inline
1677 inline = self._inline
1678 iosize = self.index.entry_size
1678 iosize = self.index.entry_size
1679 buffer = util.buffer
1679 buffer = util.buffer
1680
1680
1681 l = []
1681 l = []
1682 ladd = l.append
1682 ladd = l.append
1683
1683
1684 if not self._withsparseread:
1684 if not self._withsparseread:
1685 slicedchunks = (revs,)
1685 slicedchunks = (revs,)
1686 else:
1686 else:
1687 slicedchunks = deltautil.slicechunk(
1687 slicedchunks = deltautil.slicechunk(
1688 self, revs, targetsize=targetsize
1688 self, revs, targetsize=targetsize
1689 )
1689 )
1690
1690
1691 for revschunk in slicedchunks:
1691 for revschunk in slicedchunks:
1692 firstrev = revschunk[0]
1692 firstrev = revschunk[0]
1693 # Skip trailing revisions with empty diff
1693 # Skip trailing revisions with empty diff
1694 for lastrev in revschunk[::-1]:
1694 for lastrev in revschunk[::-1]:
1695 if length(lastrev) != 0:
1695 if length(lastrev) != 0:
1696 break
1696 break
1697
1697
1698 try:
1698 try:
1699 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1699 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1700 except OverflowError:
1700 except OverflowError:
1701 # issue4215 - we can't cache a run of chunks greater than
1701 # issue4215 - we can't cache a run of chunks greater than
1702 # 2G on Windows
1702 # 2G on Windows
1703 return [self._chunk(rev, df=df) for rev in revschunk]
1703 return [self._chunk(rev, df=df) for rev in revschunk]
1704
1704
1705 decomp = self.decompress
1705 decomp = self.decompress
1706 for rev in revschunk:
1706 for rev in revschunk:
1707 chunkstart = start(rev)
1707 chunkstart = start(rev)
1708 if inline:
1708 if inline:
1709 chunkstart += (rev + 1) * iosize
1709 chunkstart += (rev + 1) * iosize
1710 chunklength = length(rev)
1710 chunklength = length(rev)
1711 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1711 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1712
1712
1713 return l
1713 return l
1714
1714
1715 def _chunkclear(self):
1715 def _chunkclear(self):
1716 """Clear the raw chunk cache."""
1716 """Clear the raw chunk cache."""
1717 self._chunkcache = (0, b'')
1717 self._chunkcache = (0, b'')
1718
1718
1719 def deltaparent(self, rev):
1719 def deltaparent(self, rev):
1720 """return deltaparent of the given revision"""
1720 """return deltaparent of the given revision"""
1721 base = self.index[rev][3]
1721 base = self.index[rev][3]
1722 if base == rev:
1722 if base == rev:
1723 return nullrev
1723 return nullrev
1724 elif self._generaldelta:
1724 elif self._generaldelta:
1725 return base
1725 return base
1726 else:
1726 else:
1727 return rev - 1
1727 return rev - 1
1728
1728
1729 def issnapshot(self, rev):
1729 def issnapshot(self, rev):
1730 """tells whether rev is a snapshot"""
1730 """tells whether rev is a snapshot"""
1731 if not self._sparserevlog:
1731 if not self._sparserevlog:
1732 return self.deltaparent(rev) == nullrev
1732 return self.deltaparent(rev) == nullrev
1733 elif util.safehasattr(self.index, b'issnapshot'):
1733 elif util.safehasattr(self.index, b'issnapshot'):
1734 # directly assign the method to cache the testing and access
1734 # directly assign the method to cache the testing and access
1735 self.issnapshot = self.index.issnapshot
1735 self.issnapshot = self.index.issnapshot
1736 return self.issnapshot(rev)
1736 return self.issnapshot(rev)
1737 if rev == nullrev:
1737 if rev == nullrev:
1738 return True
1738 return True
1739 entry = self.index[rev]
1739 entry = self.index[rev]
1740 base = entry[3]
1740 base = entry[3]
1741 if base == rev:
1741 if base == rev:
1742 return True
1742 return True
1743 if base == nullrev:
1743 if base == nullrev:
1744 return True
1744 return True
1745 p1 = entry[5]
1745 p1 = entry[5]
1746 p2 = entry[6]
1746 p2 = entry[6]
1747 if base == p1 or base == p2:
1747 if base == p1 or base == p2:
1748 return False
1748 return False
1749 return self.issnapshot(base)
1749 return self.issnapshot(base)
1750
1750
1751 def snapshotdepth(self, rev):
1751 def snapshotdepth(self, rev):
1752 """number of snapshot in the chain before this one"""
1752 """number of snapshot in the chain before this one"""
1753 if not self.issnapshot(rev):
1753 if not self.issnapshot(rev):
1754 raise error.ProgrammingError(b'revision %d not a snapshot')
1754 raise error.ProgrammingError(b'revision %d not a snapshot')
1755 return len(self._deltachain(rev)[0]) - 1
1755 return len(self._deltachain(rev)[0]) - 1
1756
1756
1757 def revdiff(self, rev1, rev2):
1757 def revdiff(self, rev1, rev2):
1758 """return or calculate a delta between two revisions
1758 """return or calculate a delta between two revisions
1759
1759
1760 The delta calculated is in binary form and is intended to be written to
1760 The delta calculated is in binary form and is intended to be written to
1761 revlog data directly. So this function needs raw revision data.
1761 revlog data directly. So this function needs raw revision data.
1762 """
1762 """
1763 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1763 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1764 return bytes(self._chunk(rev2))
1764 return bytes(self._chunk(rev2))
1765
1765
1766 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1766 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1767
1767
1768 def _processflags(self, text, flags, operation, raw=False):
1768 def _processflags(self, text, flags, operation, raw=False):
1769 """deprecated entry point to access flag processors"""
1769 """deprecated entry point to access flag processors"""
1770 msg = b'_processflag(...) use the specialized variant'
1770 msg = b'_processflag(...) use the specialized variant'
1771 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1771 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1772 if raw:
1772 if raw:
1773 return text, flagutil.processflagsraw(self, text, flags)
1773 return text, flagutil.processflagsraw(self, text, flags)
1774 elif operation == b'read':
1774 elif operation == b'read':
1775 return flagutil.processflagsread(self, text, flags)
1775 return flagutil.processflagsread(self, text, flags)
1776 else: # write operation
1776 else: # write operation
1777 return flagutil.processflagswrite(self, text, flags)
1777 return flagutil.processflagswrite(self, text, flags)
1778
1778
1779 def revision(self, nodeorrev, _df=None, raw=False):
1779 def revision(self, nodeorrev, _df=None, raw=False):
1780 """return an uncompressed revision of a given node or revision
1780 """return an uncompressed revision of a given node or revision
1781 number.
1781 number.
1782
1782
1783 _df - an existing file handle to read from. (internal-only)
1783 _df - an existing file handle to read from. (internal-only)
1784 raw - an optional argument specifying if the revision data is to be
1784 raw - an optional argument specifying if the revision data is to be
1785 treated as raw data when applying flag transforms. 'raw' should be set
1785 treated as raw data when applying flag transforms. 'raw' should be set
1786 to True when generating changegroups or in debug commands.
1786 to True when generating changegroups or in debug commands.
1787 """
1787 """
1788 if raw:
1788 if raw:
1789 msg = (
1789 msg = (
1790 b'revlog.revision(..., raw=True) is deprecated, '
1790 b'revlog.revision(..., raw=True) is deprecated, '
1791 b'use revlog.rawdata(...)'
1791 b'use revlog.rawdata(...)'
1792 )
1792 )
1793 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1793 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1794 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1794 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1795
1795
1796 def sidedata(self, nodeorrev, _df=None):
1796 def sidedata(self, nodeorrev, _df=None):
1797 """a map of extra data related to the changeset but not part of the hash
1797 """a map of extra data related to the changeset but not part of the hash
1798
1798
1799 This function currently return a dictionary. However, more advanced
1799 This function currently return a dictionary. However, more advanced
1800 mapping object will likely be used in the future for a more
1800 mapping object will likely be used in the future for a more
1801 efficient/lazy code.
1801 efficient/lazy code.
1802 """
1802 """
1803 return self._revisiondata(nodeorrev, _df)[1]
1803 return self._revisiondata(nodeorrev, _df)[1]
1804
1804
1805 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1805 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1806 # deal with <nodeorrev> argument type
1806 # deal with <nodeorrev> argument type
1807 if isinstance(nodeorrev, int):
1807 if isinstance(nodeorrev, int):
1808 rev = nodeorrev
1808 rev = nodeorrev
1809 node = self.node(rev)
1809 node = self.node(rev)
1810 else:
1810 else:
1811 node = nodeorrev
1811 node = nodeorrev
1812 rev = None
1812 rev = None
1813
1813
1814 # fast path the special `nullid` rev
1814 # fast path the special `nullid` rev
1815 if node == self.nullid:
1815 if node == self.nullid:
1816 return b"", {}
1816 return b"", {}
1817
1817
1818 # ``rawtext`` is the text as stored inside the revlog. Might be the
1818 # ``rawtext`` is the text as stored inside the revlog. Might be the
1819 # revision or might need to be processed to retrieve the revision.
1819 # revision or might need to be processed to retrieve the revision.
1820 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1820 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1821
1821
1822 if self.hassidedata:
1822 if self.hassidedata:
1823 if rev is None:
1823 if rev is None:
1824 rev = self.rev(node)
1824 rev = self.rev(node)
1825 sidedata = self._sidedata(rev)
1825 sidedata = self._sidedata(rev)
1826 else:
1826 else:
1827 sidedata = {}
1827 sidedata = {}
1828
1828
1829 if raw and validated:
1829 if raw and validated:
1830 # if we don't want to process the raw text and that raw
1830 # if we don't want to process the raw text and that raw
1831 # text is cached, we can exit early.
1831 # text is cached, we can exit early.
1832 return rawtext, sidedata
1832 return rawtext, sidedata
1833 if rev is None:
1833 if rev is None:
1834 rev = self.rev(node)
1834 rev = self.rev(node)
1835 # the revlog's flag for this revision
1835 # the revlog's flag for this revision
1836 # (usually alter its state or content)
1836 # (usually alter its state or content)
1837 flags = self.flags(rev)
1837 flags = self.flags(rev)
1838
1838
1839 if validated and flags == REVIDX_DEFAULT_FLAGS:
1839 if validated and flags == REVIDX_DEFAULT_FLAGS:
1840 # no extra flags set, no flag processor runs, text = rawtext
1840 # no extra flags set, no flag processor runs, text = rawtext
1841 return rawtext, sidedata
1841 return rawtext, sidedata
1842
1842
1843 if raw:
1843 if raw:
1844 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1844 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1845 text = rawtext
1845 text = rawtext
1846 else:
1846 else:
1847 r = flagutil.processflagsread(self, rawtext, flags)
1847 r = flagutil.processflagsread(self, rawtext, flags)
1848 text, validatehash = r
1848 text, validatehash = r
1849 if validatehash:
1849 if validatehash:
1850 self.checkhash(text, node, rev=rev)
1850 self.checkhash(text, node, rev=rev)
1851 if not validated:
1851 if not validated:
1852 self._revisioncache = (node, rev, rawtext)
1852 self._revisioncache = (node, rev, rawtext)
1853
1853
1854 return text, sidedata
1854 return text, sidedata
1855
1855
1856 def _rawtext(self, node, rev, _df=None):
1856 def _rawtext(self, node, rev, _df=None):
1857 """return the possibly unvalidated rawtext for a revision
1857 """return the possibly unvalidated rawtext for a revision
1858
1858
1859 returns (rev, rawtext, validated)
1859 returns (rev, rawtext, validated)
1860 """
1860 """
1861
1861
1862 # revision in the cache (could be useful to apply delta)
1862 # revision in the cache (could be useful to apply delta)
1863 cachedrev = None
1863 cachedrev = None
1864 # An intermediate text to apply deltas to
1864 # An intermediate text to apply deltas to
1865 basetext = None
1865 basetext = None
1866
1866
1867 # Check if we have the entry in cache
1867 # Check if we have the entry in cache
1868 # The cache entry looks like (node, rev, rawtext)
1868 # The cache entry looks like (node, rev, rawtext)
1869 if self._revisioncache:
1869 if self._revisioncache:
1870 if self._revisioncache[0] == node:
1870 if self._revisioncache[0] == node:
1871 return (rev, self._revisioncache[2], True)
1871 return (rev, self._revisioncache[2], True)
1872 cachedrev = self._revisioncache[1]
1872 cachedrev = self._revisioncache[1]
1873
1873
1874 if rev is None:
1874 if rev is None:
1875 rev = self.rev(node)
1875 rev = self.rev(node)
1876
1876
1877 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1877 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1878 if stopped:
1878 if stopped:
1879 basetext = self._revisioncache[2]
1879 basetext = self._revisioncache[2]
1880
1880
1881 # drop cache to save memory, the caller is expected to
1881 # drop cache to save memory, the caller is expected to
1882 # update self._revisioncache after validating the text
1882 # update self._revisioncache after validating the text
1883 self._revisioncache = None
1883 self._revisioncache = None
1884
1884
1885 targetsize = None
1885 targetsize = None
1886 rawsize = self.index[rev][2]
1886 rawsize = self.index[rev][2]
1887 if 0 <= rawsize:
1887 if 0 <= rawsize:
1888 targetsize = 4 * rawsize
1888 targetsize = 4 * rawsize
1889
1889
1890 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1890 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1891 if basetext is None:
1891 if basetext is None:
1892 basetext = bytes(bins[0])
1892 basetext = bytes(bins[0])
1893 bins = bins[1:]
1893 bins = bins[1:]
1894
1894
1895 rawtext = mdiff.patches(basetext, bins)
1895 rawtext = mdiff.patches(basetext, bins)
1896 del basetext # let us have a chance to free memory early
1896 del basetext # let us have a chance to free memory early
1897 return (rev, rawtext, False)
1897 return (rev, rawtext, False)
1898
1898
1899 def _sidedata(self, rev):
1899 def _sidedata(self, rev):
1900 """Return the sidedata for a given revision number."""
1900 """Return the sidedata for a given revision number."""
1901 index_entry = self.index[rev]
1901 index_entry = self.index[rev]
1902 sidedata_offset = index_entry[8]
1902 sidedata_offset = index_entry[8]
1903 sidedata_size = index_entry[9]
1903 sidedata_size = index_entry[9]
1904
1904
1905 if self._inline:
1905 if self._inline:
1906 sidedata_offset += self.index.entry_size * (1 + rev)
1906 sidedata_offset += self.index.entry_size * (1 + rev)
1907 if sidedata_size == 0:
1907 if sidedata_size == 0:
1908 return {}
1908 return {}
1909
1909
1910 segment = self._getsegment(sidedata_offset, sidedata_size)
1910 segment = self._getsegment(sidedata_offset, sidedata_size)
1911 sidedata = sidedatautil.deserialize_sidedata(segment)
1911 sidedata = sidedatautil.deserialize_sidedata(segment)
1912 return sidedata
1912 return sidedata
1913
1913
1914 def rawdata(self, nodeorrev, _df=None):
1914 def rawdata(self, nodeorrev, _df=None):
1915 """return an uncompressed raw data of a given node or revision number.
1915 """return an uncompressed raw data of a given node or revision number.
1916
1916
1917 _df - an existing file handle to read from. (internal-only)
1917 _df - an existing file handle to read from. (internal-only)
1918 """
1918 """
1919 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1919 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1920
1920
1921 def hash(self, text, p1, p2):
1921 def hash(self, text, p1, p2):
1922 """Compute a node hash.
1922 """Compute a node hash.
1923
1923
1924 Available as a function so that subclasses can replace the hash
1924 Available as a function so that subclasses can replace the hash
1925 as needed.
1925 as needed.
1926 """
1926 """
1927 return storageutil.hashrevisionsha1(text, p1, p2)
1927 return storageutil.hashrevisionsha1(text, p1, p2)
1928
1928
1929 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1929 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1930 """Check node hash integrity.
1930 """Check node hash integrity.
1931
1931
1932 Available as a function so that subclasses can extend hash mismatch
1932 Available as a function so that subclasses can extend hash mismatch
1933 behaviors as needed.
1933 behaviors as needed.
1934 """
1934 """
1935 try:
1935 try:
1936 if p1 is None and p2 is None:
1936 if p1 is None and p2 is None:
1937 p1, p2 = self.parents(node)
1937 p1, p2 = self.parents(node)
1938 if node != self.hash(text, p1, p2):
1938 if node != self.hash(text, p1, p2):
1939 # Clear the revision cache on hash failure. The revision cache
1939 # Clear the revision cache on hash failure. The revision cache
1940 # only stores the raw revision and clearing the cache does have
1940 # only stores the raw revision and clearing the cache does have
1941 # the side-effect that we won't have a cache hit when the raw
1941 # the side-effect that we won't have a cache hit when the raw
1942 # revision data is accessed. But this case should be rare and
1942 # revision data is accessed. But this case should be rare and
1943 # it is extra work to teach the cache about the hash
1943 # it is extra work to teach the cache about the hash
1944 # verification state.
1944 # verification state.
1945 if self._revisioncache and self._revisioncache[0] == node:
1945 if self._revisioncache and self._revisioncache[0] == node:
1946 self._revisioncache = None
1946 self._revisioncache = None
1947
1947
1948 revornode = rev
1948 revornode = rev
1949 if revornode is None:
1949 if revornode is None:
1950 revornode = templatefilters.short(hex(node))
1950 revornode = templatefilters.short(hex(node))
1951 raise error.RevlogError(
1951 raise error.RevlogError(
1952 _(b"integrity check failed on %s:%s")
1952 _(b"integrity check failed on %s:%s")
1953 % (self.display_id, pycompat.bytestr(revornode))
1953 % (self.display_id, pycompat.bytestr(revornode))
1954 )
1954 )
1955 except error.RevlogError:
1955 except error.RevlogError:
1956 if self._censorable and storageutil.iscensoredtext(text):
1956 if self._censorable and storageutil.iscensoredtext(text):
1957 raise error.CensoredNodeError(self.display_id, node, text)
1957 raise error.CensoredNodeError(self.display_id, node, text)
1958 raise
1958 raise
1959
1959
1960 def _enforceinlinesize(self, tr):
1960 def _enforceinlinesize(self, tr):
1961 """Check if the revlog is too big for inline and convert if so.
1961 """Check if the revlog is too big for inline and convert if so.
1962
1962
1963 This should be called after revisions are added to the revlog. If the
1963 This should be called after revisions are added to the revlog. If the
1964 revlog has grown too large to be an inline revlog, it will convert it
1964 revlog has grown too large to be an inline revlog, it will convert it
1965 to use multiple index and data files.
1965 to use multiple index and data files.
1966 """
1966 """
1967 tiprev = len(self) - 1
1967 tiprev = len(self) - 1
1968 total_size = self.start(tiprev) + self.length(tiprev)
1968 total_size = self.start(tiprev) + self.length(tiprev)
1969 if not self._inline or total_size < _maxinline:
1969 if not self._inline or total_size < _maxinline:
1970 return
1970 return
1971
1971
1972 troffset = tr.findoffset(self._indexfile)
1972 troffset = tr.findoffset(self._indexfile)
1973 if troffset is None:
1973 if troffset is None:
1974 raise error.RevlogError(
1974 raise error.RevlogError(
1975 _(b"%s not found in the transaction") % self._indexfile
1975 _(b"%s not found in the transaction") % self._indexfile
1976 )
1976 )
1977 trindex = 0
1977 trindex = 0
1978 tr.add(self._datafile, 0)
1978 tr.add(self._datafile, 0)
1979
1979
1980 existing_handles = False
1980 existing_handles = False
1981 if self._writinghandles is not None:
1981 if self._writinghandles is not None:
1982 existing_handles = True
1982 existing_handles = True
1983 fp = self._writinghandles[0]
1983 fp = self._writinghandles[0]
1984 fp.flush()
1984 fp.flush()
1985 fp.close()
1985 fp.close()
1986 # We can't use the cached file handle after close(). So prevent
1986 # We can't use the cached file handle after close(). So prevent
1987 # its usage.
1987 # its usage.
1988 self._writinghandles = None
1988 self._writinghandles = None
1989
1989
1990 new_dfh = self._datafp(b'w+')
1990 new_dfh = self._datafp(b'w+')
1991 new_dfh.truncate(0) # drop any potentially existing data
1991 new_dfh.truncate(0) # drop any potentially existing data
1992 try:
1992 try:
1993 with self._indexfp(b'r') as read_ifh:
1993 with self._indexfp(b'r') as read_ifh:
1994 for r in self:
1994 for r in self:
1995 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
1995 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
1996 if troffset <= self.start(r):
1996 if troffset <= self.start(r):
1997 trindex = r
1997 trindex = r
1998 new_dfh.flush()
1998 new_dfh.flush()
1999
1999
2000 with self.opener(self._indexfile, mode=b'w', atomictemp=True) as fp:
2000 with self.opener(self._indexfile, mode=b'w', atomictemp=True) as fp:
2001 self._format_flags &= ~FLAG_INLINE_DATA
2001 self._format_flags &= ~FLAG_INLINE_DATA
2002 self._inline = False
2002 self._inline = False
2003 for i in self:
2003 for i in self:
2004 e = self.index.entry_binary(i)
2004 e = self.index.entry_binary(i)
2005 if i == 0:
2005 if i == 0:
2006 header = self._format_flags | self._format_version
2006 header = self._format_flags | self._format_version
2007 header = self.index.pack_header(header)
2007 header = self.index.pack_header(header)
2008 e = header + e
2008 e = header + e
2009 fp.write(e)
2009 fp.write(e)
2010 # the temp file replace the real index when we exit the context
2010 # the temp file replace the real index when we exit the context
2011 # manager
2011 # manager
2012
2012
2013 tr.replace(self._indexfile, trindex * self.index.entry_size)
2013 tr.replace(self._indexfile, trindex * self.index.entry_size)
2014 nodemaputil.setup_persistent_nodemap(tr, self)
2014 nodemaputil.setup_persistent_nodemap(tr, self)
2015 self._chunkclear()
2015 self._chunkclear()
2016
2016
2017 if existing_handles:
2017 if existing_handles:
2018 # switched from inline to conventional reopen the index
2018 # switched from inline to conventional reopen the index
2019 ifh = self._indexfp(b"a+")
2019 ifh = self._indexfp(b"a+")
2020 self._writinghandles = (ifh, new_dfh)
2020 self._writinghandles = (ifh, new_dfh)
2021 new_dfh = None
2021 new_dfh = None
2022 finally:
2022 finally:
2023 if new_dfh is not None:
2023 if new_dfh is not None:
2024 new_dfh.close()
2024 new_dfh.close()
2025
2025
2026 def _nodeduplicatecallback(self, transaction, node):
2026 def _nodeduplicatecallback(self, transaction, node):
2027 """called when trying to add a node already stored."""
2027 """called when trying to add a node already stored."""
2028
2028
2029 @contextlib.contextmanager
2029 @contextlib.contextmanager
2030 def _writing(self, transaction):
2030 def _writing(self, transaction):
2031 if self._writinghandles is not None:
2031 if self._writinghandles is not None:
2032 yield
2032 yield
2033 else:
2033 else:
2034 r = len(self)
2034 r = len(self)
2035 dsize = 0
2035 dsize = 0
2036 if r:
2036 if r:
2037 dsize = self.end(r - 1)
2037 dsize = self.end(r - 1)
2038 dfh = None
2038 dfh = None
2039 if not self._inline:
2039 if not self._inline:
2040 dfh = self._datafp(b"a+")
2040 dfh = self._datafp(b"a+")
2041 transaction.add(self._datafile, dsize)
2041 transaction.add(self._datafile, dsize)
2042 try:
2042 try:
2043 isize = r * self.index.entry_size
2043 isize = r * self.index.entry_size
2044 ifh = self._indexfp(b"a+")
2044 ifh = self._indexfp(b"a+")
2045 if self._inline:
2045 if self._inline:
2046 transaction.add(self._indexfile, dsize + isize)
2046 transaction.add(self._indexfile, dsize + isize)
2047 else:
2047 else:
2048 transaction.add(self._indexfile, isize)
2048 transaction.add(self._indexfile, isize)
2049 try:
2049 try:
2050 self._writinghandles = (ifh, dfh)
2050 self._writinghandles = (ifh, dfh)
2051 try:
2051 try:
2052 yield
2052 yield
2053 finally:
2053 finally:
2054 self._writinghandles = None
2054 self._writinghandles = None
2055 finally:
2055 finally:
2056 ifh.close()
2056 ifh.close()
2057 finally:
2057 finally:
2058 if dfh is not None:
2058 if dfh is not None:
2059 dfh.close()
2059 dfh.close()
2060
2060
2061 def addrevision(
2061 def addrevision(
2062 self,
2062 self,
2063 text,
2063 text,
2064 transaction,
2064 transaction,
2065 link,
2065 link,
2066 p1,
2066 p1,
2067 p2,
2067 p2,
2068 cachedelta=None,
2068 cachedelta=None,
2069 node=None,
2069 node=None,
2070 flags=REVIDX_DEFAULT_FLAGS,
2070 flags=REVIDX_DEFAULT_FLAGS,
2071 deltacomputer=None,
2071 deltacomputer=None,
2072 sidedata=None,
2072 sidedata=None,
2073 ):
2073 ):
2074 """add a revision to the log
2074 """add a revision to the log
2075
2075
2076 text - the revision data to add
2076 text - the revision data to add
2077 transaction - the transaction object used for rollback
2077 transaction - the transaction object used for rollback
2078 link - the linkrev data to add
2078 link - the linkrev data to add
2079 p1, p2 - the parent nodeids of the revision
2079 p1, p2 - the parent nodeids of the revision
2080 cachedelta - an optional precomputed delta
2080 cachedelta - an optional precomputed delta
2081 node - nodeid of revision; typically node is not specified, and it is
2081 node - nodeid of revision; typically node is not specified, and it is
2082 computed by default as hash(text, p1, p2), however subclasses might
2082 computed by default as hash(text, p1, p2), however subclasses might
2083 use different hashing method (and override checkhash() in such case)
2083 use different hashing method (and override checkhash() in such case)
2084 flags - the known flags to set on the revision
2084 flags - the known flags to set on the revision
2085 deltacomputer - an optional deltacomputer instance shared between
2085 deltacomputer - an optional deltacomputer instance shared between
2086 multiple calls
2086 multiple calls
2087 """
2087 """
2088 if link == nullrev:
2088 if link == nullrev:
2089 raise error.RevlogError(
2089 raise error.RevlogError(
2090 _(b"attempted to add linkrev -1 to %s") % self.display_id
2090 _(b"attempted to add linkrev -1 to %s") % self.display_id
2091 )
2091 )
2092
2092
2093 if sidedata is None:
2093 if sidedata is None:
2094 sidedata = {}
2094 sidedata = {}
2095 elif sidedata and not self.hassidedata:
2095 elif sidedata and not self.hassidedata:
2096 raise error.ProgrammingError(
2096 raise error.ProgrammingError(
2097 _(b"trying to add sidedata to a revlog who don't support them")
2097 _(b"trying to add sidedata to a revlog who don't support them")
2098 )
2098 )
2099
2099
2100 if flags:
2100 if flags:
2101 node = node or self.hash(text, p1, p2)
2101 node = node or self.hash(text, p1, p2)
2102
2102
2103 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2103 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2104
2104
2105 # If the flag processor modifies the revision data, ignore any provided
2105 # If the flag processor modifies the revision data, ignore any provided
2106 # cachedelta.
2106 # cachedelta.
2107 if rawtext != text:
2107 if rawtext != text:
2108 cachedelta = None
2108 cachedelta = None
2109
2109
2110 if len(rawtext) > _maxentrysize:
2110 if len(rawtext) > _maxentrysize:
2111 raise error.RevlogError(
2111 raise error.RevlogError(
2112 _(
2112 _(
2113 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2113 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2114 )
2114 )
2115 % (self.display_id, len(rawtext))
2115 % (self.display_id, len(rawtext))
2116 )
2116 )
2117
2117
2118 node = node or self.hash(rawtext, p1, p2)
2118 node = node or self.hash(rawtext, p1, p2)
2119 rev = self.index.get_rev(node)
2119 rev = self.index.get_rev(node)
2120 if rev is not None:
2120 if rev is not None:
2121 return rev
2121 return rev
2122
2122
2123 if validatehash:
2123 if validatehash:
2124 self.checkhash(rawtext, node, p1=p1, p2=p2)
2124 self.checkhash(rawtext, node, p1=p1, p2=p2)
2125
2125
2126 return self.addrawrevision(
2126 return self.addrawrevision(
2127 rawtext,
2127 rawtext,
2128 transaction,
2128 transaction,
2129 link,
2129 link,
2130 p1,
2130 p1,
2131 p2,
2131 p2,
2132 node,
2132 node,
2133 flags,
2133 flags,
2134 cachedelta=cachedelta,
2134 cachedelta=cachedelta,
2135 deltacomputer=deltacomputer,
2135 deltacomputer=deltacomputer,
2136 sidedata=sidedata,
2136 sidedata=sidedata,
2137 )
2137 )
2138
2138
2139 def addrawrevision(
2139 def addrawrevision(
2140 self,
2140 self,
2141 rawtext,
2141 rawtext,
2142 transaction,
2142 transaction,
2143 link,
2143 link,
2144 p1,
2144 p1,
2145 p2,
2145 p2,
2146 node,
2146 node,
2147 flags,
2147 flags,
2148 cachedelta=None,
2148 cachedelta=None,
2149 deltacomputer=None,
2149 deltacomputer=None,
2150 sidedata=None,
2150 sidedata=None,
2151 ):
2151 ):
2152 """add a raw revision with known flags, node and parents
2152 """add a raw revision with known flags, node and parents
2153 useful when reusing a revision not stored in this revlog (ex: received
2153 useful when reusing a revision not stored in this revlog (ex: received
2154 over wire, or read from an external bundle).
2154 over wire, or read from an external bundle).
2155 """
2155 """
2156 with self._writing(transaction):
2156 with self._writing(transaction):
2157 return self._addrevision(
2157 return self._addrevision(
2158 node,
2158 node,
2159 rawtext,
2159 rawtext,
2160 transaction,
2160 transaction,
2161 link,
2161 link,
2162 p1,
2162 p1,
2163 p2,
2163 p2,
2164 flags,
2164 flags,
2165 cachedelta,
2165 cachedelta,
2166 deltacomputer=deltacomputer,
2166 deltacomputer=deltacomputer,
2167 sidedata=sidedata,
2167 sidedata=sidedata,
2168 )
2168 )
2169
2169
2170 def compress(self, data):
2170 def compress(self, data):
2171 """Generate a possibly-compressed representation of data."""
2171 """Generate a possibly-compressed representation of data."""
2172 if not data:
2172 if not data:
2173 return b'', data
2173 return b'', data
2174
2174
2175 compressed = self._compressor.compress(data)
2175 compressed = self._compressor.compress(data)
2176
2176
2177 if compressed:
2177 if compressed:
2178 # The revlog compressor added the header in the returned data.
2178 # The revlog compressor added the header in the returned data.
2179 return b'', compressed
2179 return b'', compressed
2180
2180
2181 if data[0:1] == b'\0':
2181 if data[0:1] == b'\0':
2182 return b'', data
2182 return b'', data
2183 return b'u', data
2183 return b'u', data
2184
2184
2185 def decompress(self, data):
2185 def decompress(self, data):
2186 """Decompress a revlog chunk.
2186 """Decompress a revlog chunk.
2187
2187
2188 The chunk is expected to begin with a header identifying the
2188 The chunk is expected to begin with a header identifying the
2189 format type so it can be routed to an appropriate decompressor.
2189 format type so it can be routed to an appropriate decompressor.
2190 """
2190 """
2191 if not data:
2191 if not data:
2192 return data
2192 return data
2193
2193
2194 # Revlogs are read much more frequently than they are written and many
2194 # Revlogs are read much more frequently than they are written and many
2195 # chunks only take microseconds to decompress, so performance is
2195 # chunks only take microseconds to decompress, so performance is
2196 # important here.
2196 # important here.
2197 #
2197 #
2198 # We can make a few assumptions about revlogs:
2198 # We can make a few assumptions about revlogs:
2199 #
2199 #
2200 # 1) the majority of chunks will be compressed (as opposed to inline
2200 # 1) the majority of chunks will be compressed (as opposed to inline
2201 # raw data).
2201 # raw data).
2202 # 2) decompressing *any* data will likely by at least 10x slower than
2202 # 2) decompressing *any* data will likely by at least 10x slower than
2203 # returning raw inline data.
2203 # returning raw inline data.
2204 # 3) we want to prioritize common and officially supported compression
2204 # 3) we want to prioritize common and officially supported compression
2205 # engines
2205 # engines
2206 #
2206 #
2207 # It follows that we want to optimize for "decompress compressed data
2207 # It follows that we want to optimize for "decompress compressed data
2208 # when encoded with common and officially supported compression engines"
2208 # when encoded with common and officially supported compression engines"
2209 # case over "raw data" and "data encoded by less common or non-official
2209 # case over "raw data" and "data encoded by less common or non-official
2210 # compression engines." That is why we have the inline lookup first
2210 # compression engines." That is why we have the inline lookup first
2211 # followed by the compengines lookup.
2211 # followed by the compengines lookup.
2212 #
2212 #
2213 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2213 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2214 # compressed chunks. And this matters for changelog and manifest reads.
2214 # compressed chunks. And this matters for changelog and manifest reads.
2215 t = data[0:1]
2215 t = data[0:1]
2216
2216
2217 if t == b'x':
2217 if t == b'x':
2218 try:
2218 try:
2219 return _zlibdecompress(data)
2219 return _zlibdecompress(data)
2220 except zlib.error as e:
2220 except zlib.error as e:
2221 raise error.RevlogError(
2221 raise error.RevlogError(
2222 _(b'revlog decompress error: %s')
2222 _(b'revlog decompress error: %s')
2223 % stringutil.forcebytestr(e)
2223 % stringutil.forcebytestr(e)
2224 )
2224 )
2225 # '\0' is more common than 'u' so it goes first.
2225 # '\0' is more common than 'u' so it goes first.
2226 elif t == b'\0':
2226 elif t == b'\0':
2227 return data
2227 return data
2228 elif t == b'u':
2228 elif t == b'u':
2229 return util.buffer(data, 1)
2229 return util.buffer(data, 1)
2230
2230
2231 try:
2231 try:
2232 compressor = self._decompressors[t]
2232 compressor = self._decompressors[t]
2233 except KeyError:
2233 except KeyError:
2234 try:
2234 try:
2235 engine = util.compengines.forrevlogheader(t)
2235 engine = util.compengines.forrevlogheader(t)
2236 compressor = engine.revlogcompressor(self._compengineopts)
2236 compressor = engine.revlogcompressor(self._compengineopts)
2237 self._decompressors[t] = compressor
2237 self._decompressors[t] = compressor
2238 except KeyError:
2238 except KeyError:
2239 raise error.RevlogError(
2239 raise error.RevlogError(
2240 _(b'unknown compression type %s') % binascii.hexlify(t)
2240 _(b'unknown compression type %s') % binascii.hexlify(t)
2241 )
2241 )
2242
2242
2243 return compressor.decompress(data)
2243 return compressor.decompress(data)
2244
2244
2245 def _addrevision(
2245 def _addrevision(
2246 self,
2246 self,
2247 node,
2247 node,
2248 rawtext,
2248 rawtext,
2249 transaction,
2249 transaction,
2250 link,
2250 link,
2251 p1,
2251 p1,
2252 p2,
2252 p2,
2253 flags,
2253 flags,
2254 cachedelta,
2254 cachedelta,
2255 alwayscache=False,
2255 alwayscache=False,
2256 deltacomputer=None,
2256 deltacomputer=None,
2257 sidedata=None,
2257 sidedata=None,
2258 ):
2258 ):
2259 """internal function to add revisions to the log
2259 """internal function to add revisions to the log
2260
2260
2261 see addrevision for argument descriptions.
2261 see addrevision for argument descriptions.
2262
2262
2263 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2263 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2264
2264
2265 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2265 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2266 be used.
2266 be used.
2267
2267
2268 invariants:
2268 invariants:
2269 - rawtext is optional (can be None); if not set, cachedelta must be set.
2269 - rawtext is optional (can be None); if not set, cachedelta must be set.
2270 if both are set, they must correspond to each other.
2270 if both are set, they must correspond to each other.
2271 """
2271 """
2272 if node == self.nullid:
2272 if node == self.nullid:
2273 raise error.RevlogError(
2273 raise error.RevlogError(
2274 _(b"%s: attempt to add null revision") % self.display_id
2274 _(b"%s: attempt to add null revision") % self.display_id
2275 )
2275 )
2276 if (
2276 if (
2277 node == self.nodeconstants.wdirid
2277 node == self.nodeconstants.wdirid
2278 or node in self.nodeconstants.wdirfilenodeids
2278 or node in self.nodeconstants.wdirfilenodeids
2279 ):
2279 ):
2280 raise error.RevlogError(
2280 raise error.RevlogError(
2281 _(b"%s: attempt to add wdir revision") % self.display_id
2281 _(b"%s: attempt to add wdir revision") % self.display_id
2282 )
2282 )
2283 if self._writinghandles is None:
2283 if self._writinghandles is None:
2284 msg = b'adding revision outside `revlog._writing` context'
2284 msg = b'adding revision outside `revlog._writing` context'
2285 raise error.ProgrammingError(msg)
2285 raise error.ProgrammingError(msg)
2286
2286
2287 if self._inline:
2287 if self._inline:
2288 fh = self._writinghandles[0]
2288 fh = self._writinghandles[0]
2289 else:
2289 else:
2290 fh = self._writinghandles[1]
2290 fh = self._writinghandles[1]
2291
2291
2292 btext = [rawtext]
2292 btext = [rawtext]
2293
2293
2294 curr = len(self)
2294 curr = len(self)
2295 prev = curr - 1
2295 prev = curr - 1
2296
2296
2297 offset = self._get_data_offset(prev)
2297 offset = self._get_data_offset(prev)
2298
2298
2299 if self._concurrencychecker:
2299 if self._concurrencychecker:
2300 ifh, dfh = self._writinghandles
2300 ifh, dfh = self._writinghandles
2301 if self._inline:
2301 if self._inline:
2302 # offset is "as if" it were in the .d file, so we need to add on
2302 # offset is "as if" it were in the .d file, so we need to add on
2303 # the size of the entry metadata.
2303 # the size of the entry metadata.
2304 self._concurrencychecker(
2304 self._concurrencychecker(
2305 ifh, self._indexfile, offset + curr * self.index.entry_size
2305 ifh, self._indexfile, offset + curr * self.index.entry_size
2306 )
2306 )
2307 else:
2307 else:
2308 # Entries in the .i are a consistent size.
2308 # Entries in the .i are a consistent size.
2309 self._concurrencychecker(
2309 self._concurrencychecker(
2310 ifh, self._indexfile, curr * self.index.entry_size
2310 ifh, self._indexfile, curr * self.index.entry_size
2311 )
2311 )
2312 self._concurrencychecker(dfh, self._datafile, offset)
2312 self._concurrencychecker(dfh, self._datafile, offset)
2313
2313
2314 p1r, p2r = self.rev(p1), self.rev(p2)
2314 p1r, p2r = self.rev(p1), self.rev(p2)
2315
2315
2316 # full versions are inserted when the needed deltas
2316 # full versions are inserted when the needed deltas
2317 # become comparable to the uncompressed text
2317 # become comparable to the uncompressed text
2318 if rawtext is None:
2318 if rawtext is None:
2319 # need rawtext size, before changed by flag processors, which is
2319 # need rawtext size, before changed by flag processors, which is
2320 # the non-raw size. use revlog explicitly to avoid filelog's extra
2320 # the non-raw size. use revlog explicitly to avoid filelog's extra
2321 # logic that might remove metadata size.
2321 # logic that might remove metadata size.
2322 textlen = mdiff.patchedsize(
2322 textlen = mdiff.patchedsize(
2323 revlog.size(self, cachedelta[0]), cachedelta[1]
2323 revlog.size(self, cachedelta[0]), cachedelta[1]
2324 )
2324 )
2325 else:
2325 else:
2326 textlen = len(rawtext)
2326 textlen = len(rawtext)
2327
2327
2328 if deltacomputer is None:
2328 if deltacomputer is None:
2329 deltacomputer = deltautil.deltacomputer(self)
2329 deltacomputer = deltautil.deltacomputer(self)
2330
2330
2331 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2331 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2332
2332
2333 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2333 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2334
2334
2335 if sidedata and self.hassidedata:
2335 if sidedata and self.hassidedata:
2336 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2336 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2337 sidedata_offset = offset + deltainfo.deltalen
2337 sidedata_offset = offset + deltainfo.deltalen
2338 else:
2338 else:
2339 serialized_sidedata = b""
2339 serialized_sidedata = b""
2340 # Don't store the offset if the sidedata is empty, that way
2340 # Don't store the offset if the sidedata is empty, that way
2341 # we can easily detect empty sidedata and they will be no different
2341 # we can easily detect empty sidedata and they will be no different
2342 # than ones we manually add.
2342 # than ones we manually add.
2343 sidedata_offset = 0
2343 sidedata_offset = 0
2344
2344
2345 e = (
2345 e = (
2346 offset_type(offset, flags),
2346 offset_type(offset, flags),
2347 deltainfo.deltalen,
2347 deltainfo.deltalen,
2348 textlen,
2348 textlen,
2349 deltainfo.base,
2349 deltainfo.base,
2350 link,
2350 link,
2351 p1r,
2351 p1r,
2352 p2r,
2352 p2r,
2353 node,
2353 node,
2354 sidedata_offset,
2354 sidedata_offset,
2355 len(serialized_sidedata),
2355 len(serialized_sidedata),
2356 )
2356 )
2357
2357
2358 self.index.append(e)
2358 self.index.append(e)
2359 entry = self.index.entry_binary(curr)
2359 entry = self.index.entry_binary(curr)
2360 if curr == 0:
2360 if curr == 0:
2361 header = self._format_flags | self._format_version
2361 header = self._format_flags | self._format_version
2362 header = self.index.pack_header(header)
2362 header = self.index.pack_header(header)
2363 entry = header + entry
2363 entry = header + entry
2364 self._writeentry(
2364 self._writeentry(
2365 transaction,
2365 transaction,
2366 entry,
2366 entry,
2367 deltainfo.data,
2367 deltainfo.data,
2368 link,
2368 link,
2369 offset,
2369 offset,
2370 serialized_sidedata,
2370 serialized_sidedata,
2371 )
2371 )
2372
2372
2373 rawtext = btext[0]
2373 rawtext = btext[0]
2374
2374
2375 if alwayscache and rawtext is None:
2375 if alwayscache and rawtext is None:
2376 rawtext = deltacomputer.buildtext(revinfo, fh)
2376 rawtext = deltacomputer.buildtext(revinfo, fh)
2377
2377
2378 if type(rawtext) == bytes: # only accept immutable objects
2378 if type(rawtext) == bytes: # only accept immutable objects
2379 self._revisioncache = (node, curr, rawtext)
2379 self._revisioncache = (node, curr, rawtext)
2380 self._chainbasecache[curr] = deltainfo.chainbase
2380 self._chainbasecache[curr] = deltainfo.chainbase
2381 return curr
2381 return curr
2382
2382
2383 def _get_data_offset(self, prev):
2383 def _get_data_offset(self, prev):
2384 """Returns the current offset in the (in-transaction) data file.
2384 """Returns the current offset in the (in-transaction) data file.
2385 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2385 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2386 file to store that information: since sidedata can be rewritten to the
2386 file to store that information: since sidedata can be rewritten to the
2387 end of the data file within a transaction, you can have cases where, for
2387 end of the data file within a transaction, you can have cases where, for
2388 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2388 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2389 to `n - 1`'s sidedata being written after `n`'s data.
2389 to `n - 1`'s sidedata being written after `n`'s data.
2390
2390
2391 TODO cache this in a docket file before getting out of experimental."""
2391 TODO cache this in a docket file before getting out of experimental."""
2392 if self._format_version != REVLOGV2:
2392 if self._format_version != REVLOGV2:
2393 return self.end(prev)
2393 return self.end(prev)
2394
2394
2395 offset = 0
2395 offset = 0
2396 for rev, entry in enumerate(self.index):
2396 for rev, entry in enumerate(self.index):
2397 sidedata_end = entry[8] + entry[9]
2397 sidedata_end = entry[8] + entry[9]
2398 # Sidedata for a previous rev has potentially been written after
2398 # Sidedata for a previous rev has potentially been written after
2399 # this rev's end, so take the max.
2399 # this rev's end, so take the max.
2400 offset = max(self.end(rev), offset, sidedata_end)
2400 offset = max(self.end(rev), offset, sidedata_end)
2401 return offset
2401 return offset
2402
2402
2403 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2403 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2404 # Files opened in a+ mode have inconsistent behavior on various
2404 # Files opened in a+ mode have inconsistent behavior on various
2405 # platforms. Windows requires that a file positioning call be made
2405 # platforms. Windows requires that a file positioning call be made
2406 # when the file handle transitions between reads and writes. See
2406 # when the file handle transitions between reads and writes. See
2407 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2407 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2408 # platforms, Python or the platform itself can be buggy. Some versions
2408 # platforms, Python or the platform itself can be buggy. Some versions
2409 # of Solaris have been observed to not append at the end of the file
2409 # of Solaris have been observed to not append at the end of the file
2410 # if the file was seeked to before the end. See issue4943 for more.
2410 # if the file was seeked to before the end. See issue4943 for more.
2411 #
2411 #
2412 # We work around this issue by inserting a seek() before writing.
2412 # We work around this issue by inserting a seek() before writing.
2413 # Note: This is likely not necessary on Python 3. However, because
2413 # Note: This is likely not necessary on Python 3. However, because
2414 # the file handle is reused for reads and may be seeked there, we need
2414 # the file handle is reused for reads and may be seeked there, we need
2415 # to be careful before changing this.
2415 # to be careful before changing this.
2416 if self._writinghandles is None:
2416 if self._writinghandles is None:
2417 msg = b'adding revision outside `revlog._writing` context'
2417 msg = b'adding revision outside `revlog._writing` context'
2418 raise error.ProgrammingError(msg)
2418 raise error.ProgrammingError(msg)
2419 ifh, dfh = self._writinghandles
2419 ifh, dfh = self._writinghandles
2420 ifh.seek(0, os.SEEK_END)
2420 ifh.seek(0, os.SEEK_END)
2421 if dfh:
2421 if dfh:
2422 dfh.seek(0, os.SEEK_END)
2422 dfh.seek(0, os.SEEK_END)
2423
2423
2424 curr = len(self) - 1
2424 curr = len(self) - 1
2425 if not self._inline:
2425 if not self._inline:
2426 transaction.add(self._datafile, offset)
2426 transaction.add(self._datafile, offset)
2427 transaction.add(self._indexfile, curr * len(entry))
2427 transaction.add(self._indexfile, curr * len(entry))
2428 if data[0]:
2428 if data[0]:
2429 dfh.write(data[0])
2429 dfh.write(data[0])
2430 dfh.write(data[1])
2430 dfh.write(data[1])
2431 if sidedata:
2431 if sidedata:
2432 dfh.write(sidedata)
2432 dfh.write(sidedata)
2433 ifh.write(entry)
2433 ifh.write(entry)
2434 else:
2434 else:
2435 offset += curr * self.index.entry_size
2435 offset += curr * self.index.entry_size
2436 transaction.add(self._indexfile, offset)
2436 transaction.add(self._indexfile, offset)
2437 ifh.write(entry)
2437 ifh.write(entry)
2438 ifh.write(data[0])
2438 ifh.write(data[0])
2439 ifh.write(data[1])
2439 ifh.write(data[1])
2440 if sidedata:
2440 if sidedata:
2441 ifh.write(sidedata)
2441 ifh.write(sidedata)
2442 self._enforceinlinesize(transaction)
2442 self._enforceinlinesize(transaction)
2443 nodemaputil.setup_persistent_nodemap(transaction, self)
2443 nodemaputil.setup_persistent_nodemap(transaction, self)
2444
2444
2445 def addgroup(
2445 def addgroup(
2446 self,
2446 self,
2447 deltas,
2447 deltas,
2448 linkmapper,
2448 linkmapper,
2449 transaction,
2449 transaction,
2450 alwayscache=False,
2450 alwayscache=False,
2451 addrevisioncb=None,
2451 addrevisioncb=None,
2452 duplicaterevisioncb=None,
2452 duplicaterevisioncb=None,
2453 ):
2453 ):
2454 """
2454 """
2455 add a delta group
2455 add a delta group
2456
2456
2457 given a set of deltas, add them to the revision log. the
2457 given a set of deltas, add them to the revision log. the
2458 first delta is against its parent, which should be in our
2458 first delta is against its parent, which should be in our
2459 log, the rest are against the previous delta.
2459 log, the rest are against the previous delta.
2460
2460
2461 If ``addrevisioncb`` is defined, it will be called with arguments of
2461 If ``addrevisioncb`` is defined, it will be called with arguments of
2462 this revlog and the node that was added.
2462 this revlog and the node that was added.
2463 """
2463 """
2464
2464
2465 if self._adding_group:
2465 if self._adding_group:
2466 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2466 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2467
2467
2468 self._adding_group = True
2468 self._adding_group = True
2469 empty = True
2469 empty = True
2470 try:
2470 try:
2471 with self._writing(transaction):
2471 with self._writing(transaction):
2472 deltacomputer = deltautil.deltacomputer(self)
2472 deltacomputer = deltautil.deltacomputer(self)
2473 # loop through our set of deltas
2473 # loop through our set of deltas
2474 for data in deltas:
2474 for data in deltas:
2475 (
2475 (
2476 node,
2476 node,
2477 p1,
2477 p1,
2478 p2,
2478 p2,
2479 linknode,
2479 linknode,
2480 deltabase,
2480 deltabase,
2481 delta,
2481 delta,
2482 flags,
2482 flags,
2483 sidedata,
2483 sidedata,
2484 ) = data
2484 ) = data
2485 link = linkmapper(linknode)
2485 link = linkmapper(linknode)
2486 flags = flags or REVIDX_DEFAULT_FLAGS
2486 flags = flags or REVIDX_DEFAULT_FLAGS
2487
2487
2488 rev = self.index.get_rev(node)
2488 rev = self.index.get_rev(node)
2489 if rev is not None:
2489 if rev is not None:
2490 # this can happen if two branches make the same change
2490 # this can happen if two branches make the same change
2491 self._nodeduplicatecallback(transaction, rev)
2491 self._nodeduplicatecallback(transaction, rev)
2492 if duplicaterevisioncb:
2492 if duplicaterevisioncb:
2493 duplicaterevisioncb(self, rev)
2493 duplicaterevisioncb(self, rev)
2494 empty = False
2494 empty = False
2495 continue
2495 continue
2496
2496
2497 for p in (p1, p2):
2497 for p in (p1, p2):
2498 if not self.index.has_node(p):
2498 if not self.index.has_node(p):
2499 raise error.LookupError(
2499 raise error.LookupError(
2500 p, self.radix, _(b'unknown parent')
2500 p, self.radix, _(b'unknown parent')
2501 )
2501 )
2502
2502
2503 if not self.index.has_node(deltabase):
2503 if not self.index.has_node(deltabase):
2504 raise error.LookupError(
2504 raise error.LookupError(
2505 deltabase, self.display_id, _(b'unknown delta base')
2505 deltabase, self.display_id, _(b'unknown delta base')
2506 )
2506 )
2507
2507
2508 baserev = self.rev(deltabase)
2508 baserev = self.rev(deltabase)
2509
2509
2510 if baserev != nullrev and self.iscensored(baserev):
2510 if baserev != nullrev and self.iscensored(baserev):
2511 # if base is censored, delta must be full replacement in a
2511 # if base is censored, delta must be full replacement in a
2512 # single patch operation
2512 # single patch operation
2513 hlen = struct.calcsize(b">lll")
2513 hlen = struct.calcsize(b">lll")
2514 oldlen = self.rawsize(baserev)
2514 oldlen = self.rawsize(baserev)
2515 newlen = len(delta) - hlen
2515 newlen = len(delta) - hlen
2516 if delta[:hlen] != mdiff.replacediffheader(
2516 if delta[:hlen] != mdiff.replacediffheader(
2517 oldlen, newlen
2517 oldlen, newlen
2518 ):
2518 ):
2519 raise error.CensoredBaseError(
2519 raise error.CensoredBaseError(
2520 self.display_id, self.node(baserev)
2520 self.display_id, self.node(baserev)
2521 )
2521 )
2522
2522
2523 if not flags and self._peek_iscensored(baserev, delta):
2523 if not flags and self._peek_iscensored(baserev, delta):
2524 flags |= REVIDX_ISCENSORED
2524 flags |= REVIDX_ISCENSORED
2525
2525
2526 # We assume consumers of addrevisioncb will want to retrieve
2526 # We assume consumers of addrevisioncb will want to retrieve
2527 # the added revision, which will require a call to
2527 # the added revision, which will require a call to
2528 # revision(). revision() will fast path if there is a cache
2528 # revision(). revision() will fast path if there is a cache
2529 # hit. So, we tell _addrevision() to always cache in this case.
2529 # hit. So, we tell _addrevision() to always cache in this case.
2530 # We're only using addgroup() in the context of changegroup
2530 # We're only using addgroup() in the context of changegroup
2531 # generation so the revision data can always be handled as raw
2531 # generation so the revision data can always be handled as raw
2532 # by the flagprocessor.
2532 # by the flagprocessor.
2533 rev = self._addrevision(
2533 rev = self._addrevision(
2534 node,
2534 node,
2535 None,
2535 None,
2536 transaction,
2536 transaction,
2537 link,
2537 link,
2538 p1,
2538 p1,
2539 p2,
2539 p2,
2540 flags,
2540 flags,
2541 (baserev, delta),
2541 (baserev, delta),
2542 alwayscache=alwayscache,
2542 alwayscache=alwayscache,
2543 deltacomputer=deltacomputer,
2543 deltacomputer=deltacomputer,
2544 sidedata=sidedata,
2544 sidedata=sidedata,
2545 )
2545 )
2546
2546
2547 if addrevisioncb:
2547 if addrevisioncb:
2548 addrevisioncb(self, rev)
2548 addrevisioncb(self, rev)
2549 empty = False
2549 empty = False
2550 finally:
2550 finally:
2551 self._adding_group = False
2551 self._adding_group = False
2552 return not empty
2552 return not empty
2553
2553
2554 def iscensored(self, rev):
2554 def iscensored(self, rev):
2555 """Check if a file revision is censored."""
2555 """Check if a file revision is censored."""
2556 if not self._censorable:
2556 if not self._censorable:
2557 return False
2557 return False
2558
2558
2559 return self.flags(rev) & REVIDX_ISCENSORED
2559 return self.flags(rev) & REVIDX_ISCENSORED
2560
2560
2561 def _peek_iscensored(self, baserev, delta):
2561 def _peek_iscensored(self, baserev, delta):
2562 """Quickly check if a delta produces a censored revision."""
2562 """Quickly check if a delta produces a censored revision."""
2563 if not self._censorable:
2563 if not self._censorable:
2564 return False
2564 return False
2565
2565
2566 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2566 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2567
2567
2568 def getstrippoint(self, minlink):
2568 def getstrippoint(self, minlink):
2569 """find the minimum rev that must be stripped to strip the linkrev
2569 """find the minimum rev that must be stripped to strip the linkrev
2570
2570
2571 Returns a tuple containing the minimum rev and a set of all revs that
2571 Returns a tuple containing the minimum rev and a set of all revs that
2572 have linkrevs that will be broken by this strip.
2572 have linkrevs that will be broken by this strip.
2573 """
2573 """
2574 return storageutil.resolvestripinfo(
2574 return storageutil.resolvestripinfo(
2575 minlink,
2575 minlink,
2576 len(self) - 1,
2576 len(self) - 1,
2577 self.headrevs(),
2577 self.headrevs(),
2578 self.linkrev,
2578 self.linkrev,
2579 self.parentrevs,
2579 self.parentrevs,
2580 )
2580 )
2581
2581
2582 def strip(self, minlink, transaction):
2582 def strip(self, minlink, transaction):
2583 """truncate the revlog on the first revision with a linkrev >= minlink
2583 """truncate the revlog on the first revision with a linkrev >= minlink
2584
2584
2585 This function is called when we're stripping revision minlink and
2585 This function is called when we're stripping revision minlink and
2586 its descendants from the repository.
2586 its descendants from the repository.
2587
2587
2588 We have to remove all revisions with linkrev >= minlink, because
2588 We have to remove all revisions with linkrev >= minlink, because
2589 the equivalent changelog revisions will be renumbered after the
2589 the equivalent changelog revisions will be renumbered after the
2590 strip.
2590 strip.
2591
2591
2592 So we truncate the revlog on the first of these revisions, and
2592 So we truncate the revlog on the first of these revisions, and
2593 trust that the caller has saved the revisions that shouldn't be
2593 trust that the caller has saved the revisions that shouldn't be
2594 removed and that it'll re-add them after this truncation.
2594 removed and that it'll re-add them after this truncation.
2595 """
2595 """
2596 if len(self) == 0:
2596 if len(self) == 0:
2597 return
2597 return
2598
2598
2599 rev, _ = self.getstrippoint(minlink)
2599 rev, _ = self.getstrippoint(minlink)
2600 if rev == len(self):
2600 if rev == len(self):
2601 return
2601 return
2602
2602
2603 # first truncate the files on disk
2603 # first truncate the files on disk
2604 end = self.start(rev)
2604 end = self.start(rev)
2605 if not self._inline:
2605 if not self._inline:
2606 transaction.add(self._datafile, end)
2606 transaction.add(self._datafile, end)
2607 end = rev * self.index.entry_size
2607 end = rev * self.index.entry_size
2608 else:
2608 else:
2609 end += rev * self.index.entry_size
2609 end += rev * self.index.entry_size
2610
2610
2611 transaction.add(self._indexfile, end)
2611 transaction.add(self._indexfile, end)
2612
2612
2613 # then reset internal state in memory to forget those revisions
2613 # then reset internal state in memory to forget those revisions
2614 self._revisioncache = None
2614 self._revisioncache = None
2615 self._chaininfocache = util.lrucachedict(500)
2615 self._chaininfocache = util.lrucachedict(500)
2616 self._chunkclear()
2616 self._chunkclear()
2617
2617
2618 del self.index[rev:-1]
2618 del self.index[rev:-1]
2619
2619
2620 def checksize(self):
2620 def checksize(self):
2621 """Check size of index and data files
2621 """Check size of index and data files
2622
2622
2623 return a (dd, di) tuple.
2623 return a (dd, di) tuple.
2624 - dd: extra bytes for the "data" file
2624 - dd: extra bytes for the "data" file
2625 - di: extra bytes for the "index" file
2625 - di: extra bytes for the "index" file
2626
2626
2627 A healthy revlog will return (0, 0).
2627 A healthy revlog will return (0, 0).
2628 """
2628 """
2629 expected = 0
2629 expected = 0
2630 if len(self):
2630 if len(self):
2631 expected = max(0, self.end(len(self) - 1))
2631 expected = max(0, self.end(len(self) - 1))
2632
2632
2633 try:
2633 try:
2634 with self._datafp() as f:
2634 with self._datafp() as f:
2635 f.seek(0, io.SEEK_END)
2635 f.seek(0, io.SEEK_END)
2636 actual = f.tell()
2636 actual = f.tell()
2637 dd = actual - expected
2637 dd = actual - expected
2638 except IOError as inst:
2638 except IOError as inst:
2639 if inst.errno != errno.ENOENT:
2639 if inst.errno != errno.ENOENT:
2640 raise
2640 raise
2641 dd = 0
2641 dd = 0
2642
2642
2643 try:
2643 try:
2644 f = self.opener(self._indexfile)
2644 f = self.opener(self._indexfile)
2645 f.seek(0, io.SEEK_END)
2645 f.seek(0, io.SEEK_END)
2646 actual = f.tell()
2646 actual = f.tell()
2647 f.close()
2647 f.close()
2648 s = self.index.entry_size
2648 s = self.index.entry_size
2649 i = max(0, actual // s)
2649 i = max(0, actual // s)
2650 di = actual - (i * s)
2650 di = actual - (i * s)
2651 if self._inline:
2651 if self._inline:
2652 databytes = 0
2652 databytes = 0
2653 for r in self:
2653 for r in self:
2654 databytes += max(0, self.length(r))
2654 databytes += max(0, self.length(r))
2655 dd = 0
2655 dd = 0
2656 di = actual - len(self) * s - databytes
2656 di = actual - len(self) * s - databytes
2657 except IOError as inst:
2657 except IOError as inst:
2658 if inst.errno != errno.ENOENT:
2658 if inst.errno != errno.ENOENT:
2659 raise
2659 raise
2660 di = 0
2660 di = 0
2661
2661
2662 return (dd, di)
2662 return (dd, di)
2663
2663
2664 def files(self):
2664 def files(self):
2665 res = [self._indexfile]
2665 res = [self._indexfile]
2666 if not self._inline:
2666 if not self._inline:
2667 res.append(self._datafile)
2667 res.append(self._datafile)
2668 return res
2668 return res
2669
2669
2670 def emitrevisions(
2670 def emitrevisions(
2671 self,
2671 self,
2672 nodes,
2672 nodes,
2673 nodesorder=None,
2673 nodesorder=None,
2674 revisiondata=False,
2674 revisiondata=False,
2675 assumehaveparentrevisions=False,
2675 assumehaveparentrevisions=False,
2676 deltamode=repository.CG_DELTAMODE_STD,
2676 deltamode=repository.CG_DELTAMODE_STD,
2677 sidedata_helpers=None,
2677 sidedata_helpers=None,
2678 ):
2678 ):
2679 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2679 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2680 raise error.ProgrammingError(
2680 raise error.ProgrammingError(
2681 b'unhandled value for nodesorder: %s' % nodesorder
2681 b'unhandled value for nodesorder: %s' % nodesorder
2682 )
2682 )
2683
2683
2684 if nodesorder is None and not self._generaldelta:
2684 if nodesorder is None and not self._generaldelta:
2685 nodesorder = b'storage'
2685 nodesorder = b'storage'
2686
2686
2687 if (
2687 if (
2688 not self._storedeltachains
2688 not self._storedeltachains
2689 and deltamode != repository.CG_DELTAMODE_PREV
2689 and deltamode != repository.CG_DELTAMODE_PREV
2690 ):
2690 ):
2691 deltamode = repository.CG_DELTAMODE_FULL
2691 deltamode = repository.CG_DELTAMODE_FULL
2692
2692
2693 return storageutil.emitrevisions(
2693 return storageutil.emitrevisions(
2694 self,
2694 self,
2695 nodes,
2695 nodes,
2696 nodesorder,
2696 nodesorder,
2697 revlogrevisiondelta,
2697 revlogrevisiondelta,
2698 deltaparentfn=self.deltaparent,
2698 deltaparentfn=self.deltaparent,
2699 candeltafn=self.candelta,
2699 candeltafn=self.candelta,
2700 rawsizefn=self.rawsize,
2700 rawsizefn=self.rawsize,
2701 revdifffn=self.revdiff,
2701 revdifffn=self.revdiff,
2702 flagsfn=self.flags,
2702 flagsfn=self.flags,
2703 deltamode=deltamode,
2703 deltamode=deltamode,
2704 revisiondata=revisiondata,
2704 revisiondata=revisiondata,
2705 assumehaveparentrevisions=assumehaveparentrevisions,
2705 assumehaveparentrevisions=assumehaveparentrevisions,
2706 sidedata_helpers=sidedata_helpers,
2706 sidedata_helpers=sidedata_helpers,
2707 )
2707 )
2708
2708
2709 DELTAREUSEALWAYS = b'always'
2709 DELTAREUSEALWAYS = b'always'
2710 DELTAREUSESAMEREVS = b'samerevs'
2710 DELTAREUSESAMEREVS = b'samerevs'
2711 DELTAREUSENEVER = b'never'
2711 DELTAREUSENEVER = b'never'
2712
2712
2713 DELTAREUSEFULLADD = b'fulladd'
2713 DELTAREUSEFULLADD = b'fulladd'
2714
2714
2715 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2715 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2716
2716
2717 def clone(
2717 def clone(
2718 self,
2718 self,
2719 tr,
2719 tr,
2720 destrevlog,
2720 destrevlog,
2721 addrevisioncb=None,
2721 addrevisioncb=None,
2722 deltareuse=DELTAREUSESAMEREVS,
2722 deltareuse=DELTAREUSESAMEREVS,
2723 forcedeltabothparents=None,
2723 forcedeltabothparents=None,
2724 sidedata_helpers=None,
2724 sidedata_helpers=None,
2725 ):
2725 ):
2726 """Copy this revlog to another, possibly with format changes.
2726 """Copy this revlog to another, possibly with format changes.
2727
2727
2728 The destination revlog will contain the same revisions and nodes.
2728 The destination revlog will contain the same revisions and nodes.
2729 However, it may not be bit-for-bit identical due to e.g. delta encoding
2729 However, it may not be bit-for-bit identical due to e.g. delta encoding
2730 differences.
2730 differences.
2731
2731
2732 The ``deltareuse`` argument control how deltas from the existing revlog
2732 The ``deltareuse`` argument control how deltas from the existing revlog
2733 are preserved in the destination revlog. The argument can have the
2733 are preserved in the destination revlog. The argument can have the
2734 following values:
2734 following values:
2735
2735
2736 DELTAREUSEALWAYS
2736 DELTAREUSEALWAYS
2737 Deltas will always be reused (if possible), even if the destination
2737 Deltas will always be reused (if possible), even if the destination
2738 revlog would not select the same revisions for the delta. This is the
2738 revlog would not select the same revisions for the delta. This is the
2739 fastest mode of operation.
2739 fastest mode of operation.
2740 DELTAREUSESAMEREVS
2740 DELTAREUSESAMEREVS
2741 Deltas will be reused if the destination revlog would pick the same
2741 Deltas will be reused if the destination revlog would pick the same
2742 revisions for the delta. This mode strikes a balance between speed
2742 revisions for the delta. This mode strikes a balance between speed
2743 and optimization.
2743 and optimization.
2744 DELTAREUSENEVER
2744 DELTAREUSENEVER
2745 Deltas will never be reused. This is the slowest mode of execution.
2745 Deltas will never be reused. This is the slowest mode of execution.
2746 This mode can be used to recompute deltas (e.g. if the diff/delta
2746 This mode can be used to recompute deltas (e.g. if the diff/delta
2747 algorithm changes).
2747 algorithm changes).
2748 DELTAREUSEFULLADD
2748 DELTAREUSEFULLADD
2749 Revision will be re-added as if their were new content. This is
2749 Revision will be re-added as if their were new content. This is
2750 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2750 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2751 eg: large file detection and handling.
2751 eg: large file detection and handling.
2752
2752
2753 Delta computation can be slow, so the choice of delta reuse policy can
2753 Delta computation can be slow, so the choice of delta reuse policy can
2754 significantly affect run time.
2754 significantly affect run time.
2755
2755
2756 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2756 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2757 two extremes. Deltas will be reused if they are appropriate. But if the
2757 two extremes. Deltas will be reused if they are appropriate. But if the
2758 delta could choose a better revision, it will do so. This means if you
2758 delta could choose a better revision, it will do so. This means if you
2759 are converting a non-generaldelta revlog to a generaldelta revlog,
2759 are converting a non-generaldelta revlog to a generaldelta revlog,
2760 deltas will be recomputed if the delta's parent isn't a parent of the
2760 deltas will be recomputed if the delta's parent isn't a parent of the
2761 revision.
2761 revision.
2762
2762
2763 In addition to the delta policy, the ``forcedeltabothparents``
2763 In addition to the delta policy, the ``forcedeltabothparents``
2764 argument controls whether to force compute deltas against both parents
2764 argument controls whether to force compute deltas against both parents
2765 for merges. By default, the current default is used.
2765 for merges. By default, the current default is used.
2766
2766
2767 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2767 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2768 `sidedata_helpers`.
2768 `sidedata_helpers`.
2769 """
2769 """
2770 if deltareuse not in self.DELTAREUSEALL:
2770 if deltareuse not in self.DELTAREUSEALL:
2771 raise ValueError(
2771 raise ValueError(
2772 _(b'value for deltareuse invalid: %s') % deltareuse
2772 _(b'value for deltareuse invalid: %s') % deltareuse
2773 )
2773 )
2774
2774
2775 if len(destrevlog):
2775 if len(destrevlog):
2776 raise ValueError(_(b'destination revlog is not empty'))
2776 raise ValueError(_(b'destination revlog is not empty'))
2777
2777
2778 if getattr(self, 'filteredrevs', None):
2778 if getattr(self, 'filteredrevs', None):
2779 raise ValueError(_(b'source revlog has filtered revisions'))
2779 raise ValueError(_(b'source revlog has filtered revisions'))
2780 if getattr(destrevlog, 'filteredrevs', None):
2780 if getattr(destrevlog, 'filteredrevs', None):
2781 raise ValueError(_(b'destination revlog has filtered revisions'))
2781 raise ValueError(_(b'destination revlog has filtered revisions'))
2782
2782
2783 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2783 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2784 # if possible.
2784 # if possible.
2785 oldlazydelta = destrevlog._lazydelta
2785 oldlazydelta = destrevlog._lazydelta
2786 oldlazydeltabase = destrevlog._lazydeltabase
2786 oldlazydeltabase = destrevlog._lazydeltabase
2787 oldamd = destrevlog._deltabothparents
2787 oldamd = destrevlog._deltabothparents
2788
2788
2789 try:
2789 try:
2790 if deltareuse == self.DELTAREUSEALWAYS:
2790 if deltareuse == self.DELTAREUSEALWAYS:
2791 destrevlog._lazydeltabase = True
2791 destrevlog._lazydeltabase = True
2792 destrevlog._lazydelta = True
2792 destrevlog._lazydelta = True
2793 elif deltareuse == self.DELTAREUSESAMEREVS:
2793 elif deltareuse == self.DELTAREUSESAMEREVS:
2794 destrevlog._lazydeltabase = False
2794 destrevlog._lazydeltabase = False
2795 destrevlog._lazydelta = True
2795 destrevlog._lazydelta = True
2796 elif deltareuse == self.DELTAREUSENEVER:
2796 elif deltareuse == self.DELTAREUSENEVER:
2797 destrevlog._lazydeltabase = False
2797 destrevlog._lazydeltabase = False
2798 destrevlog._lazydelta = False
2798 destrevlog._lazydelta = False
2799
2799
2800 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2800 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2801
2801
2802 self._clone(
2802 self._clone(
2803 tr,
2803 tr,
2804 destrevlog,
2804 destrevlog,
2805 addrevisioncb,
2805 addrevisioncb,
2806 deltareuse,
2806 deltareuse,
2807 forcedeltabothparents,
2807 forcedeltabothparents,
2808 sidedata_helpers,
2808 sidedata_helpers,
2809 )
2809 )
2810
2810
2811 finally:
2811 finally:
2812 destrevlog._lazydelta = oldlazydelta
2812 destrevlog._lazydelta = oldlazydelta
2813 destrevlog._lazydeltabase = oldlazydeltabase
2813 destrevlog._lazydeltabase = oldlazydeltabase
2814 destrevlog._deltabothparents = oldamd
2814 destrevlog._deltabothparents = oldamd
2815
2815
2816 def _clone(
2816 def _clone(
2817 self,
2817 self,
2818 tr,
2818 tr,
2819 destrevlog,
2819 destrevlog,
2820 addrevisioncb,
2820 addrevisioncb,
2821 deltareuse,
2821 deltareuse,
2822 forcedeltabothparents,
2822 forcedeltabothparents,
2823 sidedata_helpers,
2823 sidedata_helpers,
2824 ):
2824 ):
2825 """perform the core duty of `revlog.clone` after parameter processing"""
2825 """perform the core duty of `revlog.clone` after parameter processing"""
2826 deltacomputer = deltautil.deltacomputer(destrevlog)
2826 deltacomputer = deltautil.deltacomputer(destrevlog)
2827 index = self.index
2827 index = self.index
2828 for rev in self:
2828 for rev in self:
2829 entry = index[rev]
2829 entry = index[rev]
2830
2830
2831 # Some classes override linkrev to take filtered revs into
2831 # Some classes override linkrev to take filtered revs into
2832 # account. Use raw entry from index.
2832 # account. Use raw entry from index.
2833 flags = entry[0] & 0xFFFF
2833 flags = entry[0] & 0xFFFF
2834 linkrev = entry[4]
2834 linkrev = entry[4]
2835 p1 = index[entry[5]][7]
2835 p1 = index[entry[5]][7]
2836 p2 = index[entry[6]][7]
2836 p2 = index[entry[6]][7]
2837 node = entry[7]
2837 node = entry[7]
2838
2838
2839 # (Possibly) reuse the delta from the revlog if allowed and
2839 # (Possibly) reuse the delta from the revlog if allowed and
2840 # the revlog chunk is a delta.
2840 # the revlog chunk is a delta.
2841 cachedelta = None
2841 cachedelta = None
2842 rawtext = None
2842 rawtext = None
2843 if deltareuse == self.DELTAREUSEFULLADD:
2843 if deltareuse == self.DELTAREUSEFULLADD:
2844 text, sidedata = self._revisiondata(rev)
2844 text, sidedata = self._revisiondata(rev)
2845
2845
2846 if sidedata_helpers is not None:
2846 if sidedata_helpers is not None:
2847 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2847 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2848 self, sidedata_helpers, sidedata, rev
2848 self, sidedata_helpers, sidedata, rev
2849 )
2849 )
2850 flags = flags | new_flags[0] & ~new_flags[1]
2850 flags = flags | new_flags[0] & ~new_flags[1]
2851
2851
2852 destrevlog.addrevision(
2852 destrevlog.addrevision(
2853 text,
2853 text,
2854 tr,
2854 tr,
2855 linkrev,
2855 linkrev,
2856 p1,
2856 p1,
2857 p2,
2857 p2,
2858 cachedelta=cachedelta,
2858 cachedelta=cachedelta,
2859 node=node,
2859 node=node,
2860 flags=flags,
2860 flags=flags,
2861 deltacomputer=deltacomputer,
2861 deltacomputer=deltacomputer,
2862 sidedata=sidedata,
2862 sidedata=sidedata,
2863 )
2863 )
2864 else:
2864 else:
2865 if destrevlog._lazydelta:
2865 if destrevlog._lazydelta:
2866 dp = self.deltaparent(rev)
2866 dp = self.deltaparent(rev)
2867 if dp != nullrev:
2867 if dp != nullrev:
2868 cachedelta = (dp, bytes(self._chunk(rev)))
2868 cachedelta = (dp, bytes(self._chunk(rev)))
2869
2869
2870 sidedata = None
2870 sidedata = None
2871 if not cachedelta:
2871 if not cachedelta:
2872 rawtext, sidedata = self._revisiondata(rev)
2872 rawtext, sidedata = self._revisiondata(rev)
2873 if sidedata is None:
2873 if sidedata is None:
2874 sidedata = self.sidedata(rev)
2874 sidedata = self.sidedata(rev)
2875
2875
2876 if sidedata_helpers is not None:
2876 if sidedata_helpers is not None:
2877 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2877 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2878 self, sidedata_helpers, sidedata, rev
2878 self, sidedata_helpers, sidedata, rev
2879 )
2879 )
2880 flags = flags | new_flags[0] & ~new_flags[1]
2880 flags = flags | new_flags[0] & ~new_flags[1]
2881
2881
2882 with destrevlog._writing(tr):
2882 with destrevlog._writing(tr):
2883 destrevlog._addrevision(
2883 destrevlog._addrevision(
2884 node,
2884 node,
2885 rawtext,
2885 rawtext,
2886 tr,
2886 tr,
2887 linkrev,
2887 linkrev,
2888 p1,
2888 p1,
2889 p2,
2889 p2,
2890 flags,
2890 flags,
2891 cachedelta,
2891 cachedelta,
2892 deltacomputer=deltacomputer,
2892 deltacomputer=deltacomputer,
2893 sidedata=sidedata,
2893 sidedata=sidedata,
2894 )
2894 )
2895
2895
2896 if addrevisioncb:
2896 if addrevisioncb:
2897 addrevisioncb(self, rev, node)
2897 addrevisioncb(self, rev, node)
2898
2898
2899 def censorrevision(self, tr, censornode, tombstone=b''):
2899 def censorrevision(self, tr, censornode, tombstone=b''):
2900 if self._format_version == REVLOGV0:
2900 if self._format_version == REVLOGV0:
2901 raise error.RevlogError(
2901 raise error.RevlogError(
2902 _(b'cannot censor with version %d revlogs')
2902 _(b'cannot censor with version %d revlogs')
2903 % self._format_version
2903 % self._format_version
2904 )
2904 )
2905
2905
2906 censorrev = self.rev(censornode)
2906 censorrev = self.rev(censornode)
2907 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2907 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2908
2908
2909 if len(tombstone) > self.rawsize(censorrev):
2909 if len(tombstone) > self.rawsize(censorrev):
2910 raise error.Abort(
2910 raise error.Abort(
2911 _(b'censor tombstone must be no longer than censored data')
2911 _(b'censor tombstone must be no longer than censored data')
2912 )
2912 )
2913
2913
2914 # Rewriting the revlog in place is hard. Our strategy for censoring is
2914 # Rewriting the revlog in place is hard. Our strategy for censoring is
2915 # to create a new revlog, copy all revisions to it, then replace the
2915 # to create a new revlog, copy all revisions to it, then replace the
2916 # revlogs on transaction close.
2916 # revlogs on transaction close.
2917 #
2917 #
2918 # This is a bit dangerous. We could easily have a mismatch of state.
2918 # This is a bit dangerous. We could easily have a mismatch of state.
2919 newrl = revlog(
2919 newrl = revlog(
2920 self.opener,
2920 self.opener,
2921 target=self.target,
2921 target=self.target,
2922 radix=self.radix,
2922 radix=self.radix,
2923 postfix=b'tmpcensored',
2923 postfix=b'tmpcensored',
2924 censorable=True,
2924 censorable=True,
2925 )
2925 )
2926 newrl._format_version = self._format_version
2926 newrl._format_version = self._format_version
2927 newrl._format_flags = self._format_flags
2927 newrl._format_flags = self._format_flags
2928 newrl._generaldelta = self._generaldelta
2928 newrl._generaldelta = self._generaldelta
2929 newrl._parse_index = self._parse_index
2929 newrl._parse_index = self._parse_index
2930
2930
2931 for rev in self.revs():
2931 for rev in self.revs():
2932 node = self.node(rev)
2932 node = self.node(rev)
2933 p1, p2 = self.parents(node)
2933 p1, p2 = self.parents(node)
2934
2934
2935 if rev == censorrev:
2935 if rev == censorrev:
2936 newrl.addrawrevision(
2936 newrl.addrawrevision(
2937 tombstone,
2937 tombstone,
2938 tr,
2938 tr,
2939 self.linkrev(censorrev),
2939 self.linkrev(censorrev),
2940 p1,
2940 p1,
2941 p2,
2941 p2,
2942 censornode,
2942 censornode,
2943 REVIDX_ISCENSORED,
2943 REVIDX_ISCENSORED,
2944 )
2944 )
2945
2945
2946 if newrl.deltaparent(rev) != nullrev:
2946 if newrl.deltaparent(rev) != nullrev:
2947 raise error.Abort(
2947 raise error.Abort(
2948 _(
2948 _(
2949 b'censored revision stored as delta; '
2949 b'censored revision stored as delta; '
2950 b'cannot censor'
2950 b'cannot censor'
2951 ),
2951 ),
2952 hint=_(
2952 hint=_(
2953 b'censoring of revlogs is not '
2953 b'censoring of revlogs is not '
2954 b'fully implemented; please report '
2954 b'fully implemented; please report '
2955 b'this bug'
2955 b'this bug'
2956 ),
2956 ),
2957 )
2957 )
2958 continue
2958 continue
2959
2959
2960 if self.iscensored(rev):
2960 if self.iscensored(rev):
2961 if self.deltaparent(rev) != nullrev:
2961 if self.deltaparent(rev) != nullrev:
2962 raise error.Abort(
2962 raise error.Abort(
2963 _(
2963 _(
2964 b'cannot censor due to censored '
2964 b'cannot censor due to censored '
2965 b'revision having delta stored'
2965 b'revision having delta stored'
2966 )
2966 )
2967 )
2967 )
2968 rawtext = self._chunk(rev)
2968 rawtext = self._chunk(rev)
2969 else:
2969 else:
2970 rawtext = self.rawdata(rev)
2970 rawtext = self.rawdata(rev)
2971
2971
2972 newrl.addrawrevision(
2972 newrl.addrawrevision(
2973 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2973 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2974 )
2974 )
2975
2975
2976 tr.addbackup(self._indexfile, location=b'store')
2976 tr.addbackup(self._indexfile, location=b'store')
2977 if not self._inline:
2977 if not self._inline:
2978 tr.addbackup(self._datafile, location=b'store')
2978 tr.addbackup(self._datafile, location=b'store')
2979
2979
2980 self.opener.rename(newrl._indexfile, self._indexfile)
2980 self.opener.rename(newrl._indexfile, self._indexfile)
2981 if not self._inline:
2981 if not self._inline:
2982 self.opener.rename(newrl._datafile, self._datafile)
2982 self.opener.rename(newrl._datafile, self._datafile)
2983
2983
2984 self.clearcaches()
2984 self.clearcaches()
2985 self._loadindex()
2985 self._loadindex()
2986
2986
2987 def verifyintegrity(self, state):
2987 def verifyintegrity(self, state):
2988 """Verifies the integrity of the revlog.
2988 """Verifies the integrity of the revlog.
2989
2989
2990 Yields ``revlogproblem`` instances describing problems that are
2990 Yields ``revlogproblem`` instances describing problems that are
2991 found.
2991 found.
2992 """
2992 """
2993 dd, di = self.checksize()
2993 dd, di = self.checksize()
2994 if dd:
2994 if dd:
2995 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2995 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2996 if di:
2996 if di:
2997 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2997 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2998
2998
2999 version = self._format_version
2999 version = self._format_version
3000
3000
3001 # The verifier tells us what version revlog we should be.
3001 # The verifier tells us what version revlog we should be.
3002 if version != state[b'expectedversion']:
3002 if version != state[b'expectedversion']:
3003 yield revlogproblem(
3003 yield revlogproblem(
3004 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3004 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3005 % (self.display_id, version, state[b'expectedversion'])
3005 % (self.display_id, version, state[b'expectedversion'])
3006 )
3006 )
3007
3007
3008 state[b'skipread'] = set()
3008 state[b'skipread'] = set()
3009 state[b'safe_renamed'] = set()
3009 state[b'safe_renamed'] = set()
3010
3010
3011 for rev in self:
3011 for rev in self:
3012 node = self.node(rev)
3012 node = self.node(rev)
3013
3013
3014 # Verify contents. 4 cases to care about:
3014 # Verify contents. 4 cases to care about:
3015 #
3015 #
3016 # common: the most common case
3016 # common: the most common case
3017 # rename: with a rename
3017 # rename: with a rename
3018 # meta: file content starts with b'\1\n', the metadata
3018 # meta: file content starts with b'\1\n', the metadata
3019 # header defined in filelog.py, but without a rename
3019 # header defined in filelog.py, but without a rename
3020 # ext: content stored externally
3020 # ext: content stored externally
3021 #
3021 #
3022 # More formally, their differences are shown below:
3022 # More formally, their differences are shown below:
3023 #
3023 #
3024 # | common | rename | meta | ext
3024 # | common | rename | meta | ext
3025 # -------------------------------------------------------
3025 # -------------------------------------------------------
3026 # flags() | 0 | 0 | 0 | not 0
3026 # flags() | 0 | 0 | 0 | not 0
3027 # renamed() | False | True | False | ?
3027 # renamed() | False | True | False | ?
3028 # rawtext[0:2]=='\1\n'| False | True | True | ?
3028 # rawtext[0:2]=='\1\n'| False | True | True | ?
3029 #
3029 #
3030 # "rawtext" means the raw text stored in revlog data, which
3030 # "rawtext" means the raw text stored in revlog data, which
3031 # could be retrieved by "rawdata(rev)". "text"
3031 # could be retrieved by "rawdata(rev)". "text"
3032 # mentioned below is "revision(rev)".
3032 # mentioned below is "revision(rev)".
3033 #
3033 #
3034 # There are 3 different lengths stored physically:
3034 # There are 3 different lengths stored physically:
3035 # 1. L1: rawsize, stored in revlog index
3035 # 1. L1: rawsize, stored in revlog index
3036 # 2. L2: len(rawtext), stored in revlog data
3036 # 2. L2: len(rawtext), stored in revlog data
3037 # 3. L3: len(text), stored in revlog data if flags==0, or
3037 # 3. L3: len(text), stored in revlog data if flags==0, or
3038 # possibly somewhere else if flags!=0
3038 # possibly somewhere else if flags!=0
3039 #
3039 #
3040 # L1 should be equal to L2. L3 could be different from them.
3040 # L1 should be equal to L2. L3 could be different from them.
3041 # "text" may or may not affect commit hash depending on flag
3041 # "text" may or may not affect commit hash depending on flag
3042 # processors (see flagutil.addflagprocessor).
3042 # processors (see flagutil.addflagprocessor).
3043 #
3043 #
3044 # | common | rename | meta | ext
3044 # | common | rename | meta | ext
3045 # -------------------------------------------------
3045 # -------------------------------------------------
3046 # rawsize() | L1 | L1 | L1 | L1
3046 # rawsize() | L1 | L1 | L1 | L1
3047 # size() | L1 | L2-LM | L1(*) | L1 (?)
3047 # size() | L1 | L2-LM | L1(*) | L1 (?)
3048 # len(rawtext) | L2 | L2 | L2 | L2
3048 # len(rawtext) | L2 | L2 | L2 | L2
3049 # len(text) | L2 | L2 | L2 | L3
3049 # len(text) | L2 | L2 | L2 | L3
3050 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3050 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3051 #
3051 #
3052 # LM: length of metadata, depending on rawtext
3052 # LM: length of metadata, depending on rawtext
3053 # (*): not ideal, see comment in filelog.size
3053 # (*): not ideal, see comment in filelog.size
3054 # (?): could be "- len(meta)" if the resolved content has
3054 # (?): could be "- len(meta)" if the resolved content has
3055 # rename metadata
3055 # rename metadata
3056 #
3056 #
3057 # Checks needed to be done:
3057 # Checks needed to be done:
3058 # 1. length check: L1 == L2, in all cases.
3058 # 1. length check: L1 == L2, in all cases.
3059 # 2. hash check: depending on flag processor, we may need to
3059 # 2. hash check: depending on flag processor, we may need to
3060 # use either "text" (external), or "rawtext" (in revlog).
3060 # use either "text" (external), or "rawtext" (in revlog).
3061
3061
3062 try:
3062 try:
3063 skipflags = state.get(b'skipflags', 0)
3063 skipflags = state.get(b'skipflags', 0)
3064 if skipflags:
3064 if skipflags:
3065 skipflags &= self.flags(rev)
3065 skipflags &= self.flags(rev)
3066
3066
3067 _verify_revision(self, skipflags, state, node)
3067 _verify_revision(self, skipflags, state, node)
3068
3068
3069 l1 = self.rawsize(rev)
3069 l1 = self.rawsize(rev)
3070 l2 = len(self.rawdata(node))
3070 l2 = len(self.rawdata(node))
3071
3071
3072 if l1 != l2:
3072 if l1 != l2:
3073 yield revlogproblem(
3073 yield revlogproblem(
3074 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3074 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3075 node=node,
3075 node=node,
3076 )
3076 )
3077
3077
3078 except error.CensoredNodeError:
3078 except error.CensoredNodeError:
3079 if state[b'erroroncensored']:
3079 if state[b'erroroncensored']:
3080 yield revlogproblem(
3080 yield revlogproblem(
3081 error=_(b'censored file data'), node=node
3081 error=_(b'censored file data'), node=node
3082 )
3082 )
3083 state[b'skipread'].add(node)
3083 state[b'skipread'].add(node)
3084 except Exception as e:
3084 except Exception as e:
3085 yield revlogproblem(
3085 yield revlogproblem(
3086 error=_(b'unpacking %s: %s')
3086 error=_(b'unpacking %s: %s')
3087 % (short(node), stringutil.forcebytestr(e)),
3087 % (short(node), stringutil.forcebytestr(e)),
3088 node=node,
3088 node=node,
3089 )
3089 )
3090 state[b'skipread'].add(node)
3090 state[b'skipread'].add(node)
3091
3091
3092 def storageinfo(
3092 def storageinfo(
3093 self,
3093 self,
3094 exclusivefiles=False,
3094 exclusivefiles=False,
3095 sharedfiles=False,
3095 sharedfiles=False,
3096 revisionscount=False,
3096 revisionscount=False,
3097 trackedsize=False,
3097 trackedsize=False,
3098 storedsize=False,
3098 storedsize=False,
3099 ):
3099 ):
3100 d = {}
3100 d = {}
3101
3101
3102 if exclusivefiles:
3102 if exclusivefiles:
3103 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3103 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3104 if not self._inline:
3104 if not self._inline:
3105 d[b'exclusivefiles'].append((self.opener, self._datafile))
3105 d[b'exclusivefiles'].append((self.opener, self._datafile))
3106
3106
3107 if sharedfiles:
3107 if sharedfiles:
3108 d[b'sharedfiles'] = []
3108 d[b'sharedfiles'] = []
3109
3109
3110 if revisionscount:
3110 if revisionscount:
3111 d[b'revisionscount'] = len(self)
3111 d[b'revisionscount'] = len(self)
3112
3112
3113 if trackedsize:
3113 if trackedsize:
3114 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3114 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3115
3115
3116 if storedsize:
3116 if storedsize:
3117 d[b'storedsize'] = sum(
3117 d[b'storedsize'] = sum(
3118 self.opener.stat(path).st_size for path in self.files()
3118 self.opener.stat(path).st_size for path in self.files()
3119 )
3119 )
3120
3120
3121 return d
3121 return d
3122
3122
3123 def rewrite_sidedata(self, helpers, startrev, endrev):
3123 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3124 if not self.hassidedata:
3124 if not self.hassidedata:
3125 return
3125 return
3126 # inline are not yet supported because they suffer from an issue when
3126 # inline are not yet supported because they suffer from an issue when
3127 # rewriting them (since it's not an append-only operation).
3127 # rewriting them (since it's not an append-only operation).
3128 # See issue6485.
3128 # See issue6485.
3129 assert not self._inline
3129 assert not self._inline
3130 if not helpers[1] and not helpers[2]:
3130 if not helpers[1] and not helpers[2]:
3131 # Nothing to generate or remove
3131 # Nothing to generate or remove
3132 return
3132 return
3133
3133
3134 # changelog implement some "delayed" writing mechanism that assume that
3134 # changelog implement some "delayed" writing mechanism that assume that
3135 # all index data is writen in append mode and is therefor incompatible
3135 # all index data is writen in append mode and is therefor incompatible
3136 # with the seeked write done in this method. The use of such "delayed"
3136 # with the seeked write done in this method. The use of such "delayed"
3137 # writing will soon be removed for revlog version that support side
3137 # writing will soon be removed for revlog version that support side
3138 # data, so for now, we only keep this simple assert to highlight the
3138 # data, so for now, we only keep this simple assert to highlight the
3139 # situation.
3139 # situation.
3140 delayed = getattr(self, '_delayed', False)
3140 delayed = getattr(self, '_delayed', False)
3141 diverted = getattr(self, '_divert', False)
3141 diverted = getattr(self, '_divert', False)
3142 if delayed and not diverted:
3142 if delayed and not diverted:
3143 msg = "cannot rewrite_sidedata of a delayed revlog"
3143 msg = "cannot rewrite_sidedata of a delayed revlog"
3144 raise error.ProgrammingError(msg)
3144 raise error.ProgrammingError(msg)
3145
3145
3146 new_entries = []
3146 new_entries = []
3147 # append the new sidedata
3147 # append the new sidedata
3148 with self._datafp(b'a+') as dfh:
3148 with self._datafp(b'a+') as dfh:
3149 # Maybe this bug still exists, see revlog._writeentry
3149 # Maybe this bug still exists, see revlog._writeentry
3150 dfh.seek(0, os.SEEK_END)
3150 dfh.seek(0, os.SEEK_END)
3151 current_offset = dfh.tell()
3151 current_offset = dfh.tell()
3152 for rev in range(startrev, endrev + 1):
3152 for rev in range(startrev, endrev + 1):
3153 entry = self.index[rev]
3153 entry = self.index[rev]
3154 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3154 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3155 store=self,
3155 store=self,
3156 sidedata_helpers=helpers,
3156 sidedata_helpers=helpers,
3157 sidedata={},
3157 sidedata={},
3158 rev=rev,
3158 rev=rev,
3159 )
3159 )
3160
3160
3161 serialized_sidedata = sidedatautil.serialize_sidedata(
3161 serialized_sidedata = sidedatautil.serialize_sidedata(
3162 new_sidedata
3162 new_sidedata
3163 )
3163 )
3164 if entry[8] != 0 or entry[9] != 0:
3164 if entry[8] != 0 or entry[9] != 0:
3165 # rewriting entries that already have sidedata is not
3165 # rewriting entries that already have sidedata is not
3166 # supported yet, because it introduces garbage data in the
3166 # supported yet, because it introduces garbage data in the
3167 # revlog.
3167 # revlog.
3168 msg = b"Rewriting existing sidedata is not supported yet"
3168 msg = b"Rewriting existing sidedata is not supported yet"
3169 raise error.Abort(msg)
3169 raise error.Abort(msg)
3170
3170
3171 # Apply (potential) flags to add and to remove after running
3171 # Apply (potential) flags to add and to remove after running
3172 # the sidedata helpers
3172 # the sidedata helpers
3173 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3173 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3174 entry = (new_offset_flags,) + entry[1:8]
3174 entry = (new_offset_flags,) + entry[1:8]
3175 entry += (current_offset, len(serialized_sidedata))
3175 entry += (current_offset, len(serialized_sidedata))
3176
3176
3177 dfh.write(serialized_sidedata)
3177 dfh.write(serialized_sidedata)
3178 new_entries.append(entry)
3178 new_entries.append(entry)
3179 current_offset += len(serialized_sidedata)
3179 current_offset += len(serialized_sidedata)
3180
3180
3181 # rewrite the new index entries
3181 # rewrite the new index entries
3182 with self._indexfp(b'r+') as ifh:
3182 with self._indexfp(b'r+') as ifh:
3183 fp.seek(startrev * self.index.entry_size)
3183 fp.seek(startrev * self.index.entry_size)
3184 for i, e in enumerate(new_entries):
3184 for i, e in enumerate(new_entries):
3185 rev = startrev + i
3185 rev = startrev + i
3186 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3186 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3187 packed = self.index.entry_binary(rev)
3187 packed = self.index.entry_binary(rev)
3188 if rev == 0:
3188 if rev == 0:
3189 header = self._format_flags | self._format_version
3189 header = self._format_flags | self._format_version
3190 header = self.index.pack_header(header)
3190 header = self.index.pack_header(header)
3191 packed = header + packed
3191 packed = header + packed
3192 ifh.write(packed)
3192 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now