##// END OF EJS Templates
sidedata: move documentation about sidedata helpers to sidedata module...
Raphaël Gomès -
r47849:8bd769b5 default
parent child Browse files
Show More
@@ -1,1943 +1,1946 b''
1 # changegroup.py - Mercurial changegroup manipulation functions
1 # changegroup.py - Mercurial changegroup manipulation functions
2 #
2 #
3 # Copyright 2006 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2006 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import os
10 import os
11 import struct
11 import struct
12 import weakref
12 import weakref
13
13
14 from .i18n import _
14 from .i18n import _
15 from .node import (
15 from .node import (
16 hex,
16 hex,
17 nullrev,
17 nullrev,
18 short,
18 short,
19 )
19 )
20 from .pycompat import open
20 from .pycompat import open
21
21
22 from . import (
22 from . import (
23 error,
23 error,
24 match as matchmod,
24 match as matchmod,
25 mdiff,
25 mdiff,
26 phases,
26 phases,
27 pycompat,
27 pycompat,
28 requirements,
28 requirements,
29 scmutil,
29 scmutil,
30 util,
30 util,
31 )
31 )
32
32
33 from .interfaces import repository
33 from .interfaces import repository
34 from .revlogutils import sidedata as sidedatamod
34 from .revlogutils import sidedata as sidedatamod
35 from .revlogutils import constants as revlog_constants
35 from .revlogutils import constants as revlog_constants
36 from .utils import storageutil
36 from .utils import storageutil
37
37
38 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct(b"20s20s20s20s")
38 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct(b"20s20s20s20s")
39 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct(b"20s20s20s20s20s")
39 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct(b"20s20s20s20s20s")
40 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(b">20s20s20s20s20sH")
40 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(b">20s20s20s20s20sH")
41 _CHANGEGROUPV4_DELTA_HEADER = struct.Struct(b">B20s20s20s20s20sH")
41 _CHANGEGROUPV4_DELTA_HEADER = struct.Struct(b">B20s20s20s20s20sH")
42
42
43 LFS_REQUIREMENT = b'lfs'
43 LFS_REQUIREMENT = b'lfs'
44
44
45 readexactly = util.readexactly
45 readexactly = util.readexactly
46
46
47
47
48 def getchunk(stream):
48 def getchunk(stream):
49 """return the next chunk from stream as a string"""
49 """return the next chunk from stream as a string"""
50 d = readexactly(stream, 4)
50 d = readexactly(stream, 4)
51 l = struct.unpack(b">l", d)[0]
51 l = struct.unpack(b">l", d)[0]
52 if l <= 4:
52 if l <= 4:
53 if l:
53 if l:
54 raise error.Abort(_(b"invalid chunk length %d") % l)
54 raise error.Abort(_(b"invalid chunk length %d") % l)
55 return b""
55 return b""
56 return readexactly(stream, l - 4)
56 return readexactly(stream, l - 4)
57
57
58
58
59 def chunkheader(length):
59 def chunkheader(length):
60 """return a changegroup chunk header (string)"""
60 """return a changegroup chunk header (string)"""
61 return struct.pack(b">l", length + 4)
61 return struct.pack(b">l", length + 4)
62
62
63
63
64 def closechunk():
64 def closechunk():
65 """return a changegroup chunk header (string) for a zero-length chunk"""
65 """return a changegroup chunk header (string) for a zero-length chunk"""
66 return struct.pack(b">l", 0)
66 return struct.pack(b">l", 0)
67
67
68
68
69 def _fileheader(path):
69 def _fileheader(path):
70 """Obtain a changegroup chunk header for a named path."""
70 """Obtain a changegroup chunk header for a named path."""
71 return chunkheader(len(path)) + path
71 return chunkheader(len(path)) + path
72
72
73
73
74 def writechunks(ui, chunks, filename, vfs=None):
74 def writechunks(ui, chunks, filename, vfs=None):
75 """Write chunks to a file and return its filename.
75 """Write chunks to a file and return its filename.
76
76
77 The stream is assumed to be a bundle file.
77 The stream is assumed to be a bundle file.
78 Existing files will not be overwritten.
78 Existing files will not be overwritten.
79 If no filename is specified, a temporary file is created.
79 If no filename is specified, a temporary file is created.
80 """
80 """
81 fh = None
81 fh = None
82 cleanup = None
82 cleanup = None
83 try:
83 try:
84 if filename:
84 if filename:
85 if vfs:
85 if vfs:
86 fh = vfs.open(filename, b"wb")
86 fh = vfs.open(filename, b"wb")
87 else:
87 else:
88 # Increase default buffer size because default is usually
88 # Increase default buffer size because default is usually
89 # small (4k is common on Linux).
89 # small (4k is common on Linux).
90 fh = open(filename, b"wb", 131072)
90 fh = open(filename, b"wb", 131072)
91 else:
91 else:
92 fd, filename = pycompat.mkstemp(prefix=b"hg-bundle-", suffix=b".hg")
92 fd, filename = pycompat.mkstemp(prefix=b"hg-bundle-", suffix=b".hg")
93 fh = os.fdopen(fd, "wb")
93 fh = os.fdopen(fd, "wb")
94 cleanup = filename
94 cleanup = filename
95 for c in chunks:
95 for c in chunks:
96 fh.write(c)
96 fh.write(c)
97 cleanup = None
97 cleanup = None
98 return filename
98 return filename
99 finally:
99 finally:
100 if fh is not None:
100 if fh is not None:
101 fh.close()
101 fh.close()
102 if cleanup is not None:
102 if cleanup is not None:
103 if filename and vfs:
103 if filename and vfs:
104 vfs.unlink(cleanup)
104 vfs.unlink(cleanup)
105 else:
105 else:
106 os.unlink(cleanup)
106 os.unlink(cleanup)
107
107
108
108
109 class cg1unpacker(object):
109 class cg1unpacker(object):
110 """Unpacker for cg1 changegroup streams.
110 """Unpacker for cg1 changegroup streams.
111
111
112 A changegroup unpacker handles the framing of the revision data in
112 A changegroup unpacker handles the framing of the revision data in
113 the wire format. Most consumers will want to use the apply()
113 the wire format. Most consumers will want to use the apply()
114 method to add the changes from the changegroup to a repository.
114 method to add the changes from the changegroup to a repository.
115
115
116 If you're forwarding a changegroup unmodified to another consumer,
116 If you're forwarding a changegroup unmodified to another consumer,
117 use getchunks(), which returns an iterator of changegroup
117 use getchunks(), which returns an iterator of changegroup
118 chunks. This is mostly useful for cases where you need to know the
118 chunks. This is mostly useful for cases where you need to know the
119 data stream has ended by observing the end of the changegroup.
119 data stream has ended by observing the end of the changegroup.
120
120
121 deltachunk() is useful only if you're applying delta data. Most
121 deltachunk() is useful only if you're applying delta data. Most
122 consumers should prefer apply() instead.
122 consumers should prefer apply() instead.
123
123
124 A few other public methods exist. Those are used only for
124 A few other public methods exist. Those are used only for
125 bundlerepo and some debug commands - their use is discouraged.
125 bundlerepo and some debug commands - their use is discouraged.
126 """
126 """
127
127
128 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
128 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
129 deltaheadersize = deltaheader.size
129 deltaheadersize = deltaheader.size
130 version = b'01'
130 version = b'01'
131 _grouplistcount = 1 # One list of files after the manifests
131 _grouplistcount = 1 # One list of files after the manifests
132
132
133 def __init__(self, fh, alg, extras=None):
133 def __init__(self, fh, alg, extras=None):
134 if alg is None:
134 if alg is None:
135 alg = b'UN'
135 alg = b'UN'
136 if alg not in util.compengines.supportedbundletypes:
136 if alg not in util.compengines.supportedbundletypes:
137 raise error.Abort(_(b'unknown stream compression type: %s') % alg)
137 raise error.Abort(_(b'unknown stream compression type: %s') % alg)
138 if alg == b'BZ':
138 if alg == b'BZ':
139 alg = b'_truncatedBZ'
139 alg = b'_truncatedBZ'
140
140
141 compengine = util.compengines.forbundletype(alg)
141 compengine = util.compengines.forbundletype(alg)
142 self._stream = compengine.decompressorreader(fh)
142 self._stream = compengine.decompressorreader(fh)
143 self._type = alg
143 self._type = alg
144 self.extras = extras or {}
144 self.extras = extras or {}
145 self.callback = None
145 self.callback = None
146
146
147 # These methods (compressed, read, seek, tell) all appear to only
147 # These methods (compressed, read, seek, tell) all appear to only
148 # be used by bundlerepo, but it's a little hard to tell.
148 # be used by bundlerepo, but it's a little hard to tell.
149 def compressed(self):
149 def compressed(self):
150 return self._type is not None and self._type != b'UN'
150 return self._type is not None and self._type != b'UN'
151
151
152 def read(self, l):
152 def read(self, l):
153 return self._stream.read(l)
153 return self._stream.read(l)
154
154
155 def seek(self, pos):
155 def seek(self, pos):
156 return self._stream.seek(pos)
156 return self._stream.seek(pos)
157
157
158 def tell(self):
158 def tell(self):
159 return self._stream.tell()
159 return self._stream.tell()
160
160
161 def close(self):
161 def close(self):
162 return self._stream.close()
162 return self._stream.close()
163
163
164 def _chunklength(self):
164 def _chunklength(self):
165 d = readexactly(self._stream, 4)
165 d = readexactly(self._stream, 4)
166 l = struct.unpack(b">l", d)[0]
166 l = struct.unpack(b">l", d)[0]
167 if l <= 4:
167 if l <= 4:
168 if l:
168 if l:
169 raise error.Abort(_(b"invalid chunk length %d") % l)
169 raise error.Abort(_(b"invalid chunk length %d") % l)
170 return 0
170 return 0
171 if self.callback:
171 if self.callback:
172 self.callback()
172 self.callback()
173 return l - 4
173 return l - 4
174
174
175 def changelogheader(self):
175 def changelogheader(self):
176 """v10 does not have a changelog header chunk"""
176 """v10 does not have a changelog header chunk"""
177 return {}
177 return {}
178
178
179 def manifestheader(self):
179 def manifestheader(self):
180 """v10 does not have a manifest header chunk"""
180 """v10 does not have a manifest header chunk"""
181 return {}
181 return {}
182
182
183 def filelogheader(self):
183 def filelogheader(self):
184 """return the header of the filelogs chunk, v10 only has the filename"""
184 """return the header of the filelogs chunk, v10 only has the filename"""
185 l = self._chunklength()
185 l = self._chunklength()
186 if not l:
186 if not l:
187 return {}
187 return {}
188 fname = readexactly(self._stream, l)
188 fname = readexactly(self._stream, l)
189 return {b'filename': fname}
189 return {b'filename': fname}
190
190
191 def _deltaheader(self, headertuple, prevnode):
191 def _deltaheader(self, headertuple, prevnode):
192 node, p1, p2, cs = headertuple
192 node, p1, p2, cs = headertuple
193 if prevnode is None:
193 if prevnode is None:
194 deltabase = p1
194 deltabase = p1
195 else:
195 else:
196 deltabase = prevnode
196 deltabase = prevnode
197 flags = 0
197 flags = 0
198 protocol_flags = 0
198 protocol_flags = 0
199 return node, p1, p2, deltabase, cs, flags, protocol_flags
199 return node, p1, p2, deltabase, cs, flags, protocol_flags
200
200
201 def deltachunk(self, prevnode):
201 def deltachunk(self, prevnode):
202 l = self._chunklength()
202 l = self._chunklength()
203 if not l:
203 if not l:
204 return {}
204 return {}
205 headerdata = readexactly(self._stream, self.deltaheadersize)
205 headerdata = readexactly(self._stream, self.deltaheadersize)
206 header = self.deltaheader.unpack(headerdata)
206 header = self.deltaheader.unpack(headerdata)
207 delta = readexactly(self._stream, l - self.deltaheadersize)
207 delta = readexactly(self._stream, l - self.deltaheadersize)
208 header = self._deltaheader(header, prevnode)
208 header = self._deltaheader(header, prevnode)
209 node, p1, p2, deltabase, cs, flags, protocol_flags = header
209 node, p1, p2, deltabase, cs, flags, protocol_flags = header
210 return node, p1, p2, cs, deltabase, delta, flags, protocol_flags
210 return node, p1, p2, cs, deltabase, delta, flags, protocol_flags
211
211
212 def getchunks(self):
212 def getchunks(self):
213 """returns all the chunks contains in the bundle
213 """returns all the chunks contains in the bundle
214
214
215 Used when you need to forward the binary stream to a file or another
215 Used when you need to forward the binary stream to a file or another
216 network API. To do so, it parse the changegroup data, otherwise it will
216 network API. To do so, it parse the changegroup data, otherwise it will
217 block in case of sshrepo because it don't know the end of the stream.
217 block in case of sshrepo because it don't know the end of the stream.
218 """
218 """
219 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
219 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
220 # and a list of filelogs. For changegroup 3, we expect 4 parts:
220 # and a list of filelogs. For changegroup 3, we expect 4 parts:
221 # changelog, manifestlog, a list of tree manifestlogs, and a list of
221 # changelog, manifestlog, a list of tree manifestlogs, and a list of
222 # filelogs.
222 # filelogs.
223 #
223 #
224 # Changelog and manifestlog parts are terminated with empty chunks. The
224 # Changelog and manifestlog parts are terminated with empty chunks. The
225 # tree and file parts are a list of entry sections. Each entry section
225 # tree and file parts are a list of entry sections. Each entry section
226 # is a series of chunks terminating in an empty chunk. The list of these
226 # is a series of chunks terminating in an empty chunk. The list of these
227 # entry sections is terminated in yet another empty chunk, so we know
227 # entry sections is terminated in yet another empty chunk, so we know
228 # we've reached the end of the tree/file list when we reach an empty
228 # we've reached the end of the tree/file list when we reach an empty
229 # chunk that was proceeded by no non-empty chunks.
229 # chunk that was proceeded by no non-empty chunks.
230
230
231 parts = 0
231 parts = 0
232 while parts < 2 + self._grouplistcount:
232 while parts < 2 + self._grouplistcount:
233 noentries = True
233 noentries = True
234 while True:
234 while True:
235 chunk = getchunk(self)
235 chunk = getchunk(self)
236 if not chunk:
236 if not chunk:
237 # The first two empty chunks represent the end of the
237 # The first two empty chunks represent the end of the
238 # changelog and the manifestlog portions. The remaining
238 # changelog and the manifestlog portions. The remaining
239 # empty chunks represent either A) the end of individual
239 # empty chunks represent either A) the end of individual
240 # tree or file entries in the file list, or B) the end of
240 # tree or file entries in the file list, or B) the end of
241 # the entire list. It's the end of the entire list if there
241 # the entire list. It's the end of the entire list if there
242 # were no entries (i.e. noentries is True).
242 # were no entries (i.e. noentries is True).
243 if parts < 2:
243 if parts < 2:
244 parts += 1
244 parts += 1
245 elif noentries:
245 elif noentries:
246 parts += 1
246 parts += 1
247 break
247 break
248 noentries = False
248 noentries = False
249 yield chunkheader(len(chunk))
249 yield chunkheader(len(chunk))
250 pos = 0
250 pos = 0
251 while pos < len(chunk):
251 while pos < len(chunk):
252 next = pos + 2 ** 20
252 next = pos + 2 ** 20
253 yield chunk[pos:next]
253 yield chunk[pos:next]
254 pos = next
254 pos = next
255 yield closechunk()
255 yield closechunk()
256
256
257 def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None):
257 def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None):
258 self.callback = prog.increment
258 self.callback = prog.increment
259 # no need to check for empty manifest group here:
259 # no need to check for empty manifest group here:
260 # if the result of the merge of 1 and 2 is the same in 3 and 4,
260 # if the result of the merge of 1 and 2 is the same in 3 and 4,
261 # no new manifest will be created and the manifest group will
261 # no new manifest will be created and the manifest group will
262 # be empty during the pull
262 # be empty during the pull
263 self.manifestheader()
263 self.manifestheader()
264 deltas = self.deltaiter()
264 deltas = self.deltaiter()
265 storage = repo.manifestlog.getstorage(b'')
265 storage = repo.manifestlog.getstorage(b'')
266 storage.addgroup(deltas, revmap, trp, addrevisioncb=addrevisioncb)
266 storage.addgroup(deltas, revmap, trp, addrevisioncb=addrevisioncb)
267 prog.complete()
267 prog.complete()
268 self.callback = None
268 self.callback = None
269
269
270 def apply(
270 def apply(
271 self,
271 self,
272 repo,
272 repo,
273 tr,
273 tr,
274 srctype,
274 srctype,
275 url,
275 url,
276 targetphase=phases.draft,
276 targetphase=phases.draft,
277 expectedtotal=None,
277 expectedtotal=None,
278 sidedata_categories=None,
278 sidedata_categories=None,
279 ):
279 ):
280 """Add the changegroup returned by source.read() to this repo.
280 """Add the changegroup returned by source.read() to this repo.
281 srctype is a string like 'push', 'pull', or 'unbundle'. url is
281 srctype is a string like 'push', 'pull', or 'unbundle'. url is
282 the URL of the repo where this changegroup is coming from.
282 the URL of the repo where this changegroup is coming from.
283
283
284 Return an integer summarizing the change to this repo:
284 Return an integer summarizing the change to this repo:
285 - nothing changed or no source: 0
285 - nothing changed or no source: 0
286 - more heads than before: 1+added heads (2..n)
286 - more heads than before: 1+added heads (2..n)
287 - fewer heads than before: -1-removed heads (-2..-n)
287 - fewer heads than before: -1-removed heads (-2..-n)
288 - number of heads stays the same: 1
288 - number of heads stays the same: 1
289
289
290 `sidedata_categories` is an optional set of the remote's sidedata wanted
290 `sidedata_categories` is an optional set of the remote's sidedata wanted
291 categories.
291 categories.
292 """
292 """
293 repo = repo.unfiltered()
293 repo = repo.unfiltered()
294
294
295 # Only useful if we're adding sidedata categories. If both peers have
295 # Only useful if we're adding sidedata categories. If both peers have
296 # the same categories, then we simply don't do anything.
296 # the same categories, then we simply don't do anything.
297 adding_sidedata = (
297 adding_sidedata = (
298 requirements.REVLOGV2_REQUIREMENT in repo.requirements
298 requirements.REVLOGV2_REQUIREMENT in repo.requirements
299 and self.version == b'04'
299 and self.version == b'04'
300 and srctype == b'pull'
300 and srctype == b'pull'
301 )
301 )
302 if adding_sidedata:
302 if adding_sidedata:
303 sidedata_helpers = sidedatamod.get_sidedata_helpers(
303 sidedata_helpers = sidedatamod.get_sidedata_helpers(
304 repo,
304 repo,
305 sidedata_categories or set(),
305 sidedata_categories or set(),
306 pull=True,
306 pull=True,
307 )
307 )
308 else:
308 else:
309 sidedata_helpers = None
309 sidedata_helpers = None
310
310
311 def csmap(x):
311 def csmap(x):
312 repo.ui.debug(b"add changeset %s\n" % short(x))
312 repo.ui.debug(b"add changeset %s\n" % short(x))
313 return len(cl)
313 return len(cl)
314
314
315 def revmap(x):
315 def revmap(x):
316 return cl.rev(x)
316 return cl.rev(x)
317
317
318 try:
318 try:
319 # The transaction may already carry source information. In this
319 # The transaction may already carry source information. In this
320 # case we use the top level data. We overwrite the argument
320 # case we use the top level data. We overwrite the argument
321 # because we need to use the top level value (if they exist)
321 # because we need to use the top level value (if they exist)
322 # in this function.
322 # in this function.
323 srctype = tr.hookargs.setdefault(b'source', srctype)
323 srctype = tr.hookargs.setdefault(b'source', srctype)
324 tr.hookargs.setdefault(b'url', url)
324 tr.hookargs.setdefault(b'url', url)
325 repo.hook(
325 repo.hook(
326 b'prechangegroup', throw=True, **pycompat.strkwargs(tr.hookargs)
326 b'prechangegroup', throw=True, **pycompat.strkwargs(tr.hookargs)
327 )
327 )
328
328
329 # write changelog data to temp files so concurrent readers
329 # write changelog data to temp files so concurrent readers
330 # will not see an inconsistent view
330 # will not see an inconsistent view
331 cl = repo.changelog
331 cl = repo.changelog
332 cl.delayupdate(tr)
332 cl.delayupdate(tr)
333 oldheads = set(cl.heads())
333 oldheads = set(cl.heads())
334
334
335 trp = weakref.proxy(tr)
335 trp = weakref.proxy(tr)
336 # pull off the changeset group
336 # pull off the changeset group
337 repo.ui.status(_(b"adding changesets\n"))
337 repo.ui.status(_(b"adding changesets\n"))
338 clstart = len(cl)
338 clstart = len(cl)
339 progress = repo.ui.makeprogress(
339 progress = repo.ui.makeprogress(
340 _(b'changesets'), unit=_(b'chunks'), total=expectedtotal
340 _(b'changesets'), unit=_(b'chunks'), total=expectedtotal
341 )
341 )
342 self.callback = progress.increment
342 self.callback = progress.increment
343
343
344 efilesset = set()
344 efilesset = set()
345 duprevs = []
345 duprevs = []
346
346
347 def ondupchangelog(cl, rev):
347 def ondupchangelog(cl, rev):
348 if rev < clstart:
348 if rev < clstart:
349 duprevs.append(rev)
349 duprevs.append(rev)
350
350
351 def onchangelog(cl, rev):
351 def onchangelog(cl, rev):
352 ctx = cl.changelogrevision(rev)
352 ctx = cl.changelogrevision(rev)
353 efilesset.update(ctx.files)
353 efilesset.update(ctx.files)
354 repo.register_changeset(rev, ctx)
354 repo.register_changeset(rev, ctx)
355
355
356 self.changelogheader()
356 self.changelogheader()
357 deltas = self.deltaiter()
357 deltas = self.deltaiter()
358 if not cl.addgroup(
358 if not cl.addgroup(
359 deltas,
359 deltas,
360 csmap,
360 csmap,
361 trp,
361 trp,
362 alwayscache=True,
362 alwayscache=True,
363 addrevisioncb=onchangelog,
363 addrevisioncb=onchangelog,
364 duplicaterevisioncb=ondupchangelog,
364 duplicaterevisioncb=ondupchangelog,
365 ):
365 ):
366 repo.ui.develwarn(
366 repo.ui.develwarn(
367 b'applied empty changelog from changegroup',
367 b'applied empty changelog from changegroup',
368 config=b'warn-empty-changegroup',
368 config=b'warn-empty-changegroup',
369 )
369 )
370 efiles = len(efilesset)
370 efiles = len(efilesset)
371 clend = len(cl)
371 clend = len(cl)
372 changesets = clend - clstart
372 changesets = clend - clstart
373 progress.complete()
373 progress.complete()
374 del deltas
374 del deltas
375 # TODO Python 2.7 removal
375 # TODO Python 2.7 removal
376 # del efilesset
376 # del efilesset
377 efilesset = None
377 efilesset = None
378 self.callback = None
378 self.callback = None
379
379
380 # Keep track of the (non-changelog) revlogs we've updated and their
380 # Keep track of the (non-changelog) revlogs we've updated and their
381 # range of new revisions for sidedata rewrite.
381 # range of new revisions for sidedata rewrite.
382 # TODO do something more efficient than keeping the reference to
382 # TODO do something more efficient than keeping the reference to
383 # the revlogs, especially memory-wise.
383 # the revlogs, especially memory-wise.
384 touched_manifests = {}
384 touched_manifests = {}
385 touched_filelogs = {}
385 touched_filelogs = {}
386
386
387 # pull off the manifest group
387 # pull off the manifest group
388 repo.ui.status(_(b"adding manifests\n"))
388 repo.ui.status(_(b"adding manifests\n"))
389 # We know that we'll never have more manifests than we had
389 # We know that we'll never have more manifests than we had
390 # changesets.
390 # changesets.
391 progress = repo.ui.makeprogress(
391 progress = repo.ui.makeprogress(
392 _(b'manifests'), unit=_(b'chunks'), total=changesets
392 _(b'manifests'), unit=_(b'chunks'), total=changesets
393 )
393 )
394 on_manifest_rev = None
394 on_manifest_rev = None
395 if sidedata_helpers:
395 if sidedata_helpers:
396 if revlog_constants.KIND_MANIFESTLOG in sidedata_helpers[1]:
396 if revlog_constants.KIND_MANIFESTLOG in sidedata_helpers[1]:
397
397
398 def on_manifest_rev(manifest, rev):
398 def on_manifest_rev(manifest, rev):
399 range = touched_manifests.get(manifest)
399 range = touched_manifests.get(manifest)
400 if not range:
400 if not range:
401 touched_manifests[manifest] = (rev, rev)
401 touched_manifests[manifest] = (rev, rev)
402 else:
402 else:
403 assert rev == range[1] + 1
403 assert rev == range[1] + 1
404 touched_manifests[manifest] = (range[0], rev)
404 touched_manifests[manifest] = (range[0], rev)
405
405
406 self._unpackmanifests(
406 self._unpackmanifests(
407 repo,
407 repo,
408 revmap,
408 revmap,
409 trp,
409 trp,
410 progress,
410 progress,
411 addrevisioncb=on_manifest_rev,
411 addrevisioncb=on_manifest_rev,
412 )
412 )
413
413
414 needfiles = {}
414 needfiles = {}
415 if repo.ui.configbool(b'server', b'validate'):
415 if repo.ui.configbool(b'server', b'validate'):
416 cl = repo.changelog
416 cl = repo.changelog
417 ml = repo.manifestlog
417 ml = repo.manifestlog
418 # validate incoming csets have their manifests
418 # validate incoming csets have their manifests
419 for cset in pycompat.xrange(clstart, clend):
419 for cset in pycompat.xrange(clstart, clend):
420 mfnode = cl.changelogrevision(cset).manifest
420 mfnode = cl.changelogrevision(cset).manifest
421 mfest = ml[mfnode].readdelta()
421 mfest = ml[mfnode].readdelta()
422 # store file nodes we must see
422 # store file nodes we must see
423 for f, n in pycompat.iteritems(mfest):
423 for f, n in pycompat.iteritems(mfest):
424 needfiles.setdefault(f, set()).add(n)
424 needfiles.setdefault(f, set()).add(n)
425
425
426 on_filelog_rev = None
426 on_filelog_rev = None
427 if sidedata_helpers:
427 if sidedata_helpers:
428 if revlog_constants.KIND_FILELOG in sidedata_helpers[1]:
428 if revlog_constants.KIND_FILELOG in sidedata_helpers[1]:
429
429
430 def on_filelog_rev(filelog, rev):
430 def on_filelog_rev(filelog, rev):
431 range = touched_filelogs.get(filelog)
431 range = touched_filelogs.get(filelog)
432 if not range:
432 if not range:
433 touched_filelogs[filelog] = (rev, rev)
433 touched_filelogs[filelog] = (rev, rev)
434 else:
434 else:
435 assert rev == range[1] + 1
435 assert rev == range[1] + 1
436 touched_filelogs[filelog] = (range[0], rev)
436 touched_filelogs[filelog] = (range[0], rev)
437
437
438 # process the files
438 # process the files
439 repo.ui.status(_(b"adding file changes\n"))
439 repo.ui.status(_(b"adding file changes\n"))
440 newrevs, newfiles = _addchangegroupfiles(
440 newrevs, newfiles = _addchangegroupfiles(
441 repo,
441 repo,
442 self,
442 self,
443 revmap,
443 revmap,
444 trp,
444 trp,
445 efiles,
445 efiles,
446 needfiles,
446 needfiles,
447 addrevisioncb=on_filelog_rev,
447 addrevisioncb=on_filelog_rev,
448 )
448 )
449
449
450 if sidedata_helpers:
450 if sidedata_helpers:
451 if revlog_constants.KIND_CHANGELOG in sidedata_helpers[1]:
451 if revlog_constants.KIND_CHANGELOG in sidedata_helpers[1]:
452 cl.rewrite_sidedata(sidedata_helpers, clstart, clend - 1)
452 cl.rewrite_sidedata(sidedata_helpers, clstart, clend - 1)
453 for mf, (startrev, endrev) in touched_manifests.items():
453 for mf, (startrev, endrev) in touched_manifests.items():
454 mf.rewrite_sidedata(sidedata_helpers, startrev, endrev)
454 mf.rewrite_sidedata(sidedata_helpers, startrev, endrev)
455 for fl, (startrev, endrev) in touched_filelogs.items():
455 for fl, (startrev, endrev) in touched_filelogs.items():
456 fl.rewrite_sidedata(sidedata_helpers, startrev, endrev)
456 fl.rewrite_sidedata(sidedata_helpers, startrev, endrev)
457
457
458 # making sure the value exists
458 # making sure the value exists
459 tr.changes.setdefault(b'changegroup-count-changesets', 0)
459 tr.changes.setdefault(b'changegroup-count-changesets', 0)
460 tr.changes.setdefault(b'changegroup-count-revisions', 0)
460 tr.changes.setdefault(b'changegroup-count-revisions', 0)
461 tr.changes.setdefault(b'changegroup-count-files', 0)
461 tr.changes.setdefault(b'changegroup-count-files', 0)
462 tr.changes.setdefault(b'changegroup-count-heads', 0)
462 tr.changes.setdefault(b'changegroup-count-heads', 0)
463
463
464 # some code use bundle operation for internal purpose. They usually
464 # some code use bundle operation for internal purpose. They usually
465 # set `ui.quiet` to do this outside of user sight. Size the report
465 # set `ui.quiet` to do this outside of user sight. Size the report
466 # of such operation now happens at the end of the transaction, that
466 # of such operation now happens at the end of the transaction, that
467 # ui.quiet has not direct effect on the output.
467 # ui.quiet has not direct effect on the output.
468 #
468 #
469 # To preserve this intend use an inelegant hack, we fail to report
469 # To preserve this intend use an inelegant hack, we fail to report
470 # the change if `quiet` is set. We should probably move to
470 # the change if `quiet` is set. We should probably move to
471 # something better, but this is a good first step to allow the "end
471 # something better, but this is a good first step to allow the "end
472 # of transaction report" to pass tests.
472 # of transaction report" to pass tests.
473 if not repo.ui.quiet:
473 if not repo.ui.quiet:
474 tr.changes[b'changegroup-count-changesets'] += changesets
474 tr.changes[b'changegroup-count-changesets'] += changesets
475 tr.changes[b'changegroup-count-revisions'] += newrevs
475 tr.changes[b'changegroup-count-revisions'] += newrevs
476 tr.changes[b'changegroup-count-files'] += newfiles
476 tr.changes[b'changegroup-count-files'] += newfiles
477
477
478 deltaheads = 0
478 deltaheads = 0
479 if oldheads:
479 if oldheads:
480 heads = cl.heads()
480 heads = cl.heads()
481 deltaheads += len(heads) - len(oldheads)
481 deltaheads += len(heads) - len(oldheads)
482 for h in heads:
482 for h in heads:
483 if h not in oldheads and repo[h].closesbranch():
483 if h not in oldheads and repo[h].closesbranch():
484 deltaheads -= 1
484 deltaheads -= 1
485
485
486 # see previous comment about checking ui.quiet
486 # see previous comment about checking ui.quiet
487 if not repo.ui.quiet:
487 if not repo.ui.quiet:
488 tr.changes[b'changegroup-count-heads'] += deltaheads
488 tr.changes[b'changegroup-count-heads'] += deltaheads
489 repo.invalidatevolatilesets()
489 repo.invalidatevolatilesets()
490
490
491 if changesets > 0:
491 if changesets > 0:
492 if b'node' not in tr.hookargs:
492 if b'node' not in tr.hookargs:
493 tr.hookargs[b'node'] = hex(cl.node(clstart))
493 tr.hookargs[b'node'] = hex(cl.node(clstart))
494 tr.hookargs[b'node_last'] = hex(cl.node(clend - 1))
494 tr.hookargs[b'node_last'] = hex(cl.node(clend - 1))
495 hookargs = dict(tr.hookargs)
495 hookargs = dict(tr.hookargs)
496 else:
496 else:
497 hookargs = dict(tr.hookargs)
497 hookargs = dict(tr.hookargs)
498 hookargs[b'node'] = hex(cl.node(clstart))
498 hookargs[b'node'] = hex(cl.node(clstart))
499 hookargs[b'node_last'] = hex(cl.node(clend - 1))
499 hookargs[b'node_last'] = hex(cl.node(clend - 1))
500 repo.hook(
500 repo.hook(
501 b'pretxnchangegroup',
501 b'pretxnchangegroup',
502 throw=True,
502 throw=True,
503 **pycompat.strkwargs(hookargs)
503 **pycompat.strkwargs(hookargs)
504 )
504 )
505
505
506 added = pycompat.xrange(clstart, clend)
506 added = pycompat.xrange(clstart, clend)
507 phaseall = None
507 phaseall = None
508 if srctype in (b'push', b'serve'):
508 if srctype in (b'push', b'serve'):
509 # Old servers can not push the boundary themselves.
509 # Old servers can not push the boundary themselves.
510 # New servers won't push the boundary if changeset already
510 # New servers won't push the boundary if changeset already
511 # exists locally as secret
511 # exists locally as secret
512 #
512 #
513 # We should not use added here but the list of all change in
513 # We should not use added here but the list of all change in
514 # the bundle
514 # the bundle
515 if repo.publishing():
515 if repo.publishing():
516 targetphase = phaseall = phases.public
516 targetphase = phaseall = phases.public
517 else:
517 else:
518 # closer target phase computation
518 # closer target phase computation
519
519
520 # Those changesets have been pushed from the
520 # Those changesets have been pushed from the
521 # outside, their phases are going to be pushed
521 # outside, their phases are going to be pushed
522 # alongside. Therefor `targetphase` is
522 # alongside. Therefor `targetphase` is
523 # ignored.
523 # ignored.
524 targetphase = phaseall = phases.draft
524 targetphase = phaseall = phases.draft
525 if added:
525 if added:
526 phases.registernew(repo, tr, targetphase, added)
526 phases.registernew(repo, tr, targetphase, added)
527 if phaseall is not None:
527 if phaseall is not None:
528 if duprevs:
528 if duprevs:
529 duprevs.extend(added)
529 duprevs.extend(added)
530 else:
530 else:
531 duprevs = added
531 duprevs = added
532 phases.advanceboundary(repo, tr, phaseall, [], revs=duprevs)
532 phases.advanceboundary(repo, tr, phaseall, [], revs=duprevs)
533 duprevs = []
533 duprevs = []
534
534
535 if changesets > 0:
535 if changesets > 0:
536
536
537 def runhooks(unused_success):
537 def runhooks(unused_success):
538 # These hooks run when the lock releases, not when the
538 # These hooks run when the lock releases, not when the
539 # transaction closes. So it's possible for the changelog
539 # transaction closes. So it's possible for the changelog
540 # to have changed since we last saw it.
540 # to have changed since we last saw it.
541 if clstart >= len(repo):
541 if clstart >= len(repo):
542 return
542 return
543
543
544 repo.hook(b"changegroup", **pycompat.strkwargs(hookargs))
544 repo.hook(b"changegroup", **pycompat.strkwargs(hookargs))
545
545
546 for rev in added:
546 for rev in added:
547 args = hookargs.copy()
547 args = hookargs.copy()
548 args[b'node'] = hex(cl.node(rev))
548 args[b'node'] = hex(cl.node(rev))
549 del args[b'node_last']
549 del args[b'node_last']
550 repo.hook(b"incoming", **pycompat.strkwargs(args))
550 repo.hook(b"incoming", **pycompat.strkwargs(args))
551
551
552 newheads = [h for h in repo.heads() if h not in oldheads]
552 newheads = [h for h in repo.heads() if h not in oldheads]
553 repo.ui.log(
553 repo.ui.log(
554 b"incoming",
554 b"incoming",
555 b"%d incoming changes - new heads: %s\n",
555 b"%d incoming changes - new heads: %s\n",
556 len(added),
556 len(added),
557 b', '.join([hex(c[:6]) for c in newheads]),
557 b', '.join([hex(c[:6]) for c in newheads]),
558 )
558 )
559
559
560 tr.addpostclose(
560 tr.addpostclose(
561 b'changegroup-runhooks-%020i' % clstart,
561 b'changegroup-runhooks-%020i' % clstart,
562 lambda tr: repo._afterlock(runhooks),
562 lambda tr: repo._afterlock(runhooks),
563 )
563 )
564 finally:
564 finally:
565 repo.ui.flush()
565 repo.ui.flush()
566 # never return 0 here:
566 # never return 0 here:
567 if deltaheads < 0:
567 if deltaheads < 0:
568 ret = deltaheads - 1
568 ret = deltaheads - 1
569 else:
569 else:
570 ret = deltaheads + 1
570 ret = deltaheads + 1
571 return ret
571 return ret
572
572
573 def deltaiter(self):
573 def deltaiter(self):
574 """
574 """
575 returns an iterator of the deltas in this changegroup
575 returns an iterator of the deltas in this changegroup
576
576
577 Useful for passing to the underlying storage system to be stored.
577 Useful for passing to the underlying storage system to be stored.
578 """
578 """
579 chain = None
579 chain = None
580 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
580 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
581 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags, sidedata)
581 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags, sidedata)
582 yield chunkdata
582 yield chunkdata
583 chain = chunkdata[0]
583 chain = chunkdata[0]
584
584
585
585
586 class cg2unpacker(cg1unpacker):
586 class cg2unpacker(cg1unpacker):
587 """Unpacker for cg2 streams.
587 """Unpacker for cg2 streams.
588
588
589 cg2 streams add support for generaldelta, so the delta header
589 cg2 streams add support for generaldelta, so the delta header
590 format is slightly different. All other features about the data
590 format is slightly different. All other features about the data
591 remain the same.
591 remain the same.
592 """
592 """
593
593
594 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
594 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
595 deltaheadersize = deltaheader.size
595 deltaheadersize = deltaheader.size
596 version = b'02'
596 version = b'02'
597
597
598 def _deltaheader(self, headertuple, prevnode):
598 def _deltaheader(self, headertuple, prevnode):
599 node, p1, p2, deltabase, cs = headertuple
599 node, p1, p2, deltabase, cs = headertuple
600 flags = 0
600 flags = 0
601 protocol_flags = 0
601 protocol_flags = 0
602 return node, p1, p2, deltabase, cs, flags, protocol_flags
602 return node, p1, p2, deltabase, cs, flags, protocol_flags
603
603
604
604
605 class cg3unpacker(cg2unpacker):
605 class cg3unpacker(cg2unpacker):
606 """Unpacker for cg3 streams.
606 """Unpacker for cg3 streams.
607
607
608 cg3 streams add support for exchanging treemanifests and revlog
608 cg3 streams add support for exchanging treemanifests and revlog
609 flags. It adds the revlog flags to the delta header and an empty chunk
609 flags. It adds the revlog flags to the delta header and an empty chunk
610 separating manifests and files.
610 separating manifests and files.
611 """
611 """
612
612
613 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
613 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
614 deltaheadersize = deltaheader.size
614 deltaheadersize = deltaheader.size
615 version = b'03'
615 version = b'03'
616 _grouplistcount = 2 # One list of manifests and one list of files
616 _grouplistcount = 2 # One list of manifests and one list of files
617
617
618 def _deltaheader(self, headertuple, prevnode):
618 def _deltaheader(self, headertuple, prevnode):
619 node, p1, p2, deltabase, cs, flags = headertuple
619 node, p1, p2, deltabase, cs, flags = headertuple
620 protocol_flags = 0
620 protocol_flags = 0
621 return node, p1, p2, deltabase, cs, flags, protocol_flags
621 return node, p1, p2, deltabase, cs, flags, protocol_flags
622
622
623 def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None):
623 def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None):
624 super(cg3unpacker, self)._unpackmanifests(
624 super(cg3unpacker, self)._unpackmanifests(
625 repo, revmap, trp, prog, addrevisioncb=addrevisioncb
625 repo, revmap, trp, prog, addrevisioncb=addrevisioncb
626 )
626 )
627 for chunkdata in iter(self.filelogheader, {}):
627 for chunkdata in iter(self.filelogheader, {}):
628 # If we get here, there are directory manifests in the changegroup
628 # If we get here, there are directory manifests in the changegroup
629 d = chunkdata[b"filename"]
629 d = chunkdata[b"filename"]
630 repo.ui.debug(b"adding %s revisions\n" % d)
630 repo.ui.debug(b"adding %s revisions\n" % d)
631 deltas = self.deltaiter()
631 deltas = self.deltaiter()
632 if not repo.manifestlog.getstorage(d).addgroup(
632 if not repo.manifestlog.getstorage(d).addgroup(
633 deltas, revmap, trp, addrevisioncb=addrevisioncb
633 deltas, revmap, trp, addrevisioncb=addrevisioncb
634 ):
634 ):
635 raise error.Abort(_(b"received dir revlog group is empty"))
635 raise error.Abort(_(b"received dir revlog group is empty"))
636
636
637
637
638 class cg4unpacker(cg3unpacker):
638 class cg4unpacker(cg3unpacker):
639 """Unpacker for cg4 streams.
639 """Unpacker for cg4 streams.
640
640
641 cg4 streams add support for exchanging sidedata.
641 cg4 streams add support for exchanging sidedata.
642 """
642 """
643
643
644 deltaheader = _CHANGEGROUPV4_DELTA_HEADER
644 deltaheader = _CHANGEGROUPV4_DELTA_HEADER
645 deltaheadersize = deltaheader.size
645 deltaheadersize = deltaheader.size
646 version = b'04'
646 version = b'04'
647
647
648 def _deltaheader(self, headertuple, prevnode):
648 def _deltaheader(self, headertuple, prevnode):
649 protocol_flags, node, p1, p2, deltabase, cs, flags = headertuple
649 protocol_flags, node, p1, p2, deltabase, cs, flags = headertuple
650 return node, p1, p2, deltabase, cs, flags, protocol_flags
650 return node, p1, p2, deltabase, cs, flags, protocol_flags
651
651
652 def deltachunk(self, prevnode):
652 def deltachunk(self, prevnode):
653 res = super(cg4unpacker, self).deltachunk(prevnode)
653 res = super(cg4unpacker, self).deltachunk(prevnode)
654 if not res:
654 if not res:
655 return res
655 return res
656
656
657 (node, p1, p2, cs, deltabase, delta, flags, protocol_flags) = res
657 (node, p1, p2, cs, deltabase, delta, flags, protocol_flags) = res
658
658
659 sidedata = {}
659 sidedata = {}
660 if protocol_flags & storageutil.CG_FLAG_SIDEDATA:
660 if protocol_flags & storageutil.CG_FLAG_SIDEDATA:
661 sidedata_raw = getchunk(self._stream)
661 sidedata_raw = getchunk(self._stream)
662 sidedata = sidedatamod.deserialize_sidedata(sidedata_raw)
662 sidedata = sidedatamod.deserialize_sidedata(sidedata_raw)
663
663
664 return node, p1, p2, cs, deltabase, delta, flags, sidedata
664 return node, p1, p2, cs, deltabase, delta, flags, sidedata
665
665
666
666
667 class headerlessfixup(object):
667 class headerlessfixup(object):
668 def __init__(self, fh, h):
668 def __init__(self, fh, h):
669 self._h = h
669 self._h = h
670 self._fh = fh
670 self._fh = fh
671
671
672 def read(self, n):
672 def read(self, n):
673 if self._h:
673 if self._h:
674 d, self._h = self._h[:n], self._h[n:]
674 d, self._h = self._h[:n], self._h[n:]
675 if len(d) < n:
675 if len(d) < n:
676 d += readexactly(self._fh, n - len(d))
676 d += readexactly(self._fh, n - len(d))
677 return d
677 return d
678 return readexactly(self._fh, n)
678 return readexactly(self._fh, n)
679
679
680
680
681 def _revisiondeltatochunks(repo, delta, headerfn):
681 def _revisiondeltatochunks(repo, delta, headerfn):
682 """Serialize a revisiondelta to changegroup chunks."""
682 """Serialize a revisiondelta to changegroup chunks."""
683
683
684 # The captured revision delta may be encoded as a delta against
684 # The captured revision delta may be encoded as a delta against
685 # a base revision or as a full revision. The changegroup format
685 # a base revision or as a full revision. The changegroup format
686 # requires that everything on the wire be deltas. So for full
686 # requires that everything on the wire be deltas. So for full
687 # revisions, we need to invent a header that says to rewrite
687 # revisions, we need to invent a header that says to rewrite
688 # data.
688 # data.
689
689
690 if delta.delta is not None:
690 if delta.delta is not None:
691 prefix, data = b'', delta.delta
691 prefix, data = b'', delta.delta
692 elif delta.basenode == repo.nullid:
692 elif delta.basenode == repo.nullid:
693 data = delta.revision
693 data = delta.revision
694 prefix = mdiff.trivialdiffheader(len(data))
694 prefix = mdiff.trivialdiffheader(len(data))
695 else:
695 else:
696 data = delta.revision
696 data = delta.revision
697 prefix = mdiff.replacediffheader(delta.baserevisionsize, len(data))
697 prefix = mdiff.replacediffheader(delta.baserevisionsize, len(data))
698
698
699 meta = headerfn(delta)
699 meta = headerfn(delta)
700
700
701 yield chunkheader(len(meta) + len(prefix) + len(data))
701 yield chunkheader(len(meta) + len(prefix) + len(data))
702 yield meta
702 yield meta
703 if prefix:
703 if prefix:
704 yield prefix
704 yield prefix
705 yield data
705 yield data
706
706
707 if delta.protocol_flags & storageutil.CG_FLAG_SIDEDATA:
707 if delta.protocol_flags & storageutil.CG_FLAG_SIDEDATA:
708 # Need a separate chunk for sidedata to be able to differentiate
708 # Need a separate chunk for sidedata to be able to differentiate
709 # "raw delta" length and sidedata length
709 # "raw delta" length and sidedata length
710 sidedata = delta.sidedata
710 sidedata = delta.sidedata
711 yield chunkheader(len(sidedata))
711 yield chunkheader(len(sidedata))
712 yield sidedata
712 yield sidedata
713
713
714
714
715 def _sortnodesellipsis(store, nodes, cl, lookup):
715 def _sortnodesellipsis(store, nodes, cl, lookup):
716 """Sort nodes for changegroup generation."""
716 """Sort nodes for changegroup generation."""
717 # Ellipses serving mode.
717 # Ellipses serving mode.
718 #
718 #
719 # In a perfect world, we'd generate better ellipsis-ified graphs
719 # In a perfect world, we'd generate better ellipsis-ified graphs
720 # for non-changelog revlogs. In practice, we haven't started doing
720 # for non-changelog revlogs. In practice, we haven't started doing
721 # that yet, so the resulting DAGs for the manifestlog and filelogs
721 # that yet, so the resulting DAGs for the manifestlog and filelogs
722 # are actually full of bogus parentage on all the ellipsis
722 # are actually full of bogus parentage on all the ellipsis
723 # nodes. This has the side effect that, while the contents are
723 # nodes. This has the side effect that, while the contents are
724 # correct, the individual DAGs might be completely out of whack in
724 # correct, the individual DAGs might be completely out of whack in
725 # a case like 882681bc3166 and its ancestors (back about 10
725 # a case like 882681bc3166 and its ancestors (back about 10
726 # revisions or so) in the main hg repo.
726 # revisions or so) in the main hg repo.
727 #
727 #
728 # The one invariant we *know* holds is that the new (potentially
728 # The one invariant we *know* holds is that the new (potentially
729 # bogus) DAG shape will be valid if we order the nodes in the
729 # bogus) DAG shape will be valid if we order the nodes in the
730 # order that they're introduced in dramatis personae by the
730 # order that they're introduced in dramatis personae by the
731 # changelog, so what we do is we sort the non-changelog histories
731 # changelog, so what we do is we sort the non-changelog histories
732 # by the order in which they are used by the changelog.
732 # by the order in which they are used by the changelog.
733 key = lambda n: cl.rev(lookup(n))
733 key = lambda n: cl.rev(lookup(n))
734 return sorted(nodes, key=key)
734 return sorted(nodes, key=key)
735
735
736
736
737 def _resolvenarrowrevisioninfo(
737 def _resolvenarrowrevisioninfo(
738 cl,
738 cl,
739 store,
739 store,
740 ischangelog,
740 ischangelog,
741 rev,
741 rev,
742 linkrev,
742 linkrev,
743 linknode,
743 linknode,
744 clrevtolocalrev,
744 clrevtolocalrev,
745 fullclnodes,
745 fullclnodes,
746 precomputedellipsis,
746 precomputedellipsis,
747 ):
747 ):
748 linkparents = precomputedellipsis[linkrev]
748 linkparents = precomputedellipsis[linkrev]
749
749
750 def local(clrev):
750 def local(clrev):
751 """Turn a changelog revnum into a local revnum.
751 """Turn a changelog revnum into a local revnum.
752
752
753 The ellipsis dag is stored as revnums on the changelog,
753 The ellipsis dag is stored as revnums on the changelog,
754 but when we're producing ellipsis entries for
754 but when we're producing ellipsis entries for
755 non-changelog revlogs, we need to turn those numbers into
755 non-changelog revlogs, we need to turn those numbers into
756 something local. This does that for us, and during the
756 something local. This does that for us, and during the
757 changelog sending phase will also expand the stored
757 changelog sending phase will also expand the stored
758 mappings as needed.
758 mappings as needed.
759 """
759 """
760 if clrev == nullrev:
760 if clrev == nullrev:
761 return nullrev
761 return nullrev
762
762
763 if ischangelog:
763 if ischangelog:
764 return clrev
764 return clrev
765
765
766 # Walk the ellipsis-ized changelog breadth-first looking for a
766 # Walk the ellipsis-ized changelog breadth-first looking for a
767 # change that has been linked from the current revlog.
767 # change that has been linked from the current revlog.
768 #
768 #
769 # For a flat manifest revlog only a single step should be necessary
769 # For a flat manifest revlog only a single step should be necessary
770 # as all relevant changelog entries are relevant to the flat
770 # as all relevant changelog entries are relevant to the flat
771 # manifest.
771 # manifest.
772 #
772 #
773 # For a filelog or tree manifest dirlog however not every changelog
773 # For a filelog or tree manifest dirlog however not every changelog
774 # entry will have been relevant, so we need to skip some changelog
774 # entry will have been relevant, so we need to skip some changelog
775 # nodes even after ellipsis-izing.
775 # nodes even after ellipsis-izing.
776 walk = [clrev]
776 walk = [clrev]
777 while walk:
777 while walk:
778 p = walk[0]
778 p = walk[0]
779 walk = walk[1:]
779 walk = walk[1:]
780 if p in clrevtolocalrev:
780 if p in clrevtolocalrev:
781 return clrevtolocalrev[p]
781 return clrevtolocalrev[p]
782 elif p in fullclnodes:
782 elif p in fullclnodes:
783 walk.extend([pp for pp in cl.parentrevs(p) if pp != nullrev])
783 walk.extend([pp for pp in cl.parentrevs(p) if pp != nullrev])
784 elif p in precomputedellipsis:
784 elif p in precomputedellipsis:
785 walk.extend(
785 walk.extend(
786 [pp for pp in precomputedellipsis[p] if pp != nullrev]
786 [pp for pp in precomputedellipsis[p] if pp != nullrev]
787 )
787 )
788 else:
788 else:
789 # In this case, we've got an ellipsis with parents
789 # In this case, we've got an ellipsis with parents
790 # outside the current bundle (likely an
790 # outside the current bundle (likely an
791 # incremental pull). We "know" that we can use the
791 # incremental pull). We "know" that we can use the
792 # value of this same revlog at whatever revision
792 # value of this same revlog at whatever revision
793 # is pointed to by linknode. "Know" is in scare
793 # is pointed to by linknode. "Know" is in scare
794 # quotes because I haven't done enough examination
794 # quotes because I haven't done enough examination
795 # of edge cases to convince myself this is really
795 # of edge cases to convince myself this is really
796 # a fact - it works for all the (admittedly
796 # a fact - it works for all the (admittedly
797 # thorough) cases in our testsuite, but I would be
797 # thorough) cases in our testsuite, but I would be
798 # somewhat unsurprised to find a case in the wild
798 # somewhat unsurprised to find a case in the wild
799 # where this breaks down a bit. That said, I don't
799 # where this breaks down a bit. That said, I don't
800 # know if it would hurt anything.
800 # know if it would hurt anything.
801 for i in pycompat.xrange(rev, 0, -1):
801 for i in pycompat.xrange(rev, 0, -1):
802 if store.linkrev(i) == clrev:
802 if store.linkrev(i) == clrev:
803 return i
803 return i
804 # We failed to resolve a parent for this node, so
804 # We failed to resolve a parent for this node, so
805 # we crash the changegroup construction.
805 # we crash the changegroup construction.
806 raise error.Abort(
806 raise error.Abort(
807 b"unable to resolve parent while packing '%s' %r"
807 b"unable to resolve parent while packing '%s' %r"
808 b' for changeset %r' % (store.indexfile, rev, clrev)
808 b' for changeset %r' % (store.indexfile, rev, clrev)
809 )
809 )
810
810
811 return nullrev
811 return nullrev
812
812
813 if not linkparents or (store.parentrevs(rev) == (nullrev, nullrev)):
813 if not linkparents or (store.parentrevs(rev) == (nullrev, nullrev)):
814 p1, p2 = nullrev, nullrev
814 p1, p2 = nullrev, nullrev
815 elif len(linkparents) == 1:
815 elif len(linkparents) == 1:
816 (p1,) = sorted(local(p) for p in linkparents)
816 (p1,) = sorted(local(p) for p in linkparents)
817 p2 = nullrev
817 p2 = nullrev
818 else:
818 else:
819 p1, p2 = sorted(local(p) for p in linkparents)
819 p1, p2 = sorted(local(p) for p in linkparents)
820
820
821 p1node, p2node = store.node(p1), store.node(p2)
821 p1node, p2node = store.node(p1), store.node(p2)
822
822
823 return p1node, p2node, linknode
823 return p1node, p2node, linknode
824
824
825
825
826 def deltagroup(
826 def deltagroup(
827 repo,
827 repo,
828 store,
828 store,
829 nodes,
829 nodes,
830 ischangelog,
830 ischangelog,
831 lookup,
831 lookup,
832 forcedeltaparentprev,
832 forcedeltaparentprev,
833 topic=None,
833 topic=None,
834 ellipses=False,
834 ellipses=False,
835 clrevtolocalrev=None,
835 clrevtolocalrev=None,
836 fullclnodes=None,
836 fullclnodes=None,
837 precomputedellipsis=None,
837 precomputedellipsis=None,
838 sidedata_helpers=None,
838 sidedata_helpers=None,
839 ):
839 ):
840 """Calculate deltas for a set of revisions.
840 """Calculate deltas for a set of revisions.
841
841
842 Is a generator of ``revisiondelta`` instances.
842 Is a generator of ``revisiondelta`` instances.
843
843
844 If topic is not None, progress detail will be generated using this
844 If topic is not None, progress detail will be generated using this
845 topic name (e.g. changesets, manifests, etc).
845 topic name (e.g. changesets, manifests, etc).
846
846
847 See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
847 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
848 `sidedata_helpers`.
848 """
849 """
849 if not nodes:
850 if not nodes:
850 return
851 return
851
852
852 cl = repo.changelog
853 cl = repo.changelog
853
854
854 if ischangelog:
855 if ischangelog:
855 # `hg log` shows changesets in storage order. To preserve order
856 # `hg log` shows changesets in storage order. To preserve order
856 # across clones, send out changesets in storage order.
857 # across clones, send out changesets in storage order.
857 nodesorder = b'storage'
858 nodesorder = b'storage'
858 elif ellipses:
859 elif ellipses:
859 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
860 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
860 nodesorder = b'nodes'
861 nodesorder = b'nodes'
861 else:
862 else:
862 nodesorder = None
863 nodesorder = None
863
864
864 # Perform ellipses filtering and revision massaging. We do this before
865 # Perform ellipses filtering and revision massaging. We do this before
865 # emitrevisions() because a) filtering out revisions creates less work
866 # emitrevisions() because a) filtering out revisions creates less work
866 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
867 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
867 # assumptions about delta choices and we would possibly send a delta
868 # assumptions about delta choices and we would possibly send a delta
868 # referencing a missing base revision.
869 # referencing a missing base revision.
869 #
870 #
870 # Also, calling lookup() has side-effects with regards to populating
871 # Also, calling lookup() has side-effects with regards to populating
871 # data structures. If we don't call lookup() for each node or if we call
872 # data structures. If we don't call lookup() for each node or if we call
872 # lookup() after the first pass through each node, things can break -
873 # lookup() after the first pass through each node, things can break -
873 # possibly intermittently depending on the python hash seed! For that
874 # possibly intermittently depending on the python hash seed! For that
874 # reason, we store a mapping of all linknodes during the initial node
875 # reason, we store a mapping of all linknodes during the initial node
875 # pass rather than use lookup() on the output side.
876 # pass rather than use lookup() on the output side.
876 if ellipses:
877 if ellipses:
877 filtered = []
878 filtered = []
878 adjustedparents = {}
879 adjustedparents = {}
879 linknodes = {}
880 linknodes = {}
880
881
881 for node in nodes:
882 for node in nodes:
882 rev = store.rev(node)
883 rev = store.rev(node)
883 linknode = lookup(node)
884 linknode = lookup(node)
884 linkrev = cl.rev(linknode)
885 linkrev = cl.rev(linknode)
885 clrevtolocalrev[linkrev] = rev
886 clrevtolocalrev[linkrev] = rev
886
887
887 # If linknode is in fullclnodes, it means the corresponding
888 # If linknode is in fullclnodes, it means the corresponding
888 # changeset was a full changeset and is being sent unaltered.
889 # changeset was a full changeset and is being sent unaltered.
889 if linknode in fullclnodes:
890 if linknode in fullclnodes:
890 linknodes[node] = linknode
891 linknodes[node] = linknode
891
892
892 # If the corresponding changeset wasn't in the set computed
893 # If the corresponding changeset wasn't in the set computed
893 # as relevant to us, it should be dropped outright.
894 # as relevant to us, it should be dropped outright.
894 elif linkrev not in precomputedellipsis:
895 elif linkrev not in precomputedellipsis:
895 continue
896 continue
896
897
897 else:
898 else:
898 # We could probably do this later and avoid the dict
899 # We could probably do this later and avoid the dict
899 # holding state. But it likely doesn't matter.
900 # holding state. But it likely doesn't matter.
900 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
901 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
901 cl,
902 cl,
902 store,
903 store,
903 ischangelog,
904 ischangelog,
904 rev,
905 rev,
905 linkrev,
906 linkrev,
906 linknode,
907 linknode,
907 clrevtolocalrev,
908 clrevtolocalrev,
908 fullclnodes,
909 fullclnodes,
909 precomputedellipsis,
910 precomputedellipsis,
910 )
911 )
911
912
912 adjustedparents[node] = (p1node, p2node)
913 adjustedparents[node] = (p1node, p2node)
913 linknodes[node] = linknode
914 linknodes[node] = linknode
914
915
915 filtered.append(node)
916 filtered.append(node)
916
917
917 nodes = filtered
918 nodes = filtered
918
919
919 # We expect the first pass to be fast, so we only engage the progress
920 # We expect the first pass to be fast, so we only engage the progress
920 # meter for constructing the revision deltas.
921 # meter for constructing the revision deltas.
921 progress = None
922 progress = None
922 if topic is not None:
923 if topic is not None:
923 progress = repo.ui.makeprogress(
924 progress = repo.ui.makeprogress(
924 topic, unit=_(b'chunks'), total=len(nodes)
925 topic, unit=_(b'chunks'), total=len(nodes)
925 )
926 )
926
927
927 configtarget = repo.ui.config(b'devel', b'bundle.delta')
928 configtarget = repo.ui.config(b'devel', b'bundle.delta')
928 if configtarget not in (b'', b'p1', b'full'):
929 if configtarget not in (b'', b'p1', b'full'):
929 msg = _(b"""config "devel.bundle.delta" as unknown value: %s""")
930 msg = _(b"""config "devel.bundle.delta" as unknown value: %s""")
930 repo.ui.warn(msg % configtarget)
931 repo.ui.warn(msg % configtarget)
931
932
932 deltamode = repository.CG_DELTAMODE_STD
933 deltamode = repository.CG_DELTAMODE_STD
933 if forcedeltaparentprev:
934 if forcedeltaparentprev:
934 deltamode = repository.CG_DELTAMODE_PREV
935 deltamode = repository.CG_DELTAMODE_PREV
935 elif configtarget == b'p1':
936 elif configtarget == b'p1':
936 deltamode = repository.CG_DELTAMODE_P1
937 deltamode = repository.CG_DELTAMODE_P1
937 elif configtarget == b'full':
938 elif configtarget == b'full':
938 deltamode = repository.CG_DELTAMODE_FULL
939 deltamode = repository.CG_DELTAMODE_FULL
939
940
940 revisions = store.emitrevisions(
941 revisions = store.emitrevisions(
941 nodes,
942 nodes,
942 nodesorder=nodesorder,
943 nodesorder=nodesorder,
943 revisiondata=True,
944 revisiondata=True,
944 assumehaveparentrevisions=not ellipses,
945 assumehaveparentrevisions=not ellipses,
945 deltamode=deltamode,
946 deltamode=deltamode,
946 sidedata_helpers=sidedata_helpers,
947 sidedata_helpers=sidedata_helpers,
947 )
948 )
948
949
949 for i, revision in enumerate(revisions):
950 for i, revision in enumerate(revisions):
950 if progress:
951 if progress:
951 progress.update(i + 1)
952 progress.update(i + 1)
952
953
953 if ellipses:
954 if ellipses:
954 linknode = linknodes[revision.node]
955 linknode = linknodes[revision.node]
955
956
956 if revision.node in adjustedparents:
957 if revision.node in adjustedparents:
957 p1node, p2node = adjustedparents[revision.node]
958 p1node, p2node = adjustedparents[revision.node]
958 revision.p1node = p1node
959 revision.p1node = p1node
959 revision.p2node = p2node
960 revision.p2node = p2node
960 revision.flags |= repository.REVISION_FLAG_ELLIPSIS
961 revision.flags |= repository.REVISION_FLAG_ELLIPSIS
961
962
962 else:
963 else:
963 linknode = lookup(revision.node)
964 linknode = lookup(revision.node)
964
965
965 revision.linknode = linknode
966 revision.linknode = linknode
966 yield revision
967 yield revision
967
968
968 if progress:
969 if progress:
969 progress.complete()
970 progress.complete()
970
971
971
972
972 class cgpacker(object):
973 class cgpacker(object):
973 def __init__(
974 def __init__(
974 self,
975 self,
975 repo,
976 repo,
976 oldmatcher,
977 oldmatcher,
977 matcher,
978 matcher,
978 version,
979 version,
979 builddeltaheader,
980 builddeltaheader,
980 manifestsend,
981 manifestsend,
981 forcedeltaparentprev=False,
982 forcedeltaparentprev=False,
982 bundlecaps=None,
983 bundlecaps=None,
983 ellipses=False,
984 ellipses=False,
984 shallow=False,
985 shallow=False,
985 ellipsisroots=None,
986 ellipsisroots=None,
986 fullnodes=None,
987 fullnodes=None,
987 remote_sidedata=None,
988 remote_sidedata=None,
988 ):
989 ):
989 """Given a source repo, construct a bundler.
990 """Given a source repo, construct a bundler.
990
991
991 oldmatcher is a matcher that matches on files the client already has.
992 oldmatcher is a matcher that matches on files the client already has.
992 These will not be included in the changegroup.
993 These will not be included in the changegroup.
993
994
994 matcher is a matcher that matches on files to include in the
995 matcher is a matcher that matches on files to include in the
995 changegroup. Used to facilitate sparse changegroups.
996 changegroup. Used to facilitate sparse changegroups.
996
997
997 forcedeltaparentprev indicates whether delta parents must be against
998 forcedeltaparentprev indicates whether delta parents must be against
998 the previous revision in a delta group. This should only be used for
999 the previous revision in a delta group. This should only be used for
999 compatibility with changegroup version 1.
1000 compatibility with changegroup version 1.
1000
1001
1001 builddeltaheader is a callable that constructs the header for a group
1002 builddeltaheader is a callable that constructs the header for a group
1002 delta.
1003 delta.
1003
1004
1004 manifestsend is a chunk to send after manifests have been fully emitted.
1005 manifestsend is a chunk to send after manifests have been fully emitted.
1005
1006
1006 ellipses indicates whether ellipsis serving mode is enabled.
1007 ellipses indicates whether ellipsis serving mode is enabled.
1007
1008
1008 bundlecaps is optional and can be used to specify the set of
1009 bundlecaps is optional and can be used to specify the set of
1009 capabilities which can be used to build the bundle. While bundlecaps is
1010 capabilities which can be used to build the bundle. While bundlecaps is
1010 unused in core Mercurial, extensions rely on this feature to communicate
1011 unused in core Mercurial, extensions rely on this feature to communicate
1011 capabilities to customize the changegroup packer.
1012 capabilities to customize the changegroup packer.
1012
1013
1013 shallow indicates whether shallow data might be sent. The packer may
1014 shallow indicates whether shallow data might be sent. The packer may
1014 need to pack file contents not introduced by the changes being packed.
1015 need to pack file contents not introduced by the changes being packed.
1015
1016
1016 fullnodes is the set of changelog nodes which should not be ellipsis
1017 fullnodes is the set of changelog nodes which should not be ellipsis
1017 nodes. We store this rather than the set of nodes that should be
1018 nodes. We store this rather than the set of nodes that should be
1018 ellipsis because for very large histories we expect this to be
1019 ellipsis because for very large histories we expect this to be
1019 significantly smaller.
1020 significantly smaller.
1020
1021
1021 remote_sidedata is the set of sidedata categories wanted by the remote.
1022 remote_sidedata is the set of sidedata categories wanted by the remote.
1022 """
1023 """
1023 assert oldmatcher
1024 assert oldmatcher
1024 assert matcher
1025 assert matcher
1025 self._oldmatcher = oldmatcher
1026 self._oldmatcher = oldmatcher
1026 self._matcher = matcher
1027 self._matcher = matcher
1027
1028
1028 self.version = version
1029 self.version = version
1029 self._forcedeltaparentprev = forcedeltaparentprev
1030 self._forcedeltaparentprev = forcedeltaparentprev
1030 self._builddeltaheader = builddeltaheader
1031 self._builddeltaheader = builddeltaheader
1031 self._manifestsend = manifestsend
1032 self._manifestsend = manifestsend
1032 self._ellipses = ellipses
1033 self._ellipses = ellipses
1033
1034
1034 # Set of capabilities we can use to build the bundle.
1035 # Set of capabilities we can use to build the bundle.
1035 if bundlecaps is None:
1036 if bundlecaps is None:
1036 bundlecaps = set()
1037 bundlecaps = set()
1037 self._bundlecaps = bundlecaps
1038 self._bundlecaps = bundlecaps
1038 if remote_sidedata is None:
1039 if remote_sidedata is None:
1039 remote_sidedata = set()
1040 remote_sidedata = set()
1040 self._remote_sidedata = remote_sidedata
1041 self._remote_sidedata = remote_sidedata
1041 self._isshallow = shallow
1042 self._isshallow = shallow
1042 self._fullclnodes = fullnodes
1043 self._fullclnodes = fullnodes
1043
1044
1044 # Maps ellipsis revs to their roots at the changelog level.
1045 # Maps ellipsis revs to their roots at the changelog level.
1045 self._precomputedellipsis = ellipsisroots
1046 self._precomputedellipsis = ellipsisroots
1046
1047
1047 self._repo = repo
1048 self._repo = repo
1048
1049
1049 if self._repo.ui.verbose and not self._repo.ui.debugflag:
1050 if self._repo.ui.verbose and not self._repo.ui.debugflag:
1050 self._verbosenote = self._repo.ui.note
1051 self._verbosenote = self._repo.ui.note
1051 else:
1052 else:
1052 self._verbosenote = lambda s: None
1053 self._verbosenote = lambda s: None
1053
1054
1054 def generate(
1055 def generate(
1055 self, commonrevs, clnodes, fastpathlinkrev, source, changelog=True
1056 self, commonrevs, clnodes, fastpathlinkrev, source, changelog=True
1056 ):
1057 ):
1057 """Yield a sequence of changegroup byte chunks.
1058 """Yield a sequence of changegroup byte chunks.
1058 If changelog is False, changelog data won't be added to changegroup
1059 If changelog is False, changelog data won't be added to changegroup
1059 """
1060 """
1060
1061
1061 repo = self._repo
1062 repo = self._repo
1062 cl = repo.changelog
1063 cl = repo.changelog
1063
1064
1064 self._verbosenote(_(b'uncompressed size of bundle content:\n'))
1065 self._verbosenote(_(b'uncompressed size of bundle content:\n'))
1065 size = 0
1066 size = 0
1066
1067
1067 sidedata_helpers = None
1068 sidedata_helpers = None
1068 if self.version == b'04':
1069 if self.version == b'04':
1069 remote_sidedata = self._remote_sidedata
1070 remote_sidedata = self._remote_sidedata
1070 if source == b'strip':
1071 if source == b'strip':
1071 # We're our own remote when stripping, get the no-op helpers
1072 # We're our own remote when stripping, get the no-op helpers
1072 # TODO a better approach would be for the strip bundle to
1073 # TODO a better approach would be for the strip bundle to
1073 # correctly advertise its sidedata categories directly.
1074 # correctly advertise its sidedata categories directly.
1074 remote_sidedata = repo._wanted_sidedata
1075 remote_sidedata = repo._wanted_sidedata
1075 sidedata_helpers = sidedatamod.get_sidedata_helpers(
1076 sidedata_helpers = sidedatamod.get_sidedata_helpers(
1076 repo, remote_sidedata
1077 repo, remote_sidedata
1077 )
1078 )
1078
1079
1079 clstate, deltas = self._generatechangelog(
1080 clstate, deltas = self._generatechangelog(
1080 cl,
1081 cl,
1081 clnodes,
1082 clnodes,
1082 generate=changelog,
1083 generate=changelog,
1083 sidedata_helpers=sidedata_helpers,
1084 sidedata_helpers=sidedata_helpers,
1084 )
1085 )
1085 for delta in deltas:
1086 for delta in deltas:
1086 for chunk in _revisiondeltatochunks(
1087 for chunk in _revisiondeltatochunks(
1087 self._repo, delta, self._builddeltaheader
1088 self._repo, delta, self._builddeltaheader
1088 ):
1089 ):
1089 size += len(chunk)
1090 size += len(chunk)
1090 yield chunk
1091 yield chunk
1091
1092
1092 close = closechunk()
1093 close = closechunk()
1093 size += len(close)
1094 size += len(close)
1094 yield closechunk()
1095 yield closechunk()
1095
1096
1096 self._verbosenote(_(b'%8.i (changelog)\n') % size)
1097 self._verbosenote(_(b'%8.i (changelog)\n') % size)
1097
1098
1098 clrevorder = clstate[b'clrevorder']
1099 clrevorder = clstate[b'clrevorder']
1099 manifests = clstate[b'manifests']
1100 manifests = clstate[b'manifests']
1100 changedfiles = clstate[b'changedfiles']
1101 changedfiles = clstate[b'changedfiles']
1101
1102
1102 # We need to make sure that the linkrev in the changegroup refers to
1103 # We need to make sure that the linkrev in the changegroup refers to
1103 # the first changeset that introduced the manifest or file revision.
1104 # the first changeset that introduced the manifest or file revision.
1104 # The fastpath is usually safer than the slowpath, because the filelogs
1105 # The fastpath is usually safer than the slowpath, because the filelogs
1105 # are walked in revlog order.
1106 # are walked in revlog order.
1106 #
1107 #
1107 # When taking the slowpath when the manifest revlog uses generaldelta,
1108 # When taking the slowpath when the manifest revlog uses generaldelta,
1108 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
1109 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
1109 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
1110 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
1110 #
1111 #
1111 # When taking the fastpath, we are only vulnerable to reordering
1112 # When taking the fastpath, we are only vulnerable to reordering
1112 # of the changelog itself. The changelog never uses generaldelta and is
1113 # of the changelog itself. The changelog never uses generaldelta and is
1113 # never reordered. To handle this case, we simply take the slowpath,
1114 # never reordered. To handle this case, we simply take the slowpath,
1114 # which already has the 'clrevorder' logic. This was also fixed in
1115 # which already has the 'clrevorder' logic. This was also fixed in
1115 # cc0ff93d0c0c.
1116 # cc0ff93d0c0c.
1116
1117
1117 # Treemanifests don't work correctly with fastpathlinkrev
1118 # Treemanifests don't work correctly with fastpathlinkrev
1118 # either, because we don't discover which directory nodes to
1119 # either, because we don't discover which directory nodes to
1119 # send along with files. This could probably be fixed.
1120 # send along with files. This could probably be fixed.
1120 fastpathlinkrev = fastpathlinkrev and not scmutil.istreemanifest(repo)
1121 fastpathlinkrev = fastpathlinkrev and not scmutil.istreemanifest(repo)
1121
1122
1122 fnodes = {} # needed file nodes
1123 fnodes = {} # needed file nodes
1123
1124
1124 size = 0
1125 size = 0
1125 it = self.generatemanifests(
1126 it = self.generatemanifests(
1126 commonrevs,
1127 commonrevs,
1127 clrevorder,
1128 clrevorder,
1128 fastpathlinkrev,
1129 fastpathlinkrev,
1129 manifests,
1130 manifests,
1130 fnodes,
1131 fnodes,
1131 source,
1132 source,
1132 clstate[b'clrevtomanifestrev'],
1133 clstate[b'clrevtomanifestrev'],
1133 sidedata_helpers=sidedata_helpers,
1134 sidedata_helpers=sidedata_helpers,
1134 )
1135 )
1135
1136
1136 for tree, deltas in it:
1137 for tree, deltas in it:
1137 if tree:
1138 if tree:
1138 assert self.version in (b'03', b'04')
1139 assert self.version in (b'03', b'04')
1139 chunk = _fileheader(tree)
1140 chunk = _fileheader(tree)
1140 size += len(chunk)
1141 size += len(chunk)
1141 yield chunk
1142 yield chunk
1142
1143
1143 for delta in deltas:
1144 for delta in deltas:
1144 chunks = _revisiondeltatochunks(
1145 chunks = _revisiondeltatochunks(
1145 self._repo, delta, self._builddeltaheader
1146 self._repo, delta, self._builddeltaheader
1146 )
1147 )
1147 for chunk in chunks:
1148 for chunk in chunks:
1148 size += len(chunk)
1149 size += len(chunk)
1149 yield chunk
1150 yield chunk
1150
1151
1151 close = closechunk()
1152 close = closechunk()
1152 size += len(close)
1153 size += len(close)
1153 yield close
1154 yield close
1154
1155
1155 self._verbosenote(_(b'%8.i (manifests)\n') % size)
1156 self._verbosenote(_(b'%8.i (manifests)\n') % size)
1156 yield self._manifestsend
1157 yield self._manifestsend
1157
1158
1158 mfdicts = None
1159 mfdicts = None
1159 if self._ellipses and self._isshallow:
1160 if self._ellipses and self._isshallow:
1160 mfdicts = [
1161 mfdicts = [
1161 (repo.manifestlog[n].read(), lr)
1162 (repo.manifestlog[n].read(), lr)
1162 for (n, lr) in pycompat.iteritems(manifests)
1163 for (n, lr) in pycompat.iteritems(manifests)
1163 ]
1164 ]
1164
1165
1165 manifests.clear()
1166 manifests.clear()
1166 clrevs = {cl.rev(x) for x in clnodes}
1167 clrevs = {cl.rev(x) for x in clnodes}
1167
1168
1168 it = self.generatefiles(
1169 it = self.generatefiles(
1169 changedfiles,
1170 changedfiles,
1170 commonrevs,
1171 commonrevs,
1171 source,
1172 source,
1172 mfdicts,
1173 mfdicts,
1173 fastpathlinkrev,
1174 fastpathlinkrev,
1174 fnodes,
1175 fnodes,
1175 clrevs,
1176 clrevs,
1176 sidedata_helpers=sidedata_helpers,
1177 sidedata_helpers=sidedata_helpers,
1177 )
1178 )
1178
1179
1179 for path, deltas in it:
1180 for path, deltas in it:
1180 h = _fileheader(path)
1181 h = _fileheader(path)
1181 size = len(h)
1182 size = len(h)
1182 yield h
1183 yield h
1183
1184
1184 for delta in deltas:
1185 for delta in deltas:
1185 chunks = _revisiondeltatochunks(
1186 chunks = _revisiondeltatochunks(
1186 self._repo, delta, self._builddeltaheader
1187 self._repo, delta, self._builddeltaheader
1187 )
1188 )
1188 for chunk in chunks:
1189 for chunk in chunks:
1189 size += len(chunk)
1190 size += len(chunk)
1190 yield chunk
1191 yield chunk
1191
1192
1192 close = closechunk()
1193 close = closechunk()
1193 size += len(close)
1194 size += len(close)
1194 yield close
1195 yield close
1195
1196
1196 self._verbosenote(_(b'%8.i %s\n') % (size, path))
1197 self._verbosenote(_(b'%8.i %s\n') % (size, path))
1197
1198
1198 yield closechunk()
1199 yield closechunk()
1199
1200
1200 if clnodes:
1201 if clnodes:
1201 repo.hook(b'outgoing', node=hex(clnodes[0]), source=source)
1202 repo.hook(b'outgoing', node=hex(clnodes[0]), source=source)
1202
1203
1203 def _generatechangelog(
1204 def _generatechangelog(
1204 self, cl, nodes, generate=True, sidedata_helpers=None
1205 self, cl, nodes, generate=True, sidedata_helpers=None
1205 ):
1206 ):
1206 """Generate data for changelog chunks.
1207 """Generate data for changelog chunks.
1207
1208
1208 Returns a 2-tuple of a dict containing state and an iterable of
1209 Returns a 2-tuple of a dict containing state and an iterable of
1209 byte chunks. The state will not be fully populated until the
1210 byte chunks. The state will not be fully populated until the
1210 chunk stream has been fully consumed.
1211 chunk stream has been fully consumed.
1211
1212
1212 if generate is False, the state will be fully populated and no chunk
1213 if generate is False, the state will be fully populated and no chunk
1213 stream will be yielded
1214 stream will be yielded
1214
1215
1215 See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
1216 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
1217 `sidedata_helpers`.
1216 """
1218 """
1217 clrevorder = {}
1219 clrevorder = {}
1218 manifests = {}
1220 manifests = {}
1219 mfl = self._repo.manifestlog
1221 mfl = self._repo.manifestlog
1220 changedfiles = set()
1222 changedfiles = set()
1221 clrevtomanifestrev = {}
1223 clrevtomanifestrev = {}
1222
1224
1223 state = {
1225 state = {
1224 b'clrevorder': clrevorder,
1226 b'clrevorder': clrevorder,
1225 b'manifests': manifests,
1227 b'manifests': manifests,
1226 b'changedfiles': changedfiles,
1228 b'changedfiles': changedfiles,
1227 b'clrevtomanifestrev': clrevtomanifestrev,
1229 b'clrevtomanifestrev': clrevtomanifestrev,
1228 }
1230 }
1229
1231
1230 if not (generate or self._ellipses):
1232 if not (generate or self._ellipses):
1231 # sort the nodes in storage order
1233 # sort the nodes in storage order
1232 nodes = sorted(nodes, key=cl.rev)
1234 nodes = sorted(nodes, key=cl.rev)
1233 for node in nodes:
1235 for node in nodes:
1234 c = cl.changelogrevision(node)
1236 c = cl.changelogrevision(node)
1235 clrevorder[node] = len(clrevorder)
1237 clrevorder[node] = len(clrevorder)
1236 # record the first changeset introducing this manifest version
1238 # record the first changeset introducing this manifest version
1237 manifests.setdefault(c.manifest, node)
1239 manifests.setdefault(c.manifest, node)
1238 # Record a complete list of potentially-changed files in
1240 # Record a complete list of potentially-changed files in
1239 # this manifest.
1241 # this manifest.
1240 changedfiles.update(c.files)
1242 changedfiles.update(c.files)
1241
1243
1242 return state, ()
1244 return state, ()
1243
1245
1244 # Callback for the changelog, used to collect changed files and
1246 # Callback for the changelog, used to collect changed files and
1245 # manifest nodes.
1247 # manifest nodes.
1246 # Returns the linkrev node (identity in the changelog case).
1248 # Returns the linkrev node (identity in the changelog case).
1247 def lookupcl(x):
1249 def lookupcl(x):
1248 c = cl.changelogrevision(x)
1250 c = cl.changelogrevision(x)
1249 clrevorder[x] = len(clrevorder)
1251 clrevorder[x] = len(clrevorder)
1250
1252
1251 if self._ellipses:
1253 if self._ellipses:
1252 # Only update manifests if x is going to be sent. Otherwise we
1254 # Only update manifests if x is going to be sent. Otherwise we
1253 # end up with bogus linkrevs specified for manifests and
1255 # end up with bogus linkrevs specified for manifests and
1254 # we skip some manifest nodes that we should otherwise
1256 # we skip some manifest nodes that we should otherwise
1255 # have sent.
1257 # have sent.
1256 if (
1258 if (
1257 x in self._fullclnodes
1259 x in self._fullclnodes
1258 or cl.rev(x) in self._precomputedellipsis
1260 or cl.rev(x) in self._precomputedellipsis
1259 ):
1261 ):
1260
1262
1261 manifestnode = c.manifest
1263 manifestnode = c.manifest
1262 # Record the first changeset introducing this manifest
1264 # Record the first changeset introducing this manifest
1263 # version.
1265 # version.
1264 manifests.setdefault(manifestnode, x)
1266 manifests.setdefault(manifestnode, x)
1265 # Set this narrow-specific dict so we have the lowest
1267 # Set this narrow-specific dict so we have the lowest
1266 # manifest revnum to look up for this cl revnum. (Part of
1268 # manifest revnum to look up for this cl revnum. (Part of
1267 # mapping changelog ellipsis parents to manifest ellipsis
1269 # mapping changelog ellipsis parents to manifest ellipsis
1268 # parents)
1270 # parents)
1269 clrevtomanifestrev.setdefault(
1271 clrevtomanifestrev.setdefault(
1270 cl.rev(x), mfl.rev(manifestnode)
1272 cl.rev(x), mfl.rev(manifestnode)
1271 )
1273 )
1272 # We can't trust the changed files list in the changeset if the
1274 # We can't trust the changed files list in the changeset if the
1273 # client requested a shallow clone.
1275 # client requested a shallow clone.
1274 if self._isshallow:
1276 if self._isshallow:
1275 changedfiles.update(mfl[c.manifest].read().keys())
1277 changedfiles.update(mfl[c.manifest].read().keys())
1276 else:
1278 else:
1277 changedfiles.update(c.files)
1279 changedfiles.update(c.files)
1278 else:
1280 else:
1279 # record the first changeset introducing this manifest version
1281 # record the first changeset introducing this manifest version
1280 manifests.setdefault(c.manifest, x)
1282 manifests.setdefault(c.manifest, x)
1281 # Record a complete list of potentially-changed files in
1283 # Record a complete list of potentially-changed files in
1282 # this manifest.
1284 # this manifest.
1283 changedfiles.update(c.files)
1285 changedfiles.update(c.files)
1284
1286
1285 return x
1287 return x
1286
1288
1287 gen = deltagroup(
1289 gen = deltagroup(
1288 self._repo,
1290 self._repo,
1289 cl,
1291 cl,
1290 nodes,
1292 nodes,
1291 True,
1293 True,
1292 lookupcl,
1294 lookupcl,
1293 self._forcedeltaparentprev,
1295 self._forcedeltaparentprev,
1294 ellipses=self._ellipses,
1296 ellipses=self._ellipses,
1295 topic=_(b'changesets'),
1297 topic=_(b'changesets'),
1296 clrevtolocalrev={},
1298 clrevtolocalrev={},
1297 fullclnodes=self._fullclnodes,
1299 fullclnodes=self._fullclnodes,
1298 precomputedellipsis=self._precomputedellipsis,
1300 precomputedellipsis=self._precomputedellipsis,
1299 sidedata_helpers=sidedata_helpers,
1301 sidedata_helpers=sidedata_helpers,
1300 )
1302 )
1301
1303
1302 return state, gen
1304 return state, gen
1303
1305
1304 def generatemanifests(
1306 def generatemanifests(
1305 self,
1307 self,
1306 commonrevs,
1308 commonrevs,
1307 clrevorder,
1309 clrevorder,
1308 fastpathlinkrev,
1310 fastpathlinkrev,
1309 manifests,
1311 manifests,
1310 fnodes,
1312 fnodes,
1311 source,
1313 source,
1312 clrevtolocalrev,
1314 clrevtolocalrev,
1313 sidedata_helpers=None,
1315 sidedata_helpers=None,
1314 ):
1316 ):
1315 """Returns an iterator of changegroup chunks containing manifests.
1317 """Returns an iterator of changegroup chunks containing manifests.
1316
1318
1317 `source` is unused here, but is used by extensions like remotefilelog to
1319 `source` is unused here, but is used by extensions like remotefilelog to
1318 change what is sent based in pulls vs pushes, etc.
1320 change what is sent based in pulls vs pushes, etc.
1319
1321
1320 See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
1322 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
1323 `sidedata_helpers`.
1321 """
1324 """
1322 repo = self._repo
1325 repo = self._repo
1323 mfl = repo.manifestlog
1326 mfl = repo.manifestlog
1324 tmfnodes = {b'': manifests}
1327 tmfnodes = {b'': manifests}
1325
1328
1326 # Callback for the manifest, used to collect linkrevs for filelog
1329 # Callback for the manifest, used to collect linkrevs for filelog
1327 # revisions.
1330 # revisions.
1328 # Returns the linkrev node (collected in lookupcl).
1331 # Returns the linkrev node (collected in lookupcl).
1329 def makelookupmflinknode(tree, nodes):
1332 def makelookupmflinknode(tree, nodes):
1330 if fastpathlinkrev:
1333 if fastpathlinkrev:
1331 assert not tree
1334 assert not tree
1332
1335
1333 # pytype: disable=unsupported-operands
1336 # pytype: disable=unsupported-operands
1334 return manifests.__getitem__
1337 return manifests.__getitem__
1335 # pytype: enable=unsupported-operands
1338 # pytype: enable=unsupported-operands
1336
1339
1337 def lookupmflinknode(x):
1340 def lookupmflinknode(x):
1338 """Callback for looking up the linknode for manifests.
1341 """Callback for looking up the linknode for manifests.
1339
1342
1340 Returns the linkrev node for the specified manifest.
1343 Returns the linkrev node for the specified manifest.
1341
1344
1342 SIDE EFFECT:
1345 SIDE EFFECT:
1343
1346
1344 1) fclnodes gets populated with the list of relevant
1347 1) fclnodes gets populated with the list of relevant
1345 file nodes if we're not using fastpathlinkrev
1348 file nodes if we're not using fastpathlinkrev
1346 2) When treemanifests are in use, collects treemanifest nodes
1349 2) When treemanifests are in use, collects treemanifest nodes
1347 to send
1350 to send
1348
1351
1349 Note that this means manifests must be completely sent to
1352 Note that this means manifests must be completely sent to
1350 the client before you can trust the list of files and
1353 the client before you can trust the list of files and
1351 treemanifests to send.
1354 treemanifests to send.
1352 """
1355 """
1353 clnode = nodes[x]
1356 clnode = nodes[x]
1354 mdata = mfl.get(tree, x).readfast(shallow=True)
1357 mdata = mfl.get(tree, x).readfast(shallow=True)
1355 for p, n, fl in mdata.iterentries():
1358 for p, n, fl in mdata.iterentries():
1356 if fl == b't': # subdirectory manifest
1359 if fl == b't': # subdirectory manifest
1357 subtree = tree + p + b'/'
1360 subtree = tree + p + b'/'
1358 tmfclnodes = tmfnodes.setdefault(subtree, {})
1361 tmfclnodes = tmfnodes.setdefault(subtree, {})
1359 tmfclnode = tmfclnodes.setdefault(n, clnode)
1362 tmfclnode = tmfclnodes.setdefault(n, clnode)
1360 if clrevorder[clnode] < clrevorder[tmfclnode]:
1363 if clrevorder[clnode] < clrevorder[tmfclnode]:
1361 tmfclnodes[n] = clnode
1364 tmfclnodes[n] = clnode
1362 else:
1365 else:
1363 f = tree + p
1366 f = tree + p
1364 fclnodes = fnodes.setdefault(f, {})
1367 fclnodes = fnodes.setdefault(f, {})
1365 fclnode = fclnodes.setdefault(n, clnode)
1368 fclnode = fclnodes.setdefault(n, clnode)
1366 if clrevorder[clnode] < clrevorder[fclnode]:
1369 if clrevorder[clnode] < clrevorder[fclnode]:
1367 fclnodes[n] = clnode
1370 fclnodes[n] = clnode
1368 return clnode
1371 return clnode
1369
1372
1370 return lookupmflinknode
1373 return lookupmflinknode
1371
1374
1372 while tmfnodes:
1375 while tmfnodes:
1373 tree, nodes = tmfnodes.popitem()
1376 tree, nodes = tmfnodes.popitem()
1374
1377
1375 should_visit = self._matcher.visitdir(tree[:-1])
1378 should_visit = self._matcher.visitdir(tree[:-1])
1376 if tree and not should_visit:
1379 if tree and not should_visit:
1377 continue
1380 continue
1378
1381
1379 store = mfl.getstorage(tree)
1382 store = mfl.getstorage(tree)
1380
1383
1381 if not should_visit:
1384 if not should_visit:
1382 # No nodes to send because this directory is out of
1385 # No nodes to send because this directory is out of
1383 # the client's view of the repository (probably
1386 # the client's view of the repository (probably
1384 # because of narrow clones). Do this even for the root
1387 # because of narrow clones). Do this even for the root
1385 # directory (tree=='')
1388 # directory (tree=='')
1386 prunednodes = []
1389 prunednodes = []
1387 else:
1390 else:
1388 # Avoid sending any manifest nodes we can prove the
1391 # Avoid sending any manifest nodes we can prove the
1389 # client already has by checking linkrevs. See the
1392 # client already has by checking linkrevs. See the
1390 # related comment in generatefiles().
1393 # related comment in generatefiles().
1391 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1394 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1392
1395
1393 if tree and not prunednodes:
1396 if tree and not prunednodes:
1394 continue
1397 continue
1395
1398
1396 lookupfn = makelookupmflinknode(tree, nodes)
1399 lookupfn = makelookupmflinknode(tree, nodes)
1397
1400
1398 deltas = deltagroup(
1401 deltas = deltagroup(
1399 self._repo,
1402 self._repo,
1400 store,
1403 store,
1401 prunednodes,
1404 prunednodes,
1402 False,
1405 False,
1403 lookupfn,
1406 lookupfn,
1404 self._forcedeltaparentprev,
1407 self._forcedeltaparentprev,
1405 ellipses=self._ellipses,
1408 ellipses=self._ellipses,
1406 topic=_(b'manifests'),
1409 topic=_(b'manifests'),
1407 clrevtolocalrev=clrevtolocalrev,
1410 clrevtolocalrev=clrevtolocalrev,
1408 fullclnodes=self._fullclnodes,
1411 fullclnodes=self._fullclnodes,
1409 precomputedellipsis=self._precomputedellipsis,
1412 precomputedellipsis=self._precomputedellipsis,
1410 sidedata_helpers=sidedata_helpers,
1413 sidedata_helpers=sidedata_helpers,
1411 )
1414 )
1412
1415
1413 if not self._oldmatcher.visitdir(store.tree[:-1]):
1416 if not self._oldmatcher.visitdir(store.tree[:-1]):
1414 yield tree, deltas
1417 yield tree, deltas
1415 else:
1418 else:
1416 # 'deltas' is a generator and we need to consume it even if
1419 # 'deltas' is a generator and we need to consume it even if
1417 # we are not going to send it because a side-effect is that
1420 # we are not going to send it because a side-effect is that
1418 # it updates tmdnodes (via lookupfn)
1421 # it updates tmdnodes (via lookupfn)
1419 for d in deltas:
1422 for d in deltas:
1420 pass
1423 pass
1421 if not tree:
1424 if not tree:
1422 yield tree, []
1425 yield tree, []
1423
1426
1424 def _prunemanifests(self, store, nodes, commonrevs):
1427 def _prunemanifests(self, store, nodes, commonrevs):
1425 if not self._ellipses:
1428 if not self._ellipses:
1426 # In non-ellipses case and large repositories, it is better to
1429 # In non-ellipses case and large repositories, it is better to
1427 # prevent calling of store.rev and store.linkrev on a lot of
1430 # prevent calling of store.rev and store.linkrev on a lot of
1428 # nodes as compared to sending some extra data
1431 # nodes as compared to sending some extra data
1429 return nodes.copy()
1432 return nodes.copy()
1430 # This is split out as a separate method to allow filtering
1433 # This is split out as a separate method to allow filtering
1431 # commonrevs in extension code.
1434 # commonrevs in extension code.
1432 #
1435 #
1433 # TODO(augie): this shouldn't be required, instead we should
1436 # TODO(augie): this shouldn't be required, instead we should
1434 # make filtering of revisions to send delegated to the store
1437 # make filtering of revisions to send delegated to the store
1435 # layer.
1438 # layer.
1436 frev, flr = store.rev, store.linkrev
1439 frev, flr = store.rev, store.linkrev
1437 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1440 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1438
1441
1439 # The 'source' parameter is useful for extensions
1442 # The 'source' parameter is useful for extensions
1440 def generatefiles(
1443 def generatefiles(
1441 self,
1444 self,
1442 changedfiles,
1445 changedfiles,
1443 commonrevs,
1446 commonrevs,
1444 source,
1447 source,
1445 mfdicts,
1448 mfdicts,
1446 fastpathlinkrev,
1449 fastpathlinkrev,
1447 fnodes,
1450 fnodes,
1448 clrevs,
1451 clrevs,
1449 sidedata_helpers=None,
1452 sidedata_helpers=None,
1450 ):
1453 ):
1451 changedfiles = [
1454 changedfiles = [
1452 f
1455 f
1453 for f in changedfiles
1456 for f in changedfiles
1454 if self._matcher(f) and not self._oldmatcher(f)
1457 if self._matcher(f) and not self._oldmatcher(f)
1455 ]
1458 ]
1456
1459
1457 if not fastpathlinkrev:
1460 if not fastpathlinkrev:
1458
1461
1459 def normallinknodes(unused, fname):
1462 def normallinknodes(unused, fname):
1460 return fnodes.get(fname, {})
1463 return fnodes.get(fname, {})
1461
1464
1462 else:
1465 else:
1463 cln = self._repo.changelog.node
1466 cln = self._repo.changelog.node
1464
1467
1465 def normallinknodes(store, fname):
1468 def normallinknodes(store, fname):
1466 flinkrev = store.linkrev
1469 flinkrev = store.linkrev
1467 fnode = store.node
1470 fnode = store.node
1468 revs = ((r, flinkrev(r)) for r in store)
1471 revs = ((r, flinkrev(r)) for r in store)
1469 return {fnode(r): cln(lr) for r, lr in revs if lr in clrevs}
1472 return {fnode(r): cln(lr) for r, lr in revs if lr in clrevs}
1470
1473
1471 clrevtolocalrev = {}
1474 clrevtolocalrev = {}
1472
1475
1473 if self._isshallow:
1476 if self._isshallow:
1474 # In a shallow clone, the linknodes callback needs to also include
1477 # In a shallow clone, the linknodes callback needs to also include
1475 # those file nodes that are in the manifests we sent but weren't
1478 # those file nodes that are in the manifests we sent but weren't
1476 # introduced by those manifests.
1479 # introduced by those manifests.
1477 commonctxs = [self._repo[c] for c in commonrevs]
1480 commonctxs = [self._repo[c] for c in commonrevs]
1478 clrev = self._repo.changelog.rev
1481 clrev = self._repo.changelog.rev
1479
1482
1480 def linknodes(flog, fname):
1483 def linknodes(flog, fname):
1481 for c in commonctxs:
1484 for c in commonctxs:
1482 try:
1485 try:
1483 fnode = c.filenode(fname)
1486 fnode = c.filenode(fname)
1484 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1487 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1485 except error.ManifestLookupError:
1488 except error.ManifestLookupError:
1486 pass
1489 pass
1487 links = normallinknodes(flog, fname)
1490 links = normallinknodes(flog, fname)
1488 if len(links) != len(mfdicts):
1491 if len(links) != len(mfdicts):
1489 for mf, lr in mfdicts:
1492 for mf, lr in mfdicts:
1490 fnode = mf.get(fname, None)
1493 fnode = mf.get(fname, None)
1491 if fnode in links:
1494 if fnode in links:
1492 links[fnode] = min(links[fnode], lr, key=clrev)
1495 links[fnode] = min(links[fnode], lr, key=clrev)
1493 elif fnode:
1496 elif fnode:
1494 links[fnode] = lr
1497 links[fnode] = lr
1495 return links
1498 return links
1496
1499
1497 else:
1500 else:
1498 linknodes = normallinknodes
1501 linknodes = normallinknodes
1499
1502
1500 repo = self._repo
1503 repo = self._repo
1501 progress = repo.ui.makeprogress(
1504 progress = repo.ui.makeprogress(
1502 _(b'files'), unit=_(b'files'), total=len(changedfiles)
1505 _(b'files'), unit=_(b'files'), total=len(changedfiles)
1503 )
1506 )
1504 for i, fname in enumerate(sorted(changedfiles)):
1507 for i, fname in enumerate(sorted(changedfiles)):
1505 filerevlog = repo.file(fname)
1508 filerevlog = repo.file(fname)
1506 if not filerevlog:
1509 if not filerevlog:
1507 raise error.Abort(
1510 raise error.Abort(
1508 _(b"empty or missing file data for %s") % fname
1511 _(b"empty or missing file data for %s") % fname
1509 )
1512 )
1510
1513
1511 clrevtolocalrev.clear()
1514 clrevtolocalrev.clear()
1512
1515
1513 linkrevnodes = linknodes(filerevlog, fname)
1516 linkrevnodes = linknodes(filerevlog, fname)
1514 # Lookup for filenodes, we collected the linkrev nodes above in the
1517 # Lookup for filenodes, we collected the linkrev nodes above in the
1515 # fastpath case and with lookupmf in the slowpath case.
1518 # fastpath case and with lookupmf in the slowpath case.
1516 def lookupfilelog(x):
1519 def lookupfilelog(x):
1517 return linkrevnodes[x]
1520 return linkrevnodes[x]
1518
1521
1519 frev, flr = filerevlog.rev, filerevlog.linkrev
1522 frev, flr = filerevlog.rev, filerevlog.linkrev
1520 # Skip sending any filenode we know the client already
1523 # Skip sending any filenode we know the client already
1521 # has. This avoids over-sending files relatively
1524 # has. This avoids over-sending files relatively
1522 # inexpensively, so it's not a problem if we under-filter
1525 # inexpensively, so it's not a problem if we under-filter
1523 # here.
1526 # here.
1524 filenodes = [
1527 filenodes = [
1525 n for n in linkrevnodes if flr(frev(n)) not in commonrevs
1528 n for n in linkrevnodes if flr(frev(n)) not in commonrevs
1526 ]
1529 ]
1527
1530
1528 if not filenodes:
1531 if not filenodes:
1529 continue
1532 continue
1530
1533
1531 progress.update(i + 1, item=fname)
1534 progress.update(i + 1, item=fname)
1532
1535
1533 deltas = deltagroup(
1536 deltas = deltagroup(
1534 self._repo,
1537 self._repo,
1535 filerevlog,
1538 filerevlog,
1536 filenodes,
1539 filenodes,
1537 False,
1540 False,
1538 lookupfilelog,
1541 lookupfilelog,
1539 self._forcedeltaparentprev,
1542 self._forcedeltaparentprev,
1540 ellipses=self._ellipses,
1543 ellipses=self._ellipses,
1541 clrevtolocalrev=clrevtolocalrev,
1544 clrevtolocalrev=clrevtolocalrev,
1542 fullclnodes=self._fullclnodes,
1545 fullclnodes=self._fullclnodes,
1543 precomputedellipsis=self._precomputedellipsis,
1546 precomputedellipsis=self._precomputedellipsis,
1544 sidedata_helpers=sidedata_helpers,
1547 sidedata_helpers=sidedata_helpers,
1545 )
1548 )
1546
1549
1547 yield fname, deltas
1550 yield fname, deltas
1548
1551
1549 progress.complete()
1552 progress.complete()
1550
1553
1551
1554
1552 def _makecg1packer(
1555 def _makecg1packer(
1553 repo,
1556 repo,
1554 oldmatcher,
1557 oldmatcher,
1555 matcher,
1558 matcher,
1556 bundlecaps,
1559 bundlecaps,
1557 ellipses=False,
1560 ellipses=False,
1558 shallow=False,
1561 shallow=False,
1559 ellipsisroots=None,
1562 ellipsisroots=None,
1560 fullnodes=None,
1563 fullnodes=None,
1561 remote_sidedata=None,
1564 remote_sidedata=None,
1562 ):
1565 ):
1563 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1566 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1564 d.node, d.p1node, d.p2node, d.linknode
1567 d.node, d.p1node, d.p2node, d.linknode
1565 )
1568 )
1566
1569
1567 return cgpacker(
1570 return cgpacker(
1568 repo,
1571 repo,
1569 oldmatcher,
1572 oldmatcher,
1570 matcher,
1573 matcher,
1571 b'01',
1574 b'01',
1572 builddeltaheader=builddeltaheader,
1575 builddeltaheader=builddeltaheader,
1573 manifestsend=b'',
1576 manifestsend=b'',
1574 forcedeltaparentprev=True,
1577 forcedeltaparentprev=True,
1575 bundlecaps=bundlecaps,
1578 bundlecaps=bundlecaps,
1576 ellipses=ellipses,
1579 ellipses=ellipses,
1577 shallow=shallow,
1580 shallow=shallow,
1578 ellipsisroots=ellipsisroots,
1581 ellipsisroots=ellipsisroots,
1579 fullnodes=fullnodes,
1582 fullnodes=fullnodes,
1580 )
1583 )
1581
1584
1582
1585
1583 def _makecg2packer(
1586 def _makecg2packer(
1584 repo,
1587 repo,
1585 oldmatcher,
1588 oldmatcher,
1586 matcher,
1589 matcher,
1587 bundlecaps,
1590 bundlecaps,
1588 ellipses=False,
1591 ellipses=False,
1589 shallow=False,
1592 shallow=False,
1590 ellipsisroots=None,
1593 ellipsisroots=None,
1591 fullnodes=None,
1594 fullnodes=None,
1592 remote_sidedata=None,
1595 remote_sidedata=None,
1593 ):
1596 ):
1594 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1597 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1595 d.node, d.p1node, d.p2node, d.basenode, d.linknode
1598 d.node, d.p1node, d.p2node, d.basenode, d.linknode
1596 )
1599 )
1597
1600
1598 return cgpacker(
1601 return cgpacker(
1599 repo,
1602 repo,
1600 oldmatcher,
1603 oldmatcher,
1601 matcher,
1604 matcher,
1602 b'02',
1605 b'02',
1603 builddeltaheader=builddeltaheader,
1606 builddeltaheader=builddeltaheader,
1604 manifestsend=b'',
1607 manifestsend=b'',
1605 bundlecaps=bundlecaps,
1608 bundlecaps=bundlecaps,
1606 ellipses=ellipses,
1609 ellipses=ellipses,
1607 shallow=shallow,
1610 shallow=shallow,
1608 ellipsisroots=ellipsisroots,
1611 ellipsisroots=ellipsisroots,
1609 fullnodes=fullnodes,
1612 fullnodes=fullnodes,
1610 )
1613 )
1611
1614
1612
1615
1613 def _makecg3packer(
1616 def _makecg3packer(
1614 repo,
1617 repo,
1615 oldmatcher,
1618 oldmatcher,
1616 matcher,
1619 matcher,
1617 bundlecaps,
1620 bundlecaps,
1618 ellipses=False,
1621 ellipses=False,
1619 shallow=False,
1622 shallow=False,
1620 ellipsisroots=None,
1623 ellipsisroots=None,
1621 fullnodes=None,
1624 fullnodes=None,
1622 remote_sidedata=None,
1625 remote_sidedata=None,
1623 ):
1626 ):
1624 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1627 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1625 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1628 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1626 )
1629 )
1627
1630
1628 return cgpacker(
1631 return cgpacker(
1629 repo,
1632 repo,
1630 oldmatcher,
1633 oldmatcher,
1631 matcher,
1634 matcher,
1632 b'03',
1635 b'03',
1633 builddeltaheader=builddeltaheader,
1636 builddeltaheader=builddeltaheader,
1634 manifestsend=closechunk(),
1637 manifestsend=closechunk(),
1635 bundlecaps=bundlecaps,
1638 bundlecaps=bundlecaps,
1636 ellipses=ellipses,
1639 ellipses=ellipses,
1637 shallow=shallow,
1640 shallow=shallow,
1638 ellipsisroots=ellipsisroots,
1641 ellipsisroots=ellipsisroots,
1639 fullnodes=fullnodes,
1642 fullnodes=fullnodes,
1640 )
1643 )
1641
1644
1642
1645
1643 def _makecg4packer(
1646 def _makecg4packer(
1644 repo,
1647 repo,
1645 oldmatcher,
1648 oldmatcher,
1646 matcher,
1649 matcher,
1647 bundlecaps,
1650 bundlecaps,
1648 ellipses=False,
1651 ellipses=False,
1649 shallow=False,
1652 shallow=False,
1650 ellipsisroots=None,
1653 ellipsisroots=None,
1651 fullnodes=None,
1654 fullnodes=None,
1652 remote_sidedata=None,
1655 remote_sidedata=None,
1653 ):
1656 ):
1654 # Sidedata is in a separate chunk from the delta to differentiate
1657 # Sidedata is in a separate chunk from the delta to differentiate
1655 # "raw delta" and sidedata.
1658 # "raw delta" and sidedata.
1656 def builddeltaheader(d):
1659 def builddeltaheader(d):
1657 return _CHANGEGROUPV4_DELTA_HEADER.pack(
1660 return _CHANGEGROUPV4_DELTA_HEADER.pack(
1658 d.protocol_flags,
1661 d.protocol_flags,
1659 d.node,
1662 d.node,
1660 d.p1node,
1663 d.p1node,
1661 d.p2node,
1664 d.p2node,
1662 d.basenode,
1665 d.basenode,
1663 d.linknode,
1666 d.linknode,
1664 d.flags,
1667 d.flags,
1665 )
1668 )
1666
1669
1667 return cgpacker(
1670 return cgpacker(
1668 repo,
1671 repo,
1669 oldmatcher,
1672 oldmatcher,
1670 matcher,
1673 matcher,
1671 b'04',
1674 b'04',
1672 builddeltaheader=builddeltaheader,
1675 builddeltaheader=builddeltaheader,
1673 manifestsend=closechunk(),
1676 manifestsend=closechunk(),
1674 bundlecaps=bundlecaps,
1677 bundlecaps=bundlecaps,
1675 ellipses=ellipses,
1678 ellipses=ellipses,
1676 shallow=shallow,
1679 shallow=shallow,
1677 ellipsisroots=ellipsisroots,
1680 ellipsisroots=ellipsisroots,
1678 fullnodes=fullnodes,
1681 fullnodes=fullnodes,
1679 remote_sidedata=remote_sidedata,
1682 remote_sidedata=remote_sidedata,
1680 )
1683 )
1681
1684
1682
1685
1683 _packermap = {
1686 _packermap = {
1684 b'01': (_makecg1packer, cg1unpacker),
1687 b'01': (_makecg1packer, cg1unpacker),
1685 # cg2 adds support for exchanging generaldelta
1688 # cg2 adds support for exchanging generaldelta
1686 b'02': (_makecg2packer, cg2unpacker),
1689 b'02': (_makecg2packer, cg2unpacker),
1687 # cg3 adds support for exchanging revlog flags and treemanifests
1690 # cg3 adds support for exchanging revlog flags and treemanifests
1688 b'03': (_makecg3packer, cg3unpacker),
1691 b'03': (_makecg3packer, cg3unpacker),
1689 # ch4 adds support for exchanging sidedata
1692 # ch4 adds support for exchanging sidedata
1690 b'04': (_makecg4packer, cg4unpacker),
1693 b'04': (_makecg4packer, cg4unpacker),
1691 }
1694 }
1692
1695
1693
1696
1694 def allsupportedversions(repo):
1697 def allsupportedversions(repo):
1695 versions = set(_packermap.keys())
1698 versions = set(_packermap.keys())
1696 needv03 = False
1699 needv03 = False
1697 if (
1700 if (
1698 repo.ui.configbool(b'experimental', b'changegroup3')
1701 repo.ui.configbool(b'experimental', b'changegroup3')
1699 or repo.ui.configbool(b'experimental', b'treemanifest')
1702 or repo.ui.configbool(b'experimental', b'treemanifest')
1700 or scmutil.istreemanifest(repo)
1703 or scmutil.istreemanifest(repo)
1701 ):
1704 ):
1702 # we keep version 03 because we need to to exchange treemanifest data
1705 # we keep version 03 because we need to to exchange treemanifest data
1703 #
1706 #
1704 # we also keep vresion 01 and 02, because it is possible for repo to
1707 # we also keep vresion 01 and 02, because it is possible for repo to
1705 # contains both normal and tree manifest at the same time. so using
1708 # contains both normal and tree manifest at the same time. so using
1706 # older version to pull data is viable
1709 # older version to pull data is viable
1707 #
1710 #
1708 # (or even to push subset of history)
1711 # (or even to push subset of history)
1709 needv03 = True
1712 needv03 = True
1710 if not needv03:
1713 if not needv03:
1711 versions.discard(b'03')
1714 versions.discard(b'03')
1712 want_v4 = (
1715 want_v4 = (
1713 repo.ui.configbool(b'experimental', b'changegroup4')
1716 repo.ui.configbool(b'experimental', b'changegroup4')
1714 or requirements.REVLOGV2_REQUIREMENT in repo.requirements
1717 or requirements.REVLOGV2_REQUIREMENT in repo.requirements
1715 )
1718 )
1716 if not want_v4:
1719 if not want_v4:
1717 versions.discard(b'04')
1720 versions.discard(b'04')
1718 return versions
1721 return versions
1719
1722
1720
1723
1721 # Changegroup versions that can be applied to the repo
1724 # Changegroup versions that can be applied to the repo
1722 def supportedincomingversions(repo):
1725 def supportedincomingversions(repo):
1723 return allsupportedversions(repo)
1726 return allsupportedversions(repo)
1724
1727
1725
1728
1726 # Changegroup versions that can be created from the repo
1729 # Changegroup versions that can be created from the repo
1727 def supportedoutgoingversions(repo):
1730 def supportedoutgoingversions(repo):
1728 versions = allsupportedversions(repo)
1731 versions = allsupportedversions(repo)
1729 if scmutil.istreemanifest(repo):
1732 if scmutil.istreemanifest(repo):
1730 # Versions 01 and 02 support only flat manifests and it's just too
1733 # Versions 01 and 02 support only flat manifests and it's just too
1731 # expensive to convert between the flat manifest and tree manifest on
1734 # expensive to convert between the flat manifest and tree manifest on
1732 # the fly. Since tree manifests are hashed differently, all of history
1735 # the fly. Since tree manifests are hashed differently, all of history
1733 # would have to be converted. Instead, we simply don't even pretend to
1736 # would have to be converted. Instead, we simply don't even pretend to
1734 # support versions 01 and 02.
1737 # support versions 01 and 02.
1735 versions.discard(b'01')
1738 versions.discard(b'01')
1736 versions.discard(b'02')
1739 versions.discard(b'02')
1737 if requirements.NARROW_REQUIREMENT in repo.requirements:
1740 if requirements.NARROW_REQUIREMENT in repo.requirements:
1738 # Versions 01 and 02 don't support revlog flags, and we need to
1741 # Versions 01 and 02 don't support revlog flags, and we need to
1739 # support that for stripping and unbundling to work.
1742 # support that for stripping and unbundling to work.
1740 versions.discard(b'01')
1743 versions.discard(b'01')
1741 versions.discard(b'02')
1744 versions.discard(b'02')
1742 if LFS_REQUIREMENT in repo.requirements:
1745 if LFS_REQUIREMENT in repo.requirements:
1743 # Versions 01 and 02 don't support revlog flags, and we need to
1746 # Versions 01 and 02 don't support revlog flags, and we need to
1744 # mark LFS entries with REVIDX_EXTSTORED.
1747 # mark LFS entries with REVIDX_EXTSTORED.
1745 versions.discard(b'01')
1748 versions.discard(b'01')
1746 versions.discard(b'02')
1749 versions.discard(b'02')
1747
1750
1748 return versions
1751 return versions
1749
1752
1750
1753
1751 def localversion(repo):
1754 def localversion(repo):
1752 # Finds the best version to use for bundles that are meant to be used
1755 # Finds the best version to use for bundles that are meant to be used
1753 # locally, such as those from strip and shelve, and temporary bundles.
1756 # locally, such as those from strip and shelve, and temporary bundles.
1754 return max(supportedoutgoingversions(repo))
1757 return max(supportedoutgoingversions(repo))
1755
1758
1756
1759
1757 def safeversion(repo):
1760 def safeversion(repo):
1758 # Finds the smallest version that it's safe to assume clients of the repo
1761 # Finds the smallest version that it's safe to assume clients of the repo
1759 # will support. For example, all hg versions that support generaldelta also
1762 # will support. For example, all hg versions that support generaldelta also
1760 # support changegroup 02.
1763 # support changegroup 02.
1761 versions = supportedoutgoingversions(repo)
1764 versions = supportedoutgoingversions(repo)
1762 if requirements.GENERALDELTA_REQUIREMENT in repo.requirements:
1765 if requirements.GENERALDELTA_REQUIREMENT in repo.requirements:
1763 versions.discard(b'01')
1766 versions.discard(b'01')
1764 assert versions
1767 assert versions
1765 return min(versions)
1768 return min(versions)
1766
1769
1767
1770
1768 def getbundler(
1771 def getbundler(
1769 version,
1772 version,
1770 repo,
1773 repo,
1771 bundlecaps=None,
1774 bundlecaps=None,
1772 oldmatcher=None,
1775 oldmatcher=None,
1773 matcher=None,
1776 matcher=None,
1774 ellipses=False,
1777 ellipses=False,
1775 shallow=False,
1778 shallow=False,
1776 ellipsisroots=None,
1779 ellipsisroots=None,
1777 fullnodes=None,
1780 fullnodes=None,
1778 remote_sidedata=None,
1781 remote_sidedata=None,
1779 ):
1782 ):
1780 assert version in supportedoutgoingversions(repo)
1783 assert version in supportedoutgoingversions(repo)
1781
1784
1782 if matcher is None:
1785 if matcher is None:
1783 matcher = matchmod.always()
1786 matcher = matchmod.always()
1784 if oldmatcher is None:
1787 if oldmatcher is None:
1785 oldmatcher = matchmod.never()
1788 oldmatcher = matchmod.never()
1786
1789
1787 if version == b'01' and not matcher.always():
1790 if version == b'01' and not matcher.always():
1788 raise error.ProgrammingError(
1791 raise error.ProgrammingError(
1789 b'version 01 changegroups do not support sparse file matchers'
1792 b'version 01 changegroups do not support sparse file matchers'
1790 )
1793 )
1791
1794
1792 if ellipses and version in (b'01', b'02'):
1795 if ellipses and version in (b'01', b'02'):
1793 raise error.Abort(
1796 raise error.Abort(
1794 _(
1797 _(
1795 b'ellipsis nodes require at least cg3 on client and server, '
1798 b'ellipsis nodes require at least cg3 on client and server, '
1796 b'but negotiated version %s'
1799 b'but negotiated version %s'
1797 )
1800 )
1798 % version
1801 % version
1799 )
1802 )
1800
1803
1801 # Requested files could include files not in the local store. So
1804 # Requested files could include files not in the local store. So
1802 # filter those out.
1805 # filter those out.
1803 matcher = repo.narrowmatch(matcher)
1806 matcher = repo.narrowmatch(matcher)
1804
1807
1805 fn = _packermap[version][0]
1808 fn = _packermap[version][0]
1806 return fn(
1809 return fn(
1807 repo,
1810 repo,
1808 oldmatcher,
1811 oldmatcher,
1809 matcher,
1812 matcher,
1810 bundlecaps,
1813 bundlecaps,
1811 ellipses=ellipses,
1814 ellipses=ellipses,
1812 shallow=shallow,
1815 shallow=shallow,
1813 ellipsisroots=ellipsisroots,
1816 ellipsisroots=ellipsisroots,
1814 fullnodes=fullnodes,
1817 fullnodes=fullnodes,
1815 remote_sidedata=remote_sidedata,
1818 remote_sidedata=remote_sidedata,
1816 )
1819 )
1817
1820
1818
1821
1819 def getunbundler(version, fh, alg, extras=None):
1822 def getunbundler(version, fh, alg, extras=None):
1820 return _packermap[version][1](fh, alg, extras=extras)
1823 return _packermap[version][1](fh, alg, extras=extras)
1821
1824
1822
1825
1823 def _changegroupinfo(repo, nodes, source):
1826 def _changegroupinfo(repo, nodes, source):
1824 if repo.ui.verbose or source == b'bundle':
1827 if repo.ui.verbose or source == b'bundle':
1825 repo.ui.status(_(b"%d changesets found\n") % len(nodes))
1828 repo.ui.status(_(b"%d changesets found\n") % len(nodes))
1826 if repo.ui.debugflag:
1829 if repo.ui.debugflag:
1827 repo.ui.debug(b"list of changesets:\n")
1830 repo.ui.debug(b"list of changesets:\n")
1828 for node in nodes:
1831 for node in nodes:
1829 repo.ui.debug(b"%s\n" % hex(node))
1832 repo.ui.debug(b"%s\n" % hex(node))
1830
1833
1831
1834
1832 def makechangegroup(
1835 def makechangegroup(
1833 repo, outgoing, version, source, fastpath=False, bundlecaps=None
1836 repo, outgoing, version, source, fastpath=False, bundlecaps=None
1834 ):
1837 ):
1835 cgstream = makestream(
1838 cgstream = makestream(
1836 repo,
1839 repo,
1837 outgoing,
1840 outgoing,
1838 version,
1841 version,
1839 source,
1842 source,
1840 fastpath=fastpath,
1843 fastpath=fastpath,
1841 bundlecaps=bundlecaps,
1844 bundlecaps=bundlecaps,
1842 )
1845 )
1843 return getunbundler(
1846 return getunbundler(
1844 version,
1847 version,
1845 util.chunkbuffer(cgstream),
1848 util.chunkbuffer(cgstream),
1846 None,
1849 None,
1847 {b'clcount': len(outgoing.missing)},
1850 {b'clcount': len(outgoing.missing)},
1848 )
1851 )
1849
1852
1850
1853
1851 def makestream(
1854 def makestream(
1852 repo,
1855 repo,
1853 outgoing,
1856 outgoing,
1854 version,
1857 version,
1855 source,
1858 source,
1856 fastpath=False,
1859 fastpath=False,
1857 bundlecaps=None,
1860 bundlecaps=None,
1858 matcher=None,
1861 matcher=None,
1859 remote_sidedata=None,
1862 remote_sidedata=None,
1860 ):
1863 ):
1861 bundler = getbundler(
1864 bundler = getbundler(
1862 version,
1865 version,
1863 repo,
1866 repo,
1864 bundlecaps=bundlecaps,
1867 bundlecaps=bundlecaps,
1865 matcher=matcher,
1868 matcher=matcher,
1866 remote_sidedata=remote_sidedata,
1869 remote_sidedata=remote_sidedata,
1867 )
1870 )
1868
1871
1869 repo = repo.unfiltered()
1872 repo = repo.unfiltered()
1870 commonrevs = outgoing.common
1873 commonrevs = outgoing.common
1871 csets = outgoing.missing
1874 csets = outgoing.missing
1872 heads = outgoing.ancestorsof
1875 heads = outgoing.ancestorsof
1873 # We go through the fast path if we get told to, or if all (unfiltered
1876 # We go through the fast path if we get told to, or if all (unfiltered
1874 # heads have been requested (since we then know there all linkrevs will
1877 # heads have been requested (since we then know there all linkrevs will
1875 # be pulled by the client).
1878 # be pulled by the client).
1876 heads.sort()
1879 heads.sort()
1877 fastpathlinkrev = fastpath or (
1880 fastpathlinkrev = fastpath or (
1878 repo.filtername is None and heads == sorted(repo.heads())
1881 repo.filtername is None and heads == sorted(repo.heads())
1879 )
1882 )
1880
1883
1881 repo.hook(b'preoutgoing', throw=True, source=source)
1884 repo.hook(b'preoutgoing', throw=True, source=source)
1882 _changegroupinfo(repo, csets, source)
1885 _changegroupinfo(repo, csets, source)
1883 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1886 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1884
1887
1885
1888
1886 def _addchangegroupfiles(
1889 def _addchangegroupfiles(
1887 repo,
1890 repo,
1888 source,
1891 source,
1889 revmap,
1892 revmap,
1890 trp,
1893 trp,
1891 expectedfiles,
1894 expectedfiles,
1892 needfiles,
1895 needfiles,
1893 addrevisioncb=None,
1896 addrevisioncb=None,
1894 ):
1897 ):
1895 revisions = 0
1898 revisions = 0
1896 files = 0
1899 files = 0
1897 progress = repo.ui.makeprogress(
1900 progress = repo.ui.makeprogress(
1898 _(b'files'), unit=_(b'files'), total=expectedfiles
1901 _(b'files'), unit=_(b'files'), total=expectedfiles
1899 )
1902 )
1900 for chunkdata in iter(source.filelogheader, {}):
1903 for chunkdata in iter(source.filelogheader, {}):
1901 files += 1
1904 files += 1
1902 f = chunkdata[b"filename"]
1905 f = chunkdata[b"filename"]
1903 repo.ui.debug(b"adding %s revisions\n" % f)
1906 repo.ui.debug(b"adding %s revisions\n" % f)
1904 progress.increment()
1907 progress.increment()
1905 fl = repo.file(f)
1908 fl = repo.file(f)
1906 o = len(fl)
1909 o = len(fl)
1907 try:
1910 try:
1908 deltas = source.deltaiter()
1911 deltas = source.deltaiter()
1909 added = fl.addgroup(
1912 added = fl.addgroup(
1910 deltas,
1913 deltas,
1911 revmap,
1914 revmap,
1912 trp,
1915 trp,
1913 addrevisioncb=addrevisioncb,
1916 addrevisioncb=addrevisioncb,
1914 )
1917 )
1915 if not added:
1918 if not added:
1916 raise error.Abort(_(b"received file revlog group is empty"))
1919 raise error.Abort(_(b"received file revlog group is empty"))
1917 except error.CensoredBaseError as e:
1920 except error.CensoredBaseError as e:
1918 raise error.Abort(_(b"received delta base is censored: %s") % e)
1921 raise error.Abort(_(b"received delta base is censored: %s") % e)
1919 revisions += len(fl) - o
1922 revisions += len(fl) - o
1920 if f in needfiles:
1923 if f in needfiles:
1921 needs = needfiles[f]
1924 needs = needfiles[f]
1922 for new in pycompat.xrange(o, len(fl)):
1925 for new in pycompat.xrange(o, len(fl)):
1923 n = fl.node(new)
1926 n = fl.node(new)
1924 if n in needs:
1927 if n in needs:
1925 needs.remove(n)
1928 needs.remove(n)
1926 else:
1929 else:
1927 raise error.Abort(_(b"received spurious file revlog entry"))
1930 raise error.Abort(_(b"received spurious file revlog entry"))
1928 if not needs:
1931 if not needs:
1929 del needfiles[f]
1932 del needfiles[f]
1930 progress.complete()
1933 progress.complete()
1931
1934
1932 for f, needs in pycompat.iteritems(needfiles):
1935 for f, needs in pycompat.iteritems(needfiles):
1933 fl = repo.file(f)
1936 fl = repo.file(f)
1934 for n in needs:
1937 for n in needs:
1935 try:
1938 try:
1936 fl.rev(n)
1939 fl.rev(n)
1937 except error.LookupError:
1940 except error.LookupError:
1938 raise error.Abort(
1941 raise error.Abort(
1939 _(b'missing file data for %s:%s - run hg verify')
1942 _(b'missing file data for %s:%s - run hg verify')
1940 % (f, hex(n))
1943 % (f, hex(n))
1941 )
1944 )
1942
1945
1943 return revisions, files
1946 return revisions, files
@@ -1,3129 +1,3130 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 INDEX_HEADER,
40 INDEX_HEADER,
41 REVLOGV0,
41 REVLOGV0,
42 REVLOGV1,
42 REVLOGV1,
43 REVLOGV1_FLAGS,
43 REVLOGV1_FLAGS,
44 REVLOGV2,
44 REVLOGV2,
45 REVLOGV2_FLAGS,
45 REVLOGV2_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FORMAT,
47 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_VERSION,
48 REVLOG_DEFAULT_VERSION,
49 )
49 )
50 from .revlogutils.flagutil import (
50 from .revlogutils.flagutil import (
51 REVIDX_DEFAULT_FLAGS,
51 REVIDX_DEFAULT_FLAGS,
52 REVIDX_ELLIPSIS,
52 REVIDX_ELLIPSIS,
53 REVIDX_EXTSTORED,
53 REVIDX_EXTSTORED,
54 REVIDX_FLAGS_ORDER,
54 REVIDX_FLAGS_ORDER,
55 REVIDX_HASCOPIESINFO,
55 REVIDX_HASCOPIESINFO,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 )
58 )
59 from .thirdparty import attr
59 from .thirdparty import attr
60 from . import (
60 from . import (
61 ancestor,
61 ancestor,
62 dagop,
62 dagop,
63 error,
63 error,
64 mdiff,
64 mdiff,
65 policy,
65 policy,
66 pycompat,
66 pycompat,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .interfaces import (
70 from .interfaces import (
71 repository,
71 repository,
72 util as interfaceutil,
72 util as interfaceutil,
73 )
73 )
74 from .revlogutils import (
74 from .revlogutils import (
75 deltas as deltautil,
75 deltas as deltautil,
76 flagutil,
76 flagutil,
77 nodemap as nodemaputil,
77 nodemap as nodemaputil,
78 revlogv0,
78 revlogv0,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88
88
89 REVLOGV0
89 REVLOGV0
90 REVLOGV1
90 REVLOGV1
91 REVLOGV2
91 REVLOGV2
92 FLAG_INLINE_DATA
92 FLAG_INLINE_DATA
93 FLAG_GENERALDELTA
93 FLAG_GENERALDELTA
94 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FLAGS
95 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_FORMAT
96 REVLOG_DEFAULT_VERSION
96 REVLOG_DEFAULT_VERSION
97 REVLOGV1_FLAGS
97 REVLOGV1_FLAGS
98 REVLOGV2_FLAGS
98 REVLOGV2_FLAGS
99 REVIDX_ISCENSORED
99 REVIDX_ISCENSORED
100 REVIDX_ELLIPSIS
100 REVIDX_ELLIPSIS
101 REVIDX_HASCOPIESINFO
101 REVIDX_HASCOPIESINFO
102 REVIDX_EXTSTORED
102 REVIDX_EXTSTORED
103 REVIDX_DEFAULT_FLAGS
103 REVIDX_DEFAULT_FLAGS
104 REVIDX_FLAGS_ORDER
104 REVIDX_FLAGS_ORDER
105 REVIDX_RAWTEXT_CHANGING_FLAGS
105 REVIDX_RAWTEXT_CHANGING_FLAGS
106
106
107 parsers = policy.importmod('parsers')
107 parsers = policy.importmod('parsers')
108 rustancestor = policy.importrust('ancestor')
108 rustancestor = policy.importrust('ancestor')
109 rustdagop = policy.importrust('dagop')
109 rustdagop = policy.importrust('dagop')
110 rustrevlog = policy.importrust('revlog')
110 rustrevlog = policy.importrust('revlog')
111
111
112 # Aliased for performance.
112 # Aliased for performance.
113 _zlibdecompress = zlib.decompress
113 _zlibdecompress = zlib.decompress
114
114
115 # max size of revlog with inline data
115 # max size of revlog with inline data
116 _maxinline = 131072
116 _maxinline = 131072
117 _chunksize = 1048576
117 _chunksize = 1048576
118
118
119 # Flag processors for REVIDX_ELLIPSIS.
119 # Flag processors for REVIDX_ELLIPSIS.
120 def ellipsisreadprocessor(rl, text):
120 def ellipsisreadprocessor(rl, text):
121 return text, False
121 return text, False
122
122
123
123
124 def ellipsiswriteprocessor(rl, text):
124 def ellipsiswriteprocessor(rl, text):
125 return text, False
125 return text, False
126
126
127
127
128 def ellipsisrawprocessor(rl, text):
128 def ellipsisrawprocessor(rl, text):
129 return False
129 return False
130
130
131
131
132 ellipsisprocessor = (
132 ellipsisprocessor = (
133 ellipsisreadprocessor,
133 ellipsisreadprocessor,
134 ellipsiswriteprocessor,
134 ellipsiswriteprocessor,
135 ellipsisrawprocessor,
135 ellipsisrawprocessor,
136 )
136 )
137
137
138
138
139 def offset_type(offset, type):
139 def offset_type(offset, type):
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
141 raise ValueError(b'unknown revlog index flags')
141 raise ValueError(b'unknown revlog index flags')
142 return int(int(offset) << 16 | type)
142 return int(int(offset) << 16 | type)
143
143
144
144
145 def _verify_revision(rl, skipflags, state, node):
145 def _verify_revision(rl, skipflags, state, node):
146 """Verify the integrity of the given revlog ``node`` while providing a hook
146 """Verify the integrity of the given revlog ``node`` while providing a hook
147 point for extensions to influence the operation."""
147 point for extensions to influence the operation."""
148 if skipflags:
148 if skipflags:
149 state[b'skipread'].add(node)
149 state[b'skipread'].add(node)
150 else:
150 else:
151 # Side-effect: read content and verify hash.
151 # Side-effect: read content and verify hash.
152 rl.revision(node)
152 rl.revision(node)
153
153
154
154
155 # True if a fast implementation for persistent-nodemap is available
155 # True if a fast implementation for persistent-nodemap is available
156 #
156 #
157 # We also consider we have a "fast" implementation in "pure" python because
157 # We also consider we have a "fast" implementation in "pure" python because
158 # people using pure don't really have performance consideration (and a
158 # people using pure don't really have performance consideration (and a
159 # wheelbarrow of other slowness source)
159 # wheelbarrow of other slowness source)
160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
161 parsers, 'BaseIndexObject'
161 parsers, 'BaseIndexObject'
162 )
162 )
163
163
164
164
165 @attr.s(slots=True, frozen=True)
165 @attr.s(slots=True, frozen=True)
166 class _revisioninfo(object):
166 class _revisioninfo(object):
167 """Information about a revision that allows building its fulltext
167 """Information about a revision that allows building its fulltext
168 node: expected hash of the revision
168 node: expected hash of the revision
169 p1, p2: parent revs of the revision
169 p1, p2: parent revs of the revision
170 btext: built text cache consisting of a one-element list
170 btext: built text cache consisting of a one-element list
171 cachedelta: (baserev, uncompressed_delta) or None
171 cachedelta: (baserev, uncompressed_delta) or None
172 flags: flags associated to the revision storage
172 flags: flags associated to the revision storage
173
173
174 One of btext[0] or cachedelta must be set.
174 One of btext[0] or cachedelta must be set.
175 """
175 """
176
176
177 node = attr.ib()
177 node = attr.ib()
178 p1 = attr.ib()
178 p1 = attr.ib()
179 p2 = attr.ib()
179 p2 = attr.ib()
180 btext = attr.ib()
180 btext = attr.ib()
181 textlen = attr.ib()
181 textlen = attr.ib()
182 cachedelta = attr.ib()
182 cachedelta = attr.ib()
183 flags = attr.ib()
183 flags = attr.ib()
184
184
185
185
186 @interfaceutil.implementer(repository.irevisiondelta)
186 @interfaceutil.implementer(repository.irevisiondelta)
187 @attr.s(slots=True)
187 @attr.s(slots=True)
188 class revlogrevisiondelta(object):
188 class revlogrevisiondelta(object):
189 node = attr.ib()
189 node = attr.ib()
190 p1node = attr.ib()
190 p1node = attr.ib()
191 p2node = attr.ib()
191 p2node = attr.ib()
192 basenode = attr.ib()
192 basenode = attr.ib()
193 flags = attr.ib()
193 flags = attr.ib()
194 baserevisionsize = attr.ib()
194 baserevisionsize = attr.ib()
195 revision = attr.ib()
195 revision = attr.ib()
196 delta = attr.ib()
196 delta = attr.ib()
197 sidedata = attr.ib()
197 sidedata = attr.ib()
198 protocol_flags = attr.ib()
198 protocol_flags = attr.ib()
199 linknode = attr.ib(default=None)
199 linknode = attr.ib(default=None)
200
200
201
201
202 @interfaceutil.implementer(repository.iverifyproblem)
202 @interfaceutil.implementer(repository.iverifyproblem)
203 @attr.s(frozen=True)
203 @attr.s(frozen=True)
204 class revlogproblem(object):
204 class revlogproblem(object):
205 warning = attr.ib(default=None)
205 warning = attr.ib(default=None)
206 error = attr.ib(default=None)
206 error = attr.ib(default=None)
207 node = attr.ib(default=None)
207 node = attr.ib(default=None)
208
208
209
209
210 def parse_index_v1(data, inline):
210 def parse_index_v1(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline)
212 index, cache = parsers.parse_index2(data, inline)
213 return index, cache
213 return index, cache
214
214
215
215
216 def parse_index_v2(data, inline):
216 def parse_index_v2(data, inline):
217 # call the C implementation to parse the index data
217 # call the C implementation to parse the index data
218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
219 return index, cache
219 return index, cache
220
220
221
221
222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
223
223
224 def parse_index_v1_nodemap(data, inline):
224 def parse_index_v1_nodemap(data, inline):
225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
226 return index, cache
226 return index, cache
227
227
228
228
229 else:
229 else:
230 parse_index_v1_nodemap = None
230 parse_index_v1_nodemap = None
231
231
232
232
233 def parse_index_v1_mixed(data, inline):
233 def parse_index_v1_mixed(data, inline):
234 index, cache = parse_index_v1(data, inline)
234 index, cache = parse_index_v1(data, inline)
235 return rustrevlog.MixedIndex(index), cache
235 return rustrevlog.MixedIndex(index), cache
236
236
237
237
238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
239 # signed integer)
239 # signed integer)
240 _maxentrysize = 0x7FFFFFFF
240 _maxentrysize = 0x7FFFFFFF
241
241
242
242
243 class revlog(object):
243 class revlog(object):
244 """
244 """
245 the underlying revision storage object
245 the underlying revision storage object
246
246
247 A revlog consists of two parts, an index and the revision data.
247 A revlog consists of two parts, an index and the revision data.
248
248
249 The index is a file with a fixed record size containing
249 The index is a file with a fixed record size containing
250 information on each revision, including its nodeid (hash), the
250 information on each revision, including its nodeid (hash), the
251 nodeids of its parents, the position and offset of its data within
251 nodeids of its parents, the position and offset of its data within
252 the data file, and the revision it's based on. Finally, each entry
252 the data file, and the revision it's based on. Finally, each entry
253 contains a linkrev entry that can serve as a pointer to external
253 contains a linkrev entry that can serve as a pointer to external
254 data.
254 data.
255
255
256 The revision data itself is a linear collection of data chunks.
256 The revision data itself is a linear collection of data chunks.
257 Each chunk represents a revision and is usually represented as a
257 Each chunk represents a revision and is usually represented as a
258 delta against the previous chunk. To bound lookup time, runs of
258 delta against the previous chunk. To bound lookup time, runs of
259 deltas are limited to about 2 times the length of the original
259 deltas are limited to about 2 times the length of the original
260 version data. This makes retrieval of a version proportional to
260 version data. This makes retrieval of a version proportional to
261 its size, or O(1) relative to the number of revisions.
261 its size, or O(1) relative to the number of revisions.
262
262
263 Both pieces of the revlog are written to in an append-only
263 Both pieces of the revlog are written to in an append-only
264 fashion, which means we never need to rewrite a file to insert or
264 fashion, which means we never need to rewrite a file to insert or
265 remove data, and can use some simple techniques to avoid the need
265 remove data, and can use some simple techniques to avoid the need
266 for locking while reading.
266 for locking while reading.
267
267
268 If checkambig, indexfile is opened with checkambig=True at
268 If checkambig, indexfile is opened with checkambig=True at
269 writing, to avoid file stat ambiguity.
269 writing, to avoid file stat ambiguity.
270
270
271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 index will be mmapped rather than read if it is larger than the
272 index will be mmapped rather than read if it is larger than the
273 configured threshold.
273 configured threshold.
274
274
275 If censorable is True, the revlog can have censored revisions.
275 If censorable is True, the revlog can have censored revisions.
276
276
277 If `upperboundcomp` is not None, this is the expected maximal gain from
277 If `upperboundcomp` is not None, this is the expected maximal gain from
278 compression for the data content.
278 compression for the data content.
279
279
280 `concurrencychecker` is an optional function that receives 3 arguments: a
280 `concurrencychecker` is an optional function that receives 3 arguments: a
281 file handle, a filename, and an expected position. It should check whether
281 file handle, a filename, and an expected position. It should check whether
282 the current position in the file handle is valid, and log/warn/fail (by
282 the current position in the file handle is valid, and log/warn/fail (by
283 raising).
283 raising).
284 """
284 """
285
285
286 _flagserrorclass = error.RevlogError
286 _flagserrorclass = error.RevlogError
287
287
288 def __init__(
288 def __init__(
289 self,
289 self,
290 opener,
290 opener,
291 target,
291 target,
292 indexfile=None,
292 indexfile=None,
293 datafile=None,
293 datafile=None,
294 checkambig=False,
294 checkambig=False,
295 mmaplargeindex=False,
295 mmaplargeindex=False,
296 censorable=False,
296 censorable=False,
297 upperboundcomp=None,
297 upperboundcomp=None,
298 persistentnodemap=False,
298 persistentnodemap=False,
299 concurrencychecker=None,
299 concurrencychecker=None,
300 ):
300 ):
301 """
301 """
302 create a revlog object
302 create a revlog object
303
303
304 opener is a function that abstracts the file opening operation
304 opener is a function that abstracts the file opening operation
305 and can be used to implement COW semantics or the like.
305 and can be used to implement COW semantics or the like.
306
306
307 `target`: a (KIND, ID) tuple that identify the content stored in
307 `target`: a (KIND, ID) tuple that identify the content stored in
308 this revlog. It help the rest of the code to understand what the revlog
308 this revlog. It help the rest of the code to understand what the revlog
309 is about without having to resort to heuristic and index filename
309 is about without having to resort to heuristic and index filename
310 analysis. Note: that this must be reliably be set by normal code, but
310 analysis. Note: that this must be reliably be set by normal code, but
311 that test, debug, or performance measurement code might not set this to
311 that test, debug, or performance measurement code might not set this to
312 accurate value.
312 accurate value.
313 """
313 """
314 self.upperboundcomp = upperboundcomp
314 self.upperboundcomp = upperboundcomp
315 self.indexfile = indexfile
315 self.indexfile = indexfile
316 self.datafile = datafile or (indexfile[:-2] + b".d")
316 self.datafile = datafile or (indexfile[:-2] + b".d")
317 self.nodemap_file = None
317 self.nodemap_file = None
318 if persistentnodemap:
318 if persistentnodemap:
319 self.nodemap_file = nodemaputil.get_nodemap_file(
319 self.nodemap_file = nodemaputil.get_nodemap_file(
320 opener, self.indexfile
320 opener, self.indexfile
321 )
321 )
322
322
323 self.opener = opener
323 self.opener = opener
324 assert target[0] in ALL_KINDS
324 assert target[0] in ALL_KINDS
325 assert len(target) == 2
325 assert len(target) == 2
326 self.target = target
326 self.target = target
327 # When True, indexfile is opened with checkambig=True at writing, to
327 # When True, indexfile is opened with checkambig=True at writing, to
328 # avoid file stat ambiguity.
328 # avoid file stat ambiguity.
329 self._checkambig = checkambig
329 self._checkambig = checkambig
330 self._mmaplargeindex = mmaplargeindex
330 self._mmaplargeindex = mmaplargeindex
331 self._censorable = censorable
331 self._censorable = censorable
332 # 3-tuple of (node, rev, text) for a raw revision.
332 # 3-tuple of (node, rev, text) for a raw revision.
333 self._revisioncache = None
333 self._revisioncache = None
334 # Maps rev to chain base rev.
334 # Maps rev to chain base rev.
335 self._chainbasecache = util.lrucachedict(100)
335 self._chainbasecache = util.lrucachedict(100)
336 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
336 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
337 self._chunkcache = (0, b'')
337 self._chunkcache = (0, b'')
338 # How much data to read and cache into the raw revlog data cache.
338 # How much data to read and cache into the raw revlog data cache.
339 self._chunkcachesize = 65536
339 self._chunkcachesize = 65536
340 self._maxchainlen = None
340 self._maxchainlen = None
341 self._deltabothparents = True
341 self._deltabothparents = True
342 self.index = None
342 self.index = None
343 self._nodemap_docket = None
343 self._nodemap_docket = None
344 # Mapping of partial identifiers to full nodes.
344 # Mapping of partial identifiers to full nodes.
345 self._pcache = {}
345 self._pcache = {}
346 # Mapping of revision integer to full node.
346 # Mapping of revision integer to full node.
347 self._compengine = b'zlib'
347 self._compengine = b'zlib'
348 self._compengineopts = {}
348 self._compengineopts = {}
349 self._maxdeltachainspan = -1
349 self._maxdeltachainspan = -1
350 self._withsparseread = False
350 self._withsparseread = False
351 self._sparserevlog = False
351 self._sparserevlog = False
352 self._srdensitythreshold = 0.50
352 self._srdensitythreshold = 0.50
353 self._srmingapsize = 262144
353 self._srmingapsize = 262144
354
354
355 # Make copy of flag processors so each revlog instance can support
355 # Make copy of flag processors so each revlog instance can support
356 # custom flags.
356 # custom flags.
357 self._flagprocessors = dict(flagutil.flagprocessors)
357 self._flagprocessors = dict(flagutil.flagprocessors)
358
358
359 # 2-tuple of file handles being used for active writing.
359 # 2-tuple of file handles being used for active writing.
360 self._writinghandles = None
360 self._writinghandles = None
361
361
362 self._loadindex()
362 self._loadindex()
363
363
364 self._concurrencychecker = concurrencychecker
364 self._concurrencychecker = concurrencychecker
365
365
366 def _loadindex(self):
366 def _loadindex(self):
367 mmapindexthreshold = None
367 mmapindexthreshold = None
368 opts = self.opener.options
368 opts = self.opener.options
369
369
370 if b'revlogv2' in opts:
370 if b'revlogv2' in opts:
371 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
371 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
372 elif b'revlogv1' in opts:
372 elif b'revlogv1' in opts:
373 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
373 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
374 if b'generaldelta' in opts:
374 if b'generaldelta' in opts:
375 newversionflags |= FLAG_GENERALDELTA
375 newversionflags |= FLAG_GENERALDELTA
376 elif b'revlogv0' in self.opener.options:
376 elif b'revlogv0' in self.opener.options:
377 newversionflags = REVLOGV0
377 newversionflags = REVLOGV0
378 else:
378 else:
379 newversionflags = REVLOG_DEFAULT_VERSION
379 newversionflags = REVLOG_DEFAULT_VERSION
380
380
381 if b'chunkcachesize' in opts:
381 if b'chunkcachesize' in opts:
382 self._chunkcachesize = opts[b'chunkcachesize']
382 self._chunkcachesize = opts[b'chunkcachesize']
383 if b'maxchainlen' in opts:
383 if b'maxchainlen' in opts:
384 self._maxchainlen = opts[b'maxchainlen']
384 self._maxchainlen = opts[b'maxchainlen']
385 if b'deltabothparents' in opts:
385 if b'deltabothparents' in opts:
386 self._deltabothparents = opts[b'deltabothparents']
386 self._deltabothparents = opts[b'deltabothparents']
387 self._lazydelta = bool(opts.get(b'lazydelta', True))
387 self._lazydelta = bool(opts.get(b'lazydelta', True))
388 self._lazydeltabase = False
388 self._lazydeltabase = False
389 if self._lazydelta:
389 if self._lazydelta:
390 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
390 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
391 if b'compengine' in opts:
391 if b'compengine' in opts:
392 self._compengine = opts[b'compengine']
392 self._compengine = opts[b'compengine']
393 if b'zlib.level' in opts:
393 if b'zlib.level' in opts:
394 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
394 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
395 if b'zstd.level' in opts:
395 if b'zstd.level' in opts:
396 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
396 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
397 if b'maxdeltachainspan' in opts:
397 if b'maxdeltachainspan' in opts:
398 self._maxdeltachainspan = opts[b'maxdeltachainspan']
398 self._maxdeltachainspan = opts[b'maxdeltachainspan']
399 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
399 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
400 mmapindexthreshold = opts[b'mmapindexthreshold']
400 mmapindexthreshold = opts[b'mmapindexthreshold']
401 self.hassidedata = bool(opts.get(b'side-data', False))
401 self.hassidedata = bool(opts.get(b'side-data', False))
402 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
402 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
403 withsparseread = bool(opts.get(b'with-sparse-read', False))
403 withsparseread = bool(opts.get(b'with-sparse-read', False))
404 # sparse-revlog forces sparse-read
404 # sparse-revlog forces sparse-read
405 self._withsparseread = self._sparserevlog or withsparseread
405 self._withsparseread = self._sparserevlog or withsparseread
406 if b'sparse-read-density-threshold' in opts:
406 if b'sparse-read-density-threshold' in opts:
407 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
407 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
408 if b'sparse-read-min-gap-size' in opts:
408 if b'sparse-read-min-gap-size' in opts:
409 self._srmingapsize = opts[b'sparse-read-min-gap-size']
409 self._srmingapsize = opts[b'sparse-read-min-gap-size']
410 if opts.get(b'enableellipsis'):
410 if opts.get(b'enableellipsis'):
411 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
411 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
412
412
413 # revlog v0 doesn't have flag processors
413 # revlog v0 doesn't have flag processors
414 for flag, processor in pycompat.iteritems(
414 for flag, processor in pycompat.iteritems(
415 opts.get(b'flagprocessors', {})
415 opts.get(b'flagprocessors', {})
416 ):
416 ):
417 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
417 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
418
418
419 if self._chunkcachesize <= 0:
419 if self._chunkcachesize <= 0:
420 raise error.RevlogError(
420 raise error.RevlogError(
421 _(b'revlog chunk cache size %r is not greater than 0')
421 _(b'revlog chunk cache size %r is not greater than 0')
422 % self._chunkcachesize
422 % self._chunkcachesize
423 )
423 )
424 elif self._chunkcachesize & (self._chunkcachesize - 1):
424 elif self._chunkcachesize & (self._chunkcachesize - 1):
425 raise error.RevlogError(
425 raise error.RevlogError(
426 _(b'revlog chunk cache size %r is not a power of 2')
426 _(b'revlog chunk cache size %r is not a power of 2')
427 % self._chunkcachesize
427 % self._chunkcachesize
428 )
428 )
429
429
430 indexdata = b''
430 indexdata = b''
431 self._initempty = True
431 self._initempty = True
432 try:
432 try:
433 with self._indexfp() as f:
433 with self._indexfp() as f:
434 if (
434 if (
435 mmapindexthreshold is not None
435 mmapindexthreshold is not None
436 and self.opener.fstat(f).st_size >= mmapindexthreshold
436 and self.opener.fstat(f).st_size >= mmapindexthreshold
437 ):
437 ):
438 # TODO: should .close() to release resources without
438 # TODO: should .close() to release resources without
439 # relying on Python GC
439 # relying on Python GC
440 indexdata = util.buffer(util.mmapread(f))
440 indexdata = util.buffer(util.mmapread(f))
441 else:
441 else:
442 indexdata = f.read()
442 indexdata = f.read()
443 if len(indexdata) > 0:
443 if len(indexdata) > 0:
444 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
444 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
445 self._initempty = False
445 self._initempty = False
446 else:
446 else:
447 versionflags = newversionflags
447 versionflags = newversionflags
448 except IOError as inst:
448 except IOError as inst:
449 if inst.errno != errno.ENOENT:
449 if inst.errno != errno.ENOENT:
450 raise
450 raise
451
451
452 versionflags = newversionflags
452 versionflags = newversionflags
453
453
454 self.version = versionflags
454 self.version = versionflags
455
455
456 flags = versionflags & ~0xFFFF
456 flags = versionflags & ~0xFFFF
457 fmt = versionflags & 0xFFFF
457 fmt = versionflags & 0xFFFF
458
458
459 if fmt == REVLOGV0:
459 if fmt == REVLOGV0:
460 if flags:
460 if flags:
461 raise error.RevlogError(
461 raise error.RevlogError(
462 _(b'unknown flags (%#04x) in version %d revlog %s')
462 _(b'unknown flags (%#04x) in version %d revlog %s')
463 % (flags >> 16, fmt, self.indexfile)
463 % (flags >> 16, fmt, self.indexfile)
464 )
464 )
465
465
466 self._inline = False
466 self._inline = False
467 self._generaldelta = False
467 self._generaldelta = False
468
468
469 elif fmt == REVLOGV1:
469 elif fmt == REVLOGV1:
470 if flags & ~REVLOGV1_FLAGS:
470 if flags & ~REVLOGV1_FLAGS:
471 raise error.RevlogError(
471 raise error.RevlogError(
472 _(b'unknown flags (%#04x) in version %d revlog %s')
472 _(b'unknown flags (%#04x) in version %d revlog %s')
473 % (flags >> 16, fmt, self.indexfile)
473 % (flags >> 16, fmt, self.indexfile)
474 )
474 )
475
475
476 self._inline = versionflags & FLAG_INLINE_DATA
476 self._inline = versionflags & FLAG_INLINE_DATA
477 self._generaldelta = versionflags & FLAG_GENERALDELTA
477 self._generaldelta = versionflags & FLAG_GENERALDELTA
478
478
479 elif fmt == REVLOGV2:
479 elif fmt == REVLOGV2:
480 if flags & ~REVLOGV2_FLAGS:
480 if flags & ~REVLOGV2_FLAGS:
481 raise error.RevlogError(
481 raise error.RevlogError(
482 _(b'unknown flags (%#04x) in version %d revlog %s')
482 _(b'unknown flags (%#04x) in version %d revlog %s')
483 % (flags >> 16, fmt, self.indexfile)
483 % (flags >> 16, fmt, self.indexfile)
484 )
484 )
485
485
486 # There is a bug in the transaction handling when going from an
486 # There is a bug in the transaction handling when going from an
487 # inline revlog to a separate index and data file. Turn it off until
487 # inline revlog to a separate index and data file. Turn it off until
488 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
488 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
489 # See issue6485
489 # See issue6485
490 self._inline = False
490 self._inline = False
491 # generaldelta implied by version 2 revlogs.
491 # generaldelta implied by version 2 revlogs.
492 self._generaldelta = True
492 self._generaldelta = True
493
493
494 else:
494 else:
495 raise error.RevlogError(
495 raise error.RevlogError(
496 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
496 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
497 )
497 )
498
498
499 self.nodeconstants = sha1nodeconstants
499 self.nodeconstants = sha1nodeconstants
500 self.nullid = self.nodeconstants.nullid
500 self.nullid = self.nodeconstants.nullid
501
501
502 # sparse-revlog can't be on without general-delta (issue6056)
502 # sparse-revlog can't be on without general-delta (issue6056)
503 if not self._generaldelta:
503 if not self._generaldelta:
504 self._sparserevlog = False
504 self._sparserevlog = False
505
505
506 self._storedeltachains = True
506 self._storedeltachains = True
507
507
508 devel_nodemap = (
508 devel_nodemap = (
509 self.nodemap_file
509 self.nodemap_file
510 and opts.get(b'devel-force-nodemap', False)
510 and opts.get(b'devel-force-nodemap', False)
511 and parse_index_v1_nodemap is not None
511 and parse_index_v1_nodemap is not None
512 )
512 )
513
513
514 use_rust_index = False
514 use_rust_index = False
515 if rustrevlog is not None:
515 if rustrevlog is not None:
516 if self.nodemap_file is not None:
516 if self.nodemap_file is not None:
517 use_rust_index = True
517 use_rust_index = True
518 else:
518 else:
519 use_rust_index = self.opener.options.get(b'rust.index')
519 use_rust_index = self.opener.options.get(b'rust.index')
520
520
521 self._parse_index = parse_index_v1
521 self._parse_index = parse_index_v1
522 if self.version == REVLOGV0:
522 if self.version == REVLOGV0:
523 self._parse_index = revlogv0.parse_index_v0
523 self._parse_index = revlogv0.parse_index_v0
524 elif fmt == REVLOGV2:
524 elif fmt == REVLOGV2:
525 self._parse_index = parse_index_v2
525 self._parse_index = parse_index_v2
526 elif devel_nodemap:
526 elif devel_nodemap:
527 self._parse_index = parse_index_v1_nodemap
527 self._parse_index = parse_index_v1_nodemap
528 elif use_rust_index:
528 elif use_rust_index:
529 self._parse_index = parse_index_v1_mixed
529 self._parse_index = parse_index_v1_mixed
530 try:
530 try:
531 d = self._parse_index(indexdata, self._inline)
531 d = self._parse_index(indexdata, self._inline)
532 index, _chunkcache = d
532 index, _chunkcache = d
533 use_nodemap = (
533 use_nodemap = (
534 not self._inline
534 not self._inline
535 and self.nodemap_file is not None
535 and self.nodemap_file is not None
536 and util.safehasattr(index, 'update_nodemap_data')
536 and util.safehasattr(index, 'update_nodemap_data')
537 )
537 )
538 if use_nodemap:
538 if use_nodemap:
539 nodemap_data = nodemaputil.persisted_data(self)
539 nodemap_data = nodemaputil.persisted_data(self)
540 if nodemap_data is not None:
540 if nodemap_data is not None:
541 docket = nodemap_data[0]
541 docket = nodemap_data[0]
542 if (
542 if (
543 len(d[0]) > docket.tip_rev
543 len(d[0]) > docket.tip_rev
544 and d[0][docket.tip_rev][7] == docket.tip_node
544 and d[0][docket.tip_rev][7] == docket.tip_node
545 ):
545 ):
546 # no changelog tampering
546 # no changelog tampering
547 self._nodemap_docket = docket
547 self._nodemap_docket = docket
548 index.update_nodemap_data(*nodemap_data)
548 index.update_nodemap_data(*nodemap_data)
549 except (ValueError, IndexError):
549 except (ValueError, IndexError):
550 raise error.RevlogError(
550 raise error.RevlogError(
551 _(b"index %s is corrupted") % self.indexfile
551 _(b"index %s is corrupted") % self.indexfile
552 )
552 )
553 self.index, self._chunkcache = d
553 self.index, self._chunkcache = d
554 if not self._chunkcache:
554 if not self._chunkcache:
555 self._chunkclear()
555 self._chunkclear()
556 # revnum -> (chain-length, sum-delta-length)
556 # revnum -> (chain-length, sum-delta-length)
557 self._chaininfocache = util.lrucachedict(500)
557 self._chaininfocache = util.lrucachedict(500)
558 # revlog header -> revlog compressor
558 # revlog header -> revlog compressor
559 self._decompressors = {}
559 self._decompressors = {}
560
560
561 @util.propertycache
561 @util.propertycache
562 def revlog_kind(self):
562 def revlog_kind(self):
563 return self.target[0]
563 return self.target[0]
564
564
565 @util.propertycache
565 @util.propertycache
566 def _compressor(self):
566 def _compressor(self):
567 engine = util.compengines[self._compengine]
567 engine = util.compengines[self._compengine]
568 return engine.revlogcompressor(self._compengineopts)
568 return engine.revlogcompressor(self._compengineopts)
569
569
570 def _indexfp(self, mode=b'r'):
570 def _indexfp(self, mode=b'r'):
571 """file object for the revlog's index file"""
571 """file object for the revlog's index file"""
572 args = {'mode': mode}
572 args = {'mode': mode}
573 if mode != b'r':
573 if mode != b'r':
574 args['checkambig'] = self._checkambig
574 args['checkambig'] = self._checkambig
575 if mode == b'w':
575 if mode == b'w':
576 args['atomictemp'] = True
576 args['atomictemp'] = True
577 return self.opener(self.indexfile, **args)
577 return self.opener(self.indexfile, **args)
578
578
579 def _datafp(self, mode=b'r'):
579 def _datafp(self, mode=b'r'):
580 """file object for the revlog's data file"""
580 """file object for the revlog's data file"""
581 return self.opener(self.datafile, mode=mode)
581 return self.opener(self.datafile, mode=mode)
582
582
583 @contextlib.contextmanager
583 @contextlib.contextmanager
584 def _datareadfp(self, existingfp=None):
584 def _datareadfp(self, existingfp=None):
585 """file object suitable to read data"""
585 """file object suitable to read data"""
586 # Use explicit file handle, if given.
586 # Use explicit file handle, if given.
587 if existingfp is not None:
587 if existingfp is not None:
588 yield existingfp
588 yield existingfp
589
589
590 # Use a file handle being actively used for writes, if available.
590 # Use a file handle being actively used for writes, if available.
591 # There is some danger to doing this because reads will seek the
591 # There is some danger to doing this because reads will seek the
592 # file. However, _writeentry() performs a SEEK_END before all writes,
592 # file. However, _writeentry() performs a SEEK_END before all writes,
593 # so we should be safe.
593 # so we should be safe.
594 elif self._writinghandles:
594 elif self._writinghandles:
595 if self._inline:
595 if self._inline:
596 yield self._writinghandles[0]
596 yield self._writinghandles[0]
597 else:
597 else:
598 yield self._writinghandles[1]
598 yield self._writinghandles[1]
599
599
600 # Otherwise open a new file handle.
600 # Otherwise open a new file handle.
601 else:
601 else:
602 if self._inline:
602 if self._inline:
603 func = self._indexfp
603 func = self._indexfp
604 else:
604 else:
605 func = self._datafp
605 func = self._datafp
606 with func() as fp:
606 with func() as fp:
607 yield fp
607 yield fp
608
608
609 def tiprev(self):
609 def tiprev(self):
610 return len(self.index) - 1
610 return len(self.index) - 1
611
611
612 def tip(self):
612 def tip(self):
613 return self.node(self.tiprev())
613 return self.node(self.tiprev())
614
614
615 def __contains__(self, rev):
615 def __contains__(self, rev):
616 return 0 <= rev < len(self)
616 return 0 <= rev < len(self)
617
617
618 def __len__(self):
618 def __len__(self):
619 return len(self.index)
619 return len(self.index)
620
620
621 def __iter__(self):
621 def __iter__(self):
622 return iter(pycompat.xrange(len(self)))
622 return iter(pycompat.xrange(len(self)))
623
623
624 def revs(self, start=0, stop=None):
624 def revs(self, start=0, stop=None):
625 """iterate over all rev in this revlog (from start to stop)"""
625 """iterate over all rev in this revlog (from start to stop)"""
626 return storageutil.iterrevs(len(self), start=start, stop=stop)
626 return storageutil.iterrevs(len(self), start=start, stop=stop)
627
627
628 @property
628 @property
629 def nodemap(self):
629 def nodemap(self):
630 msg = (
630 msg = (
631 b"revlog.nodemap is deprecated, "
631 b"revlog.nodemap is deprecated, "
632 b"use revlog.index.[has_node|rev|get_rev]"
632 b"use revlog.index.[has_node|rev|get_rev]"
633 )
633 )
634 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
634 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
635 return self.index.nodemap
635 return self.index.nodemap
636
636
637 @property
637 @property
638 def _nodecache(self):
638 def _nodecache(self):
639 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
639 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
640 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
640 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
641 return self.index.nodemap
641 return self.index.nodemap
642
642
643 def hasnode(self, node):
643 def hasnode(self, node):
644 try:
644 try:
645 self.rev(node)
645 self.rev(node)
646 return True
646 return True
647 except KeyError:
647 except KeyError:
648 return False
648 return False
649
649
650 def candelta(self, baserev, rev):
650 def candelta(self, baserev, rev):
651 """whether two revisions (baserev, rev) can be delta-ed or not"""
651 """whether two revisions (baserev, rev) can be delta-ed or not"""
652 # Disable delta if either rev requires a content-changing flag
652 # Disable delta if either rev requires a content-changing flag
653 # processor (ex. LFS). This is because such flag processor can alter
653 # processor (ex. LFS). This is because such flag processor can alter
654 # the rawtext content that the delta will be based on, and two clients
654 # the rawtext content that the delta will be based on, and two clients
655 # could have a same revlog node with different flags (i.e. different
655 # could have a same revlog node with different flags (i.e. different
656 # rawtext contents) and the delta could be incompatible.
656 # rawtext contents) and the delta could be incompatible.
657 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
657 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
658 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
658 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
659 ):
659 ):
660 return False
660 return False
661 return True
661 return True
662
662
663 def update_caches(self, transaction):
663 def update_caches(self, transaction):
664 if self.nodemap_file is not None:
664 if self.nodemap_file is not None:
665 if transaction is None:
665 if transaction is None:
666 nodemaputil.update_persistent_nodemap(self)
666 nodemaputil.update_persistent_nodemap(self)
667 else:
667 else:
668 nodemaputil.setup_persistent_nodemap(transaction, self)
668 nodemaputil.setup_persistent_nodemap(transaction, self)
669
669
670 def clearcaches(self):
670 def clearcaches(self):
671 self._revisioncache = None
671 self._revisioncache = None
672 self._chainbasecache.clear()
672 self._chainbasecache.clear()
673 self._chunkcache = (0, b'')
673 self._chunkcache = (0, b'')
674 self._pcache = {}
674 self._pcache = {}
675 self._nodemap_docket = None
675 self._nodemap_docket = None
676 self.index.clearcaches()
676 self.index.clearcaches()
677 # The python code is the one responsible for validating the docket, we
677 # The python code is the one responsible for validating the docket, we
678 # end up having to refresh it here.
678 # end up having to refresh it here.
679 use_nodemap = (
679 use_nodemap = (
680 not self._inline
680 not self._inline
681 and self.nodemap_file is not None
681 and self.nodemap_file is not None
682 and util.safehasattr(self.index, 'update_nodemap_data')
682 and util.safehasattr(self.index, 'update_nodemap_data')
683 )
683 )
684 if use_nodemap:
684 if use_nodemap:
685 nodemap_data = nodemaputil.persisted_data(self)
685 nodemap_data = nodemaputil.persisted_data(self)
686 if nodemap_data is not None:
686 if nodemap_data is not None:
687 self._nodemap_docket = nodemap_data[0]
687 self._nodemap_docket = nodemap_data[0]
688 self.index.update_nodemap_data(*nodemap_data)
688 self.index.update_nodemap_data(*nodemap_data)
689
689
690 def rev(self, node):
690 def rev(self, node):
691 try:
691 try:
692 return self.index.rev(node)
692 return self.index.rev(node)
693 except TypeError:
693 except TypeError:
694 raise
694 raise
695 except error.RevlogError:
695 except error.RevlogError:
696 # parsers.c radix tree lookup failed
696 # parsers.c radix tree lookup failed
697 if (
697 if (
698 node == self.nodeconstants.wdirid
698 node == self.nodeconstants.wdirid
699 or node in self.nodeconstants.wdirfilenodeids
699 or node in self.nodeconstants.wdirfilenodeids
700 ):
700 ):
701 raise error.WdirUnsupported
701 raise error.WdirUnsupported
702 raise error.LookupError(node, self.indexfile, _(b'no node'))
702 raise error.LookupError(node, self.indexfile, _(b'no node'))
703
703
704 # Accessors for index entries.
704 # Accessors for index entries.
705
705
706 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
706 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
707 # are flags.
707 # are flags.
708 def start(self, rev):
708 def start(self, rev):
709 return int(self.index[rev][0] >> 16)
709 return int(self.index[rev][0] >> 16)
710
710
711 def flags(self, rev):
711 def flags(self, rev):
712 return self.index[rev][0] & 0xFFFF
712 return self.index[rev][0] & 0xFFFF
713
713
714 def length(self, rev):
714 def length(self, rev):
715 return self.index[rev][1]
715 return self.index[rev][1]
716
716
717 def sidedata_length(self, rev):
717 def sidedata_length(self, rev):
718 if self.version & 0xFFFF != REVLOGV2:
718 if self.version & 0xFFFF != REVLOGV2:
719 return 0
719 return 0
720 return self.index[rev][9]
720 return self.index[rev][9]
721
721
722 def rawsize(self, rev):
722 def rawsize(self, rev):
723 """return the length of the uncompressed text for a given revision"""
723 """return the length of the uncompressed text for a given revision"""
724 l = self.index[rev][2]
724 l = self.index[rev][2]
725 if l >= 0:
725 if l >= 0:
726 return l
726 return l
727
727
728 t = self.rawdata(rev)
728 t = self.rawdata(rev)
729 return len(t)
729 return len(t)
730
730
731 def size(self, rev):
731 def size(self, rev):
732 """length of non-raw text (processed by a "read" flag processor)"""
732 """length of non-raw text (processed by a "read" flag processor)"""
733 # fast path: if no "read" flag processor could change the content,
733 # fast path: if no "read" flag processor could change the content,
734 # size is rawsize. note: ELLIPSIS is known to not change the content.
734 # size is rawsize. note: ELLIPSIS is known to not change the content.
735 flags = self.flags(rev)
735 flags = self.flags(rev)
736 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
736 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
737 return self.rawsize(rev)
737 return self.rawsize(rev)
738
738
739 return len(self.revision(rev, raw=False))
739 return len(self.revision(rev, raw=False))
740
740
741 def chainbase(self, rev):
741 def chainbase(self, rev):
742 base = self._chainbasecache.get(rev)
742 base = self._chainbasecache.get(rev)
743 if base is not None:
743 if base is not None:
744 return base
744 return base
745
745
746 index = self.index
746 index = self.index
747 iterrev = rev
747 iterrev = rev
748 base = index[iterrev][3]
748 base = index[iterrev][3]
749 while base != iterrev:
749 while base != iterrev:
750 iterrev = base
750 iterrev = base
751 base = index[iterrev][3]
751 base = index[iterrev][3]
752
752
753 self._chainbasecache[rev] = base
753 self._chainbasecache[rev] = base
754 return base
754 return base
755
755
756 def linkrev(self, rev):
756 def linkrev(self, rev):
757 return self.index[rev][4]
757 return self.index[rev][4]
758
758
759 def parentrevs(self, rev):
759 def parentrevs(self, rev):
760 try:
760 try:
761 entry = self.index[rev]
761 entry = self.index[rev]
762 except IndexError:
762 except IndexError:
763 if rev == wdirrev:
763 if rev == wdirrev:
764 raise error.WdirUnsupported
764 raise error.WdirUnsupported
765 raise
765 raise
766 if entry[5] == nullrev:
766 if entry[5] == nullrev:
767 return entry[6], entry[5]
767 return entry[6], entry[5]
768 else:
768 else:
769 return entry[5], entry[6]
769 return entry[5], entry[6]
770
770
771 # fast parentrevs(rev) where rev isn't filtered
771 # fast parentrevs(rev) where rev isn't filtered
772 _uncheckedparentrevs = parentrevs
772 _uncheckedparentrevs = parentrevs
773
773
774 def node(self, rev):
774 def node(self, rev):
775 try:
775 try:
776 return self.index[rev][7]
776 return self.index[rev][7]
777 except IndexError:
777 except IndexError:
778 if rev == wdirrev:
778 if rev == wdirrev:
779 raise error.WdirUnsupported
779 raise error.WdirUnsupported
780 raise
780 raise
781
781
782 # Derived from index values.
782 # Derived from index values.
783
783
784 def end(self, rev):
784 def end(self, rev):
785 return self.start(rev) + self.length(rev)
785 return self.start(rev) + self.length(rev)
786
786
787 def parents(self, node):
787 def parents(self, node):
788 i = self.index
788 i = self.index
789 d = i[self.rev(node)]
789 d = i[self.rev(node)]
790 # inline node() to avoid function call overhead
790 # inline node() to avoid function call overhead
791 if d[5] == self.nullid:
791 if d[5] == self.nullid:
792 return i[d[6]][7], i[d[5]][7]
792 return i[d[6]][7], i[d[5]][7]
793 else:
793 else:
794 return i[d[5]][7], i[d[6]][7]
794 return i[d[5]][7], i[d[6]][7]
795
795
796 def chainlen(self, rev):
796 def chainlen(self, rev):
797 return self._chaininfo(rev)[0]
797 return self._chaininfo(rev)[0]
798
798
799 def _chaininfo(self, rev):
799 def _chaininfo(self, rev):
800 chaininfocache = self._chaininfocache
800 chaininfocache = self._chaininfocache
801 if rev in chaininfocache:
801 if rev in chaininfocache:
802 return chaininfocache[rev]
802 return chaininfocache[rev]
803 index = self.index
803 index = self.index
804 generaldelta = self._generaldelta
804 generaldelta = self._generaldelta
805 iterrev = rev
805 iterrev = rev
806 e = index[iterrev]
806 e = index[iterrev]
807 clen = 0
807 clen = 0
808 compresseddeltalen = 0
808 compresseddeltalen = 0
809 while iterrev != e[3]:
809 while iterrev != e[3]:
810 clen += 1
810 clen += 1
811 compresseddeltalen += e[1]
811 compresseddeltalen += e[1]
812 if generaldelta:
812 if generaldelta:
813 iterrev = e[3]
813 iterrev = e[3]
814 else:
814 else:
815 iterrev -= 1
815 iterrev -= 1
816 if iterrev in chaininfocache:
816 if iterrev in chaininfocache:
817 t = chaininfocache[iterrev]
817 t = chaininfocache[iterrev]
818 clen += t[0]
818 clen += t[0]
819 compresseddeltalen += t[1]
819 compresseddeltalen += t[1]
820 break
820 break
821 e = index[iterrev]
821 e = index[iterrev]
822 else:
822 else:
823 # Add text length of base since decompressing that also takes
823 # Add text length of base since decompressing that also takes
824 # work. For cache hits the length is already included.
824 # work. For cache hits the length is already included.
825 compresseddeltalen += e[1]
825 compresseddeltalen += e[1]
826 r = (clen, compresseddeltalen)
826 r = (clen, compresseddeltalen)
827 chaininfocache[rev] = r
827 chaininfocache[rev] = r
828 return r
828 return r
829
829
830 def _deltachain(self, rev, stoprev=None):
830 def _deltachain(self, rev, stoprev=None):
831 """Obtain the delta chain for a revision.
831 """Obtain the delta chain for a revision.
832
832
833 ``stoprev`` specifies a revision to stop at. If not specified, we
833 ``stoprev`` specifies a revision to stop at. If not specified, we
834 stop at the base of the chain.
834 stop at the base of the chain.
835
835
836 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
836 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
837 revs in ascending order and ``stopped`` is a bool indicating whether
837 revs in ascending order and ``stopped`` is a bool indicating whether
838 ``stoprev`` was hit.
838 ``stoprev`` was hit.
839 """
839 """
840 # Try C implementation.
840 # Try C implementation.
841 try:
841 try:
842 return self.index.deltachain(rev, stoprev, self._generaldelta)
842 return self.index.deltachain(rev, stoprev, self._generaldelta)
843 except AttributeError:
843 except AttributeError:
844 pass
844 pass
845
845
846 chain = []
846 chain = []
847
847
848 # Alias to prevent attribute lookup in tight loop.
848 # Alias to prevent attribute lookup in tight loop.
849 index = self.index
849 index = self.index
850 generaldelta = self._generaldelta
850 generaldelta = self._generaldelta
851
851
852 iterrev = rev
852 iterrev = rev
853 e = index[iterrev]
853 e = index[iterrev]
854 while iterrev != e[3] and iterrev != stoprev:
854 while iterrev != e[3] and iterrev != stoprev:
855 chain.append(iterrev)
855 chain.append(iterrev)
856 if generaldelta:
856 if generaldelta:
857 iterrev = e[3]
857 iterrev = e[3]
858 else:
858 else:
859 iterrev -= 1
859 iterrev -= 1
860 e = index[iterrev]
860 e = index[iterrev]
861
861
862 if iterrev == stoprev:
862 if iterrev == stoprev:
863 stopped = True
863 stopped = True
864 else:
864 else:
865 chain.append(iterrev)
865 chain.append(iterrev)
866 stopped = False
866 stopped = False
867
867
868 chain.reverse()
868 chain.reverse()
869 return chain, stopped
869 return chain, stopped
870
870
871 def ancestors(self, revs, stoprev=0, inclusive=False):
871 def ancestors(self, revs, stoprev=0, inclusive=False):
872 """Generate the ancestors of 'revs' in reverse revision order.
872 """Generate the ancestors of 'revs' in reverse revision order.
873 Does not generate revs lower than stoprev.
873 Does not generate revs lower than stoprev.
874
874
875 See the documentation for ancestor.lazyancestors for more details."""
875 See the documentation for ancestor.lazyancestors for more details."""
876
876
877 # first, make sure start revisions aren't filtered
877 # first, make sure start revisions aren't filtered
878 revs = list(revs)
878 revs = list(revs)
879 checkrev = self.node
879 checkrev = self.node
880 for r in revs:
880 for r in revs:
881 checkrev(r)
881 checkrev(r)
882 # and we're sure ancestors aren't filtered as well
882 # and we're sure ancestors aren't filtered as well
883
883
884 if rustancestor is not None:
884 if rustancestor is not None:
885 lazyancestors = rustancestor.LazyAncestors
885 lazyancestors = rustancestor.LazyAncestors
886 arg = self.index
886 arg = self.index
887 else:
887 else:
888 lazyancestors = ancestor.lazyancestors
888 lazyancestors = ancestor.lazyancestors
889 arg = self._uncheckedparentrevs
889 arg = self._uncheckedparentrevs
890 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
890 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
891
891
892 def descendants(self, revs):
892 def descendants(self, revs):
893 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
893 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
894
894
895 def findcommonmissing(self, common=None, heads=None):
895 def findcommonmissing(self, common=None, heads=None):
896 """Return a tuple of the ancestors of common and the ancestors of heads
896 """Return a tuple of the ancestors of common and the ancestors of heads
897 that are not ancestors of common. In revset terminology, we return the
897 that are not ancestors of common. In revset terminology, we return the
898 tuple:
898 tuple:
899
899
900 ::common, (::heads) - (::common)
900 ::common, (::heads) - (::common)
901
901
902 The list is sorted by revision number, meaning it is
902 The list is sorted by revision number, meaning it is
903 topologically sorted.
903 topologically sorted.
904
904
905 'heads' and 'common' are both lists of node IDs. If heads is
905 'heads' and 'common' are both lists of node IDs. If heads is
906 not supplied, uses all of the revlog's heads. If common is not
906 not supplied, uses all of the revlog's heads. If common is not
907 supplied, uses nullid."""
907 supplied, uses nullid."""
908 if common is None:
908 if common is None:
909 common = [self.nullid]
909 common = [self.nullid]
910 if heads is None:
910 if heads is None:
911 heads = self.heads()
911 heads = self.heads()
912
912
913 common = [self.rev(n) for n in common]
913 common = [self.rev(n) for n in common]
914 heads = [self.rev(n) for n in heads]
914 heads = [self.rev(n) for n in heads]
915
915
916 # we want the ancestors, but inclusive
916 # we want the ancestors, but inclusive
917 class lazyset(object):
917 class lazyset(object):
918 def __init__(self, lazyvalues):
918 def __init__(self, lazyvalues):
919 self.addedvalues = set()
919 self.addedvalues = set()
920 self.lazyvalues = lazyvalues
920 self.lazyvalues = lazyvalues
921
921
922 def __contains__(self, value):
922 def __contains__(self, value):
923 return value in self.addedvalues or value in self.lazyvalues
923 return value in self.addedvalues or value in self.lazyvalues
924
924
925 def __iter__(self):
925 def __iter__(self):
926 added = self.addedvalues
926 added = self.addedvalues
927 for r in added:
927 for r in added:
928 yield r
928 yield r
929 for r in self.lazyvalues:
929 for r in self.lazyvalues:
930 if not r in added:
930 if not r in added:
931 yield r
931 yield r
932
932
933 def add(self, value):
933 def add(self, value):
934 self.addedvalues.add(value)
934 self.addedvalues.add(value)
935
935
936 def update(self, values):
936 def update(self, values):
937 self.addedvalues.update(values)
937 self.addedvalues.update(values)
938
938
939 has = lazyset(self.ancestors(common))
939 has = lazyset(self.ancestors(common))
940 has.add(nullrev)
940 has.add(nullrev)
941 has.update(common)
941 has.update(common)
942
942
943 # take all ancestors from heads that aren't in has
943 # take all ancestors from heads that aren't in has
944 missing = set()
944 missing = set()
945 visit = collections.deque(r for r in heads if r not in has)
945 visit = collections.deque(r for r in heads if r not in has)
946 while visit:
946 while visit:
947 r = visit.popleft()
947 r = visit.popleft()
948 if r in missing:
948 if r in missing:
949 continue
949 continue
950 else:
950 else:
951 missing.add(r)
951 missing.add(r)
952 for p in self.parentrevs(r):
952 for p in self.parentrevs(r):
953 if p not in has:
953 if p not in has:
954 visit.append(p)
954 visit.append(p)
955 missing = list(missing)
955 missing = list(missing)
956 missing.sort()
956 missing.sort()
957 return has, [self.node(miss) for miss in missing]
957 return has, [self.node(miss) for miss in missing]
958
958
959 def incrementalmissingrevs(self, common=None):
959 def incrementalmissingrevs(self, common=None):
960 """Return an object that can be used to incrementally compute the
960 """Return an object that can be used to incrementally compute the
961 revision numbers of the ancestors of arbitrary sets that are not
961 revision numbers of the ancestors of arbitrary sets that are not
962 ancestors of common. This is an ancestor.incrementalmissingancestors
962 ancestors of common. This is an ancestor.incrementalmissingancestors
963 object.
963 object.
964
964
965 'common' is a list of revision numbers. If common is not supplied, uses
965 'common' is a list of revision numbers. If common is not supplied, uses
966 nullrev.
966 nullrev.
967 """
967 """
968 if common is None:
968 if common is None:
969 common = [nullrev]
969 common = [nullrev]
970
970
971 if rustancestor is not None:
971 if rustancestor is not None:
972 return rustancestor.MissingAncestors(self.index, common)
972 return rustancestor.MissingAncestors(self.index, common)
973 return ancestor.incrementalmissingancestors(self.parentrevs, common)
973 return ancestor.incrementalmissingancestors(self.parentrevs, common)
974
974
975 def findmissingrevs(self, common=None, heads=None):
975 def findmissingrevs(self, common=None, heads=None):
976 """Return the revision numbers of the ancestors of heads that
976 """Return the revision numbers of the ancestors of heads that
977 are not ancestors of common.
977 are not ancestors of common.
978
978
979 More specifically, return a list of revision numbers corresponding to
979 More specifically, return a list of revision numbers corresponding to
980 nodes N such that every N satisfies the following constraints:
980 nodes N such that every N satisfies the following constraints:
981
981
982 1. N is an ancestor of some node in 'heads'
982 1. N is an ancestor of some node in 'heads'
983 2. N is not an ancestor of any node in 'common'
983 2. N is not an ancestor of any node in 'common'
984
984
985 The list is sorted by revision number, meaning it is
985 The list is sorted by revision number, meaning it is
986 topologically sorted.
986 topologically sorted.
987
987
988 'heads' and 'common' are both lists of revision numbers. If heads is
988 'heads' and 'common' are both lists of revision numbers. If heads is
989 not supplied, uses all of the revlog's heads. If common is not
989 not supplied, uses all of the revlog's heads. If common is not
990 supplied, uses nullid."""
990 supplied, uses nullid."""
991 if common is None:
991 if common is None:
992 common = [nullrev]
992 common = [nullrev]
993 if heads is None:
993 if heads is None:
994 heads = self.headrevs()
994 heads = self.headrevs()
995
995
996 inc = self.incrementalmissingrevs(common=common)
996 inc = self.incrementalmissingrevs(common=common)
997 return inc.missingancestors(heads)
997 return inc.missingancestors(heads)
998
998
999 def findmissing(self, common=None, heads=None):
999 def findmissing(self, common=None, heads=None):
1000 """Return the ancestors of heads that are not ancestors of common.
1000 """Return the ancestors of heads that are not ancestors of common.
1001
1001
1002 More specifically, return a list of nodes N such that every N
1002 More specifically, return a list of nodes N such that every N
1003 satisfies the following constraints:
1003 satisfies the following constraints:
1004
1004
1005 1. N is an ancestor of some node in 'heads'
1005 1. N is an ancestor of some node in 'heads'
1006 2. N is not an ancestor of any node in 'common'
1006 2. N is not an ancestor of any node in 'common'
1007
1007
1008 The list is sorted by revision number, meaning it is
1008 The list is sorted by revision number, meaning it is
1009 topologically sorted.
1009 topologically sorted.
1010
1010
1011 'heads' and 'common' are both lists of node IDs. If heads is
1011 'heads' and 'common' are both lists of node IDs. If heads is
1012 not supplied, uses all of the revlog's heads. If common is not
1012 not supplied, uses all of the revlog's heads. If common is not
1013 supplied, uses nullid."""
1013 supplied, uses nullid."""
1014 if common is None:
1014 if common is None:
1015 common = [self.nullid]
1015 common = [self.nullid]
1016 if heads is None:
1016 if heads is None:
1017 heads = self.heads()
1017 heads = self.heads()
1018
1018
1019 common = [self.rev(n) for n in common]
1019 common = [self.rev(n) for n in common]
1020 heads = [self.rev(n) for n in heads]
1020 heads = [self.rev(n) for n in heads]
1021
1021
1022 inc = self.incrementalmissingrevs(common=common)
1022 inc = self.incrementalmissingrevs(common=common)
1023 return [self.node(r) for r in inc.missingancestors(heads)]
1023 return [self.node(r) for r in inc.missingancestors(heads)]
1024
1024
1025 def nodesbetween(self, roots=None, heads=None):
1025 def nodesbetween(self, roots=None, heads=None):
1026 """Return a topological path from 'roots' to 'heads'.
1026 """Return a topological path from 'roots' to 'heads'.
1027
1027
1028 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1028 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1029 topologically sorted list of all nodes N that satisfy both of
1029 topologically sorted list of all nodes N that satisfy both of
1030 these constraints:
1030 these constraints:
1031
1031
1032 1. N is a descendant of some node in 'roots'
1032 1. N is a descendant of some node in 'roots'
1033 2. N is an ancestor of some node in 'heads'
1033 2. N is an ancestor of some node in 'heads'
1034
1034
1035 Every node is considered to be both a descendant and an ancestor
1035 Every node is considered to be both a descendant and an ancestor
1036 of itself, so every reachable node in 'roots' and 'heads' will be
1036 of itself, so every reachable node in 'roots' and 'heads' will be
1037 included in 'nodes'.
1037 included in 'nodes'.
1038
1038
1039 'outroots' is the list of reachable nodes in 'roots', i.e., the
1039 'outroots' is the list of reachable nodes in 'roots', i.e., the
1040 subset of 'roots' that is returned in 'nodes'. Likewise,
1040 subset of 'roots' that is returned in 'nodes'. Likewise,
1041 'outheads' is the subset of 'heads' that is also in 'nodes'.
1041 'outheads' is the subset of 'heads' that is also in 'nodes'.
1042
1042
1043 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1043 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1044 unspecified, uses nullid as the only root. If 'heads' is
1044 unspecified, uses nullid as the only root. If 'heads' is
1045 unspecified, uses list of all of the revlog's heads."""
1045 unspecified, uses list of all of the revlog's heads."""
1046 nonodes = ([], [], [])
1046 nonodes = ([], [], [])
1047 if roots is not None:
1047 if roots is not None:
1048 roots = list(roots)
1048 roots = list(roots)
1049 if not roots:
1049 if not roots:
1050 return nonodes
1050 return nonodes
1051 lowestrev = min([self.rev(n) for n in roots])
1051 lowestrev = min([self.rev(n) for n in roots])
1052 else:
1052 else:
1053 roots = [self.nullid] # Everybody's a descendant of nullid
1053 roots = [self.nullid] # Everybody's a descendant of nullid
1054 lowestrev = nullrev
1054 lowestrev = nullrev
1055 if (lowestrev == nullrev) and (heads is None):
1055 if (lowestrev == nullrev) and (heads is None):
1056 # We want _all_ the nodes!
1056 # We want _all_ the nodes!
1057 return (
1057 return (
1058 [self.node(r) for r in self],
1058 [self.node(r) for r in self],
1059 [self.nullid],
1059 [self.nullid],
1060 list(self.heads()),
1060 list(self.heads()),
1061 )
1061 )
1062 if heads is None:
1062 if heads is None:
1063 # All nodes are ancestors, so the latest ancestor is the last
1063 # All nodes are ancestors, so the latest ancestor is the last
1064 # node.
1064 # node.
1065 highestrev = len(self) - 1
1065 highestrev = len(self) - 1
1066 # Set ancestors to None to signal that every node is an ancestor.
1066 # Set ancestors to None to signal that every node is an ancestor.
1067 ancestors = None
1067 ancestors = None
1068 # Set heads to an empty dictionary for later discovery of heads
1068 # Set heads to an empty dictionary for later discovery of heads
1069 heads = {}
1069 heads = {}
1070 else:
1070 else:
1071 heads = list(heads)
1071 heads = list(heads)
1072 if not heads:
1072 if not heads:
1073 return nonodes
1073 return nonodes
1074 ancestors = set()
1074 ancestors = set()
1075 # Turn heads into a dictionary so we can remove 'fake' heads.
1075 # Turn heads into a dictionary so we can remove 'fake' heads.
1076 # Also, later we will be using it to filter out the heads we can't
1076 # Also, later we will be using it to filter out the heads we can't
1077 # find from roots.
1077 # find from roots.
1078 heads = dict.fromkeys(heads, False)
1078 heads = dict.fromkeys(heads, False)
1079 # Start at the top and keep marking parents until we're done.
1079 # Start at the top and keep marking parents until we're done.
1080 nodestotag = set(heads)
1080 nodestotag = set(heads)
1081 # Remember where the top was so we can use it as a limit later.
1081 # Remember where the top was so we can use it as a limit later.
1082 highestrev = max([self.rev(n) for n in nodestotag])
1082 highestrev = max([self.rev(n) for n in nodestotag])
1083 while nodestotag:
1083 while nodestotag:
1084 # grab a node to tag
1084 # grab a node to tag
1085 n = nodestotag.pop()
1085 n = nodestotag.pop()
1086 # Never tag nullid
1086 # Never tag nullid
1087 if n == self.nullid:
1087 if n == self.nullid:
1088 continue
1088 continue
1089 # A node's revision number represents its place in a
1089 # A node's revision number represents its place in a
1090 # topologically sorted list of nodes.
1090 # topologically sorted list of nodes.
1091 r = self.rev(n)
1091 r = self.rev(n)
1092 if r >= lowestrev:
1092 if r >= lowestrev:
1093 if n not in ancestors:
1093 if n not in ancestors:
1094 # If we are possibly a descendant of one of the roots
1094 # If we are possibly a descendant of one of the roots
1095 # and we haven't already been marked as an ancestor
1095 # and we haven't already been marked as an ancestor
1096 ancestors.add(n) # Mark as ancestor
1096 ancestors.add(n) # Mark as ancestor
1097 # Add non-nullid parents to list of nodes to tag.
1097 # Add non-nullid parents to list of nodes to tag.
1098 nodestotag.update(
1098 nodestotag.update(
1099 [p for p in self.parents(n) if p != self.nullid]
1099 [p for p in self.parents(n) if p != self.nullid]
1100 )
1100 )
1101 elif n in heads: # We've seen it before, is it a fake head?
1101 elif n in heads: # We've seen it before, is it a fake head?
1102 # So it is, real heads should not be the ancestors of
1102 # So it is, real heads should not be the ancestors of
1103 # any other heads.
1103 # any other heads.
1104 heads.pop(n)
1104 heads.pop(n)
1105 if not ancestors:
1105 if not ancestors:
1106 return nonodes
1106 return nonodes
1107 # Now that we have our set of ancestors, we want to remove any
1107 # Now that we have our set of ancestors, we want to remove any
1108 # roots that are not ancestors.
1108 # roots that are not ancestors.
1109
1109
1110 # If one of the roots was nullid, everything is included anyway.
1110 # If one of the roots was nullid, everything is included anyway.
1111 if lowestrev > nullrev:
1111 if lowestrev > nullrev:
1112 # But, since we weren't, let's recompute the lowest rev to not
1112 # But, since we weren't, let's recompute the lowest rev to not
1113 # include roots that aren't ancestors.
1113 # include roots that aren't ancestors.
1114
1114
1115 # Filter out roots that aren't ancestors of heads
1115 # Filter out roots that aren't ancestors of heads
1116 roots = [root for root in roots if root in ancestors]
1116 roots = [root for root in roots if root in ancestors]
1117 # Recompute the lowest revision
1117 # Recompute the lowest revision
1118 if roots:
1118 if roots:
1119 lowestrev = min([self.rev(root) for root in roots])
1119 lowestrev = min([self.rev(root) for root in roots])
1120 else:
1120 else:
1121 # No more roots? Return empty list
1121 # No more roots? Return empty list
1122 return nonodes
1122 return nonodes
1123 else:
1123 else:
1124 # We are descending from nullid, and don't need to care about
1124 # We are descending from nullid, and don't need to care about
1125 # any other roots.
1125 # any other roots.
1126 lowestrev = nullrev
1126 lowestrev = nullrev
1127 roots = [self.nullid]
1127 roots = [self.nullid]
1128 # Transform our roots list into a set.
1128 # Transform our roots list into a set.
1129 descendants = set(roots)
1129 descendants = set(roots)
1130 # Also, keep the original roots so we can filter out roots that aren't
1130 # Also, keep the original roots so we can filter out roots that aren't
1131 # 'real' roots (i.e. are descended from other roots).
1131 # 'real' roots (i.e. are descended from other roots).
1132 roots = descendants.copy()
1132 roots = descendants.copy()
1133 # Our topologically sorted list of output nodes.
1133 # Our topologically sorted list of output nodes.
1134 orderedout = []
1134 orderedout = []
1135 # Don't start at nullid since we don't want nullid in our output list,
1135 # Don't start at nullid since we don't want nullid in our output list,
1136 # and if nullid shows up in descendants, empty parents will look like
1136 # and if nullid shows up in descendants, empty parents will look like
1137 # they're descendants.
1137 # they're descendants.
1138 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1138 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1139 n = self.node(r)
1139 n = self.node(r)
1140 isdescendant = False
1140 isdescendant = False
1141 if lowestrev == nullrev: # Everybody is a descendant of nullid
1141 if lowestrev == nullrev: # Everybody is a descendant of nullid
1142 isdescendant = True
1142 isdescendant = True
1143 elif n in descendants:
1143 elif n in descendants:
1144 # n is already a descendant
1144 # n is already a descendant
1145 isdescendant = True
1145 isdescendant = True
1146 # This check only needs to be done here because all the roots
1146 # This check only needs to be done here because all the roots
1147 # will start being marked is descendants before the loop.
1147 # will start being marked is descendants before the loop.
1148 if n in roots:
1148 if n in roots:
1149 # If n was a root, check if it's a 'real' root.
1149 # If n was a root, check if it's a 'real' root.
1150 p = tuple(self.parents(n))
1150 p = tuple(self.parents(n))
1151 # If any of its parents are descendants, it's not a root.
1151 # If any of its parents are descendants, it's not a root.
1152 if (p[0] in descendants) or (p[1] in descendants):
1152 if (p[0] in descendants) or (p[1] in descendants):
1153 roots.remove(n)
1153 roots.remove(n)
1154 else:
1154 else:
1155 p = tuple(self.parents(n))
1155 p = tuple(self.parents(n))
1156 # A node is a descendant if either of its parents are
1156 # A node is a descendant if either of its parents are
1157 # descendants. (We seeded the dependents list with the roots
1157 # descendants. (We seeded the dependents list with the roots
1158 # up there, remember?)
1158 # up there, remember?)
1159 if (p[0] in descendants) or (p[1] in descendants):
1159 if (p[0] in descendants) or (p[1] in descendants):
1160 descendants.add(n)
1160 descendants.add(n)
1161 isdescendant = True
1161 isdescendant = True
1162 if isdescendant and ((ancestors is None) or (n in ancestors)):
1162 if isdescendant and ((ancestors is None) or (n in ancestors)):
1163 # Only include nodes that are both descendants and ancestors.
1163 # Only include nodes that are both descendants and ancestors.
1164 orderedout.append(n)
1164 orderedout.append(n)
1165 if (ancestors is not None) and (n in heads):
1165 if (ancestors is not None) and (n in heads):
1166 # We're trying to figure out which heads are reachable
1166 # We're trying to figure out which heads are reachable
1167 # from roots.
1167 # from roots.
1168 # Mark this head as having been reached
1168 # Mark this head as having been reached
1169 heads[n] = True
1169 heads[n] = True
1170 elif ancestors is None:
1170 elif ancestors is None:
1171 # Otherwise, we're trying to discover the heads.
1171 # Otherwise, we're trying to discover the heads.
1172 # Assume this is a head because if it isn't, the next step
1172 # Assume this is a head because if it isn't, the next step
1173 # will eventually remove it.
1173 # will eventually remove it.
1174 heads[n] = True
1174 heads[n] = True
1175 # But, obviously its parents aren't.
1175 # But, obviously its parents aren't.
1176 for p in self.parents(n):
1176 for p in self.parents(n):
1177 heads.pop(p, None)
1177 heads.pop(p, None)
1178 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1178 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1179 roots = list(roots)
1179 roots = list(roots)
1180 assert orderedout
1180 assert orderedout
1181 assert roots
1181 assert roots
1182 assert heads
1182 assert heads
1183 return (orderedout, roots, heads)
1183 return (orderedout, roots, heads)
1184
1184
1185 def headrevs(self, revs=None):
1185 def headrevs(self, revs=None):
1186 if revs is None:
1186 if revs is None:
1187 try:
1187 try:
1188 return self.index.headrevs()
1188 return self.index.headrevs()
1189 except AttributeError:
1189 except AttributeError:
1190 return self._headrevs()
1190 return self._headrevs()
1191 if rustdagop is not None:
1191 if rustdagop is not None:
1192 return rustdagop.headrevs(self.index, revs)
1192 return rustdagop.headrevs(self.index, revs)
1193 return dagop.headrevs(revs, self._uncheckedparentrevs)
1193 return dagop.headrevs(revs, self._uncheckedparentrevs)
1194
1194
1195 def computephases(self, roots):
1195 def computephases(self, roots):
1196 return self.index.computephasesmapsets(roots)
1196 return self.index.computephasesmapsets(roots)
1197
1197
1198 def _headrevs(self):
1198 def _headrevs(self):
1199 count = len(self)
1199 count = len(self)
1200 if not count:
1200 if not count:
1201 return [nullrev]
1201 return [nullrev]
1202 # we won't iter over filtered rev so nobody is a head at start
1202 # we won't iter over filtered rev so nobody is a head at start
1203 ishead = [0] * (count + 1)
1203 ishead = [0] * (count + 1)
1204 index = self.index
1204 index = self.index
1205 for r in self:
1205 for r in self:
1206 ishead[r] = 1 # I may be an head
1206 ishead[r] = 1 # I may be an head
1207 e = index[r]
1207 e = index[r]
1208 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1208 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1209 return [r for r, val in enumerate(ishead) if val]
1209 return [r for r, val in enumerate(ishead) if val]
1210
1210
1211 def heads(self, start=None, stop=None):
1211 def heads(self, start=None, stop=None):
1212 """return the list of all nodes that have no children
1212 """return the list of all nodes that have no children
1213
1213
1214 if start is specified, only heads that are descendants of
1214 if start is specified, only heads that are descendants of
1215 start will be returned
1215 start will be returned
1216 if stop is specified, it will consider all the revs from stop
1216 if stop is specified, it will consider all the revs from stop
1217 as if they had no children
1217 as if they had no children
1218 """
1218 """
1219 if start is None and stop is None:
1219 if start is None and stop is None:
1220 if not len(self):
1220 if not len(self):
1221 return [self.nullid]
1221 return [self.nullid]
1222 return [self.node(r) for r in self.headrevs()]
1222 return [self.node(r) for r in self.headrevs()]
1223
1223
1224 if start is None:
1224 if start is None:
1225 start = nullrev
1225 start = nullrev
1226 else:
1226 else:
1227 start = self.rev(start)
1227 start = self.rev(start)
1228
1228
1229 stoprevs = {self.rev(n) for n in stop or []}
1229 stoprevs = {self.rev(n) for n in stop or []}
1230
1230
1231 revs = dagop.headrevssubset(
1231 revs = dagop.headrevssubset(
1232 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1232 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1233 )
1233 )
1234
1234
1235 return [self.node(rev) for rev in revs]
1235 return [self.node(rev) for rev in revs]
1236
1236
1237 def children(self, node):
1237 def children(self, node):
1238 """find the children of a given node"""
1238 """find the children of a given node"""
1239 c = []
1239 c = []
1240 p = self.rev(node)
1240 p = self.rev(node)
1241 for r in self.revs(start=p + 1):
1241 for r in self.revs(start=p + 1):
1242 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1242 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1243 if prevs:
1243 if prevs:
1244 for pr in prevs:
1244 for pr in prevs:
1245 if pr == p:
1245 if pr == p:
1246 c.append(self.node(r))
1246 c.append(self.node(r))
1247 elif p == nullrev:
1247 elif p == nullrev:
1248 c.append(self.node(r))
1248 c.append(self.node(r))
1249 return c
1249 return c
1250
1250
1251 def commonancestorsheads(self, a, b):
1251 def commonancestorsheads(self, a, b):
1252 """calculate all the heads of the common ancestors of nodes a and b"""
1252 """calculate all the heads of the common ancestors of nodes a and b"""
1253 a, b = self.rev(a), self.rev(b)
1253 a, b = self.rev(a), self.rev(b)
1254 ancs = self._commonancestorsheads(a, b)
1254 ancs = self._commonancestorsheads(a, b)
1255 return pycompat.maplist(self.node, ancs)
1255 return pycompat.maplist(self.node, ancs)
1256
1256
1257 def _commonancestorsheads(self, *revs):
1257 def _commonancestorsheads(self, *revs):
1258 """calculate all the heads of the common ancestors of revs"""
1258 """calculate all the heads of the common ancestors of revs"""
1259 try:
1259 try:
1260 ancs = self.index.commonancestorsheads(*revs)
1260 ancs = self.index.commonancestorsheads(*revs)
1261 except (AttributeError, OverflowError): # C implementation failed
1261 except (AttributeError, OverflowError): # C implementation failed
1262 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1262 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1263 return ancs
1263 return ancs
1264
1264
1265 def isancestor(self, a, b):
1265 def isancestor(self, a, b):
1266 """return True if node a is an ancestor of node b
1266 """return True if node a is an ancestor of node b
1267
1267
1268 A revision is considered an ancestor of itself."""
1268 A revision is considered an ancestor of itself."""
1269 a, b = self.rev(a), self.rev(b)
1269 a, b = self.rev(a), self.rev(b)
1270 return self.isancestorrev(a, b)
1270 return self.isancestorrev(a, b)
1271
1271
1272 def isancestorrev(self, a, b):
1272 def isancestorrev(self, a, b):
1273 """return True if revision a is an ancestor of revision b
1273 """return True if revision a is an ancestor of revision b
1274
1274
1275 A revision is considered an ancestor of itself.
1275 A revision is considered an ancestor of itself.
1276
1276
1277 The implementation of this is trivial but the use of
1277 The implementation of this is trivial but the use of
1278 reachableroots is not."""
1278 reachableroots is not."""
1279 if a == nullrev:
1279 if a == nullrev:
1280 return True
1280 return True
1281 elif a == b:
1281 elif a == b:
1282 return True
1282 return True
1283 elif a > b:
1283 elif a > b:
1284 return False
1284 return False
1285 return bool(self.reachableroots(a, [b], [a], includepath=False))
1285 return bool(self.reachableroots(a, [b], [a], includepath=False))
1286
1286
1287 def reachableroots(self, minroot, heads, roots, includepath=False):
1287 def reachableroots(self, minroot, heads, roots, includepath=False):
1288 """return (heads(::(<roots> and <roots>::<heads>)))
1288 """return (heads(::(<roots> and <roots>::<heads>)))
1289
1289
1290 If includepath is True, return (<roots>::<heads>)."""
1290 If includepath is True, return (<roots>::<heads>)."""
1291 try:
1291 try:
1292 return self.index.reachableroots2(
1292 return self.index.reachableroots2(
1293 minroot, heads, roots, includepath
1293 minroot, heads, roots, includepath
1294 )
1294 )
1295 except AttributeError:
1295 except AttributeError:
1296 return dagop._reachablerootspure(
1296 return dagop._reachablerootspure(
1297 self.parentrevs, minroot, roots, heads, includepath
1297 self.parentrevs, minroot, roots, heads, includepath
1298 )
1298 )
1299
1299
1300 def ancestor(self, a, b):
1300 def ancestor(self, a, b):
1301 """calculate the "best" common ancestor of nodes a and b"""
1301 """calculate the "best" common ancestor of nodes a and b"""
1302
1302
1303 a, b = self.rev(a), self.rev(b)
1303 a, b = self.rev(a), self.rev(b)
1304 try:
1304 try:
1305 ancs = self.index.ancestors(a, b)
1305 ancs = self.index.ancestors(a, b)
1306 except (AttributeError, OverflowError):
1306 except (AttributeError, OverflowError):
1307 ancs = ancestor.ancestors(self.parentrevs, a, b)
1307 ancs = ancestor.ancestors(self.parentrevs, a, b)
1308 if ancs:
1308 if ancs:
1309 # choose a consistent winner when there's a tie
1309 # choose a consistent winner when there's a tie
1310 return min(map(self.node, ancs))
1310 return min(map(self.node, ancs))
1311 return self.nullid
1311 return self.nullid
1312
1312
1313 def _match(self, id):
1313 def _match(self, id):
1314 if isinstance(id, int):
1314 if isinstance(id, int):
1315 # rev
1315 # rev
1316 return self.node(id)
1316 return self.node(id)
1317 if len(id) == self.nodeconstants.nodelen:
1317 if len(id) == self.nodeconstants.nodelen:
1318 # possibly a binary node
1318 # possibly a binary node
1319 # odds of a binary node being all hex in ASCII are 1 in 10**25
1319 # odds of a binary node being all hex in ASCII are 1 in 10**25
1320 try:
1320 try:
1321 node = id
1321 node = id
1322 self.rev(node) # quick search the index
1322 self.rev(node) # quick search the index
1323 return node
1323 return node
1324 except error.LookupError:
1324 except error.LookupError:
1325 pass # may be partial hex id
1325 pass # may be partial hex id
1326 try:
1326 try:
1327 # str(rev)
1327 # str(rev)
1328 rev = int(id)
1328 rev = int(id)
1329 if b"%d" % rev != id:
1329 if b"%d" % rev != id:
1330 raise ValueError
1330 raise ValueError
1331 if rev < 0:
1331 if rev < 0:
1332 rev = len(self) + rev
1332 rev = len(self) + rev
1333 if rev < 0 or rev >= len(self):
1333 if rev < 0 or rev >= len(self):
1334 raise ValueError
1334 raise ValueError
1335 return self.node(rev)
1335 return self.node(rev)
1336 except (ValueError, OverflowError):
1336 except (ValueError, OverflowError):
1337 pass
1337 pass
1338 if len(id) == 2 * self.nodeconstants.nodelen:
1338 if len(id) == 2 * self.nodeconstants.nodelen:
1339 try:
1339 try:
1340 # a full hex nodeid?
1340 # a full hex nodeid?
1341 node = bin(id)
1341 node = bin(id)
1342 self.rev(node)
1342 self.rev(node)
1343 return node
1343 return node
1344 except (TypeError, error.LookupError):
1344 except (TypeError, error.LookupError):
1345 pass
1345 pass
1346
1346
1347 def _partialmatch(self, id):
1347 def _partialmatch(self, id):
1348 # we don't care wdirfilenodeids as they should be always full hash
1348 # we don't care wdirfilenodeids as they should be always full hash
1349 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1349 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1350 try:
1350 try:
1351 partial = self.index.partialmatch(id)
1351 partial = self.index.partialmatch(id)
1352 if partial and self.hasnode(partial):
1352 if partial and self.hasnode(partial):
1353 if maybewdir:
1353 if maybewdir:
1354 # single 'ff...' match in radix tree, ambiguous with wdir
1354 # single 'ff...' match in radix tree, ambiguous with wdir
1355 raise error.RevlogError
1355 raise error.RevlogError
1356 return partial
1356 return partial
1357 if maybewdir:
1357 if maybewdir:
1358 # no 'ff...' match in radix tree, wdir identified
1358 # no 'ff...' match in radix tree, wdir identified
1359 raise error.WdirUnsupported
1359 raise error.WdirUnsupported
1360 return None
1360 return None
1361 except error.RevlogError:
1361 except error.RevlogError:
1362 # parsers.c radix tree lookup gave multiple matches
1362 # parsers.c radix tree lookup gave multiple matches
1363 # fast path: for unfiltered changelog, radix tree is accurate
1363 # fast path: for unfiltered changelog, radix tree is accurate
1364 if not getattr(self, 'filteredrevs', None):
1364 if not getattr(self, 'filteredrevs', None):
1365 raise error.AmbiguousPrefixLookupError(
1365 raise error.AmbiguousPrefixLookupError(
1366 id, self.indexfile, _(b'ambiguous identifier')
1366 id, self.indexfile, _(b'ambiguous identifier')
1367 )
1367 )
1368 # fall through to slow path that filters hidden revisions
1368 # fall through to slow path that filters hidden revisions
1369 except (AttributeError, ValueError):
1369 except (AttributeError, ValueError):
1370 # we are pure python, or key was too short to search radix tree
1370 # we are pure python, or key was too short to search radix tree
1371 pass
1371 pass
1372
1372
1373 if id in self._pcache:
1373 if id in self._pcache:
1374 return self._pcache[id]
1374 return self._pcache[id]
1375
1375
1376 if len(id) <= 40:
1376 if len(id) <= 40:
1377 try:
1377 try:
1378 # hex(node)[:...]
1378 # hex(node)[:...]
1379 l = len(id) // 2 # grab an even number of digits
1379 l = len(id) // 2 # grab an even number of digits
1380 prefix = bin(id[: l * 2])
1380 prefix = bin(id[: l * 2])
1381 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1381 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1382 nl = [
1382 nl = [
1383 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1383 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1384 ]
1384 ]
1385 if self.nodeconstants.nullhex.startswith(id):
1385 if self.nodeconstants.nullhex.startswith(id):
1386 nl.append(self.nullid)
1386 nl.append(self.nullid)
1387 if len(nl) > 0:
1387 if len(nl) > 0:
1388 if len(nl) == 1 and not maybewdir:
1388 if len(nl) == 1 and not maybewdir:
1389 self._pcache[id] = nl[0]
1389 self._pcache[id] = nl[0]
1390 return nl[0]
1390 return nl[0]
1391 raise error.AmbiguousPrefixLookupError(
1391 raise error.AmbiguousPrefixLookupError(
1392 id, self.indexfile, _(b'ambiguous identifier')
1392 id, self.indexfile, _(b'ambiguous identifier')
1393 )
1393 )
1394 if maybewdir:
1394 if maybewdir:
1395 raise error.WdirUnsupported
1395 raise error.WdirUnsupported
1396 return None
1396 return None
1397 except TypeError:
1397 except TypeError:
1398 pass
1398 pass
1399
1399
1400 def lookup(self, id):
1400 def lookup(self, id):
1401 """locate a node based on:
1401 """locate a node based on:
1402 - revision number or str(revision number)
1402 - revision number or str(revision number)
1403 - nodeid or subset of hex nodeid
1403 - nodeid or subset of hex nodeid
1404 """
1404 """
1405 n = self._match(id)
1405 n = self._match(id)
1406 if n is not None:
1406 if n is not None:
1407 return n
1407 return n
1408 n = self._partialmatch(id)
1408 n = self._partialmatch(id)
1409 if n:
1409 if n:
1410 return n
1410 return n
1411
1411
1412 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1412 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1413
1413
1414 def shortest(self, node, minlength=1):
1414 def shortest(self, node, minlength=1):
1415 """Find the shortest unambiguous prefix that matches node."""
1415 """Find the shortest unambiguous prefix that matches node."""
1416
1416
1417 def isvalid(prefix):
1417 def isvalid(prefix):
1418 try:
1418 try:
1419 matchednode = self._partialmatch(prefix)
1419 matchednode = self._partialmatch(prefix)
1420 except error.AmbiguousPrefixLookupError:
1420 except error.AmbiguousPrefixLookupError:
1421 return False
1421 return False
1422 except error.WdirUnsupported:
1422 except error.WdirUnsupported:
1423 # single 'ff...' match
1423 # single 'ff...' match
1424 return True
1424 return True
1425 if matchednode is None:
1425 if matchednode is None:
1426 raise error.LookupError(node, self.indexfile, _(b'no node'))
1426 raise error.LookupError(node, self.indexfile, _(b'no node'))
1427 return True
1427 return True
1428
1428
1429 def maybewdir(prefix):
1429 def maybewdir(prefix):
1430 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1430 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1431
1431
1432 hexnode = hex(node)
1432 hexnode = hex(node)
1433
1433
1434 def disambiguate(hexnode, minlength):
1434 def disambiguate(hexnode, minlength):
1435 """Disambiguate against wdirid."""
1435 """Disambiguate against wdirid."""
1436 for length in range(minlength, len(hexnode) + 1):
1436 for length in range(minlength, len(hexnode) + 1):
1437 prefix = hexnode[:length]
1437 prefix = hexnode[:length]
1438 if not maybewdir(prefix):
1438 if not maybewdir(prefix):
1439 return prefix
1439 return prefix
1440
1440
1441 if not getattr(self, 'filteredrevs', None):
1441 if not getattr(self, 'filteredrevs', None):
1442 try:
1442 try:
1443 length = max(self.index.shortest(node), minlength)
1443 length = max(self.index.shortest(node), minlength)
1444 return disambiguate(hexnode, length)
1444 return disambiguate(hexnode, length)
1445 except error.RevlogError:
1445 except error.RevlogError:
1446 if node != self.nodeconstants.wdirid:
1446 if node != self.nodeconstants.wdirid:
1447 raise error.LookupError(node, self.indexfile, _(b'no node'))
1447 raise error.LookupError(node, self.indexfile, _(b'no node'))
1448 except AttributeError:
1448 except AttributeError:
1449 # Fall through to pure code
1449 # Fall through to pure code
1450 pass
1450 pass
1451
1451
1452 if node == self.nodeconstants.wdirid:
1452 if node == self.nodeconstants.wdirid:
1453 for length in range(minlength, len(hexnode) + 1):
1453 for length in range(minlength, len(hexnode) + 1):
1454 prefix = hexnode[:length]
1454 prefix = hexnode[:length]
1455 if isvalid(prefix):
1455 if isvalid(prefix):
1456 return prefix
1456 return prefix
1457
1457
1458 for length in range(minlength, len(hexnode) + 1):
1458 for length in range(minlength, len(hexnode) + 1):
1459 prefix = hexnode[:length]
1459 prefix = hexnode[:length]
1460 if isvalid(prefix):
1460 if isvalid(prefix):
1461 return disambiguate(hexnode, length)
1461 return disambiguate(hexnode, length)
1462
1462
1463 def cmp(self, node, text):
1463 def cmp(self, node, text):
1464 """compare text with a given file revision
1464 """compare text with a given file revision
1465
1465
1466 returns True if text is different than what is stored.
1466 returns True if text is different than what is stored.
1467 """
1467 """
1468 p1, p2 = self.parents(node)
1468 p1, p2 = self.parents(node)
1469 return storageutil.hashrevisionsha1(text, p1, p2) != node
1469 return storageutil.hashrevisionsha1(text, p1, p2) != node
1470
1470
1471 def _cachesegment(self, offset, data):
1471 def _cachesegment(self, offset, data):
1472 """Add a segment to the revlog cache.
1472 """Add a segment to the revlog cache.
1473
1473
1474 Accepts an absolute offset and the data that is at that location.
1474 Accepts an absolute offset and the data that is at that location.
1475 """
1475 """
1476 o, d = self._chunkcache
1476 o, d = self._chunkcache
1477 # try to add to existing cache
1477 # try to add to existing cache
1478 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1478 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1479 self._chunkcache = o, d + data
1479 self._chunkcache = o, d + data
1480 else:
1480 else:
1481 self._chunkcache = offset, data
1481 self._chunkcache = offset, data
1482
1482
1483 def _readsegment(self, offset, length, df=None):
1483 def _readsegment(self, offset, length, df=None):
1484 """Load a segment of raw data from the revlog.
1484 """Load a segment of raw data from the revlog.
1485
1485
1486 Accepts an absolute offset, length to read, and an optional existing
1486 Accepts an absolute offset, length to read, and an optional existing
1487 file handle to read from.
1487 file handle to read from.
1488
1488
1489 If an existing file handle is passed, it will be seeked and the
1489 If an existing file handle is passed, it will be seeked and the
1490 original seek position will NOT be restored.
1490 original seek position will NOT be restored.
1491
1491
1492 Returns a str or buffer of raw byte data.
1492 Returns a str or buffer of raw byte data.
1493
1493
1494 Raises if the requested number of bytes could not be read.
1494 Raises if the requested number of bytes could not be read.
1495 """
1495 """
1496 # Cache data both forward and backward around the requested
1496 # Cache data both forward and backward around the requested
1497 # data, in a fixed size window. This helps speed up operations
1497 # data, in a fixed size window. This helps speed up operations
1498 # involving reading the revlog backwards.
1498 # involving reading the revlog backwards.
1499 cachesize = self._chunkcachesize
1499 cachesize = self._chunkcachesize
1500 realoffset = offset & ~(cachesize - 1)
1500 realoffset = offset & ~(cachesize - 1)
1501 reallength = (
1501 reallength = (
1502 (offset + length + cachesize) & ~(cachesize - 1)
1502 (offset + length + cachesize) & ~(cachesize - 1)
1503 ) - realoffset
1503 ) - realoffset
1504 with self._datareadfp(df) as df:
1504 with self._datareadfp(df) as df:
1505 df.seek(realoffset)
1505 df.seek(realoffset)
1506 d = df.read(reallength)
1506 d = df.read(reallength)
1507
1507
1508 self._cachesegment(realoffset, d)
1508 self._cachesegment(realoffset, d)
1509 if offset != realoffset or reallength != length:
1509 if offset != realoffset or reallength != length:
1510 startoffset = offset - realoffset
1510 startoffset = offset - realoffset
1511 if len(d) - startoffset < length:
1511 if len(d) - startoffset < length:
1512 raise error.RevlogError(
1512 raise error.RevlogError(
1513 _(
1513 _(
1514 b'partial read of revlog %s; expected %d bytes from '
1514 b'partial read of revlog %s; expected %d bytes from '
1515 b'offset %d, got %d'
1515 b'offset %d, got %d'
1516 )
1516 )
1517 % (
1517 % (
1518 self.indexfile if self._inline else self.datafile,
1518 self.indexfile if self._inline else self.datafile,
1519 length,
1519 length,
1520 realoffset,
1520 realoffset,
1521 len(d) - startoffset,
1521 len(d) - startoffset,
1522 )
1522 )
1523 )
1523 )
1524
1524
1525 return util.buffer(d, startoffset, length)
1525 return util.buffer(d, startoffset, length)
1526
1526
1527 if len(d) < length:
1527 if len(d) < length:
1528 raise error.RevlogError(
1528 raise error.RevlogError(
1529 _(
1529 _(
1530 b'partial read of revlog %s; expected %d bytes from offset '
1530 b'partial read of revlog %s; expected %d bytes from offset '
1531 b'%d, got %d'
1531 b'%d, got %d'
1532 )
1532 )
1533 % (
1533 % (
1534 self.indexfile if self._inline else self.datafile,
1534 self.indexfile if self._inline else self.datafile,
1535 length,
1535 length,
1536 offset,
1536 offset,
1537 len(d),
1537 len(d),
1538 )
1538 )
1539 )
1539 )
1540
1540
1541 return d
1541 return d
1542
1542
1543 def _getsegment(self, offset, length, df=None):
1543 def _getsegment(self, offset, length, df=None):
1544 """Obtain a segment of raw data from the revlog.
1544 """Obtain a segment of raw data from the revlog.
1545
1545
1546 Accepts an absolute offset, length of bytes to obtain, and an
1546 Accepts an absolute offset, length of bytes to obtain, and an
1547 optional file handle to the already-opened revlog. If the file
1547 optional file handle to the already-opened revlog. If the file
1548 handle is used, it's original seek position will not be preserved.
1548 handle is used, it's original seek position will not be preserved.
1549
1549
1550 Requests for data may be returned from a cache.
1550 Requests for data may be returned from a cache.
1551
1551
1552 Returns a str or a buffer instance of raw byte data.
1552 Returns a str or a buffer instance of raw byte data.
1553 """
1553 """
1554 o, d = self._chunkcache
1554 o, d = self._chunkcache
1555 l = len(d)
1555 l = len(d)
1556
1556
1557 # is it in the cache?
1557 # is it in the cache?
1558 cachestart = offset - o
1558 cachestart = offset - o
1559 cacheend = cachestart + length
1559 cacheend = cachestart + length
1560 if cachestart >= 0 and cacheend <= l:
1560 if cachestart >= 0 and cacheend <= l:
1561 if cachestart == 0 and cacheend == l:
1561 if cachestart == 0 and cacheend == l:
1562 return d # avoid a copy
1562 return d # avoid a copy
1563 return util.buffer(d, cachestart, cacheend - cachestart)
1563 return util.buffer(d, cachestart, cacheend - cachestart)
1564
1564
1565 return self._readsegment(offset, length, df=df)
1565 return self._readsegment(offset, length, df=df)
1566
1566
1567 def _getsegmentforrevs(self, startrev, endrev, df=None):
1567 def _getsegmentforrevs(self, startrev, endrev, df=None):
1568 """Obtain a segment of raw data corresponding to a range of revisions.
1568 """Obtain a segment of raw data corresponding to a range of revisions.
1569
1569
1570 Accepts the start and end revisions and an optional already-open
1570 Accepts the start and end revisions and an optional already-open
1571 file handle to be used for reading. If the file handle is read, its
1571 file handle to be used for reading. If the file handle is read, its
1572 seek position will not be preserved.
1572 seek position will not be preserved.
1573
1573
1574 Requests for data may be satisfied by a cache.
1574 Requests for data may be satisfied by a cache.
1575
1575
1576 Returns a 2-tuple of (offset, data) for the requested range of
1576 Returns a 2-tuple of (offset, data) for the requested range of
1577 revisions. Offset is the integer offset from the beginning of the
1577 revisions. Offset is the integer offset from the beginning of the
1578 revlog and data is a str or buffer of the raw byte data.
1578 revlog and data is a str or buffer of the raw byte data.
1579
1579
1580 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1580 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1581 to determine where each revision's data begins and ends.
1581 to determine where each revision's data begins and ends.
1582 """
1582 """
1583 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1583 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1584 # (functions are expensive).
1584 # (functions are expensive).
1585 index = self.index
1585 index = self.index
1586 istart = index[startrev]
1586 istart = index[startrev]
1587 start = int(istart[0] >> 16)
1587 start = int(istart[0] >> 16)
1588 if startrev == endrev:
1588 if startrev == endrev:
1589 end = start + istart[1]
1589 end = start + istart[1]
1590 else:
1590 else:
1591 iend = index[endrev]
1591 iend = index[endrev]
1592 end = int(iend[0] >> 16) + iend[1]
1592 end = int(iend[0] >> 16) + iend[1]
1593
1593
1594 if self._inline:
1594 if self._inline:
1595 start += (startrev + 1) * self.index.entry_size
1595 start += (startrev + 1) * self.index.entry_size
1596 end += (endrev + 1) * self.index.entry_size
1596 end += (endrev + 1) * self.index.entry_size
1597 length = end - start
1597 length = end - start
1598
1598
1599 return start, self._getsegment(start, length, df=df)
1599 return start, self._getsegment(start, length, df=df)
1600
1600
1601 def _chunk(self, rev, df=None):
1601 def _chunk(self, rev, df=None):
1602 """Obtain a single decompressed chunk for a revision.
1602 """Obtain a single decompressed chunk for a revision.
1603
1603
1604 Accepts an integer revision and an optional already-open file handle
1604 Accepts an integer revision and an optional already-open file handle
1605 to be used for reading. If used, the seek position of the file will not
1605 to be used for reading. If used, the seek position of the file will not
1606 be preserved.
1606 be preserved.
1607
1607
1608 Returns a str holding uncompressed data for the requested revision.
1608 Returns a str holding uncompressed data for the requested revision.
1609 """
1609 """
1610 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1610 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1611
1611
1612 def _chunks(self, revs, df=None, targetsize=None):
1612 def _chunks(self, revs, df=None, targetsize=None):
1613 """Obtain decompressed chunks for the specified revisions.
1613 """Obtain decompressed chunks for the specified revisions.
1614
1614
1615 Accepts an iterable of numeric revisions that are assumed to be in
1615 Accepts an iterable of numeric revisions that are assumed to be in
1616 ascending order. Also accepts an optional already-open file handle
1616 ascending order. Also accepts an optional already-open file handle
1617 to be used for reading. If used, the seek position of the file will
1617 to be used for reading. If used, the seek position of the file will
1618 not be preserved.
1618 not be preserved.
1619
1619
1620 This function is similar to calling ``self._chunk()`` multiple times,
1620 This function is similar to calling ``self._chunk()`` multiple times,
1621 but is faster.
1621 but is faster.
1622
1622
1623 Returns a list with decompressed data for each requested revision.
1623 Returns a list with decompressed data for each requested revision.
1624 """
1624 """
1625 if not revs:
1625 if not revs:
1626 return []
1626 return []
1627 start = self.start
1627 start = self.start
1628 length = self.length
1628 length = self.length
1629 inline = self._inline
1629 inline = self._inline
1630 iosize = self.index.entry_size
1630 iosize = self.index.entry_size
1631 buffer = util.buffer
1631 buffer = util.buffer
1632
1632
1633 l = []
1633 l = []
1634 ladd = l.append
1634 ladd = l.append
1635
1635
1636 if not self._withsparseread:
1636 if not self._withsparseread:
1637 slicedchunks = (revs,)
1637 slicedchunks = (revs,)
1638 else:
1638 else:
1639 slicedchunks = deltautil.slicechunk(
1639 slicedchunks = deltautil.slicechunk(
1640 self, revs, targetsize=targetsize
1640 self, revs, targetsize=targetsize
1641 )
1641 )
1642
1642
1643 for revschunk in slicedchunks:
1643 for revschunk in slicedchunks:
1644 firstrev = revschunk[0]
1644 firstrev = revschunk[0]
1645 # Skip trailing revisions with empty diff
1645 # Skip trailing revisions with empty diff
1646 for lastrev in revschunk[::-1]:
1646 for lastrev in revschunk[::-1]:
1647 if length(lastrev) != 0:
1647 if length(lastrev) != 0:
1648 break
1648 break
1649
1649
1650 try:
1650 try:
1651 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1651 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1652 except OverflowError:
1652 except OverflowError:
1653 # issue4215 - we can't cache a run of chunks greater than
1653 # issue4215 - we can't cache a run of chunks greater than
1654 # 2G on Windows
1654 # 2G on Windows
1655 return [self._chunk(rev, df=df) for rev in revschunk]
1655 return [self._chunk(rev, df=df) for rev in revschunk]
1656
1656
1657 decomp = self.decompress
1657 decomp = self.decompress
1658 for rev in revschunk:
1658 for rev in revschunk:
1659 chunkstart = start(rev)
1659 chunkstart = start(rev)
1660 if inline:
1660 if inline:
1661 chunkstart += (rev + 1) * iosize
1661 chunkstart += (rev + 1) * iosize
1662 chunklength = length(rev)
1662 chunklength = length(rev)
1663 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1663 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1664
1664
1665 return l
1665 return l
1666
1666
1667 def _chunkclear(self):
1667 def _chunkclear(self):
1668 """Clear the raw chunk cache."""
1668 """Clear the raw chunk cache."""
1669 self._chunkcache = (0, b'')
1669 self._chunkcache = (0, b'')
1670
1670
1671 def deltaparent(self, rev):
1671 def deltaparent(self, rev):
1672 """return deltaparent of the given revision"""
1672 """return deltaparent of the given revision"""
1673 base = self.index[rev][3]
1673 base = self.index[rev][3]
1674 if base == rev:
1674 if base == rev:
1675 return nullrev
1675 return nullrev
1676 elif self._generaldelta:
1676 elif self._generaldelta:
1677 return base
1677 return base
1678 else:
1678 else:
1679 return rev - 1
1679 return rev - 1
1680
1680
1681 def issnapshot(self, rev):
1681 def issnapshot(self, rev):
1682 """tells whether rev is a snapshot"""
1682 """tells whether rev is a snapshot"""
1683 if not self._sparserevlog:
1683 if not self._sparserevlog:
1684 return self.deltaparent(rev) == nullrev
1684 return self.deltaparent(rev) == nullrev
1685 elif util.safehasattr(self.index, b'issnapshot'):
1685 elif util.safehasattr(self.index, b'issnapshot'):
1686 # directly assign the method to cache the testing and access
1686 # directly assign the method to cache the testing and access
1687 self.issnapshot = self.index.issnapshot
1687 self.issnapshot = self.index.issnapshot
1688 return self.issnapshot(rev)
1688 return self.issnapshot(rev)
1689 if rev == nullrev:
1689 if rev == nullrev:
1690 return True
1690 return True
1691 entry = self.index[rev]
1691 entry = self.index[rev]
1692 base = entry[3]
1692 base = entry[3]
1693 if base == rev:
1693 if base == rev:
1694 return True
1694 return True
1695 if base == nullrev:
1695 if base == nullrev:
1696 return True
1696 return True
1697 p1 = entry[5]
1697 p1 = entry[5]
1698 p2 = entry[6]
1698 p2 = entry[6]
1699 if base == p1 or base == p2:
1699 if base == p1 or base == p2:
1700 return False
1700 return False
1701 return self.issnapshot(base)
1701 return self.issnapshot(base)
1702
1702
1703 def snapshotdepth(self, rev):
1703 def snapshotdepth(self, rev):
1704 """number of snapshot in the chain before this one"""
1704 """number of snapshot in the chain before this one"""
1705 if not self.issnapshot(rev):
1705 if not self.issnapshot(rev):
1706 raise error.ProgrammingError(b'revision %d not a snapshot')
1706 raise error.ProgrammingError(b'revision %d not a snapshot')
1707 return len(self._deltachain(rev)[0]) - 1
1707 return len(self._deltachain(rev)[0]) - 1
1708
1708
1709 def revdiff(self, rev1, rev2):
1709 def revdiff(self, rev1, rev2):
1710 """return or calculate a delta between two revisions
1710 """return or calculate a delta between two revisions
1711
1711
1712 The delta calculated is in binary form and is intended to be written to
1712 The delta calculated is in binary form and is intended to be written to
1713 revlog data directly. So this function needs raw revision data.
1713 revlog data directly. So this function needs raw revision data.
1714 """
1714 """
1715 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1715 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1716 return bytes(self._chunk(rev2))
1716 return bytes(self._chunk(rev2))
1717
1717
1718 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1718 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1719
1719
1720 def _processflags(self, text, flags, operation, raw=False):
1720 def _processflags(self, text, flags, operation, raw=False):
1721 """deprecated entry point to access flag processors"""
1721 """deprecated entry point to access flag processors"""
1722 msg = b'_processflag(...) use the specialized variant'
1722 msg = b'_processflag(...) use the specialized variant'
1723 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1723 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1724 if raw:
1724 if raw:
1725 return text, flagutil.processflagsraw(self, text, flags)
1725 return text, flagutil.processflagsraw(self, text, flags)
1726 elif operation == b'read':
1726 elif operation == b'read':
1727 return flagutil.processflagsread(self, text, flags)
1727 return flagutil.processflagsread(self, text, flags)
1728 else: # write operation
1728 else: # write operation
1729 return flagutil.processflagswrite(self, text, flags)
1729 return flagutil.processflagswrite(self, text, flags)
1730
1730
1731 def revision(self, nodeorrev, _df=None, raw=False):
1731 def revision(self, nodeorrev, _df=None, raw=False):
1732 """return an uncompressed revision of a given node or revision
1732 """return an uncompressed revision of a given node or revision
1733 number.
1733 number.
1734
1734
1735 _df - an existing file handle to read from. (internal-only)
1735 _df - an existing file handle to read from. (internal-only)
1736 raw - an optional argument specifying if the revision data is to be
1736 raw - an optional argument specifying if the revision data is to be
1737 treated as raw data when applying flag transforms. 'raw' should be set
1737 treated as raw data when applying flag transforms. 'raw' should be set
1738 to True when generating changegroups or in debug commands.
1738 to True when generating changegroups or in debug commands.
1739 """
1739 """
1740 if raw:
1740 if raw:
1741 msg = (
1741 msg = (
1742 b'revlog.revision(..., raw=True) is deprecated, '
1742 b'revlog.revision(..., raw=True) is deprecated, '
1743 b'use revlog.rawdata(...)'
1743 b'use revlog.rawdata(...)'
1744 )
1744 )
1745 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1745 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1746 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1746 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1747
1747
1748 def sidedata(self, nodeorrev, _df=None):
1748 def sidedata(self, nodeorrev, _df=None):
1749 """a map of extra data related to the changeset but not part of the hash
1749 """a map of extra data related to the changeset but not part of the hash
1750
1750
1751 This function currently return a dictionary. However, more advanced
1751 This function currently return a dictionary. However, more advanced
1752 mapping object will likely be used in the future for a more
1752 mapping object will likely be used in the future for a more
1753 efficient/lazy code.
1753 efficient/lazy code.
1754 """
1754 """
1755 return self._revisiondata(nodeorrev, _df)[1]
1755 return self._revisiondata(nodeorrev, _df)[1]
1756
1756
1757 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1757 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1758 # deal with <nodeorrev> argument type
1758 # deal with <nodeorrev> argument type
1759 if isinstance(nodeorrev, int):
1759 if isinstance(nodeorrev, int):
1760 rev = nodeorrev
1760 rev = nodeorrev
1761 node = self.node(rev)
1761 node = self.node(rev)
1762 else:
1762 else:
1763 node = nodeorrev
1763 node = nodeorrev
1764 rev = None
1764 rev = None
1765
1765
1766 # fast path the special `nullid` rev
1766 # fast path the special `nullid` rev
1767 if node == self.nullid:
1767 if node == self.nullid:
1768 return b"", {}
1768 return b"", {}
1769
1769
1770 # ``rawtext`` is the text as stored inside the revlog. Might be the
1770 # ``rawtext`` is the text as stored inside the revlog. Might be the
1771 # revision or might need to be processed to retrieve the revision.
1771 # revision or might need to be processed to retrieve the revision.
1772 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1772 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1773
1773
1774 if self.version & 0xFFFF == REVLOGV2:
1774 if self.version & 0xFFFF == REVLOGV2:
1775 if rev is None:
1775 if rev is None:
1776 rev = self.rev(node)
1776 rev = self.rev(node)
1777 sidedata = self._sidedata(rev)
1777 sidedata = self._sidedata(rev)
1778 else:
1778 else:
1779 sidedata = {}
1779 sidedata = {}
1780
1780
1781 if raw and validated:
1781 if raw and validated:
1782 # if we don't want to process the raw text and that raw
1782 # if we don't want to process the raw text and that raw
1783 # text is cached, we can exit early.
1783 # text is cached, we can exit early.
1784 return rawtext, sidedata
1784 return rawtext, sidedata
1785 if rev is None:
1785 if rev is None:
1786 rev = self.rev(node)
1786 rev = self.rev(node)
1787 # the revlog's flag for this revision
1787 # the revlog's flag for this revision
1788 # (usually alter its state or content)
1788 # (usually alter its state or content)
1789 flags = self.flags(rev)
1789 flags = self.flags(rev)
1790
1790
1791 if validated and flags == REVIDX_DEFAULT_FLAGS:
1791 if validated and flags == REVIDX_DEFAULT_FLAGS:
1792 # no extra flags set, no flag processor runs, text = rawtext
1792 # no extra flags set, no flag processor runs, text = rawtext
1793 return rawtext, sidedata
1793 return rawtext, sidedata
1794
1794
1795 if raw:
1795 if raw:
1796 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1796 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1797 text = rawtext
1797 text = rawtext
1798 else:
1798 else:
1799 r = flagutil.processflagsread(self, rawtext, flags)
1799 r = flagutil.processflagsread(self, rawtext, flags)
1800 text, validatehash = r
1800 text, validatehash = r
1801 if validatehash:
1801 if validatehash:
1802 self.checkhash(text, node, rev=rev)
1802 self.checkhash(text, node, rev=rev)
1803 if not validated:
1803 if not validated:
1804 self._revisioncache = (node, rev, rawtext)
1804 self._revisioncache = (node, rev, rawtext)
1805
1805
1806 return text, sidedata
1806 return text, sidedata
1807
1807
1808 def _rawtext(self, node, rev, _df=None):
1808 def _rawtext(self, node, rev, _df=None):
1809 """return the possibly unvalidated rawtext for a revision
1809 """return the possibly unvalidated rawtext for a revision
1810
1810
1811 returns (rev, rawtext, validated)
1811 returns (rev, rawtext, validated)
1812 """
1812 """
1813
1813
1814 # revision in the cache (could be useful to apply delta)
1814 # revision in the cache (could be useful to apply delta)
1815 cachedrev = None
1815 cachedrev = None
1816 # An intermediate text to apply deltas to
1816 # An intermediate text to apply deltas to
1817 basetext = None
1817 basetext = None
1818
1818
1819 # Check if we have the entry in cache
1819 # Check if we have the entry in cache
1820 # The cache entry looks like (node, rev, rawtext)
1820 # The cache entry looks like (node, rev, rawtext)
1821 if self._revisioncache:
1821 if self._revisioncache:
1822 if self._revisioncache[0] == node:
1822 if self._revisioncache[0] == node:
1823 return (rev, self._revisioncache[2], True)
1823 return (rev, self._revisioncache[2], True)
1824 cachedrev = self._revisioncache[1]
1824 cachedrev = self._revisioncache[1]
1825
1825
1826 if rev is None:
1826 if rev is None:
1827 rev = self.rev(node)
1827 rev = self.rev(node)
1828
1828
1829 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1829 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1830 if stopped:
1830 if stopped:
1831 basetext = self._revisioncache[2]
1831 basetext = self._revisioncache[2]
1832
1832
1833 # drop cache to save memory, the caller is expected to
1833 # drop cache to save memory, the caller is expected to
1834 # update self._revisioncache after validating the text
1834 # update self._revisioncache after validating the text
1835 self._revisioncache = None
1835 self._revisioncache = None
1836
1836
1837 targetsize = None
1837 targetsize = None
1838 rawsize = self.index[rev][2]
1838 rawsize = self.index[rev][2]
1839 if 0 <= rawsize:
1839 if 0 <= rawsize:
1840 targetsize = 4 * rawsize
1840 targetsize = 4 * rawsize
1841
1841
1842 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1842 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1843 if basetext is None:
1843 if basetext is None:
1844 basetext = bytes(bins[0])
1844 basetext = bytes(bins[0])
1845 bins = bins[1:]
1845 bins = bins[1:]
1846
1846
1847 rawtext = mdiff.patches(basetext, bins)
1847 rawtext = mdiff.patches(basetext, bins)
1848 del basetext # let us have a chance to free memory early
1848 del basetext # let us have a chance to free memory early
1849 return (rev, rawtext, False)
1849 return (rev, rawtext, False)
1850
1850
1851 def _sidedata(self, rev):
1851 def _sidedata(self, rev):
1852 """Return the sidedata for a given revision number."""
1852 """Return the sidedata for a given revision number."""
1853 index_entry = self.index[rev]
1853 index_entry = self.index[rev]
1854 sidedata_offset = index_entry[8]
1854 sidedata_offset = index_entry[8]
1855 sidedata_size = index_entry[9]
1855 sidedata_size = index_entry[9]
1856
1856
1857 if self._inline:
1857 if self._inline:
1858 sidedata_offset += self.index.entry_size * (1 + rev)
1858 sidedata_offset += self.index.entry_size * (1 + rev)
1859 if sidedata_size == 0:
1859 if sidedata_size == 0:
1860 return {}
1860 return {}
1861
1861
1862 segment = self._getsegment(sidedata_offset, sidedata_size)
1862 segment = self._getsegment(sidedata_offset, sidedata_size)
1863 sidedata = sidedatautil.deserialize_sidedata(segment)
1863 sidedata = sidedatautil.deserialize_sidedata(segment)
1864 return sidedata
1864 return sidedata
1865
1865
1866 def rawdata(self, nodeorrev, _df=None):
1866 def rawdata(self, nodeorrev, _df=None):
1867 """return an uncompressed raw data of a given node or revision number.
1867 """return an uncompressed raw data of a given node or revision number.
1868
1868
1869 _df - an existing file handle to read from. (internal-only)
1869 _df - an existing file handle to read from. (internal-only)
1870 """
1870 """
1871 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1871 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1872
1872
1873 def hash(self, text, p1, p2):
1873 def hash(self, text, p1, p2):
1874 """Compute a node hash.
1874 """Compute a node hash.
1875
1875
1876 Available as a function so that subclasses can replace the hash
1876 Available as a function so that subclasses can replace the hash
1877 as needed.
1877 as needed.
1878 """
1878 """
1879 return storageutil.hashrevisionsha1(text, p1, p2)
1879 return storageutil.hashrevisionsha1(text, p1, p2)
1880
1880
1881 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1881 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1882 """Check node hash integrity.
1882 """Check node hash integrity.
1883
1883
1884 Available as a function so that subclasses can extend hash mismatch
1884 Available as a function so that subclasses can extend hash mismatch
1885 behaviors as needed.
1885 behaviors as needed.
1886 """
1886 """
1887 try:
1887 try:
1888 if p1 is None and p2 is None:
1888 if p1 is None and p2 is None:
1889 p1, p2 = self.parents(node)
1889 p1, p2 = self.parents(node)
1890 if node != self.hash(text, p1, p2):
1890 if node != self.hash(text, p1, p2):
1891 # Clear the revision cache on hash failure. The revision cache
1891 # Clear the revision cache on hash failure. The revision cache
1892 # only stores the raw revision and clearing the cache does have
1892 # only stores the raw revision and clearing the cache does have
1893 # the side-effect that we won't have a cache hit when the raw
1893 # the side-effect that we won't have a cache hit when the raw
1894 # revision data is accessed. But this case should be rare and
1894 # revision data is accessed. But this case should be rare and
1895 # it is extra work to teach the cache about the hash
1895 # it is extra work to teach the cache about the hash
1896 # verification state.
1896 # verification state.
1897 if self._revisioncache and self._revisioncache[0] == node:
1897 if self._revisioncache and self._revisioncache[0] == node:
1898 self._revisioncache = None
1898 self._revisioncache = None
1899
1899
1900 revornode = rev
1900 revornode = rev
1901 if revornode is None:
1901 if revornode is None:
1902 revornode = templatefilters.short(hex(node))
1902 revornode = templatefilters.short(hex(node))
1903 raise error.RevlogError(
1903 raise error.RevlogError(
1904 _(b"integrity check failed on %s:%s")
1904 _(b"integrity check failed on %s:%s")
1905 % (self.indexfile, pycompat.bytestr(revornode))
1905 % (self.indexfile, pycompat.bytestr(revornode))
1906 )
1906 )
1907 except error.RevlogError:
1907 except error.RevlogError:
1908 if self._censorable and storageutil.iscensoredtext(text):
1908 if self._censorable and storageutil.iscensoredtext(text):
1909 raise error.CensoredNodeError(self.indexfile, node, text)
1909 raise error.CensoredNodeError(self.indexfile, node, text)
1910 raise
1910 raise
1911
1911
1912 def _enforceinlinesize(self, tr, fp=None):
1912 def _enforceinlinesize(self, tr, fp=None):
1913 """Check if the revlog is too big for inline and convert if so.
1913 """Check if the revlog is too big for inline and convert if so.
1914
1914
1915 This should be called after revisions are added to the revlog. If the
1915 This should be called after revisions are added to the revlog. If the
1916 revlog has grown too large to be an inline revlog, it will convert it
1916 revlog has grown too large to be an inline revlog, it will convert it
1917 to use multiple index and data files.
1917 to use multiple index and data files.
1918 """
1918 """
1919 tiprev = len(self) - 1
1919 tiprev = len(self) - 1
1920 if (
1920 if (
1921 not self._inline
1921 not self._inline
1922 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1922 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1923 ):
1923 ):
1924 return
1924 return
1925
1925
1926 troffset = tr.findoffset(self.indexfile)
1926 troffset = tr.findoffset(self.indexfile)
1927 if troffset is None:
1927 if troffset is None:
1928 raise error.RevlogError(
1928 raise error.RevlogError(
1929 _(b"%s not found in the transaction") % self.indexfile
1929 _(b"%s not found in the transaction") % self.indexfile
1930 )
1930 )
1931 trindex = 0
1931 trindex = 0
1932 tr.add(self.datafile, 0)
1932 tr.add(self.datafile, 0)
1933
1933
1934 if fp:
1934 if fp:
1935 fp.flush()
1935 fp.flush()
1936 fp.close()
1936 fp.close()
1937 # We can't use the cached file handle after close(). So prevent
1937 # We can't use the cached file handle after close(). So prevent
1938 # its usage.
1938 # its usage.
1939 self._writinghandles = None
1939 self._writinghandles = None
1940
1940
1941 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1941 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1942 for r in self:
1942 for r in self:
1943 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1943 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1944 if troffset <= self.start(r):
1944 if troffset <= self.start(r):
1945 trindex = r
1945 trindex = r
1946
1946
1947 with self._indexfp(b'w') as fp:
1947 with self._indexfp(b'w') as fp:
1948 self.version &= ~FLAG_INLINE_DATA
1948 self.version &= ~FLAG_INLINE_DATA
1949 self._inline = False
1949 self._inline = False
1950 for i in self:
1950 for i in self:
1951 e = self.index.entry_binary(i)
1951 e = self.index.entry_binary(i)
1952 if i == 0:
1952 if i == 0:
1953 header = self.index.pack_header(self.version)
1953 header = self.index.pack_header(self.version)
1954 e = header + e
1954 e = header + e
1955 fp.write(e)
1955 fp.write(e)
1956
1956
1957 # the temp file replace the real index when we exit the context
1957 # the temp file replace the real index when we exit the context
1958 # manager
1958 # manager
1959
1959
1960 tr.replace(self.indexfile, trindex * self.index.entry_size)
1960 tr.replace(self.indexfile, trindex * self.index.entry_size)
1961 nodemaputil.setup_persistent_nodemap(tr, self)
1961 nodemaputil.setup_persistent_nodemap(tr, self)
1962 self._chunkclear()
1962 self._chunkclear()
1963
1963
1964 def _nodeduplicatecallback(self, transaction, node):
1964 def _nodeduplicatecallback(self, transaction, node):
1965 """called when trying to add a node already stored."""
1965 """called when trying to add a node already stored."""
1966
1966
1967 def addrevision(
1967 def addrevision(
1968 self,
1968 self,
1969 text,
1969 text,
1970 transaction,
1970 transaction,
1971 link,
1971 link,
1972 p1,
1972 p1,
1973 p2,
1973 p2,
1974 cachedelta=None,
1974 cachedelta=None,
1975 node=None,
1975 node=None,
1976 flags=REVIDX_DEFAULT_FLAGS,
1976 flags=REVIDX_DEFAULT_FLAGS,
1977 deltacomputer=None,
1977 deltacomputer=None,
1978 sidedata=None,
1978 sidedata=None,
1979 ):
1979 ):
1980 """add a revision to the log
1980 """add a revision to the log
1981
1981
1982 text - the revision data to add
1982 text - the revision data to add
1983 transaction - the transaction object used for rollback
1983 transaction - the transaction object used for rollback
1984 link - the linkrev data to add
1984 link - the linkrev data to add
1985 p1, p2 - the parent nodeids of the revision
1985 p1, p2 - the parent nodeids of the revision
1986 cachedelta - an optional precomputed delta
1986 cachedelta - an optional precomputed delta
1987 node - nodeid of revision; typically node is not specified, and it is
1987 node - nodeid of revision; typically node is not specified, and it is
1988 computed by default as hash(text, p1, p2), however subclasses might
1988 computed by default as hash(text, p1, p2), however subclasses might
1989 use different hashing method (and override checkhash() in such case)
1989 use different hashing method (and override checkhash() in such case)
1990 flags - the known flags to set on the revision
1990 flags - the known flags to set on the revision
1991 deltacomputer - an optional deltacomputer instance shared between
1991 deltacomputer - an optional deltacomputer instance shared between
1992 multiple calls
1992 multiple calls
1993 """
1993 """
1994 if link == nullrev:
1994 if link == nullrev:
1995 raise error.RevlogError(
1995 raise error.RevlogError(
1996 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1996 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1997 )
1997 )
1998
1998
1999 if sidedata is None:
1999 if sidedata is None:
2000 sidedata = {}
2000 sidedata = {}
2001 elif sidedata and not self.hassidedata:
2001 elif sidedata and not self.hassidedata:
2002 raise error.ProgrammingError(
2002 raise error.ProgrammingError(
2003 _(b"trying to add sidedata to a revlog who don't support them")
2003 _(b"trying to add sidedata to a revlog who don't support them")
2004 )
2004 )
2005
2005
2006 if flags:
2006 if flags:
2007 node = node or self.hash(text, p1, p2)
2007 node = node or self.hash(text, p1, p2)
2008
2008
2009 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2009 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2010
2010
2011 # If the flag processor modifies the revision data, ignore any provided
2011 # If the flag processor modifies the revision data, ignore any provided
2012 # cachedelta.
2012 # cachedelta.
2013 if rawtext != text:
2013 if rawtext != text:
2014 cachedelta = None
2014 cachedelta = None
2015
2015
2016 if len(rawtext) > _maxentrysize:
2016 if len(rawtext) > _maxentrysize:
2017 raise error.RevlogError(
2017 raise error.RevlogError(
2018 _(
2018 _(
2019 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2019 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2020 )
2020 )
2021 % (self.indexfile, len(rawtext))
2021 % (self.indexfile, len(rawtext))
2022 )
2022 )
2023
2023
2024 node = node or self.hash(rawtext, p1, p2)
2024 node = node or self.hash(rawtext, p1, p2)
2025 rev = self.index.get_rev(node)
2025 rev = self.index.get_rev(node)
2026 if rev is not None:
2026 if rev is not None:
2027 return rev
2027 return rev
2028
2028
2029 if validatehash:
2029 if validatehash:
2030 self.checkhash(rawtext, node, p1=p1, p2=p2)
2030 self.checkhash(rawtext, node, p1=p1, p2=p2)
2031
2031
2032 return self.addrawrevision(
2032 return self.addrawrevision(
2033 rawtext,
2033 rawtext,
2034 transaction,
2034 transaction,
2035 link,
2035 link,
2036 p1,
2036 p1,
2037 p2,
2037 p2,
2038 node,
2038 node,
2039 flags,
2039 flags,
2040 cachedelta=cachedelta,
2040 cachedelta=cachedelta,
2041 deltacomputer=deltacomputer,
2041 deltacomputer=deltacomputer,
2042 sidedata=sidedata,
2042 sidedata=sidedata,
2043 )
2043 )
2044
2044
2045 def addrawrevision(
2045 def addrawrevision(
2046 self,
2046 self,
2047 rawtext,
2047 rawtext,
2048 transaction,
2048 transaction,
2049 link,
2049 link,
2050 p1,
2050 p1,
2051 p2,
2051 p2,
2052 node,
2052 node,
2053 flags,
2053 flags,
2054 cachedelta=None,
2054 cachedelta=None,
2055 deltacomputer=None,
2055 deltacomputer=None,
2056 sidedata=None,
2056 sidedata=None,
2057 ):
2057 ):
2058 """add a raw revision with known flags, node and parents
2058 """add a raw revision with known flags, node and parents
2059 useful when reusing a revision not stored in this revlog (ex: received
2059 useful when reusing a revision not stored in this revlog (ex: received
2060 over wire, or read from an external bundle).
2060 over wire, or read from an external bundle).
2061 """
2061 """
2062 dfh = None
2062 dfh = None
2063 if not self._inline:
2063 if not self._inline:
2064 dfh = self._datafp(b"a+")
2064 dfh = self._datafp(b"a+")
2065 ifh = self._indexfp(b"a+")
2065 ifh = self._indexfp(b"a+")
2066 try:
2066 try:
2067 return self._addrevision(
2067 return self._addrevision(
2068 node,
2068 node,
2069 rawtext,
2069 rawtext,
2070 transaction,
2070 transaction,
2071 link,
2071 link,
2072 p1,
2072 p1,
2073 p2,
2073 p2,
2074 flags,
2074 flags,
2075 cachedelta,
2075 cachedelta,
2076 ifh,
2076 ifh,
2077 dfh,
2077 dfh,
2078 deltacomputer=deltacomputer,
2078 deltacomputer=deltacomputer,
2079 sidedata=sidedata,
2079 sidedata=sidedata,
2080 )
2080 )
2081 finally:
2081 finally:
2082 if dfh:
2082 if dfh:
2083 dfh.close()
2083 dfh.close()
2084 ifh.close()
2084 ifh.close()
2085
2085
2086 def compress(self, data):
2086 def compress(self, data):
2087 """Generate a possibly-compressed representation of data."""
2087 """Generate a possibly-compressed representation of data."""
2088 if not data:
2088 if not data:
2089 return b'', data
2089 return b'', data
2090
2090
2091 compressed = self._compressor.compress(data)
2091 compressed = self._compressor.compress(data)
2092
2092
2093 if compressed:
2093 if compressed:
2094 # The revlog compressor added the header in the returned data.
2094 # The revlog compressor added the header in the returned data.
2095 return b'', compressed
2095 return b'', compressed
2096
2096
2097 if data[0:1] == b'\0':
2097 if data[0:1] == b'\0':
2098 return b'', data
2098 return b'', data
2099 return b'u', data
2099 return b'u', data
2100
2100
2101 def decompress(self, data):
2101 def decompress(self, data):
2102 """Decompress a revlog chunk.
2102 """Decompress a revlog chunk.
2103
2103
2104 The chunk is expected to begin with a header identifying the
2104 The chunk is expected to begin with a header identifying the
2105 format type so it can be routed to an appropriate decompressor.
2105 format type so it can be routed to an appropriate decompressor.
2106 """
2106 """
2107 if not data:
2107 if not data:
2108 return data
2108 return data
2109
2109
2110 # Revlogs are read much more frequently than they are written and many
2110 # Revlogs are read much more frequently than they are written and many
2111 # chunks only take microseconds to decompress, so performance is
2111 # chunks only take microseconds to decompress, so performance is
2112 # important here.
2112 # important here.
2113 #
2113 #
2114 # We can make a few assumptions about revlogs:
2114 # We can make a few assumptions about revlogs:
2115 #
2115 #
2116 # 1) the majority of chunks will be compressed (as opposed to inline
2116 # 1) the majority of chunks will be compressed (as opposed to inline
2117 # raw data).
2117 # raw data).
2118 # 2) decompressing *any* data will likely by at least 10x slower than
2118 # 2) decompressing *any* data will likely by at least 10x slower than
2119 # returning raw inline data.
2119 # returning raw inline data.
2120 # 3) we want to prioritize common and officially supported compression
2120 # 3) we want to prioritize common and officially supported compression
2121 # engines
2121 # engines
2122 #
2122 #
2123 # It follows that we want to optimize for "decompress compressed data
2123 # It follows that we want to optimize for "decompress compressed data
2124 # when encoded with common and officially supported compression engines"
2124 # when encoded with common and officially supported compression engines"
2125 # case over "raw data" and "data encoded by less common or non-official
2125 # case over "raw data" and "data encoded by less common or non-official
2126 # compression engines." That is why we have the inline lookup first
2126 # compression engines." That is why we have the inline lookup first
2127 # followed by the compengines lookup.
2127 # followed by the compengines lookup.
2128 #
2128 #
2129 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2129 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2130 # compressed chunks. And this matters for changelog and manifest reads.
2130 # compressed chunks. And this matters for changelog and manifest reads.
2131 t = data[0:1]
2131 t = data[0:1]
2132
2132
2133 if t == b'x':
2133 if t == b'x':
2134 try:
2134 try:
2135 return _zlibdecompress(data)
2135 return _zlibdecompress(data)
2136 except zlib.error as e:
2136 except zlib.error as e:
2137 raise error.RevlogError(
2137 raise error.RevlogError(
2138 _(b'revlog decompress error: %s')
2138 _(b'revlog decompress error: %s')
2139 % stringutil.forcebytestr(e)
2139 % stringutil.forcebytestr(e)
2140 )
2140 )
2141 # '\0' is more common than 'u' so it goes first.
2141 # '\0' is more common than 'u' so it goes first.
2142 elif t == b'\0':
2142 elif t == b'\0':
2143 return data
2143 return data
2144 elif t == b'u':
2144 elif t == b'u':
2145 return util.buffer(data, 1)
2145 return util.buffer(data, 1)
2146
2146
2147 try:
2147 try:
2148 compressor = self._decompressors[t]
2148 compressor = self._decompressors[t]
2149 except KeyError:
2149 except KeyError:
2150 try:
2150 try:
2151 engine = util.compengines.forrevlogheader(t)
2151 engine = util.compengines.forrevlogheader(t)
2152 compressor = engine.revlogcompressor(self._compengineopts)
2152 compressor = engine.revlogcompressor(self._compengineopts)
2153 self._decompressors[t] = compressor
2153 self._decompressors[t] = compressor
2154 except KeyError:
2154 except KeyError:
2155 raise error.RevlogError(
2155 raise error.RevlogError(
2156 _(b'unknown compression type %s') % binascii.hexlify(t)
2156 _(b'unknown compression type %s') % binascii.hexlify(t)
2157 )
2157 )
2158
2158
2159 return compressor.decompress(data)
2159 return compressor.decompress(data)
2160
2160
2161 def _addrevision(
2161 def _addrevision(
2162 self,
2162 self,
2163 node,
2163 node,
2164 rawtext,
2164 rawtext,
2165 transaction,
2165 transaction,
2166 link,
2166 link,
2167 p1,
2167 p1,
2168 p2,
2168 p2,
2169 flags,
2169 flags,
2170 cachedelta,
2170 cachedelta,
2171 ifh,
2171 ifh,
2172 dfh,
2172 dfh,
2173 alwayscache=False,
2173 alwayscache=False,
2174 deltacomputer=None,
2174 deltacomputer=None,
2175 sidedata=None,
2175 sidedata=None,
2176 ):
2176 ):
2177 """internal function to add revisions to the log
2177 """internal function to add revisions to the log
2178
2178
2179 see addrevision for argument descriptions.
2179 see addrevision for argument descriptions.
2180
2180
2181 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2181 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2182
2182
2183 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2183 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2184 be used.
2184 be used.
2185
2185
2186 invariants:
2186 invariants:
2187 - rawtext is optional (can be None); if not set, cachedelta must be set.
2187 - rawtext is optional (can be None); if not set, cachedelta must be set.
2188 if both are set, they must correspond to each other.
2188 if both are set, they must correspond to each other.
2189 """
2189 """
2190 if node == self.nullid:
2190 if node == self.nullid:
2191 raise error.RevlogError(
2191 raise error.RevlogError(
2192 _(b"%s: attempt to add null revision") % self.indexfile
2192 _(b"%s: attempt to add null revision") % self.indexfile
2193 )
2193 )
2194 if (
2194 if (
2195 node == self.nodeconstants.wdirid
2195 node == self.nodeconstants.wdirid
2196 or node in self.nodeconstants.wdirfilenodeids
2196 or node in self.nodeconstants.wdirfilenodeids
2197 ):
2197 ):
2198 raise error.RevlogError(
2198 raise error.RevlogError(
2199 _(b"%s: attempt to add wdir revision") % self.indexfile
2199 _(b"%s: attempt to add wdir revision") % self.indexfile
2200 )
2200 )
2201
2201
2202 if self._inline:
2202 if self._inline:
2203 fh = ifh
2203 fh = ifh
2204 else:
2204 else:
2205 fh = dfh
2205 fh = dfh
2206
2206
2207 btext = [rawtext]
2207 btext = [rawtext]
2208
2208
2209 curr = len(self)
2209 curr = len(self)
2210 prev = curr - 1
2210 prev = curr - 1
2211
2211
2212 offset = self._get_data_offset(prev)
2212 offset = self._get_data_offset(prev)
2213
2213
2214 if self._concurrencychecker:
2214 if self._concurrencychecker:
2215 if self._inline:
2215 if self._inline:
2216 # offset is "as if" it were in the .d file, so we need to add on
2216 # offset is "as if" it were in the .d file, so we need to add on
2217 # the size of the entry metadata.
2217 # the size of the entry metadata.
2218 self._concurrencychecker(
2218 self._concurrencychecker(
2219 ifh, self.indexfile, offset + curr * self.index.entry_size
2219 ifh, self.indexfile, offset + curr * self.index.entry_size
2220 )
2220 )
2221 else:
2221 else:
2222 # Entries in the .i are a consistent size.
2222 # Entries in the .i are a consistent size.
2223 self._concurrencychecker(
2223 self._concurrencychecker(
2224 ifh, self.indexfile, curr * self.index.entry_size
2224 ifh, self.indexfile, curr * self.index.entry_size
2225 )
2225 )
2226 self._concurrencychecker(dfh, self.datafile, offset)
2226 self._concurrencychecker(dfh, self.datafile, offset)
2227
2227
2228 p1r, p2r = self.rev(p1), self.rev(p2)
2228 p1r, p2r = self.rev(p1), self.rev(p2)
2229
2229
2230 # full versions are inserted when the needed deltas
2230 # full versions are inserted when the needed deltas
2231 # become comparable to the uncompressed text
2231 # become comparable to the uncompressed text
2232 if rawtext is None:
2232 if rawtext is None:
2233 # need rawtext size, before changed by flag processors, which is
2233 # need rawtext size, before changed by flag processors, which is
2234 # the non-raw size. use revlog explicitly to avoid filelog's extra
2234 # the non-raw size. use revlog explicitly to avoid filelog's extra
2235 # logic that might remove metadata size.
2235 # logic that might remove metadata size.
2236 textlen = mdiff.patchedsize(
2236 textlen = mdiff.patchedsize(
2237 revlog.size(self, cachedelta[0]), cachedelta[1]
2237 revlog.size(self, cachedelta[0]), cachedelta[1]
2238 )
2238 )
2239 else:
2239 else:
2240 textlen = len(rawtext)
2240 textlen = len(rawtext)
2241
2241
2242 if deltacomputer is None:
2242 if deltacomputer is None:
2243 deltacomputer = deltautil.deltacomputer(self)
2243 deltacomputer = deltautil.deltacomputer(self)
2244
2244
2245 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2245 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2246
2246
2247 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2247 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2248
2248
2249 if sidedata and self.version & 0xFFFF == REVLOGV2:
2249 if sidedata and self.version & 0xFFFF == REVLOGV2:
2250 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2250 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2251 sidedata_offset = offset + deltainfo.deltalen
2251 sidedata_offset = offset + deltainfo.deltalen
2252 else:
2252 else:
2253 serialized_sidedata = b""
2253 serialized_sidedata = b""
2254 # Don't store the offset if the sidedata is empty, that way
2254 # Don't store the offset if the sidedata is empty, that way
2255 # we can easily detect empty sidedata and they will be no different
2255 # we can easily detect empty sidedata and they will be no different
2256 # than ones we manually add.
2256 # than ones we manually add.
2257 sidedata_offset = 0
2257 sidedata_offset = 0
2258
2258
2259 e = (
2259 e = (
2260 offset_type(offset, flags),
2260 offset_type(offset, flags),
2261 deltainfo.deltalen,
2261 deltainfo.deltalen,
2262 textlen,
2262 textlen,
2263 deltainfo.base,
2263 deltainfo.base,
2264 link,
2264 link,
2265 p1r,
2265 p1r,
2266 p2r,
2266 p2r,
2267 node,
2267 node,
2268 sidedata_offset,
2268 sidedata_offset,
2269 len(serialized_sidedata),
2269 len(serialized_sidedata),
2270 )
2270 )
2271
2271
2272 if self.version & 0xFFFF != REVLOGV2:
2272 if self.version & 0xFFFF != REVLOGV2:
2273 e = e[:8]
2273 e = e[:8]
2274
2274
2275 self.index.append(e)
2275 self.index.append(e)
2276 entry = self.index.entry_binary(curr)
2276 entry = self.index.entry_binary(curr)
2277 if curr == 0:
2277 if curr == 0:
2278 header = self.index.pack_header(self.version)
2278 header = self.index.pack_header(self.version)
2279 entry = header + entry
2279 entry = header + entry
2280 self._writeentry(
2280 self._writeentry(
2281 transaction,
2281 transaction,
2282 ifh,
2282 ifh,
2283 dfh,
2283 dfh,
2284 entry,
2284 entry,
2285 deltainfo.data,
2285 deltainfo.data,
2286 link,
2286 link,
2287 offset,
2287 offset,
2288 serialized_sidedata,
2288 serialized_sidedata,
2289 )
2289 )
2290
2290
2291 rawtext = btext[0]
2291 rawtext = btext[0]
2292
2292
2293 if alwayscache and rawtext is None:
2293 if alwayscache and rawtext is None:
2294 rawtext = deltacomputer.buildtext(revinfo, fh)
2294 rawtext = deltacomputer.buildtext(revinfo, fh)
2295
2295
2296 if type(rawtext) == bytes: # only accept immutable objects
2296 if type(rawtext) == bytes: # only accept immutable objects
2297 self._revisioncache = (node, curr, rawtext)
2297 self._revisioncache = (node, curr, rawtext)
2298 self._chainbasecache[curr] = deltainfo.chainbase
2298 self._chainbasecache[curr] = deltainfo.chainbase
2299 return curr
2299 return curr
2300
2300
2301 def _get_data_offset(self, prev):
2301 def _get_data_offset(self, prev):
2302 """Returns the current offset in the (in-transaction) data file.
2302 """Returns the current offset in the (in-transaction) data file.
2303 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2303 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2304 file to store that information: since sidedata can be rewritten to the
2304 file to store that information: since sidedata can be rewritten to the
2305 end of the data file within a transaction, you can have cases where, for
2305 end of the data file within a transaction, you can have cases where, for
2306 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2306 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2307 to `n - 1`'s sidedata being written after `n`'s data.
2307 to `n - 1`'s sidedata being written after `n`'s data.
2308
2308
2309 TODO cache this in a docket file before getting out of experimental."""
2309 TODO cache this in a docket file before getting out of experimental."""
2310 if self.version & 0xFFFF != REVLOGV2:
2310 if self.version & 0xFFFF != REVLOGV2:
2311 return self.end(prev)
2311 return self.end(prev)
2312
2312
2313 offset = 0
2313 offset = 0
2314 for rev, entry in enumerate(self.index):
2314 for rev, entry in enumerate(self.index):
2315 sidedata_end = entry[8] + entry[9]
2315 sidedata_end = entry[8] + entry[9]
2316 # Sidedata for a previous rev has potentially been written after
2316 # Sidedata for a previous rev has potentially been written after
2317 # this rev's end, so take the max.
2317 # this rev's end, so take the max.
2318 offset = max(self.end(rev), offset, sidedata_end)
2318 offset = max(self.end(rev), offset, sidedata_end)
2319 return offset
2319 return offset
2320
2320
2321 def _writeentry(
2321 def _writeentry(
2322 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2322 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2323 ):
2323 ):
2324 # Files opened in a+ mode have inconsistent behavior on various
2324 # Files opened in a+ mode have inconsistent behavior on various
2325 # platforms. Windows requires that a file positioning call be made
2325 # platforms. Windows requires that a file positioning call be made
2326 # when the file handle transitions between reads and writes. See
2326 # when the file handle transitions between reads and writes. See
2327 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2327 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2328 # platforms, Python or the platform itself can be buggy. Some versions
2328 # platforms, Python or the platform itself can be buggy. Some versions
2329 # of Solaris have been observed to not append at the end of the file
2329 # of Solaris have been observed to not append at the end of the file
2330 # if the file was seeked to before the end. See issue4943 for more.
2330 # if the file was seeked to before the end. See issue4943 for more.
2331 #
2331 #
2332 # We work around this issue by inserting a seek() before writing.
2332 # We work around this issue by inserting a seek() before writing.
2333 # Note: This is likely not necessary on Python 3. However, because
2333 # Note: This is likely not necessary on Python 3. However, because
2334 # the file handle is reused for reads and may be seeked there, we need
2334 # the file handle is reused for reads and may be seeked there, we need
2335 # to be careful before changing this.
2335 # to be careful before changing this.
2336 ifh.seek(0, os.SEEK_END)
2336 ifh.seek(0, os.SEEK_END)
2337 if dfh:
2337 if dfh:
2338 dfh.seek(0, os.SEEK_END)
2338 dfh.seek(0, os.SEEK_END)
2339
2339
2340 curr = len(self) - 1
2340 curr = len(self) - 1
2341 if not self._inline:
2341 if not self._inline:
2342 transaction.add(self.datafile, offset)
2342 transaction.add(self.datafile, offset)
2343 transaction.add(self.indexfile, curr * len(entry))
2343 transaction.add(self.indexfile, curr * len(entry))
2344 if data[0]:
2344 if data[0]:
2345 dfh.write(data[0])
2345 dfh.write(data[0])
2346 dfh.write(data[1])
2346 dfh.write(data[1])
2347 if sidedata:
2347 if sidedata:
2348 dfh.write(sidedata)
2348 dfh.write(sidedata)
2349 ifh.write(entry)
2349 ifh.write(entry)
2350 else:
2350 else:
2351 offset += curr * self.index.entry_size
2351 offset += curr * self.index.entry_size
2352 transaction.add(self.indexfile, offset)
2352 transaction.add(self.indexfile, offset)
2353 ifh.write(entry)
2353 ifh.write(entry)
2354 ifh.write(data[0])
2354 ifh.write(data[0])
2355 ifh.write(data[1])
2355 ifh.write(data[1])
2356 if sidedata:
2356 if sidedata:
2357 ifh.write(sidedata)
2357 ifh.write(sidedata)
2358 self._enforceinlinesize(transaction, ifh)
2358 self._enforceinlinesize(transaction, ifh)
2359 nodemaputil.setup_persistent_nodemap(transaction, self)
2359 nodemaputil.setup_persistent_nodemap(transaction, self)
2360
2360
2361 def addgroup(
2361 def addgroup(
2362 self,
2362 self,
2363 deltas,
2363 deltas,
2364 linkmapper,
2364 linkmapper,
2365 transaction,
2365 transaction,
2366 alwayscache=False,
2366 alwayscache=False,
2367 addrevisioncb=None,
2367 addrevisioncb=None,
2368 duplicaterevisioncb=None,
2368 duplicaterevisioncb=None,
2369 ):
2369 ):
2370 """
2370 """
2371 add a delta group
2371 add a delta group
2372
2372
2373 given a set of deltas, add them to the revision log. the
2373 given a set of deltas, add them to the revision log. the
2374 first delta is against its parent, which should be in our
2374 first delta is against its parent, which should be in our
2375 log, the rest are against the previous delta.
2375 log, the rest are against the previous delta.
2376
2376
2377 If ``addrevisioncb`` is defined, it will be called with arguments of
2377 If ``addrevisioncb`` is defined, it will be called with arguments of
2378 this revlog and the node that was added.
2378 this revlog and the node that was added.
2379 """
2379 """
2380
2380
2381 if self._writinghandles:
2381 if self._writinghandles:
2382 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2382 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2383
2383
2384 r = len(self)
2384 r = len(self)
2385 end = 0
2385 end = 0
2386 if r:
2386 if r:
2387 end = self.end(r - 1)
2387 end = self.end(r - 1)
2388 ifh = self._indexfp(b"a+")
2388 ifh = self._indexfp(b"a+")
2389 isize = r * self.index.entry_size
2389 isize = r * self.index.entry_size
2390 if self._inline:
2390 if self._inline:
2391 transaction.add(self.indexfile, end + isize)
2391 transaction.add(self.indexfile, end + isize)
2392 dfh = None
2392 dfh = None
2393 else:
2393 else:
2394 transaction.add(self.indexfile, isize)
2394 transaction.add(self.indexfile, isize)
2395 transaction.add(self.datafile, end)
2395 transaction.add(self.datafile, end)
2396 dfh = self._datafp(b"a+")
2396 dfh = self._datafp(b"a+")
2397
2397
2398 def flush():
2398 def flush():
2399 if dfh:
2399 if dfh:
2400 dfh.flush()
2400 dfh.flush()
2401 ifh.flush()
2401 ifh.flush()
2402
2402
2403 self._writinghandles = (ifh, dfh)
2403 self._writinghandles = (ifh, dfh)
2404 empty = True
2404 empty = True
2405
2405
2406 try:
2406 try:
2407 deltacomputer = deltautil.deltacomputer(self)
2407 deltacomputer = deltautil.deltacomputer(self)
2408 # loop through our set of deltas
2408 # loop through our set of deltas
2409 for data in deltas:
2409 for data in deltas:
2410 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2410 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2411 link = linkmapper(linknode)
2411 link = linkmapper(linknode)
2412 flags = flags or REVIDX_DEFAULT_FLAGS
2412 flags = flags or REVIDX_DEFAULT_FLAGS
2413
2413
2414 rev = self.index.get_rev(node)
2414 rev = self.index.get_rev(node)
2415 if rev is not None:
2415 if rev is not None:
2416 # this can happen if two branches make the same change
2416 # this can happen if two branches make the same change
2417 self._nodeduplicatecallback(transaction, rev)
2417 self._nodeduplicatecallback(transaction, rev)
2418 if duplicaterevisioncb:
2418 if duplicaterevisioncb:
2419 duplicaterevisioncb(self, rev)
2419 duplicaterevisioncb(self, rev)
2420 empty = False
2420 empty = False
2421 continue
2421 continue
2422
2422
2423 for p in (p1, p2):
2423 for p in (p1, p2):
2424 if not self.index.has_node(p):
2424 if not self.index.has_node(p):
2425 raise error.LookupError(
2425 raise error.LookupError(
2426 p, self.indexfile, _(b'unknown parent')
2426 p, self.indexfile, _(b'unknown parent')
2427 )
2427 )
2428
2428
2429 if not self.index.has_node(deltabase):
2429 if not self.index.has_node(deltabase):
2430 raise error.LookupError(
2430 raise error.LookupError(
2431 deltabase, self.indexfile, _(b'unknown delta base')
2431 deltabase, self.indexfile, _(b'unknown delta base')
2432 )
2432 )
2433
2433
2434 baserev = self.rev(deltabase)
2434 baserev = self.rev(deltabase)
2435
2435
2436 if baserev != nullrev and self.iscensored(baserev):
2436 if baserev != nullrev and self.iscensored(baserev):
2437 # if base is censored, delta must be full replacement in a
2437 # if base is censored, delta must be full replacement in a
2438 # single patch operation
2438 # single patch operation
2439 hlen = struct.calcsize(b">lll")
2439 hlen = struct.calcsize(b">lll")
2440 oldlen = self.rawsize(baserev)
2440 oldlen = self.rawsize(baserev)
2441 newlen = len(delta) - hlen
2441 newlen = len(delta) - hlen
2442 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2442 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2443 raise error.CensoredBaseError(
2443 raise error.CensoredBaseError(
2444 self.indexfile, self.node(baserev)
2444 self.indexfile, self.node(baserev)
2445 )
2445 )
2446
2446
2447 if not flags and self._peek_iscensored(baserev, delta, flush):
2447 if not flags and self._peek_iscensored(baserev, delta, flush):
2448 flags |= REVIDX_ISCENSORED
2448 flags |= REVIDX_ISCENSORED
2449
2449
2450 # We assume consumers of addrevisioncb will want to retrieve
2450 # We assume consumers of addrevisioncb will want to retrieve
2451 # the added revision, which will require a call to
2451 # the added revision, which will require a call to
2452 # revision(). revision() will fast path if there is a cache
2452 # revision(). revision() will fast path if there is a cache
2453 # hit. So, we tell _addrevision() to always cache in this case.
2453 # hit. So, we tell _addrevision() to always cache in this case.
2454 # We're only using addgroup() in the context of changegroup
2454 # We're only using addgroup() in the context of changegroup
2455 # generation so the revision data can always be handled as raw
2455 # generation so the revision data can always be handled as raw
2456 # by the flagprocessor.
2456 # by the flagprocessor.
2457 rev = self._addrevision(
2457 rev = self._addrevision(
2458 node,
2458 node,
2459 None,
2459 None,
2460 transaction,
2460 transaction,
2461 link,
2461 link,
2462 p1,
2462 p1,
2463 p2,
2463 p2,
2464 flags,
2464 flags,
2465 (baserev, delta),
2465 (baserev, delta),
2466 ifh,
2466 ifh,
2467 dfh,
2467 dfh,
2468 alwayscache=alwayscache,
2468 alwayscache=alwayscache,
2469 deltacomputer=deltacomputer,
2469 deltacomputer=deltacomputer,
2470 sidedata=sidedata,
2470 sidedata=sidedata,
2471 )
2471 )
2472
2472
2473 if addrevisioncb:
2473 if addrevisioncb:
2474 addrevisioncb(self, rev)
2474 addrevisioncb(self, rev)
2475 empty = False
2475 empty = False
2476
2476
2477 if not dfh and not self._inline:
2477 if not dfh and not self._inline:
2478 # addrevision switched from inline to conventional
2478 # addrevision switched from inline to conventional
2479 # reopen the index
2479 # reopen the index
2480 ifh.close()
2480 ifh.close()
2481 dfh = self._datafp(b"a+")
2481 dfh = self._datafp(b"a+")
2482 ifh = self._indexfp(b"a+")
2482 ifh = self._indexfp(b"a+")
2483 self._writinghandles = (ifh, dfh)
2483 self._writinghandles = (ifh, dfh)
2484 finally:
2484 finally:
2485 self._writinghandles = None
2485 self._writinghandles = None
2486
2486
2487 if dfh:
2487 if dfh:
2488 dfh.close()
2488 dfh.close()
2489 ifh.close()
2489 ifh.close()
2490 return not empty
2490 return not empty
2491
2491
2492 def iscensored(self, rev):
2492 def iscensored(self, rev):
2493 """Check if a file revision is censored."""
2493 """Check if a file revision is censored."""
2494 if not self._censorable:
2494 if not self._censorable:
2495 return False
2495 return False
2496
2496
2497 return self.flags(rev) & REVIDX_ISCENSORED
2497 return self.flags(rev) & REVIDX_ISCENSORED
2498
2498
2499 def _peek_iscensored(self, baserev, delta, flush):
2499 def _peek_iscensored(self, baserev, delta, flush):
2500 """Quickly check if a delta produces a censored revision."""
2500 """Quickly check if a delta produces a censored revision."""
2501 if not self._censorable:
2501 if not self._censorable:
2502 return False
2502 return False
2503
2503
2504 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2504 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2505
2505
2506 def getstrippoint(self, minlink):
2506 def getstrippoint(self, minlink):
2507 """find the minimum rev that must be stripped to strip the linkrev
2507 """find the minimum rev that must be stripped to strip the linkrev
2508
2508
2509 Returns a tuple containing the minimum rev and a set of all revs that
2509 Returns a tuple containing the minimum rev and a set of all revs that
2510 have linkrevs that will be broken by this strip.
2510 have linkrevs that will be broken by this strip.
2511 """
2511 """
2512 return storageutil.resolvestripinfo(
2512 return storageutil.resolvestripinfo(
2513 minlink,
2513 minlink,
2514 len(self) - 1,
2514 len(self) - 1,
2515 self.headrevs(),
2515 self.headrevs(),
2516 self.linkrev,
2516 self.linkrev,
2517 self.parentrevs,
2517 self.parentrevs,
2518 )
2518 )
2519
2519
2520 def strip(self, minlink, transaction):
2520 def strip(self, minlink, transaction):
2521 """truncate the revlog on the first revision with a linkrev >= minlink
2521 """truncate the revlog on the first revision with a linkrev >= minlink
2522
2522
2523 This function is called when we're stripping revision minlink and
2523 This function is called when we're stripping revision minlink and
2524 its descendants from the repository.
2524 its descendants from the repository.
2525
2525
2526 We have to remove all revisions with linkrev >= minlink, because
2526 We have to remove all revisions with linkrev >= minlink, because
2527 the equivalent changelog revisions will be renumbered after the
2527 the equivalent changelog revisions will be renumbered after the
2528 strip.
2528 strip.
2529
2529
2530 So we truncate the revlog on the first of these revisions, and
2530 So we truncate the revlog on the first of these revisions, and
2531 trust that the caller has saved the revisions that shouldn't be
2531 trust that the caller has saved the revisions that shouldn't be
2532 removed and that it'll re-add them after this truncation.
2532 removed and that it'll re-add them after this truncation.
2533 """
2533 """
2534 if len(self) == 0:
2534 if len(self) == 0:
2535 return
2535 return
2536
2536
2537 rev, _ = self.getstrippoint(minlink)
2537 rev, _ = self.getstrippoint(minlink)
2538 if rev == len(self):
2538 if rev == len(self):
2539 return
2539 return
2540
2540
2541 # first truncate the files on disk
2541 # first truncate the files on disk
2542 end = self.start(rev)
2542 end = self.start(rev)
2543 if not self._inline:
2543 if not self._inline:
2544 transaction.add(self.datafile, end)
2544 transaction.add(self.datafile, end)
2545 end = rev * self.index.entry_size
2545 end = rev * self.index.entry_size
2546 else:
2546 else:
2547 end += rev * self.index.entry_size
2547 end += rev * self.index.entry_size
2548
2548
2549 transaction.add(self.indexfile, end)
2549 transaction.add(self.indexfile, end)
2550
2550
2551 # then reset internal state in memory to forget those revisions
2551 # then reset internal state in memory to forget those revisions
2552 self._revisioncache = None
2552 self._revisioncache = None
2553 self._chaininfocache = util.lrucachedict(500)
2553 self._chaininfocache = util.lrucachedict(500)
2554 self._chunkclear()
2554 self._chunkclear()
2555
2555
2556 del self.index[rev:-1]
2556 del self.index[rev:-1]
2557
2557
2558 def checksize(self):
2558 def checksize(self):
2559 """Check size of index and data files
2559 """Check size of index and data files
2560
2560
2561 return a (dd, di) tuple.
2561 return a (dd, di) tuple.
2562 - dd: extra bytes for the "data" file
2562 - dd: extra bytes for the "data" file
2563 - di: extra bytes for the "index" file
2563 - di: extra bytes for the "index" file
2564
2564
2565 A healthy revlog will return (0, 0).
2565 A healthy revlog will return (0, 0).
2566 """
2566 """
2567 expected = 0
2567 expected = 0
2568 if len(self):
2568 if len(self):
2569 expected = max(0, self.end(len(self) - 1))
2569 expected = max(0, self.end(len(self) - 1))
2570
2570
2571 try:
2571 try:
2572 with self._datafp() as f:
2572 with self._datafp() as f:
2573 f.seek(0, io.SEEK_END)
2573 f.seek(0, io.SEEK_END)
2574 actual = f.tell()
2574 actual = f.tell()
2575 dd = actual - expected
2575 dd = actual - expected
2576 except IOError as inst:
2576 except IOError as inst:
2577 if inst.errno != errno.ENOENT:
2577 if inst.errno != errno.ENOENT:
2578 raise
2578 raise
2579 dd = 0
2579 dd = 0
2580
2580
2581 try:
2581 try:
2582 f = self.opener(self.indexfile)
2582 f = self.opener(self.indexfile)
2583 f.seek(0, io.SEEK_END)
2583 f.seek(0, io.SEEK_END)
2584 actual = f.tell()
2584 actual = f.tell()
2585 f.close()
2585 f.close()
2586 s = self.index.entry_size
2586 s = self.index.entry_size
2587 i = max(0, actual // s)
2587 i = max(0, actual // s)
2588 di = actual - (i * s)
2588 di = actual - (i * s)
2589 if self._inline:
2589 if self._inline:
2590 databytes = 0
2590 databytes = 0
2591 for r in self:
2591 for r in self:
2592 databytes += max(0, self.length(r))
2592 databytes += max(0, self.length(r))
2593 dd = 0
2593 dd = 0
2594 di = actual - len(self) * s - databytes
2594 di = actual - len(self) * s - databytes
2595 except IOError as inst:
2595 except IOError as inst:
2596 if inst.errno != errno.ENOENT:
2596 if inst.errno != errno.ENOENT:
2597 raise
2597 raise
2598 di = 0
2598 di = 0
2599
2599
2600 return (dd, di)
2600 return (dd, di)
2601
2601
2602 def files(self):
2602 def files(self):
2603 res = [self.indexfile]
2603 res = [self.indexfile]
2604 if not self._inline:
2604 if not self._inline:
2605 res.append(self.datafile)
2605 res.append(self.datafile)
2606 return res
2606 return res
2607
2607
2608 def emitrevisions(
2608 def emitrevisions(
2609 self,
2609 self,
2610 nodes,
2610 nodes,
2611 nodesorder=None,
2611 nodesorder=None,
2612 revisiondata=False,
2612 revisiondata=False,
2613 assumehaveparentrevisions=False,
2613 assumehaveparentrevisions=False,
2614 deltamode=repository.CG_DELTAMODE_STD,
2614 deltamode=repository.CG_DELTAMODE_STD,
2615 sidedata_helpers=None,
2615 sidedata_helpers=None,
2616 ):
2616 ):
2617 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2617 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2618 raise error.ProgrammingError(
2618 raise error.ProgrammingError(
2619 b'unhandled value for nodesorder: %s' % nodesorder
2619 b'unhandled value for nodesorder: %s' % nodesorder
2620 )
2620 )
2621
2621
2622 if nodesorder is None and not self._generaldelta:
2622 if nodesorder is None and not self._generaldelta:
2623 nodesorder = b'storage'
2623 nodesorder = b'storage'
2624
2624
2625 if (
2625 if (
2626 not self._storedeltachains
2626 not self._storedeltachains
2627 and deltamode != repository.CG_DELTAMODE_PREV
2627 and deltamode != repository.CG_DELTAMODE_PREV
2628 ):
2628 ):
2629 deltamode = repository.CG_DELTAMODE_FULL
2629 deltamode = repository.CG_DELTAMODE_FULL
2630
2630
2631 return storageutil.emitrevisions(
2631 return storageutil.emitrevisions(
2632 self,
2632 self,
2633 nodes,
2633 nodes,
2634 nodesorder,
2634 nodesorder,
2635 revlogrevisiondelta,
2635 revlogrevisiondelta,
2636 deltaparentfn=self.deltaparent,
2636 deltaparentfn=self.deltaparent,
2637 candeltafn=self.candelta,
2637 candeltafn=self.candelta,
2638 rawsizefn=self.rawsize,
2638 rawsizefn=self.rawsize,
2639 revdifffn=self.revdiff,
2639 revdifffn=self.revdiff,
2640 flagsfn=self.flags,
2640 flagsfn=self.flags,
2641 deltamode=deltamode,
2641 deltamode=deltamode,
2642 revisiondata=revisiondata,
2642 revisiondata=revisiondata,
2643 assumehaveparentrevisions=assumehaveparentrevisions,
2643 assumehaveparentrevisions=assumehaveparentrevisions,
2644 sidedata_helpers=sidedata_helpers,
2644 sidedata_helpers=sidedata_helpers,
2645 )
2645 )
2646
2646
2647 DELTAREUSEALWAYS = b'always'
2647 DELTAREUSEALWAYS = b'always'
2648 DELTAREUSESAMEREVS = b'samerevs'
2648 DELTAREUSESAMEREVS = b'samerevs'
2649 DELTAREUSENEVER = b'never'
2649 DELTAREUSENEVER = b'never'
2650
2650
2651 DELTAREUSEFULLADD = b'fulladd'
2651 DELTAREUSEFULLADD = b'fulladd'
2652
2652
2653 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2653 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2654
2654
2655 def clone(
2655 def clone(
2656 self,
2656 self,
2657 tr,
2657 tr,
2658 destrevlog,
2658 destrevlog,
2659 addrevisioncb=None,
2659 addrevisioncb=None,
2660 deltareuse=DELTAREUSESAMEREVS,
2660 deltareuse=DELTAREUSESAMEREVS,
2661 forcedeltabothparents=None,
2661 forcedeltabothparents=None,
2662 sidedata_helpers=None,
2662 sidedata_helpers=None,
2663 ):
2663 ):
2664 """Copy this revlog to another, possibly with format changes.
2664 """Copy this revlog to another, possibly with format changes.
2665
2665
2666 The destination revlog will contain the same revisions and nodes.
2666 The destination revlog will contain the same revisions and nodes.
2667 However, it may not be bit-for-bit identical due to e.g. delta encoding
2667 However, it may not be bit-for-bit identical due to e.g. delta encoding
2668 differences.
2668 differences.
2669
2669
2670 The ``deltareuse`` argument control how deltas from the existing revlog
2670 The ``deltareuse`` argument control how deltas from the existing revlog
2671 are preserved in the destination revlog. The argument can have the
2671 are preserved in the destination revlog. The argument can have the
2672 following values:
2672 following values:
2673
2673
2674 DELTAREUSEALWAYS
2674 DELTAREUSEALWAYS
2675 Deltas will always be reused (if possible), even if the destination
2675 Deltas will always be reused (if possible), even if the destination
2676 revlog would not select the same revisions for the delta. This is the
2676 revlog would not select the same revisions for the delta. This is the
2677 fastest mode of operation.
2677 fastest mode of operation.
2678 DELTAREUSESAMEREVS
2678 DELTAREUSESAMEREVS
2679 Deltas will be reused if the destination revlog would pick the same
2679 Deltas will be reused if the destination revlog would pick the same
2680 revisions for the delta. This mode strikes a balance between speed
2680 revisions for the delta. This mode strikes a balance between speed
2681 and optimization.
2681 and optimization.
2682 DELTAREUSENEVER
2682 DELTAREUSENEVER
2683 Deltas will never be reused. This is the slowest mode of execution.
2683 Deltas will never be reused. This is the slowest mode of execution.
2684 This mode can be used to recompute deltas (e.g. if the diff/delta
2684 This mode can be used to recompute deltas (e.g. if the diff/delta
2685 algorithm changes).
2685 algorithm changes).
2686 DELTAREUSEFULLADD
2686 DELTAREUSEFULLADD
2687 Revision will be re-added as if their were new content. This is
2687 Revision will be re-added as if their were new content. This is
2688 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2688 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2689 eg: large file detection and handling.
2689 eg: large file detection and handling.
2690
2690
2691 Delta computation can be slow, so the choice of delta reuse policy can
2691 Delta computation can be slow, so the choice of delta reuse policy can
2692 significantly affect run time.
2692 significantly affect run time.
2693
2693
2694 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2694 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2695 two extremes. Deltas will be reused if they are appropriate. But if the
2695 two extremes. Deltas will be reused if they are appropriate. But if the
2696 delta could choose a better revision, it will do so. This means if you
2696 delta could choose a better revision, it will do so. This means if you
2697 are converting a non-generaldelta revlog to a generaldelta revlog,
2697 are converting a non-generaldelta revlog to a generaldelta revlog,
2698 deltas will be recomputed if the delta's parent isn't a parent of the
2698 deltas will be recomputed if the delta's parent isn't a parent of the
2699 revision.
2699 revision.
2700
2700
2701 In addition to the delta policy, the ``forcedeltabothparents``
2701 In addition to the delta policy, the ``forcedeltabothparents``
2702 argument controls whether to force compute deltas against both parents
2702 argument controls whether to force compute deltas against both parents
2703 for merges. By default, the current default is used.
2703 for merges. By default, the current default is used.
2704
2704
2705 See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
2705 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2706 `sidedata_helpers`.
2706 """
2707 """
2707 if deltareuse not in self.DELTAREUSEALL:
2708 if deltareuse not in self.DELTAREUSEALL:
2708 raise ValueError(
2709 raise ValueError(
2709 _(b'value for deltareuse invalid: %s') % deltareuse
2710 _(b'value for deltareuse invalid: %s') % deltareuse
2710 )
2711 )
2711
2712
2712 if len(destrevlog):
2713 if len(destrevlog):
2713 raise ValueError(_(b'destination revlog is not empty'))
2714 raise ValueError(_(b'destination revlog is not empty'))
2714
2715
2715 if getattr(self, 'filteredrevs', None):
2716 if getattr(self, 'filteredrevs', None):
2716 raise ValueError(_(b'source revlog has filtered revisions'))
2717 raise ValueError(_(b'source revlog has filtered revisions'))
2717 if getattr(destrevlog, 'filteredrevs', None):
2718 if getattr(destrevlog, 'filteredrevs', None):
2718 raise ValueError(_(b'destination revlog has filtered revisions'))
2719 raise ValueError(_(b'destination revlog has filtered revisions'))
2719
2720
2720 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2721 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2721 # if possible.
2722 # if possible.
2722 oldlazydelta = destrevlog._lazydelta
2723 oldlazydelta = destrevlog._lazydelta
2723 oldlazydeltabase = destrevlog._lazydeltabase
2724 oldlazydeltabase = destrevlog._lazydeltabase
2724 oldamd = destrevlog._deltabothparents
2725 oldamd = destrevlog._deltabothparents
2725
2726
2726 try:
2727 try:
2727 if deltareuse == self.DELTAREUSEALWAYS:
2728 if deltareuse == self.DELTAREUSEALWAYS:
2728 destrevlog._lazydeltabase = True
2729 destrevlog._lazydeltabase = True
2729 destrevlog._lazydelta = True
2730 destrevlog._lazydelta = True
2730 elif deltareuse == self.DELTAREUSESAMEREVS:
2731 elif deltareuse == self.DELTAREUSESAMEREVS:
2731 destrevlog._lazydeltabase = False
2732 destrevlog._lazydeltabase = False
2732 destrevlog._lazydelta = True
2733 destrevlog._lazydelta = True
2733 elif deltareuse == self.DELTAREUSENEVER:
2734 elif deltareuse == self.DELTAREUSENEVER:
2734 destrevlog._lazydeltabase = False
2735 destrevlog._lazydeltabase = False
2735 destrevlog._lazydelta = False
2736 destrevlog._lazydelta = False
2736
2737
2737 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2738 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2738
2739
2739 self._clone(
2740 self._clone(
2740 tr,
2741 tr,
2741 destrevlog,
2742 destrevlog,
2742 addrevisioncb,
2743 addrevisioncb,
2743 deltareuse,
2744 deltareuse,
2744 forcedeltabothparents,
2745 forcedeltabothparents,
2745 sidedata_helpers,
2746 sidedata_helpers,
2746 )
2747 )
2747
2748
2748 finally:
2749 finally:
2749 destrevlog._lazydelta = oldlazydelta
2750 destrevlog._lazydelta = oldlazydelta
2750 destrevlog._lazydeltabase = oldlazydeltabase
2751 destrevlog._lazydeltabase = oldlazydeltabase
2751 destrevlog._deltabothparents = oldamd
2752 destrevlog._deltabothparents = oldamd
2752
2753
2753 def _clone(
2754 def _clone(
2754 self,
2755 self,
2755 tr,
2756 tr,
2756 destrevlog,
2757 destrevlog,
2757 addrevisioncb,
2758 addrevisioncb,
2758 deltareuse,
2759 deltareuse,
2759 forcedeltabothparents,
2760 forcedeltabothparents,
2760 sidedata_helpers,
2761 sidedata_helpers,
2761 ):
2762 ):
2762 """perform the core duty of `revlog.clone` after parameter processing"""
2763 """perform the core duty of `revlog.clone` after parameter processing"""
2763 deltacomputer = deltautil.deltacomputer(destrevlog)
2764 deltacomputer = deltautil.deltacomputer(destrevlog)
2764 index = self.index
2765 index = self.index
2765 for rev in self:
2766 for rev in self:
2766 entry = index[rev]
2767 entry = index[rev]
2767
2768
2768 # Some classes override linkrev to take filtered revs into
2769 # Some classes override linkrev to take filtered revs into
2769 # account. Use raw entry from index.
2770 # account. Use raw entry from index.
2770 flags = entry[0] & 0xFFFF
2771 flags = entry[0] & 0xFFFF
2771 linkrev = entry[4]
2772 linkrev = entry[4]
2772 p1 = index[entry[5]][7]
2773 p1 = index[entry[5]][7]
2773 p2 = index[entry[6]][7]
2774 p2 = index[entry[6]][7]
2774 node = entry[7]
2775 node = entry[7]
2775
2776
2776 # (Possibly) reuse the delta from the revlog if allowed and
2777 # (Possibly) reuse the delta from the revlog if allowed and
2777 # the revlog chunk is a delta.
2778 # the revlog chunk is a delta.
2778 cachedelta = None
2779 cachedelta = None
2779 rawtext = None
2780 rawtext = None
2780 if deltareuse == self.DELTAREUSEFULLADD:
2781 if deltareuse == self.DELTAREUSEFULLADD:
2781 text, sidedata = self._revisiondata(rev)
2782 text, sidedata = self._revisiondata(rev)
2782
2783
2783 if sidedata_helpers is not None:
2784 if sidedata_helpers is not None:
2784 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2785 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2785 self, sidedata_helpers, sidedata, rev
2786 self, sidedata_helpers, sidedata, rev
2786 )
2787 )
2787 flags = flags | new_flags[0] & ~new_flags[1]
2788 flags = flags | new_flags[0] & ~new_flags[1]
2788
2789
2789 destrevlog.addrevision(
2790 destrevlog.addrevision(
2790 text,
2791 text,
2791 tr,
2792 tr,
2792 linkrev,
2793 linkrev,
2793 p1,
2794 p1,
2794 p2,
2795 p2,
2795 cachedelta=cachedelta,
2796 cachedelta=cachedelta,
2796 node=node,
2797 node=node,
2797 flags=flags,
2798 flags=flags,
2798 deltacomputer=deltacomputer,
2799 deltacomputer=deltacomputer,
2799 sidedata=sidedata,
2800 sidedata=sidedata,
2800 )
2801 )
2801 else:
2802 else:
2802 if destrevlog._lazydelta:
2803 if destrevlog._lazydelta:
2803 dp = self.deltaparent(rev)
2804 dp = self.deltaparent(rev)
2804 if dp != nullrev:
2805 if dp != nullrev:
2805 cachedelta = (dp, bytes(self._chunk(rev)))
2806 cachedelta = (dp, bytes(self._chunk(rev)))
2806
2807
2807 sidedata = None
2808 sidedata = None
2808 if not cachedelta:
2809 if not cachedelta:
2809 rawtext, sidedata = self._revisiondata(rev)
2810 rawtext, sidedata = self._revisiondata(rev)
2810 if sidedata is None:
2811 if sidedata is None:
2811 sidedata = self.sidedata(rev)
2812 sidedata = self.sidedata(rev)
2812
2813
2813 if sidedata_helpers is not None:
2814 if sidedata_helpers is not None:
2814 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2815 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2815 self, sidedata_helpers, sidedata, rev
2816 self, sidedata_helpers, sidedata, rev
2816 )
2817 )
2817 flags = flags | new_flags[0] & ~new_flags[1]
2818 flags = flags | new_flags[0] & ~new_flags[1]
2818
2819
2819 ifh = destrevlog.opener(
2820 ifh = destrevlog.opener(
2820 destrevlog.indexfile, b'a+', checkambig=False
2821 destrevlog.indexfile, b'a+', checkambig=False
2821 )
2822 )
2822 dfh = None
2823 dfh = None
2823 if not destrevlog._inline:
2824 if not destrevlog._inline:
2824 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2825 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2825 try:
2826 try:
2826 destrevlog._addrevision(
2827 destrevlog._addrevision(
2827 node,
2828 node,
2828 rawtext,
2829 rawtext,
2829 tr,
2830 tr,
2830 linkrev,
2831 linkrev,
2831 p1,
2832 p1,
2832 p2,
2833 p2,
2833 flags,
2834 flags,
2834 cachedelta,
2835 cachedelta,
2835 ifh,
2836 ifh,
2836 dfh,
2837 dfh,
2837 deltacomputer=deltacomputer,
2838 deltacomputer=deltacomputer,
2838 sidedata=sidedata,
2839 sidedata=sidedata,
2839 )
2840 )
2840 finally:
2841 finally:
2841 if dfh:
2842 if dfh:
2842 dfh.close()
2843 dfh.close()
2843 ifh.close()
2844 ifh.close()
2844
2845
2845 if addrevisioncb:
2846 if addrevisioncb:
2846 addrevisioncb(self, rev, node)
2847 addrevisioncb(self, rev, node)
2847
2848
2848 def censorrevision(self, tr, censornode, tombstone=b''):
2849 def censorrevision(self, tr, censornode, tombstone=b''):
2849 if (self.version & 0xFFFF) == REVLOGV0:
2850 if (self.version & 0xFFFF) == REVLOGV0:
2850 raise error.RevlogError(
2851 raise error.RevlogError(
2851 _(b'cannot censor with version %d revlogs') % self.version
2852 _(b'cannot censor with version %d revlogs') % self.version
2852 )
2853 )
2853
2854
2854 censorrev = self.rev(censornode)
2855 censorrev = self.rev(censornode)
2855 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2856 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2856
2857
2857 if len(tombstone) > self.rawsize(censorrev):
2858 if len(tombstone) > self.rawsize(censorrev):
2858 raise error.Abort(
2859 raise error.Abort(
2859 _(b'censor tombstone must be no longer than censored data')
2860 _(b'censor tombstone must be no longer than censored data')
2860 )
2861 )
2861
2862
2862 # Rewriting the revlog in place is hard. Our strategy for censoring is
2863 # Rewriting the revlog in place is hard. Our strategy for censoring is
2863 # to create a new revlog, copy all revisions to it, then replace the
2864 # to create a new revlog, copy all revisions to it, then replace the
2864 # revlogs on transaction close.
2865 # revlogs on transaction close.
2865
2866
2866 newindexfile = self.indexfile + b'.tmpcensored'
2867 newindexfile = self.indexfile + b'.tmpcensored'
2867 newdatafile = self.datafile + b'.tmpcensored'
2868 newdatafile = self.datafile + b'.tmpcensored'
2868
2869
2869 # This is a bit dangerous. We could easily have a mismatch of state.
2870 # This is a bit dangerous. We could easily have a mismatch of state.
2870 newrl = revlog(
2871 newrl = revlog(
2871 self.opener,
2872 self.opener,
2872 target=self.target,
2873 target=self.target,
2873 indexfile=newindexfile,
2874 indexfile=newindexfile,
2874 datafile=newdatafile,
2875 datafile=newdatafile,
2875 censorable=True,
2876 censorable=True,
2876 )
2877 )
2877 newrl.version = self.version
2878 newrl.version = self.version
2878 newrl._generaldelta = self._generaldelta
2879 newrl._generaldelta = self._generaldelta
2879 newrl._parse_index = self._parse_index
2880 newrl._parse_index = self._parse_index
2880
2881
2881 for rev in self.revs():
2882 for rev in self.revs():
2882 node = self.node(rev)
2883 node = self.node(rev)
2883 p1, p2 = self.parents(node)
2884 p1, p2 = self.parents(node)
2884
2885
2885 if rev == censorrev:
2886 if rev == censorrev:
2886 newrl.addrawrevision(
2887 newrl.addrawrevision(
2887 tombstone,
2888 tombstone,
2888 tr,
2889 tr,
2889 self.linkrev(censorrev),
2890 self.linkrev(censorrev),
2890 p1,
2891 p1,
2891 p2,
2892 p2,
2892 censornode,
2893 censornode,
2893 REVIDX_ISCENSORED,
2894 REVIDX_ISCENSORED,
2894 )
2895 )
2895
2896
2896 if newrl.deltaparent(rev) != nullrev:
2897 if newrl.deltaparent(rev) != nullrev:
2897 raise error.Abort(
2898 raise error.Abort(
2898 _(
2899 _(
2899 b'censored revision stored as delta; '
2900 b'censored revision stored as delta; '
2900 b'cannot censor'
2901 b'cannot censor'
2901 ),
2902 ),
2902 hint=_(
2903 hint=_(
2903 b'censoring of revlogs is not '
2904 b'censoring of revlogs is not '
2904 b'fully implemented; please report '
2905 b'fully implemented; please report '
2905 b'this bug'
2906 b'this bug'
2906 ),
2907 ),
2907 )
2908 )
2908 continue
2909 continue
2909
2910
2910 if self.iscensored(rev):
2911 if self.iscensored(rev):
2911 if self.deltaparent(rev) != nullrev:
2912 if self.deltaparent(rev) != nullrev:
2912 raise error.Abort(
2913 raise error.Abort(
2913 _(
2914 _(
2914 b'cannot censor due to censored '
2915 b'cannot censor due to censored '
2915 b'revision having delta stored'
2916 b'revision having delta stored'
2916 )
2917 )
2917 )
2918 )
2918 rawtext = self._chunk(rev)
2919 rawtext = self._chunk(rev)
2919 else:
2920 else:
2920 rawtext = self.rawdata(rev)
2921 rawtext = self.rawdata(rev)
2921
2922
2922 newrl.addrawrevision(
2923 newrl.addrawrevision(
2923 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2924 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2924 )
2925 )
2925
2926
2926 tr.addbackup(self.indexfile, location=b'store')
2927 tr.addbackup(self.indexfile, location=b'store')
2927 if not self._inline:
2928 if not self._inline:
2928 tr.addbackup(self.datafile, location=b'store')
2929 tr.addbackup(self.datafile, location=b'store')
2929
2930
2930 self.opener.rename(newrl.indexfile, self.indexfile)
2931 self.opener.rename(newrl.indexfile, self.indexfile)
2931 if not self._inline:
2932 if not self._inline:
2932 self.opener.rename(newrl.datafile, self.datafile)
2933 self.opener.rename(newrl.datafile, self.datafile)
2933
2934
2934 self.clearcaches()
2935 self.clearcaches()
2935 self._loadindex()
2936 self._loadindex()
2936
2937
2937 def verifyintegrity(self, state):
2938 def verifyintegrity(self, state):
2938 """Verifies the integrity of the revlog.
2939 """Verifies the integrity of the revlog.
2939
2940
2940 Yields ``revlogproblem`` instances describing problems that are
2941 Yields ``revlogproblem`` instances describing problems that are
2941 found.
2942 found.
2942 """
2943 """
2943 dd, di = self.checksize()
2944 dd, di = self.checksize()
2944 if dd:
2945 if dd:
2945 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2946 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2946 if di:
2947 if di:
2947 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2948 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2948
2949
2949 version = self.version & 0xFFFF
2950 version = self.version & 0xFFFF
2950
2951
2951 # The verifier tells us what version revlog we should be.
2952 # The verifier tells us what version revlog we should be.
2952 if version != state[b'expectedversion']:
2953 if version != state[b'expectedversion']:
2953 yield revlogproblem(
2954 yield revlogproblem(
2954 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2955 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2955 % (self.indexfile, version, state[b'expectedversion'])
2956 % (self.indexfile, version, state[b'expectedversion'])
2956 )
2957 )
2957
2958
2958 state[b'skipread'] = set()
2959 state[b'skipread'] = set()
2959 state[b'safe_renamed'] = set()
2960 state[b'safe_renamed'] = set()
2960
2961
2961 for rev in self:
2962 for rev in self:
2962 node = self.node(rev)
2963 node = self.node(rev)
2963
2964
2964 # Verify contents. 4 cases to care about:
2965 # Verify contents. 4 cases to care about:
2965 #
2966 #
2966 # common: the most common case
2967 # common: the most common case
2967 # rename: with a rename
2968 # rename: with a rename
2968 # meta: file content starts with b'\1\n', the metadata
2969 # meta: file content starts with b'\1\n', the metadata
2969 # header defined in filelog.py, but without a rename
2970 # header defined in filelog.py, but without a rename
2970 # ext: content stored externally
2971 # ext: content stored externally
2971 #
2972 #
2972 # More formally, their differences are shown below:
2973 # More formally, their differences are shown below:
2973 #
2974 #
2974 # | common | rename | meta | ext
2975 # | common | rename | meta | ext
2975 # -------------------------------------------------------
2976 # -------------------------------------------------------
2976 # flags() | 0 | 0 | 0 | not 0
2977 # flags() | 0 | 0 | 0 | not 0
2977 # renamed() | False | True | False | ?
2978 # renamed() | False | True | False | ?
2978 # rawtext[0:2]=='\1\n'| False | True | True | ?
2979 # rawtext[0:2]=='\1\n'| False | True | True | ?
2979 #
2980 #
2980 # "rawtext" means the raw text stored in revlog data, which
2981 # "rawtext" means the raw text stored in revlog data, which
2981 # could be retrieved by "rawdata(rev)". "text"
2982 # could be retrieved by "rawdata(rev)". "text"
2982 # mentioned below is "revision(rev)".
2983 # mentioned below is "revision(rev)".
2983 #
2984 #
2984 # There are 3 different lengths stored physically:
2985 # There are 3 different lengths stored physically:
2985 # 1. L1: rawsize, stored in revlog index
2986 # 1. L1: rawsize, stored in revlog index
2986 # 2. L2: len(rawtext), stored in revlog data
2987 # 2. L2: len(rawtext), stored in revlog data
2987 # 3. L3: len(text), stored in revlog data if flags==0, or
2988 # 3. L3: len(text), stored in revlog data if flags==0, or
2988 # possibly somewhere else if flags!=0
2989 # possibly somewhere else if flags!=0
2989 #
2990 #
2990 # L1 should be equal to L2. L3 could be different from them.
2991 # L1 should be equal to L2. L3 could be different from them.
2991 # "text" may or may not affect commit hash depending on flag
2992 # "text" may or may not affect commit hash depending on flag
2992 # processors (see flagutil.addflagprocessor).
2993 # processors (see flagutil.addflagprocessor).
2993 #
2994 #
2994 # | common | rename | meta | ext
2995 # | common | rename | meta | ext
2995 # -------------------------------------------------
2996 # -------------------------------------------------
2996 # rawsize() | L1 | L1 | L1 | L1
2997 # rawsize() | L1 | L1 | L1 | L1
2997 # size() | L1 | L2-LM | L1(*) | L1 (?)
2998 # size() | L1 | L2-LM | L1(*) | L1 (?)
2998 # len(rawtext) | L2 | L2 | L2 | L2
2999 # len(rawtext) | L2 | L2 | L2 | L2
2999 # len(text) | L2 | L2 | L2 | L3
3000 # len(text) | L2 | L2 | L2 | L3
3000 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3001 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3001 #
3002 #
3002 # LM: length of metadata, depending on rawtext
3003 # LM: length of metadata, depending on rawtext
3003 # (*): not ideal, see comment in filelog.size
3004 # (*): not ideal, see comment in filelog.size
3004 # (?): could be "- len(meta)" if the resolved content has
3005 # (?): could be "- len(meta)" if the resolved content has
3005 # rename metadata
3006 # rename metadata
3006 #
3007 #
3007 # Checks needed to be done:
3008 # Checks needed to be done:
3008 # 1. length check: L1 == L2, in all cases.
3009 # 1. length check: L1 == L2, in all cases.
3009 # 2. hash check: depending on flag processor, we may need to
3010 # 2. hash check: depending on flag processor, we may need to
3010 # use either "text" (external), or "rawtext" (in revlog).
3011 # use either "text" (external), or "rawtext" (in revlog).
3011
3012
3012 try:
3013 try:
3013 skipflags = state.get(b'skipflags', 0)
3014 skipflags = state.get(b'skipflags', 0)
3014 if skipflags:
3015 if skipflags:
3015 skipflags &= self.flags(rev)
3016 skipflags &= self.flags(rev)
3016
3017
3017 _verify_revision(self, skipflags, state, node)
3018 _verify_revision(self, skipflags, state, node)
3018
3019
3019 l1 = self.rawsize(rev)
3020 l1 = self.rawsize(rev)
3020 l2 = len(self.rawdata(node))
3021 l2 = len(self.rawdata(node))
3021
3022
3022 if l1 != l2:
3023 if l1 != l2:
3023 yield revlogproblem(
3024 yield revlogproblem(
3024 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3025 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3025 node=node,
3026 node=node,
3026 )
3027 )
3027
3028
3028 except error.CensoredNodeError:
3029 except error.CensoredNodeError:
3029 if state[b'erroroncensored']:
3030 if state[b'erroroncensored']:
3030 yield revlogproblem(
3031 yield revlogproblem(
3031 error=_(b'censored file data'), node=node
3032 error=_(b'censored file data'), node=node
3032 )
3033 )
3033 state[b'skipread'].add(node)
3034 state[b'skipread'].add(node)
3034 except Exception as e:
3035 except Exception as e:
3035 yield revlogproblem(
3036 yield revlogproblem(
3036 error=_(b'unpacking %s: %s')
3037 error=_(b'unpacking %s: %s')
3037 % (short(node), stringutil.forcebytestr(e)),
3038 % (short(node), stringutil.forcebytestr(e)),
3038 node=node,
3039 node=node,
3039 )
3040 )
3040 state[b'skipread'].add(node)
3041 state[b'skipread'].add(node)
3041
3042
3042 def storageinfo(
3043 def storageinfo(
3043 self,
3044 self,
3044 exclusivefiles=False,
3045 exclusivefiles=False,
3045 sharedfiles=False,
3046 sharedfiles=False,
3046 revisionscount=False,
3047 revisionscount=False,
3047 trackedsize=False,
3048 trackedsize=False,
3048 storedsize=False,
3049 storedsize=False,
3049 ):
3050 ):
3050 d = {}
3051 d = {}
3051
3052
3052 if exclusivefiles:
3053 if exclusivefiles:
3053 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3054 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3054 if not self._inline:
3055 if not self._inline:
3055 d[b'exclusivefiles'].append((self.opener, self.datafile))
3056 d[b'exclusivefiles'].append((self.opener, self.datafile))
3056
3057
3057 if sharedfiles:
3058 if sharedfiles:
3058 d[b'sharedfiles'] = []
3059 d[b'sharedfiles'] = []
3059
3060
3060 if revisionscount:
3061 if revisionscount:
3061 d[b'revisionscount'] = len(self)
3062 d[b'revisionscount'] = len(self)
3062
3063
3063 if trackedsize:
3064 if trackedsize:
3064 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3065 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3065
3066
3066 if storedsize:
3067 if storedsize:
3067 d[b'storedsize'] = sum(
3068 d[b'storedsize'] = sum(
3068 self.opener.stat(path).st_size for path in self.files()
3069 self.opener.stat(path).st_size for path in self.files()
3069 )
3070 )
3070
3071
3071 return d
3072 return d
3072
3073
3073 def rewrite_sidedata(self, helpers, startrev, endrev):
3074 def rewrite_sidedata(self, helpers, startrev, endrev):
3074 if self.version & 0xFFFF != REVLOGV2:
3075 if self.version & 0xFFFF != REVLOGV2:
3075 return
3076 return
3076 # inline are not yet supported because they suffer from an issue when
3077 # inline are not yet supported because they suffer from an issue when
3077 # rewriting them (since it's not an append-only operation).
3078 # rewriting them (since it's not an append-only operation).
3078 # See issue6485.
3079 # See issue6485.
3079 assert not self._inline
3080 assert not self._inline
3080 if not helpers[1] and not helpers[2]:
3081 if not helpers[1] and not helpers[2]:
3081 # Nothing to generate or remove
3082 # Nothing to generate or remove
3082 return
3083 return
3083
3084
3084 new_entries = []
3085 new_entries = []
3085 # append the new sidedata
3086 # append the new sidedata
3086 with self._datafp(b'a+') as fp:
3087 with self._datafp(b'a+') as fp:
3087 # Maybe this bug still exists, see revlog._writeentry
3088 # Maybe this bug still exists, see revlog._writeentry
3088 fp.seek(0, os.SEEK_END)
3089 fp.seek(0, os.SEEK_END)
3089 current_offset = fp.tell()
3090 current_offset = fp.tell()
3090 for rev in range(startrev, endrev + 1):
3091 for rev in range(startrev, endrev + 1):
3091 entry = self.index[rev]
3092 entry = self.index[rev]
3092 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3093 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3093 store=self,
3094 store=self,
3094 sidedata_helpers=helpers,
3095 sidedata_helpers=helpers,
3095 sidedata={},
3096 sidedata={},
3096 rev=rev,
3097 rev=rev,
3097 )
3098 )
3098
3099
3099 serialized_sidedata = sidedatautil.serialize_sidedata(
3100 serialized_sidedata = sidedatautil.serialize_sidedata(
3100 new_sidedata
3101 new_sidedata
3101 )
3102 )
3102 if entry[8] != 0 or entry[9] != 0:
3103 if entry[8] != 0 or entry[9] != 0:
3103 # rewriting entries that already have sidedata is not
3104 # rewriting entries that already have sidedata is not
3104 # supported yet, because it introduces garbage data in the
3105 # supported yet, because it introduces garbage data in the
3105 # revlog.
3106 # revlog.
3106 msg = b"Rewriting existing sidedata is not supported yet"
3107 msg = b"Rewriting existing sidedata is not supported yet"
3107 raise error.Abort(msg)
3108 raise error.Abort(msg)
3108
3109
3109 # Apply (potential) flags to add and to remove after running
3110 # Apply (potential) flags to add and to remove after running
3110 # the sidedata helpers
3111 # the sidedata helpers
3111 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3112 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3112 entry = (new_offset_flags,) + entry[1:8]
3113 entry = (new_offset_flags,) + entry[1:8]
3113 entry += (current_offset, len(serialized_sidedata))
3114 entry += (current_offset, len(serialized_sidedata))
3114
3115
3115 fp.write(serialized_sidedata)
3116 fp.write(serialized_sidedata)
3116 new_entries.append(entry)
3117 new_entries.append(entry)
3117 current_offset += len(serialized_sidedata)
3118 current_offset += len(serialized_sidedata)
3118
3119
3119 # rewrite the new index entries
3120 # rewrite the new index entries
3120 with self._indexfp(b'w+') as fp:
3121 with self._indexfp(b'w+') as fp:
3121 fp.seek(startrev * self.index.entry_size)
3122 fp.seek(startrev * self.index.entry_size)
3122 for i, e in enumerate(new_entries):
3123 for i, e in enumerate(new_entries):
3123 rev = startrev + i
3124 rev = startrev + i
3124 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3125 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3125 packed = self.index.entry_binary(rev)
3126 packed = self.index.entry_binary(rev)
3126 if rev == 0:
3127 if rev == 0:
3127 header = self.index.pack_header(self.version)
3128 header = self.index.pack_header(self.version)
3128 packed = header + packed
3129 packed = header + packed
3129 fp.write(packed)
3130 fp.write(packed)
@@ -1,155 +1,175 b''
1 # sidedata.py - Logic around store extra data alongside revlog revisions
1 # sidedata.py - Logic around store extra data alongside revlog revisions
2 #
2 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net)
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net)
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7 """core code for "sidedata" support
7 """core code for "sidedata" support
8
8
9 The "sidedata" are stored alongside the revision without actually being part of
9 The "sidedata" are stored alongside the revision without actually being part of
10 its content and not affecting its hash. It's main use cases is to cache
10 its content and not affecting its hash. It's main use cases is to cache
11 important information related to a changesets.
11 important information related to a changesets.
12
12
13 The current implementation is experimental and subject to changes. Do not rely
13 The current implementation is experimental and subject to changes. Do not rely
14 on it in production.
14 on it in production.
15
15
16 Sidedata are stored in the revlog itself, thanks to a new version of the
16 Sidedata are stored in the revlog itself, thanks to a new version of the
17 revlog. The following format is currently used::
17 revlog. The following format is currently used::
18
18
19 initial header:
19 initial header:
20 <number of sidedata; 2 bytes>
20 <number of sidedata; 2 bytes>
21 sidedata (repeated N times):
21 sidedata (repeated N times):
22 <sidedata-key; 2 bytes>
22 <sidedata-key; 2 bytes>
23 <sidedata-entry-length: 4 bytes>
23 <sidedata-entry-length: 4 bytes>
24 <sidedata-content-sha1-digest: 20 bytes>
24 <sidedata-content-sha1-digest: 20 bytes>
25 <sidedata-content; X bytes>
25 <sidedata-content; X bytes>
26 normal raw text:
26 normal raw text:
27 <all bytes remaining in the rawtext>
27 <all bytes remaining in the rawtext>
28
28
29 This is a simple and effective format. It should be enough to experiment with
29 This is a simple and effective format. It should be enough to experiment with
30 the concept.
30 the concept.
31 """
31 """
32
32
33 from __future__ import absolute_import
33 from __future__ import absolute_import
34
34
35 import collections
35 import collections
36 import struct
36 import struct
37
37
38 from .. import error, requirements as requirementsmod
38 from .. import error, requirements as requirementsmod
39 from ..revlogutils import constants, flagutil
39 from ..revlogutils import constants, flagutil
40 from ..utils import hashutil
40 from ..utils import hashutil
41
41
42 ## sidedata type constant
42 ## sidedata type constant
43 # reserve a block for testing purposes.
43 # reserve a block for testing purposes.
44 SD_TEST1 = 1
44 SD_TEST1 = 1
45 SD_TEST2 = 2
45 SD_TEST2 = 2
46 SD_TEST3 = 3
46 SD_TEST3 = 3
47 SD_TEST4 = 4
47 SD_TEST4 = 4
48 SD_TEST5 = 5
48 SD_TEST5 = 5
49 SD_TEST6 = 6
49 SD_TEST6 = 6
50 SD_TEST7 = 7
50 SD_TEST7 = 7
51
51
52 # key to store copies related information
52 # key to store copies related information
53 SD_P1COPIES = 8
53 SD_P1COPIES = 8
54 SD_P2COPIES = 9
54 SD_P2COPIES = 9
55 SD_FILESADDED = 10
55 SD_FILESADDED = 10
56 SD_FILESREMOVED = 11
56 SD_FILESREMOVED = 11
57 SD_FILES = 12
57 SD_FILES = 12
58
58
59 # internal format constant
59 # internal format constant
60 SIDEDATA_HEADER = struct.Struct('>H')
60 SIDEDATA_HEADER = struct.Struct('>H')
61 SIDEDATA_ENTRY = struct.Struct('>HL20s')
61 SIDEDATA_ENTRY = struct.Struct('>HL20s')
62
62
63
63
64 def serialize_sidedata(sidedata):
64 def serialize_sidedata(sidedata):
65 sidedata = list(sidedata.items())
65 sidedata = list(sidedata.items())
66 sidedata.sort()
66 sidedata.sort()
67 buf = [SIDEDATA_HEADER.pack(len(sidedata))]
67 buf = [SIDEDATA_HEADER.pack(len(sidedata))]
68 for key, value in sidedata:
68 for key, value in sidedata:
69 digest = hashutil.sha1(value).digest()
69 digest = hashutil.sha1(value).digest()
70 buf.append(SIDEDATA_ENTRY.pack(key, len(value), digest))
70 buf.append(SIDEDATA_ENTRY.pack(key, len(value), digest))
71 for key, value in sidedata:
71 for key, value in sidedata:
72 buf.append(value)
72 buf.append(value)
73 buf = b''.join(buf)
73 buf = b''.join(buf)
74 return buf
74 return buf
75
75
76
76
77 def deserialize_sidedata(blob):
77 def deserialize_sidedata(blob):
78 sidedata = {}
78 sidedata = {}
79 offset = 0
79 offset = 0
80 (nbentry,) = SIDEDATA_HEADER.unpack(blob[: SIDEDATA_HEADER.size])
80 (nbentry,) = SIDEDATA_HEADER.unpack(blob[: SIDEDATA_HEADER.size])
81 offset += SIDEDATA_HEADER.size
81 offset += SIDEDATA_HEADER.size
82 dataoffset = SIDEDATA_HEADER.size + (SIDEDATA_ENTRY.size * nbentry)
82 dataoffset = SIDEDATA_HEADER.size + (SIDEDATA_ENTRY.size * nbentry)
83 for i in range(nbentry):
83 for i in range(nbentry):
84 nextoffset = offset + SIDEDATA_ENTRY.size
84 nextoffset = offset + SIDEDATA_ENTRY.size
85 key, size, storeddigest = SIDEDATA_ENTRY.unpack(blob[offset:nextoffset])
85 key, size, storeddigest = SIDEDATA_ENTRY.unpack(blob[offset:nextoffset])
86 offset = nextoffset
86 offset = nextoffset
87 # read the data associated with that entry
87 # read the data associated with that entry
88 nextdataoffset = dataoffset + size
88 nextdataoffset = dataoffset + size
89 entrytext = bytes(blob[dataoffset:nextdataoffset])
89 entrytext = bytes(blob[dataoffset:nextdataoffset])
90 readdigest = hashutil.sha1(entrytext).digest()
90 readdigest = hashutil.sha1(entrytext).digest()
91 if storeddigest != readdigest:
91 if storeddigest != readdigest:
92 raise error.SidedataHashError(key, storeddigest, readdigest)
92 raise error.SidedataHashError(key, storeddigest, readdigest)
93 sidedata[key] = entrytext
93 sidedata[key] = entrytext
94 dataoffset = nextdataoffset
94 dataoffset = nextdataoffset
95 return sidedata
95 return sidedata
96
96
97
97
98 def get_sidedata_helpers(repo, remote_sd_categories, pull=False):
98 def get_sidedata_helpers(repo, remote_sd_categories, pull=False):
99 """
100 Returns a dictionary mapping revlog types to tuples of
101 `(repo, computers, removers)`:
102 * `repo` is used as an argument for computers
103 * `computers` is a list of `(category, (keys, computer, flags)` that
104 compute the missing sidedata categories that were asked:
105 * `category` is the sidedata category
106 * `keys` are the sidedata keys to be affected
107 * `flags` is a bitmask (an integer) of flags to remove when
108 removing the category.
109 * `computer` is the function `(repo, store, rev, sidedata)` that
110 returns a tuple of
111 `(new sidedata dict, (flags to add, flags to remove))`.
112 For example, it will return `({}, (0, 1 << 15))` to return no
113 sidedata, with no flags to add and one flag to remove.
114 * `removers` will remove the keys corresponding to the categories
115 that are present, but not needed.
116 If both `computers` and `removers` are empty, sidedata will simply not
117 be transformed.
118 """
99 # Computers for computing sidedata on-the-fly
119 # Computers for computing sidedata on-the-fly
100 sd_computers = collections.defaultdict(list)
120 sd_computers = collections.defaultdict(list)
101 # Computers for categories to remove from sidedata
121 # Computers for categories to remove from sidedata
102 sd_removers = collections.defaultdict(list)
122 sd_removers = collections.defaultdict(list)
103 to_generate = remote_sd_categories - repo._wanted_sidedata
123 to_generate = remote_sd_categories - repo._wanted_sidedata
104 to_remove = repo._wanted_sidedata - remote_sd_categories
124 to_remove = repo._wanted_sidedata - remote_sd_categories
105 if pull:
125 if pull:
106 to_generate, to_remove = to_remove, to_generate
126 to_generate, to_remove = to_remove, to_generate
107
127
108 for revlog_kind, computers in repo._sidedata_computers.items():
128 for revlog_kind, computers in repo._sidedata_computers.items():
109 for category, computer in computers.items():
129 for category, computer in computers.items():
110 if category in to_generate:
130 if category in to_generate:
111 sd_computers[revlog_kind].append(computer)
131 sd_computers[revlog_kind].append(computer)
112 if category in to_remove:
132 if category in to_remove:
113 sd_removers[revlog_kind].append(computer)
133 sd_removers[revlog_kind].append(computer)
114
134
115 sidedata_helpers = (repo, sd_computers, sd_removers)
135 sidedata_helpers = (repo, sd_computers, sd_removers)
116 return sidedata_helpers
136 return sidedata_helpers
117
137
118
138
119 def run_sidedata_helpers(store, sidedata_helpers, sidedata, rev):
139 def run_sidedata_helpers(store, sidedata_helpers, sidedata, rev):
120 """Returns the sidedata for the given revision after running through
140 """Returns the sidedata for the given revision after running through
121 the given helpers.
141 the given helpers.
122 - `store`: the revlog this applies to (changelog, manifest, or filelog
142 - `store`: the revlog this applies to (changelog, manifest, or filelog
123 instance)
143 instance)
124 - `sidedata_helpers`: see `storageutil.emitrevisions`
144 - `sidedata_helpers`: see `get_sidedata_helpers`
125 - `sidedata`: previous sidedata at the given rev, if any
145 - `sidedata`: previous sidedata at the given rev, if any
126 - `rev`: affected rev of `store`
146 - `rev`: affected rev of `store`
127 """
147 """
128 repo, sd_computers, sd_removers = sidedata_helpers
148 repo, sd_computers, sd_removers = sidedata_helpers
129 kind = store.revlog_kind
149 kind = store.revlog_kind
130 flags_to_add = 0
150 flags_to_add = 0
131 flags_to_remove = 0
151 flags_to_remove = 0
132 for _keys, sd_computer, _flags in sd_computers.get(kind, []):
152 for _keys, sd_computer, _flags in sd_computers.get(kind, []):
133 sidedata, flags = sd_computer(repo, store, rev, sidedata)
153 sidedata, flags = sd_computer(repo, store, rev, sidedata)
134 flags_to_add |= flags[0]
154 flags_to_add |= flags[0]
135 flags_to_remove |= flags[1]
155 flags_to_remove |= flags[1]
136 for keys, _computer, flags in sd_removers.get(kind, []):
156 for keys, _computer, flags in sd_removers.get(kind, []):
137 for key in keys:
157 for key in keys:
138 sidedata.pop(key, None)
158 sidedata.pop(key, None)
139 flags_to_remove |= flags
159 flags_to_remove |= flags
140 return sidedata, (flags_to_add, flags_to_remove)
160 return sidedata, (flags_to_add, flags_to_remove)
141
161
142
162
143 def set_sidedata_spec_for_repo(repo):
163 def set_sidedata_spec_for_repo(repo):
144 # prevent cycle metadata -> revlogutils.sidedata -> metadata
164 # prevent cycle metadata -> revlogutils.sidedata -> metadata
145 from .. import metadata
165 from .. import metadata
146
166
147 if requirementsmod.COPIESSDC_REQUIREMENT in repo.requirements:
167 if requirementsmod.COPIESSDC_REQUIREMENT in repo.requirements:
148 repo.register_wanted_sidedata(SD_FILES)
168 repo.register_wanted_sidedata(SD_FILES)
149 repo.register_sidedata_computer(
169 repo.register_sidedata_computer(
150 constants.KIND_CHANGELOG,
170 constants.KIND_CHANGELOG,
151 SD_FILES,
171 SD_FILES,
152 (SD_FILES,),
172 (SD_FILES,),
153 metadata.copies_sidedata_computer,
173 metadata.copies_sidedata_computer,
154 flagutil.REVIDX_HASCOPIESINFO,
174 flagutil.REVIDX_HASCOPIESINFO,
155 )
175 )
@@ -1,561 +1,544 b''
1 # storageutil.py - Storage functionality agnostic of backend implementation.
1 # storageutil.py - Storage functionality agnostic of backend implementation.
2 #
2 #
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import re
10 import re
11 import struct
11 import struct
12
12
13 from ..i18n import _
13 from ..i18n import _
14 from ..node import (
14 from ..node import (
15 bin,
15 bin,
16 nullrev,
16 nullrev,
17 sha1nodeconstants,
17 sha1nodeconstants,
18 )
18 )
19 from .. import (
19 from .. import (
20 dagop,
20 dagop,
21 error,
21 error,
22 mdiff,
22 mdiff,
23 pycompat,
23 pycompat,
24 )
24 )
25 from ..interfaces import repository
25 from ..interfaces import repository
26 from ..revlogutils import sidedata as sidedatamod
26 from ..revlogutils import sidedata as sidedatamod
27 from ..utils import hashutil
27 from ..utils import hashutil
28
28
29 _nullhash = hashutil.sha1(sha1nodeconstants.nullid)
29 _nullhash = hashutil.sha1(sha1nodeconstants.nullid)
30
30
31 # revision data contains extra metadata not part of the official digest
31 # revision data contains extra metadata not part of the official digest
32 # Only used in changegroup >= v4.
32 # Only used in changegroup >= v4.
33 CG_FLAG_SIDEDATA = 1
33 CG_FLAG_SIDEDATA = 1
34
34
35
35
36 def hashrevisionsha1(text, p1, p2):
36 def hashrevisionsha1(text, p1, p2):
37 """Compute the SHA-1 for revision data and its parents.
37 """Compute the SHA-1 for revision data and its parents.
38
38
39 This hash combines both the current file contents and its history
39 This hash combines both the current file contents and its history
40 in a manner that makes it easy to distinguish nodes with the same
40 in a manner that makes it easy to distinguish nodes with the same
41 content in the revision graph.
41 content in the revision graph.
42 """
42 """
43 # As of now, if one of the parent node is null, p2 is null
43 # As of now, if one of the parent node is null, p2 is null
44 if p2 == sha1nodeconstants.nullid:
44 if p2 == sha1nodeconstants.nullid:
45 # deep copy of a hash is faster than creating one
45 # deep copy of a hash is faster than creating one
46 s = _nullhash.copy()
46 s = _nullhash.copy()
47 s.update(p1)
47 s.update(p1)
48 else:
48 else:
49 # none of the parent nodes are nullid
49 # none of the parent nodes are nullid
50 if p1 < p2:
50 if p1 < p2:
51 a = p1
51 a = p1
52 b = p2
52 b = p2
53 else:
53 else:
54 a = p2
54 a = p2
55 b = p1
55 b = p1
56 s = hashutil.sha1(a)
56 s = hashutil.sha1(a)
57 s.update(b)
57 s.update(b)
58 s.update(text)
58 s.update(text)
59 return s.digest()
59 return s.digest()
60
60
61
61
62 METADATA_RE = re.compile(b'\x01\n')
62 METADATA_RE = re.compile(b'\x01\n')
63
63
64
64
65 def parsemeta(text):
65 def parsemeta(text):
66 """Parse metadata header from revision data.
66 """Parse metadata header from revision data.
67
67
68 Returns a 2-tuple of (metadata, offset), where both can be None if there
68 Returns a 2-tuple of (metadata, offset), where both can be None if there
69 is no metadata.
69 is no metadata.
70 """
70 """
71 # text can be buffer, so we can't use .startswith or .index
71 # text can be buffer, so we can't use .startswith or .index
72 if text[:2] != b'\x01\n':
72 if text[:2] != b'\x01\n':
73 return None, None
73 return None, None
74 s = METADATA_RE.search(text, 2).start()
74 s = METADATA_RE.search(text, 2).start()
75 mtext = text[2:s]
75 mtext = text[2:s]
76 meta = {}
76 meta = {}
77 for l in mtext.splitlines():
77 for l in mtext.splitlines():
78 k, v = l.split(b': ', 1)
78 k, v = l.split(b': ', 1)
79 meta[k] = v
79 meta[k] = v
80 return meta, s + 2
80 return meta, s + 2
81
81
82
82
83 def packmeta(meta, text):
83 def packmeta(meta, text):
84 """Add metadata to fulltext to produce revision text."""
84 """Add metadata to fulltext to produce revision text."""
85 keys = sorted(meta)
85 keys = sorted(meta)
86 metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)
86 metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)
87 return b'\x01\n%s\x01\n%s' % (metatext, text)
87 return b'\x01\n%s\x01\n%s' % (metatext, text)
88
88
89
89
90 def iscensoredtext(text):
90 def iscensoredtext(text):
91 meta = parsemeta(text)[0]
91 meta = parsemeta(text)[0]
92 return meta and b'censored' in meta
92 return meta and b'censored' in meta
93
93
94
94
95 def filtermetadata(text):
95 def filtermetadata(text):
96 """Extract just the revision data from source text.
96 """Extract just the revision data from source text.
97
97
98 Returns ``text`` unless it has a metadata header, in which case we return
98 Returns ``text`` unless it has a metadata header, in which case we return
99 a new buffer without hte metadata.
99 a new buffer without hte metadata.
100 """
100 """
101 if not text.startswith(b'\x01\n'):
101 if not text.startswith(b'\x01\n'):
102 return text
102 return text
103
103
104 offset = text.index(b'\x01\n', 2)
104 offset = text.index(b'\x01\n', 2)
105 return text[offset + 2 :]
105 return text[offset + 2 :]
106
106
107
107
108 def filerevisioncopied(store, node):
108 def filerevisioncopied(store, node):
109 """Resolve file revision copy metadata.
109 """Resolve file revision copy metadata.
110
110
111 Returns ``False`` if the file has no copy metadata. Otherwise a
111 Returns ``False`` if the file has no copy metadata. Otherwise a
112 2-tuple of the source filename and node.
112 2-tuple of the source filename and node.
113 """
113 """
114 if store.parents(node)[0] != sha1nodeconstants.nullid:
114 if store.parents(node)[0] != sha1nodeconstants.nullid:
115 return False
115 return False
116
116
117 meta = parsemeta(store.revision(node))[0]
117 meta = parsemeta(store.revision(node))[0]
118
118
119 # copy and copyrev occur in pairs. In rare cases due to old bugs,
119 # copy and copyrev occur in pairs. In rare cases due to old bugs,
120 # one can occur without the other. So ensure both are present to flag
120 # one can occur without the other. So ensure both are present to flag
121 # as a copy.
121 # as a copy.
122 if meta and b'copy' in meta and b'copyrev' in meta:
122 if meta and b'copy' in meta and b'copyrev' in meta:
123 return meta[b'copy'], bin(meta[b'copyrev'])
123 return meta[b'copy'], bin(meta[b'copyrev'])
124
124
125 return False
125 return False
126
126
127
127
128 def filedataequivalent(store, node, filedata):
128 def filedataequivalent(store, node, filedata):
129 """Determines whether file data is equivalent to a stored node.
129 """Determines whether file data is equivalent to a stored node.
130
130
131 Returns True if the passed file data would hash to the same value
131 Returns True if the passed file data would hash to the same value
132 as a stored revision and False otherwise.
132 as a stored revision and False otherwise.
133
133
134 When a stored revision is censored, filedata must be empty to have
134 When a stored revision is censored, filedata must be empty to have
135 equivalence.
135 equivalence.
136
136
137 When a stored revision has copy metadata, it is ignored as part
137 When a stored revision has copy metadata, it is ignored as part
138 of the compare.
138 of the compare.
139 """
139 """
140
140
141 if filedata.startswith(b'\x01\n'):
141 if filedata.startswith(b'\x01\n'):
142 revisiontext = b'\x01\n\x01\n' + filedata
142 revisiontext = b'\x01\n\x01\n' + filedata
143 else:
143 else:
144 revisiontext = filedata
144 revisiontext = filedata
145
145
146 p1, p2 = store.parents(node)
146 p1, p2 = store.parents(node)
147
147
148 computednode = hashrevisionsha1(revisiontext, p1, p2)
148 computednode = hashrevisionsha1(revisiontext, p1, p2)
149
149
150 if computednode == node:
150 if computednode == node:
151 return True
151 return True
152
152
153 # Censored files compare against the empty file.
153 # Censored files compare against the empty file.
154 if store.iscensored(store.rev(node)):
154 if store.iscensored(store.rev(node)):
155 return filedata == b''
155 return filedata == b''
156
156
157 # Renaming a file produces a different hash, even if the data
157 # Renaming a file produces a different hash, even if the data
158 # remains unchanged. Check if that's the case.
158 # remains unchanged. Check if that's the case.
159 if store.renamed(node):
159 if store.renamed(node):
160 return store.read(node) == filedata
160 return store.read(node) == filedata
161
161
162 return False
162 return False
163
163
164
164
165 def iterrevs(storelen, start=0, stop=None):
165 def iterrevs(storelen, start=0, stop=None):
166 """Iterate over revision numbers in a store."""
166 """Iterate over revision numbers in a store."""
167 step = 1
167 step = 1
168
168
169 if stop is not None:
169 if stop is not None:
170 if start > stop:
170 if start > stop:
171 step = -1
171 step = -1
172 stop += step
172 stop += step
173 if stop > storelen:
173 if stop > storelen:
174 stop = storelen
174 stop = storelen
175 else:
175 else:
176 stop = storelen
176 stop = storelen
177
177
178 return pycompat.xrange(start, stop, step)
178 return pycompat.xrange(start, stop, step)
179
179
180
180
181 def fileidlookup(store, fileid, identifier):
181 def fileidlookup(store, fileid, identifier):
182 """Resolve the file node for a value.
182 """Resolve the file node for a value.
183
183
184 ``store`` is an object implementing the ``ifileindex`` interface.
184 ``store`` is an object implementing the ``ifileindex`` interface.
185
185
186 ``fileid`` can be:
186 ``fileid`` can be:
187
187
188 * A 20 or 32 byte binary node.
188 * A 20 or 32 byte binary node.
189 * An integer revision number
189 * An integer revision number
190 * A 40 or 64 byte hex node.
190 * A 40 or 64 byte hex node.
191 * A bytes that can be parsed as an integer representing a revision number.
191 * A bytes that can be parsed as an integer representing a revision number.
192
192
193 ``identifier`` is used to populate ``error.LookupError`` with an identifier
193 ``identifier`` is used to populate ``error.LookupError`` with an identifier
194 for the store.
194 for the store.
195
195
196 Raises ``error.LookupError`` on failure.
196 Raises ``error.LookupError`` on failure.
197 """
197 """
198 if isinstance(fileid, int):
198 if isinstance(fileid, int):
199 try:
199 try:
200 return store.node(fileid)
200 return store.node(fileid)
201 except IndexError:
201 except IndexError:
202 raise error.LookupError(
202 raise error.LookupError(
203 b'%d' % fileid, identifier, _(b'no match found')
203 b'%d' % fileid, identifier, _(b'no match found')
204 )
204 )
205
205
206 if len(fileid) in (20, 32):
206 if len(fileid) in (20, 32):
207 try:
207 try:
208 store.rev(fileid)
208 store.rev(fileid)
209 return fileid
209 return fileid
210 except error.LookupError:
210 except error.LookupError:
211 pass
211 pass
212
212
213 if len(fileid) in (40, 64):
213 if len(fileid) in (40, 64):
214 try:
214 try:
215 rawnode = bin(fileid)
215 rawnode = bin(fileid)
216 store.rev(rawnode)
216 store.rev(rawnode)
217 return rawnode
217 return rawnode
218 except TypeError:
218 except TypeError:
219 pass
219 pass
220
220
221 try:
221 try:
222 rev = int(fileid)
222 rev = int(fileid)
223
223
224 if b'%d' % rev != fileid:
224 if b'%d' % rev != fileid:
225 raise ValueError
225 raise ValueError
226
226
227 try:
227 try:
228 return store.node(rev)
228 return store.node(rev)
229 except (IndexError, TypeError):
229 except (IndexError, TypeError):
230 pass
230 pass
231 except (ValueError, OverflowError):
231 except (ValueError, OverflowError):
232 pass
232 pass
233
233
234 raise error.LookupError(fileid, identifier, _(b'no match found'))
234 raise error.LookupError(fileid, identifier, _(b'no match found'))
235
235
236
236
237 def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn):
237 def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn):
238 """Resolve information needed to strip revisions.
238 """Resolve information needed to strip revisions.
239
239
240 Finds the minimum revision number that must be stripped in order to
240 Finds the minimum revision number that must be stripped in order to
241 strip ``minlinkrev``.
241 strip ``minlinkrev``.
242
242
243 Returns a 2-tuple of the minimum revision number to do that and a set
243 Returns a 2-tuple of the minimum revision number to do that and a set
244 of all revision numbers that have linkrevs that would be broken
244 of all revision numbers that have linkrevs that would be broken
245 by that strip.
245 by that strip.
246
246
247 ``tiprev`` is the current tip-most revision. It is ``len(store) - 1``.
247 ``tiprev`` is the current tip-most revision. It is ``len(store) - 1``.
248 ``headrevs`` is an iterable of head revisions.
248 ``headrevs`` is an iterable of head revisions.
249 ``linkrevfn`` is a callable that receives a revision and returns a linked
249 ``linkrevfn`` is a callable that receives a revision and returns a linked
250 revision.
250 revision.
251 ``parentrevsfn`` is a callable that receives a revision number and returns
251 ``parentrevsfn`` is a callable that receives a revision number and returns
252 an iterable of its parent revision numbers.
252 an iterable of its parent revision numbers.
253 """
253 """
254 brokenrevs = set()
254 brokenrevs = set()
255 strippoint = tiprev + 1
255 strippoint = tiprev + 1
256
256
257 heads = {}
257 heads = {}
258 futurelargelinkrevs = set()
258 futurelargelinkrevs = set()
259 for head in headrevs:
259 for head in headrevs:
260 headlinkrev = linkrevfn(head)
260 headlinkrev = linkrevfn(head)
261 heads[head] = headlinkrev
261 heads[head] = headlinkrev
262 if headlinkrev >= minlinkrev:
262 if headlinkrev >= minlinkrev:
263 futurelargelinkrevs.add(headlinkrev)
263 futurelargelinkrevs.add(headlinkrev)
264
264
265 # This algorithm involves walking down the rev graph, starting at the
265 # This algorithm involves walking down the rev graph, starting at the
266 # heads. Since the revs are topologically sorted according to linkrev,
266 # heads. Since the revs are topologically sorted according to linkrev,
267 # once all head linkrevs are below the minlink, we know there are
267 # once all head linkrevs are below the minlink, we know there are
268 # no more revs that could have a linkrev greater than minlink.
268 # no more revs that could have a linkrev greater than minlink.
269 # So we can stop walking.
269 # So we can stop walking.
270 while futurelargelinkrevs:
270 while futurelargelinkrevs:
271 strippoint -= 1
271 strippoint -= 1
272 linkrev = heads.pop(strippoint)
272 linkrev = heads.pop(strippoint)
273
273
274 if linkrev < minlinkrev:
274 if linkrev < minlinkrev:
275 brokenrevs.add(strippoint)
275 brokenrevs.add(strippoint)
276 else:
276 else:
277 futurelargelinkrevs.remove(linkrev)
277 futurelargelinkrevs.remove(linkrev)
278
278
279 for p in parentrevsfn(strippoint):
279 for p in parentrevsfn(strippoint):
280 if p != nullrev:
280 if p != nullrev:
281 plinkrev = linkrevfn(p)
281 plinkrev = linkrevfn(p)
282 heads[p] = plinkrev
282 heads[p] = plinkrev
283 if plinkrev >= minlinkrev:
283 if plinkrev >= minlinkrev:
284 futurelargelinkrevs.add(plinkrev)
284 futurelargelinkrevs.add(plinkrev)
285
285
286 return strippoint, brokenrevs
286 return strippoint, brokenrevs
287
287
288
288
289 def emitrevisions(
289 def emitrevisions(
290 store,
290 store,
291 nodes,
291 nodes,
292 nodesorder,
292 nodesorder,
293 resultcls,
293 resultcls,
294 deltaparentfn=None,
294 deltaparentfn=None,
295 candeltafn=None,
295 candeltafn=None,
296 rawsizefn=None,
296 rawsizefn=None,
297 revdifffn=None,
297 revdifffn=None,
298 flagsfn=None,
298 flagsfn=None,
299 deltamode=repository.CG_DELTAMODE_STD,
299 deltamode=repository.CG_DELTAMODE_STD,
300 revisiondata=False,
300 revisiondata=False,
301 assumehaveparentrevisions=False,
301 assumehaveparentrevisions=False,
302 sidedata_helpers=None,
302 sidedata_helpers=None,
303 ):
303 ):
304 """Generic implementation of ifiledata.emitrevisions().
304 """Generic implementation of ifiledata.emitrevisions().
305
305
306 Emitting revision data is subtly complex. This function attempts to
306 Emitting revision data is subtly complex. This function attempts to
307 encapsulate all the logic for doing so in a backend-agnostic way.
307 encapsulate all the logic for doing so in a backend-agnostic way.
308
308
309 ``store``
309 ``store``
310 Object conforming to ``ifilestorage`` interface.
310 Object conforming to ``ifilestorage`` interface.
311
311
312 ``nodes``
312 ``nodes``
313 List of revision nodes whose data to emit.
313 List of revision nodes whose data to emit.
314
314
315 ``resultcls``
315 ``resultcls``
316 A type implementing the ``irevisiondelta`` interface that will be
316 A type implementing the ``irevisiondelta`` interface that will be
317 constructed and returned.
317 constructed and returned.
318
318
319 ``deltaparentfn`` (optional)
319 ``deltaparentfn`` (optional)
320 Callable receiving a revision number and returning the revision number
320 Callable receiving a revision number and returning the revision number
321 of a revision that the internal delta is stored against. This delta
321 of a revision that the internal delta is stored against. This delta
322 will be preferred over computing a new arbitrary delta.
322 will be preferred over computing a new arbitrary delta.
323
323
324 If not defined, a delta will always be computed from raw revision
324 If not defined, a delta will always be computed from raw revision
325 data.
325 data.
326
326
327 ``candeltafn`` (optional)
327 ``candeltafn`` (optional)
328 Callable receiving a pair of revision numbers that returns a bool
328 Callable receiving a pair of revision numbers that returns a bool
329 indicating whether a delta between them can be produced.
329 indicating whether a delta between them can be produced.
330
330
331 If not defined, it is assumed that any two revisions can delta with
331 If not defined, it is assumed that any two revisions can delta with
332 each other.
332 each other.
333
333
334 ``rawsizefn`` (optional)
334 ``rawsizefn`` (optional)
335 Callable receiving a revision number and returning the length of the
335 Callable receiving a revision number and returning the length of the
336 ``store.rawdata(rev)``.
336 ``store.rawdata(rev)``.
337
337
338 If not defined, ``len(store.rawdata(rev))`` will be called.
338 If not defined, ``len(store.rawdata(rev))`` will be called.
339
339
340 ``revdifffn`` (optional)
340 ``revdifffn`` (optional)
341 Callable receiving a pair of revision numbers that returns a delta
341 Callable receiving a pair of revision numbers that returns a delta
342 between them.
342 between them.
343
343
344 If not defined, a delta will be computed by invoking mdiff code
344 If not defined, a delta will be computed by invoking mdiff code
345 on ``store.revision()`` results.
345 on ``store.revision()`` results.
346
346
347 Defining this function allows a precomputed or stored delta to be
347 Defining this function allows a precomputed or stored delta to be
348 used without having to compute on.
348 used without having to compute on.
349
349
350 ``flagsfn`` (optional)
350 ``flagsfn`` (optional)
351 Callable receiving a revision number and returns the integer flags
351 Callable receiving a revision number and returns the integer flags
352 value for it. If not defined, flags value will be 0.
352 value for it. If not defined, flags value will be 0.
353
353
354 ``deltamode``
354 ``deltamode``
355 constaint on delta to be sent:
355 constaint on delta to be sent:
356 * CG_DELTAMODE_STD - normal mode, try to reuse storage deltas,
356 * CG_DELTAMODE_STD - normal mode, try to reuse storage deltas,
357 * CG_DELTAMODE_PREV - only delta against "prev",
357 * CG_DELTAMODE_PREV - only delta against "prev",
358 * CG_DELTAMODE_FULL - only issue full snapshot.
358 * CG_DELTAMODE_FULL - only issue full snapshot.
359
359
360 Whether to send fulltext revisions instead of deltas, if allowed.
360 Whether to send fulltext revisions instead of deltas, if allowed.
361
361
362 ``nodesorder``
362 ``nodesorder``
363 ``revisiondata``
363 ``revisiondata``
364 ``assumehaveparentrevisions``
364 ``assumehaveparentrevisions``
365 ``sidedata_helpers`` (optional)
365 ``sidedata_helpers`` (optional)
366 If not None, means that sidedata should be included.
366 If not None, means that sidedata should be included.
367 A dictionary of revlog type to tuples of `(repo, computers, removers)`:
367 See `revlogutil.sidedata.get_sidedata_helpers`.
368 * `repo` is used as an argument for computers
369 * `computers` is a list of `(category, (keys, computer, flags)` that
370 compute the missing sidedata categories that were asked:
371 * `category` is the sidedata category
372 * `keys` are the sidedata keys to be affected
373 * `flags` is a bitmask (an integer) of flags to remove when
374 removing the category.
375 * `computer` is the function `(repo, store, rev, sidedata)` that
376 returns a tuple of
377 `(new sidedata dict, (flags to add, flags to remove))`.
378 For example, it will return `({}, (0, 1 << 15))` to return no
379 sidedata, with no flags to add and one flag to remove.
380 * `removers` will remove the keys corresponding to the categories
381 that are present, but not needed.
382 If both `computers` and `removers` are empty, sidedata are simply not
383 transformed.
384 Revlog types are `changelog`, `manifest` or `filelog`.
385 """
368 """
386
369
387 fnode = store.node
370 fnode = store.node
388 frev = store.rev
371 frev = store.rev
389
372
390 if nodesorder == b'nodes':
373 if nodesorder == b'nodes':
391 revs = [frev(n) for n in nodes]
374 revs = [frev(n) for n in nodes]
392 elif nodesorder == b'linear':
375 elif nodesorder == b'linear':
393 revs = {frev(n) for n in nodes}
376 revs = {frev(n) for n in nodes}
394 revs = dagop.linearize(revs, store.parentrevs)
377 revs = dagop.linearize(revs, store.parentrevs)
395 else: # storage and default
378 else: # storage and default
396 revs = sorted(frev(n) for n in nodes)
379 revs = sorted(frev(n) for n in nodes)
397
380
398 prevrev = None
381 prevrev = None
399
382
400 if deltamode == repository.CG_DELTAMODE_PREV or assumehaveparentrevisions:
383 if deltamode == repository.CG_DELTAMODE_PREV or assumehaveparentrevisions:
401 prevrev = store.parentrevs(revs[0])[0]
384 prevrev = store.parentrevs(revs[0])[0]
402
385
403 # Set of revs available to delta against.
386 # Set of revs available to delta against.
404 available = set()
387 available = set()
405
388
406 for rev in revs:
389 for rev in revs:
407 if rev == nullrev:
390 if rev == nullrev:
408 continue
391 continue
409
392
410 node = fnode(rev)
393 node = fnode(rev)
411 p1rev, p2rev = store.parentrevs(rev)
394 p1rev, p2rev = store.parentrevs(rev)
412
395
413 if deltaparentfn:
396 if deltaparentfn:
414 deltaparentrev = deltaparentfn(rev)
397 deltaparentrev = deltaparentfn(rev)
415 else:
398 else:
416 deltaparentrev = nullrev
399 deltaparentrev = nullrev
417
400
418 # Forced delta against previous mode.
401 # Forced delta against previous mode.
419 if deltamode == repository.CG_DELTAMODE_PREV:
402 if deltamode == repository.CG_DELTAMODE_PREV:
420 baserev = prevrev
403 baserev = prevrev
421
404
422 # We're instructed to send fulltext. Honor that.
405 # We're instructed to send fulltext. Honor that.
423 elif deltamode == repository.CG_DELTAMODE_FULL:
406 elif deltamode == repository.CG_DELTAMODE_FULL:
424 baserev = nullrev
407 baserev = nullrev
425 # We're instructed to use p1. Honor that
408 # We're instructed to use p1. Honor that
426 elif deltamode == repository.CG_DELTAMODE_P1:
409 elif deltamode == repository.CG_DELTAMODE_P1:
427 baserev = p1rev
410 baserev = p1rev
428
411
429 # There is a delta in storage. We try to use that because it
412 # There is a delta in storage. We try to use that because it
430 # amounts to effectively copying data from storage and is
413 # amounts to effectively copying data from storage and is
431 # therefore the fastest.
414 # therefore the fastest.
432 elif deltaparentrev != nullrev:
415 elif deltaparentrev != nullrev:
433 # Base revision was already emitted in this group. We can
416 # Base revision was already emitted in this group. We can
434 # always safely use the delta.
417 # always safely use the delta.
435 if deltaparentrev in available:
418 if deltaparentrev in available:
436 baserev = deltaparentrev
419 baserev = deltaparentrev
437
420
438 # Base revision is a parent that hasn't been emitted already.
421 # Base revision is a parent that hasn't been emitted already.
439 # Use it if we can assume the receiver has the parent revision.
422 # Use it if we can assume the receiver has the parent revision.
440 elif assumehaveparentrevisions and deltaparentrev in (p1rev, p2rev):
423 elif assumehaveparentrevisions and deltaparentrev in (p1rev, p2rev):
441 baserev = deltaparentrev
424 baserev = deltaparentrev
442
425
443 # No guarantee the receiver has the delta parent. Send delta
426 # No guarantee the receiver has the delta parent. Send delta
444 # against last revision (if possible), which in the common case
427 # against last revision (if possible), which in the common case
445 # should be similar enough to this revision that the delta is
428 # should be similar enough to this revision that the delta is
446 # reasonable.
429 # reasonable.
447 elif prevrev is not None:
430 elif prevrev is not None:
448 baserev = prevrev
431 baserev = prevrev
449 else:
432 else:
450 baserev = nullrev
433 baserev = nullrev
451
434
452 # Storage has a fulltext revision.
435 # Storage has a fulltext revision.
453
436
454 # Let's use the previous revision, which is as good a guess as any.
437 # Let's use the previous revision, which is as good a guess as any.
455 # There is definitely room to improve this logic.
438 # There is definitely room to improve this logic.
456 elif prevrev is not None:
439 elif prevrev is not None:
457 baserev = prevrev
440 baserev = prevrev
458 else:
441 else:
459 baserev = nullrev
442 baserev = nullrev
460
443
461 # But we can't actually use our chosen delta base for whatever
444 # But we can't actually use our chosen delta base for whatever
462 # reason. Reset to fulltext.
445 # reason. Reset to fulltext.
463 if baserev != nullrev and (candeltafn and not candeltafn(baserev, rev)):
446 if baserev != nullrev and (candeltafn and not candeltafn(baserev, rev)):
464 baserev = nullrev
447 baserev = nullrev
465
448
466 revision = None
449 revision = None
467 delta = None
450 delta = None
468 baserevisionsize = None
451 baserevisionsize = None
469
452
470 if revisiondata:
453 if revisiondata:
471 if store.iscensored(baserev) or store.iscensored(rev):
454 if store.iscensored(baserev) or store.iscensored(rev):
472 try:
455 try:
473 revision = store.rawdata(node)
456 revision = store.rawdata(node)
474 except error.CensoredNodeError as e:
457 except error.CensoredNodeError as e:
475 revision = e.tombstone
458 revision = e.tombstone
476
459
477 if baserev != nullrev:
460 if baserev != nullrev:
478 if rawsizefn:
461 if rawsizefn:
479 baserevisionsize = rawsizefn(baserev)
462 baserevisionsize = rawsizefn(baserev)
480 else:
463 else:
481 baserevisionsize = len(store.rawdata(baserev))
464 baserevisionsize = len(store.rawdata(baserev))
482
465
483 elif (
466 elif (
484 baserev == nullrev and deltamode != repository.CG_DELTAMODE_PREV
467 baserev == nullrev and deltamode != repository.CG_DELTAMODE_PREV
485 ):
468 ):
486 revision = store.rawdata(node)
469 revision = store.rawdata(node)
487 available.add(rev)
470 available.add(rev)
488 else:
471 else:
489 if revdifffn:
472 if revdifffn:
490 delta = revdifffn(baserev, rev)
473 delta = revdifffn(baserev, rev)
491 else:
474 else:
492 delta = mdiff.textdiff(
475 delta = mdiff.textdiff(
493 store.rawdata(baserev), store.rawdata(rev)
476 store.rawdata(baserev), store.rawdata(rev)
494 )
477 )
495
478
496 available.add(rev)
479 available.add(rev)
497
480
498 serialized_sidedata = None
481 serialized_sidedata = None
499 sidedata_flags = (0, 0)
482 sidedata_flags = (0, 0)
500 if sidedata_helpers:
483 if sidedata_helpers:
501 old_sidedata = store.sidedata(rev)
484 old_sidedata = store.sidedata(rev)
502 sidedata, sidedata_flags = sidedatamod.run_sidedata_helpers(
485 sidedata, sidedata_flags = sidedatamod.run_sidedata_helpers(
503 store=store,
486 store=store,
504 sidedata_helpers=sidedata_helpers,
487 sidedata_helpers=sidedata_helpers,
505 sidedata=old_sidedata,
488 sidedata=old_sidedata,
506 rev=rev,
489 rev=rev,
507 )
490 )
508 if sidedata:
491 if sidedata:
509 serialized_sidedata = sidedatamod.serialize_sidedata(sidedata)
492 serialized_sidedata = sidedatamod.serialize_sidedata(sidedata)
510
493
511 flags = flagsfn(rev) if flagsfn else 0
494 flags = flagsfn(rev) if flagsfn else 0
512 protocol_flags = 0
495 protocol_flags = 0
513 if serialized_sidedata:
496 if serialized_sidedata:
514 # Advertise that sidedata exists to the other side
497 # Advertise that sidedata exists to the other side
515 protocol_flags |= CG_FLAG_SIDEDATA
498 protocol_flags |= CG_FLAG_SIDEDATA
516 # Computers and removers can return flags to add and/or remove
499 # Computers and removers can return flags to add and/or remove
517 flags = flags | sidedata_flags[0] & ~sidedata_flags[1]
500 flags = flags | sidedata_flags[0] & ~sidedata_flags[1]
518
501
519 yield resultcls(
502 yield resultcls(
520 node=node,
503 node=node,
521 p1node=fnode(p1rev),
504 p1node=fnode(p1rev),
522 p2node=fnode(p2rev),
505 p2node=fnode(p2rev),
523 basenode=fnode(baserev),
506 basenode=fnode(baserev),
524 flags=flags,
507 flags=flags,
525 baserevisionsize=baserevisionsize,
508 baserevisionsize=baserevisionsize,
526 revision=revision,
509 revision=revision,
527 delta=delta,
510 delta=delta,
528 sidedata=serialized_sidedata,
511 sidedata=serialized_sidedata,
529 protocol_flags=protocol_flags,
512 protocol_flags=protocol_flags,
530 )
513 )
531
514
532 prevrev = rev
515 prevrev = rev
533
516
534
517
535 def deltaiscensored(delta, baserev, baselenfn):
518 def deltaiscensored(delta, baserev, baselenfn):
536 """Determine if a delta represents censored revision data.
519 """Determine if a delta represents censored revision data.
537
520
538 ``baserev`` is the base revision this delta is encoded against.
521 ``baserev`` is the base revision this delta is encoded against.
539 ``baselenfn`` is a callable receiving a revision number that resolves the
522 ``baselenfn`` is a callable receiving a revision number that resolves the
540 length of the revision fulltext.
523 length of the revision fulltext.
541
524
542 Returns a bool indicating if the result of the delta represents a censored
525 Returns a bool indicating if the result of the delta represents a censored
543 revision.
526 revision.
544 """
527 """
545 # Fragile heuristic: unless new file meta keys are added alphabetically
528 # Fragile heuristic: unless new file meta keys are added alphabetically
546 # preceding "censored", all censored revisions are prefixed by
529 # preceding "censored", all censored revisions are prefixed by
547 # "\1\ncensored:". A delta producing such a censored revision must be a
530 # "\1\ncensored:". A delta producing such a censored revision must be a
548 # full-replacement delta, so we inspect the first and only patch in the
531 # full-replacement delta, so we inspect the first and only patch in the
549 # delta for this prefix.
532 # delta for this prefix.
550 hlen = struct.calcsize(b">lll")
533 hlen = struct.calcsize(b">lll")
551 if len(delta) <= hlen:
534 if len(delta) <= hlen:
552 return False
535 return False
553
536
554 oldlen = baselenfn(baserev)
537 oldlen = baselenfn(baserev)
555 newlen = len(delta) - hlen
538 newlen = len(delta) - hlen
556 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
539 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
557 return False
540 return False
558
541
559 add = b"\1\ncensored:"
542 add = b"\1\ncensored:"
560 addlen = len(add)
543 addlen = len(add)
561 return newlen >= addlen and delta[hlen : hlen + addlen] == add
544 return newlen >= addlen and delta[hlen : hlen + addlen] == add
General Comments 0
You need to be logged in to leave comments. Login now