##// END OF EJS Templates
sidedata-exchange: rewrite sidedata on-the-fly whenever possible...
Raphaël Gomès -
r47452:ba8e508a default
parent child Browse files
Show More
@@ -0,0 +1,473 b''
1 ===========================
2 Tests for sidedata exchange
3 ===========================
4
5 Check simple exchange behavior
6 ==============================
7
8 Pusher and pushed have sidedata enabled
9 ---------------------------------------
10
11 $ hg init sidedata-source --config format.exp-use-side-data=yes
12 $ cat << EOF >> sidedata-source/.hg/hgrc
13 > [extensions]
14 > testsidedata=$TESTDIR/testlib/ext-sidedata-5.py
15 > EOF
16 $ hg init sidedata-target --config format.exp-use-side-data=yes
17 $ cat << EOF >> sidedata-target/.hg/hgrc
18 > [extensions]
19 > testsidedata=$TESTDIR/testlib/ext-sidedata-5.py
20 > EOF
21 $ cd sidedata-source
22 $ echo a > a
23 $ echo b > b
24 $ echo c > c
25 $ hg commit -Am "initial"
26 adding a
27 adding b
28 adding c
29 $ echo aa > a
30 $ hg commit -m "other"
31 $ hg push -r . ../sidedata-target
32 pushing to ../sidedata-target
33 searching for changes
34 adding changesets
35 adding manifests
36 adding file changes
37 added 2 changesets with 4 changes to 3 files
38 $ hg -R ../sidedata-target debugsidedata -c 0
39 2 sidedata entries
40 entry-0001 size 4
41 entry-0002 size 32
42 $ hg -R ../sidedata-target debugsidedata -c 1 -v
43 2 sidedata entries
44 entry-0001 size 4
45 '\x00\x00\x00:'
46 entry-0002 size 32
47 '\xa3\xee4v\x99\x85$\x9f\x1f\x8dKe\x0f\xc3\x9d-\xc9\xb5%[\x15=h\xe9\xf2O\xb5\xd9\x1f*\xff\xe5'
48 $ hg -R ../sidedata-target debugsidedata -m 0
49 2 sidedata entries
50 entry-0001 size 4
51 entry-0002 size 32
52 $ hg -R ../sidedata-target debugsidedata -m 1 -v
53 2 sidedata entries
54 entry-0001 size 4
55 '\x00\x00\x00\x81'
56 entry-0002 size 32
57 '-bL\xc5\xa4uu"#\xac\x1b`,\xc0\xbc\x9d\xf5\xac\xf0\x1d\x89)2\xf8N\xb1\x14m\xce\xd7\xbc\xae'
58 $ hg -R ../sidedata-target debugsidedata a 0
59 2 sidedata entries
60 entry-0001 size 4
61 entry-0002 size 32
62 $ hg -R ../sidedata-target debugsidedata a 1 -v
63 2 sidedata entries
64 entry-0001 size 4
65 '\x00\x00\x00\x03'
66 entry-0002 size 32
67 '\xd9\xcd\x81UvL5C\xf1\x0f\xad\x8aH\rt17Fo\x8dU!<\x8e\xae\xfc\xd1/\x06\xd4:\x80'
68 $ cd ..
69
70 Puller and pulled have sidedata enabled
71 ---------------------------------------
72
73 $ rm -rf sidedata-source sidedata-target
74 $ hg init sidedata-source --config format.exp-use-side-data=yes
75 $ cat << EOF >> sidedata-source/.hg/hgrc
76 > [extensions]
77 > testsidedata=$TESTDIR/testlib/ext-sidedata-5.py
78 > EOF
79 $ hg init sidedata-target --config format.exp-use-side-data=yes
80 $ cat << EOF >> sidedata-target/.hg/hgrc
81 > [extensions]
82 > testsidedata=$TESTDIR/testlib/ext-sidedata-5.py
83 > EOF
84 $ cd sidedata-source
85 $ echo a > a
86 $ echo b > b
87 $ echo c > c
88 $ hg commit -Am "initial"
89 adding a
90 adding b
91 adding c
92 $ echo aa > a
93 $ hg commit -m "other"
94 $ hg pull -R ../sidedata-target ../sidedata-source
95 pulling from ../sidedata-source
96 requesting all changes
97 adding changesets
98 adding manifests
99 adding file changes
100 added 2 changesets with 4 changes to 3 files
101 new changesets 05da661850d7:7ec8b4049447
102 (run 'hg update' to get a working copy)
103 $ hg -R ../sidedata-target debugsidedata -c 0
104 2 sidedata entries
105 entry-0001 size 4
106 entry-0002 size 32
107 $ hg -R ../sidedata-target debugsidedata -c 1 -v
108 2 sidedata entries
109 entry-0001 size 4
110 '\x00\x00\x00:'
111 entry-0002 size 32
112 '\xa3\xee4v\x99\x85$\x9f\x1f\x8dKe\x0f\xc3\x9d-\xc9\xb5%[\x15=h\xe9\xf2O\xb5\xd9\x1f*\xff\xe5'
113 $ hg -R ../sidedata-target debugsidedata -m 0
114 2 sidedata entries
115 entry-0001 size 4
116 entry-0002 size 32
117 $ hg -R ../sidedata-target debugsidedata -m 1 -v
118 2 sidedata entries
119 entry-0001 size 4
120 '\x00\x00\x00\x81'
121 entry-0002 size 32
122 '-bL\xc5\xa4uu"#\xac\x1b`,\xc0\xbc\x9d\xf5\xac\xf0\x1d\x89)2\xf8N\xb1\x14m\xce\xd7\xbc\xae'
123 $ hg -R ../sidedata-target debugsidedata a 0
124 2 sidedata entries
125 entry-0001 size 4
126 entry-0002 size 32
127 $ hg -R ../sidedata-target debugsidedata a 1 -v
128 2 sidedata entries
129 entry-0001 size 4
130 '\x00\x00\x00\x03'
131 entry-0002 size 32
132 '\xd9\xcd\x81UvL5C\xf1\x0f\xad\x8aH\rt17Fo\x8dU!<\x8e\xae\xfc\xd1/\x06\xd4:\x80'
133 $ cd ..
134
135 Now on to asymmetric configs.
136
137 Pusher has sidedata enabled, pushed does not
138 --------------------------------------------
139
140 $ rm -rf sidedata-source sidedata-target
141 $ hg init sidedata-source --config format.exp-use-side-data=yes
142 $ cat << EOF >> sidedata-source/.hg/hgrc
143 > [extensions]
144 > testsidedata=$TESTDIR/testlib/ext-sidedata-5.py
145 > EOF
146 $ hg init sidedata-target --config format.exp-use-side-data=no
147 $ cd sidedata-source
148 $ echo a > a
149 $ echo b > b
150 $ echo c > c
151 $ hg commit -Am "initial"
152 adding a
153 adding b
154 adding c
155 $ echo aa > a
156 $ hg commit -m "other"
157 $ hg push -r . ../sidedata-target --traceback
158 pushing to ../sidedata-target
159 searching for changes
160 adding changesets
161 adding manifests
162 adding file changes
163 added 2 changesets with 4 changes to 3 files
164 $ hg -R ../sidedata-target log -G
165 o changeset: 1:7ec8b4049447
166 | tag: tip
167 | user: test
168 | date: Thu Jan 01 00:00:00 1970 +0000
169 | summary: other
170 |
171 o changeset: 0:05da661850d7
172 user: test
173 date: Thu Jan 01 00:00:00 1970 +0000
174 summary: initial
175
176
177 $ hg -R ../sidedata-target debugsidedata -c 0
178 $ hg -R ../sidedata-target debugsidedata -c 1 -v
179 $ hg -R ../sidedata-target debugsidedata -m 0
180 $ hg -R ../sidedata-target debugsidedata -m 1 -v
181 $ hg -R ../sidedata-target debugsidedata a 0
182 $ hg -R ../sidedata-target debugsidedata a 1 -v
183 $ cd ..
184
185 Pulled has sidedata enabled, puller does not
186 --------------------------------------------
187
188 $ rm -rf sidedata-source sidedata-target
189 $ hg init sidedata-source --config format.exp-use-side-data=yes
190 $ cat << EOF >> sidedata-source/.hg/hgrc
191 > [extensions]
192 > testsidedata=$TESTDIR/testlib/ext-sidedata-5.py
193 > EOF
194 $ hg init sidedata-target --config format.exp-use-side-data=no
195 $ cd sidedata-source
196 $ echo a > a
197 $ echo b > b
198 $ echo c > c
199 $ hg commit -Am "initial"
200 adding a
201 adding b
202 adding c
203 $ echo aa > a
204 $ hg commit -m "other"
205 $ hg pull -R ../sidedata-target ../sidedata-source
206 pulling from ../sidedata-source
207 requesting all changes
208 adding changesets
209 adding manifests
210 adding file changes
211 added 2 changesets with 4 changes to 3 files
212 new changesets 05da661850d7:7ec8b4049447
213 (run 'hg update' to get a working copy)
214 $ hg -R ../sidedata-target log -G
215 o changeset: 1:7ec8b4049447
216 | tag: tip
217 | user: test
218 | date: Thu Jan 01 00:00:00 1970 +0000
219 | summary: other
220 |
221 o changeset: 0:05da661850d7
222 user: test
223 date: Thu Jan 01 00:00:00 1970 +0000
224 summary: initial
225
226
227 $ hg -R ../sidedata-target debugsidedata -c 0
228 $ hg -R ../sidedata-target debugsidedata -c 1 -v
229 $ hg -R ../sidedata-target debugsidedata -m 0
230 $ hg -R ../sidedata-target debugsidedata -m 1 -v
231 $ hg -R ../sidedata-target debugsidedata a 0
232 $ hg -R ../sidedata-target debugsidedata a 1 -v
233 $ cd ..
234
235
236 Check sidedata exchange with on-the-fly generation and removal
237 ==============================================================
238
239 (Push) Target has strict superset of the source
240 -----------------------------------------------
241
242 $ hg init source-repo --config format.exp-use-side-data=yes
243 $ hg init target-repo --config format.exp-use-side-data=yes
244 $ cat << EOF >> target-repo/.hg/hgrc
245 > [extensions]
246 > testsidedata=$TESTDIR/testlib/ext-sidedata.py
247 > EOF
248 $ cd source-repo
249 $ echo aaa > a
250 $ hg add a
251 $ hg commit -m a
252 $ echo aaa > b
253 $ hg add b
254 $ hg commit -m b
255 $ echo xxx >> a
256 $ hg commit -m aa
257
258 No sidedata is generated in the source
259 $ hg debugsidedata -c 0
260
261 Check that sidedata capabilities are advertised
262 $ hg debugcapabilities ../target-repo | grep sidedata
263 exp-wanted-sidedata=1,2
264
265 We expect the client to abort the push since it's not capable of generating
266 what the server is asking
267 $ hg push -r . ../target-repo
268 pushing to ../target-repo
269 abort: cannot push: required sidedata category not supported by this client: '1'
270 [255]
271
272 Add the required capabilities
273 $ cat << EOF >> .hg/hgrc
274 > [extensions]
275 > testsidedata2=$TESTDIR/testlib/ext-sidedata-2.py
276 > EOF
277
278 We expect the target to have sidedata that was generated by the source on push
279 $ hg push -r . ../target-repo
280 pushing to ../target-repo
281 searching for changes
282 adding changesets
283 adding manifests
284 adding file changes
285 added 3 changesets with 3 changes to 2 files
286 $ cd ../target-repo
287 $ hg debugsidedata -c 0
288 2 sidedata entries
289 entry-0001 size 4
290 entry-0002 size 32
291 $ hg debugsidedata -c 1 -v
292 2 sidedata entries
293 entry-0001 size 4
294 '\x00\x00\x006'
295 entry-0002 size 32
296 '\x98\t\xf9\xc4v\xf0\xc5P\x90\xf7wRf\xe8\xe27e\xfc\xc1\x93\xa4\x96\xd0\x1d\x97\xaaG\x1d\xd7t\xfa\xde'
297 $ hg debugsidedata -m 2
298 2 sidedata entries
299 entry-0001 size 4
300 entry-0002 size 32
301 $ hg debugsidedata a 1
302 2 sidedata entries
303 entry-0001 size 4
304 entry-0002 size 32
305 $ cd ..
306
307 (Push) Difference is not subset/superset
308 ----------------------------------------
309
310 Source has one in common, one missing and one more sidedata category with the
311 target.
312
313 $ rm -rf source-repo target-repo
314 $ hg init source-repo --config format.exp-use-side-data=yes
315 $ cat << EOF >> source-repo/.hg/hgrc
316 > [extensions]
317 > testsidedata3=$TESTDIR/testlib/ext-sidedata-3.py
318 > EOF
319 $ hg init target-repo --config format.exp-use-side-data=yes
320 $ cat << EOF >> target-repo/.hg/hgrc
321 > [extensions]
322 > testsidedata4=$TESTDIR/testlib/ext-sidedata-4.py
323 > EOF
324 $ cd source-repo
325 $ echo aaa > a
326 $ hg add a
327 $ hg commit -m a
328 $ echo aaa > b
329 $ hg add b
330 $ hg commit -m b
331 $ echo xxx >> a
332 $ hg commit -m aa
333
334 Check that sidedata capabilities are advertised
335 $ hg debugcapabilities . | grep sidedata
336 exp-wanted-sidedata=1,2
337 $ hg debugcapabilities ../target-repo | grep sidedata
338 exp-wanted-sidedata=2,3
339
340 Sidedata is generated in the source, but only the right categories (entry-0001 and entry-0002)
341 $ hg debugsidedata -c 0
342 2 sidedata entries
343 entry-0001 size 4
344 entry-0002 size 32
345 $ hg debugsidedata -c 1 -v
346 2 sidedata entries
347 entry-0001 size 4
348 '\x00\x00\x006'
349 entry-0002 size 32
350 '\x98\t\xf9\xc4v\xf0\xc5P\x90\xf7wRf\xe8\xe27e\xfc\xc1\x93\xa4\x96\xd0\x1d\x97\xaaG\x1d\xd7t\xfa\xde'
351 $ hg debugsidedata -m 2
352 2 sidedata entries
353 entry-0001 size 4
354 entry-0002 size 32
355 $ hg debugsidedata a 1
356 2 sidedata entries
357 entry-0001 size 4
358 entry-0002 size 32
359
360
361 We expect the target to have sidedata that was generated by the source on push,
362 and also removed the sidedata categories that are not supported by the target.
363 Namely, we expect entry-0002 (only exchanged) and entry-0003 (generated),
364 but not entry-0001.
365
366 $ hg push -r . ../target-repo --traceback
367 pushing to ../target-repo
368 searching for changes
369 adding changesets
370 adding manifests
371 adding file changes
372 added 3 changesets with 3 changes to 2 files
373 $ cd ../target-repo
374 $ hg log -G
375 o changeset: 2:40f977031323
376 | tag: tip
377 | user: test
378 | date: Thu Jan 01 00:00:00 1970 +0000
379 | summary: aa
380 |
381 o changeset: 1:2707720c6597
382 | user: test
383 | date: Thu Jan 01 00:00:00 1970 +0000
384 | summary: b
385 |
386 o changeset: 0:7049e48789d7
387 user: test
388 date: Thu Jan 01 00:00:00 1970 +0000
389 summary: a
390
391 $ hg debugsidedata -c 0
392 2 sidedata entries
393 entry-0002 size 32
394 entry-0003 size 48
395 $ hg debugsidedata -c 1 -v
396 2 sidedata entries
397 entry-0002 size 32
398 '\x98\t\xf9\xc4v\xf0\xc5P\x90\xf7wRf\xe8\xe27e\xfc\xc1\x93\xa4\x96\xd0\x1d\x97\xaaG\x1d\xd7t\xfa\xde'
399 entry-0003 size 48
400 '\x87\xcf\xdfI/\xb5\xed\xeaC\xc1\xf0S\xf3X\x1c\xcc\x00m\xee\xe6#\xc1\xe3\xcaB8Fk\x82e\xfc\xc01\xf6\xb7\xb9\xb3([\xf6D\xa6\xcf\x9b\xea\x11{\x08'
401 $ hg debugsidedata -m 2
402 2 sidedata entries
403 entry-0002 size 32
404 entry-0003 size 48
405 $ hg debugsidedata a 1
406 2 sidedata entries
407 entry-0002 size 32
408 entry-0003 size 48
409 $ cd ..
410
411 (Pull) Target has strict superset of the source
412 -----------------------------------------------
413
414 $ rm -rf source-repo target-repo
415 $ hg init source-repo --config format.exp-use-side-data=yes
416 $ hg init target-repo --config format.exp-use-side-data=yes
417 $ cat << EOF >> target-repo/.hg/hgrc
418 > [extensions]
419 > testsidedata=$TESTDIR/testlib/ext-sidedata.py
420 > EOF
421 $ cd source-repo
422 $ echo aaa > a
423 $ hg add a
424 $ hg commit -m a
425 $ echo aaa > b
426 $ hg add b
427 $ hg commit -m b
428 $ echo xxx >> a
429 $ hg commit -m aa
430
431 No sidedata is generated in the source
432 $ hg debugsidedata -c 0
433
434 Check that sidedata capabilities are advertised
435 $ hg debugcapabilities ../target-repo | grep sidedata
436 exp-wanted-sidedata=1,2
437
438 $ cd ../target-repo
439
440 Add the required capabilities
441 $ cat << EOF >> .hg/hgrc
442 > [extensions]
443 > testsidedata2=$TESTDIR/testlib/ext-sidedata-2.py
444 > EOF
445
446 We expect the target to have sidedata that it generated on-the-fly during pull
447 $ hg pull -r . ../source-repo --traceback
448 pulling from ../source-repo
449 adding changesets
450 adding manifests
451 adding file changes
452 added 3 changesets with 3 changes to 2 files
453 new changesets 7049e48789d7:40f977031323
454 (run 'hg update' to get a working copy)
455 $ hg debugsidedata -c 0 --traceback
456 2 sidedata entries
457 entry-0001 size 4
458 entry-0002 size 32
459 $ hg debugsidedata -c 1 -v --traceback
460 2 sidedata entries
461 entry-0001 size 4
462 '\x00\x00\x006'
463 entry-0002 size 32
464 '\x98\t\xf9\xc4v\xf0\xc5P\x90\xf7wRf\xe8\xe27e\xfc\xc1\x93\xa4\x96\xd0\x1d\x97\xaaG\x1d\xd7t\xfa\xde'
465 $ hg debugsidedata -m 2
466 2 sidedata entries
467 entry-0001 size 4
468 entry-0002 size 32
469 $ hg debugsidedata a 1
470 2 sidedata entries
471 entry-0001 size 4
472 entry-0002 size 32
473 $ cd ..
@@ -0,0 +1,81 b''
1 # coding: utf8
2 # ext-sidedata-5.py - small extension to test (differently still) the sidedata
3 # logic
4 #
5 # Simulates a server for a simple sidedata exchange.
6 #
7 # Copyright 2021 Raphaël Gomès <rgomes@octobus.net>
8 #
9 # This software may be used and distributed according to the terms of the
10 # GNU General Public License version 2 or any later version.
11
12 from __future__ import absolute_import
13
14 import hashlib
15 import struct
16
17 from mercurial import (
18 extensions,
19 revlog,
20 )
21
22
23 from mercurial.revlogutils import sidedata as sidedatamod
24
25
26 def compute_sidedata_1(repo, revlog, rev, sidedata, text=None):
27 sidedata = sidedata.copy()
28 if text is None:
29 text = revlog.revision(rev)
30 sidedata[sidedatamod.SD_TEST1] = struct.pack('>I', len(text))
31 return sidedata
32
33
34 def compute_sidedata_2(repo, revlog, rev, sidedata, text=None):
35 sidedata = sidedata.copy()
36 if text is None:
37 text = revlog.revision(rev)
38 sha256 = hashlib.sha256(text).digest()
39 sidedata[sidedatamod.SD_TEST2] = struct.pack('>32s', sha256)
40 return sidedata
41
42
43 def reposetup(ui, repo):
44 # Sidedata keys happen to be the same as the categories, easier for testing.
45 for kind in (b'changelog', b'manifest', b'filelog'):
46 repo.register_sidedata_computer(
47 kind,
48 sidedatamod.SD_TEST1,
49 (sidedatamod.SD_TEST1,),
50 compute_sidedata_1,
51 )
52 repo.register_sidedata_computer(
53 kind,
54 sidedatamod.SD_TEST2,
55 (sidedatamod.SD_TEST2,),
56 compute_sidedata_2,
57 )
58
59 # We don't register sidedata computers because we don't care within these
60 # tests
61 repo.register_wanted_sidedata(sidedatamod.SD_TEST1)
62 repo.register_wanted_sidedata(sidedatamod.SD_TEST2)
63
64
65 def wrapaddrevision(
66 orig, self, text, transaction, link, p1, p2, *args, **kwargs
67 ):
68 if kwargs.get('sidedata') is None:
69 kwargs['sidedata'] = {}
70 sd = kwargs['sidedata']
71 ## let's store some arbitrary data just for testing
72 # text length
73 sd[sidedatamod.SD_TEST1] = struct.pack('>I', len(text))
74 # and sha2 hashes
75 sha256 = hashlib.sha256(text).digest()
76 sd[sidedatamod.SD_TEST2] = struct.pack('>32s', sha256)
77 return orig(self, text, transaction, link, p1, p2, *args, **kwargs)
78
79
80 def extsetup(ui):
81 extensions.wrapfunction(revlog.revlog, 'addrevision', wrapaddrevision)
@@ -1,1861 +1,1930 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 import collections
10 11 import os
11 12 import struct
12 13 import weakref
13 14
14 15 from .i18n import _
15 16 from .node import (
16 17 hex,
17 18 nullid,
18 19 nullrev,
19 20 short,
20 21 )
21 22 from .pycompat import open
22 23
23 24 from . import (
24 25 error,
25 26 match as matchmod,
26 27 mdiff,
27 28 phases,
28 29 pycompat,
29 30 requirements,
30 31 scmutil,
31 32 util,
32 33 )
33 34
34 35 from .interfaces import repository
35 36 from .revlogutils import sidedata as sidedatamod
36 37
37 38 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct(b"20s20s20s20s")
38 39 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct(b"20s20s20s20s20s")
39 40 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(b">20s20s20s20s20sH")
40 41
41 42 LFS_REQUIREMENT = b'lfs'
42 43
43 44 readexactly = util.readexactly
44 45
45 46
46 47 def getchunk(stream):
47 48 """return the next chunk from stream as a string"""
48 49 d = readexactly(stream, 4)
49 50 l = struct.unpack(b">l", d)[0]
50 51 if l <= 4:
51 52 if l:
52 53 raise error.Abort(_(b"invalid chunk length %d") % l)
53 54 return b""
54 55 return readexactly(stream, l - 4)
55 56
56 57
57 58 def chunkheader(length):
58 59 """return a changegroup chunk header (string)"""
59 60 return struct.pack(b">l", length + 4)
60 61
61 62
62 63 def closechunk():
63 64 """return a changegroup chunk header (string) for a zero-length chunk"""
64 65 return struct.pack(b">l", 0)
65 66
66 67
67 68 def _fileheader(path):
68 69 """Obtain a changegroup chunk header for a named path."""
69 70 return chunkheader(len(path)) + path
70 71
71 72
72 73 def writechunks(ui, chunks, filename, vfs=None):
73 74 """Write chunks to a file and return its filename.
74 75
75 76 The stream is assumed to be a bundle file.
76 77 Existing files will not be overwritten.
77 78 If no filename is specified, a temporary file is created.
78 79 """
79 80 fh = None
80 81 cleanup = None
81 82 try:
82 83 if filename:
83 84 if vfs:
84 85 fh = vfs.open(filename, b"wb")
85 86 else:
86 87 # Increase default buffer size because default is usually
87 88 # small (4k is common on Linux).
88 89 fh = open(filename, b"wb", 131072)
89 90 else:
90 91 fd, filename = pycompat.mkstemp(prefix=b"hg-bundle-", suffix=b".hg")
91 92 fh = os.fdopen(fd, "wb")
92 93 cleanup = filename
93 94 for c in chunks:
94 95 fh.write(c)
95 96 cleanup = None
96 97 return filename
97 98 finally:
98 99 if fh is not None:
99 100 fh.close()
100 101 if cleanup is not None:
101 102 if filename and vfs:
102 103 vfs.unlink(cleanup)
103 104 else:
104 105 os.unlink(cleanup)
105 106
106 107
107 108 class cg1unpacker(object):
108 109 """Unpacker for cg1 changegroup streams.
109 110
110 111 A changegroup unpacker handles the framing of the revision data in
111 112 the wire format. Most consumers will want to use the apply()
112 113 method to add the changes from the changegroup to a repository.
113 114
114 115 If you're forwarding a changegroup unmodified to another consumer,
115 116 use getchunks(), which returns an iterator of changegroup
116 117 chunks. This is mostly useful for cases where you need to know the
117 118 data stream has ended by observing the end of the changegroup.
118 119
119 120 deltachunk() is useful only if you're applying delta data. Most
120 121 consumers should prefer apply() instead.
121 122
122 123 A few other public methods exist. Those are used only for
123 124 bundlerepo and some debug commands - their use is discouraged.
124 125 """
125 126
126 127 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
127 128 deltaheadersize = deltaheader.size
128 129 version = b'01'
129 130 _grouplistcount = 1 # One list of files after the manifests
130 131
131 132 def __init__(self, fh, alg, extras=None):
132 133 if alg is None:
133 134 alg = b'UN'
134 135 if alg not in util.compengines.supportedbundletypes:
135 136 raise error.Abort(_(b'unknown stream compression type: %s') % alg)
136 137 if alg == b'BZ':
137 138 alg = b'_truncatedBZ'
138 139
139 140 compengine = util.compengines.forbundletype(alg)
140 141 self._stream = compengine.decompressorreader(fh)
141 142 self._type = alg
142 143 self.extras = extras or {}
143 144 self.callback = None
144 145
145 146 # These methods (compressed, read, seek, tell) all appear to only
146 147 # be used by bundlerepo, but it's a little hard to tell.
147 148 def compressed(self):
148 149 return self._type is not None and self._type != b'UN'
149 150
150 151 def read(self, l):
151 152 return self._stream.read(l)
152 153
153 154 def seek(self, pos):
154 155 return self._stream.seek(pos)
155 156
156 157 def tell(self):
157 158 return self._stream.tell()
158 159
159 160 def close(self):
160 161 return self._stream.close()
161 162
162 163 def _chunklength(self):
163 164 d = readexactly(self._stream, 4)
164 165 l = struct.unpack(b">l", d)[0]
165 166 if l <= 4:
166 167 if l:
167 168 raise error.Abort(_(b"invalid chunk length %d") % l)
168 169 return 0
169 170 if self.callback:
170 171 self.callback()
171 172 return l - 4
172 173
173 174 def changelogheader(self):
174 175 """v10 does not have a changelog header chunk"""
175 176 return {}
176 177
177 178 def manifestheader(self):
178 179 """v10 does not have a manifest header chunk"""
179 180 return {}
180 181
181 182 def filelogheader(self):
182 183 """return the header of the filelogs chunk, v10 only has the filename"""
183 184 l = self._chunklength()
184 185 if not l:
185 186 return {}
186 187 fname = readexactly(self._stream, l)
187 188 return {b'filename': fname}
188 189
189 190 def _deltaheader(self, headertuple, prevnode):
190 191 node, p1, p2, cs = headertuple
191 192 if prevnode is None:
192 193 deltabase = p1
193 194 else:
194 195 deltabase = prevnode
195 196 flags = 0
196 197 return node, p1, p2, deltabase, cs, flags
197 198
198 199 def deltachunk(self, prevnode):
199 200 l = self._chunklength()
200 201 if not l:
201 202 return {}
202 203 headerdata = readexactly(self._stream, self.deltaheadersize)
203 204 header = self.deltaheader.unpack(headerdata)
204 205 delta = readexactly(self._stream, l - self.deltaheadersize)
205 206 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
206 207 # cg4 forward-compat
207 208 sidedata = {}
208 209 return (node, p1, p2, cs, deltabase, delta, flags, sidedata)
209 210
210 211 def getchunks(self):
211 212 """returns all the chunks contains in the bundle
212 213
213 214 Used when you need to forward the binary stream to a file or another
214 215 network API. To do so, it parse the changegroup data, otherwise it will
215 216 block in case of sshrepo because it don't know the end of the stream.
216 217 """
217 218 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
218 219 # and a list of filelogs. For changegroup 3, we expect 4 parts:
219 220 # changelog, manifestlog, a list of tree manifestlogs, and a list of
220 221 # filelogs.
221 222 #
222 223 # Changelog and manifestlog parts are terminated with empty chunks. The
223 224 # tree and file parts are a list of entry sections. Each entry section
224 225 # is a series of chunks terminating in an empty chunk. The list of these
225 226 # entry sections is terminated in yet another empty chunk, so we know
226 227 # we've reached the end of the tree/file list when we reach an empty
227 228 # chunk that was proceeded by no non-empty chunks.
228 229
229 230 parts = 0
230 231 while parts < 2 + self._grouplistcount:
231 232 noentries = True
232 233 while True:
233 234 chunk = getchunk(self)
234 235 if not chunk:
235 236 # The first two empty chunks represent the end of the
236 237 # changelog and the manifestlog portions. The remaining
237 238 # empty chunks represent either A) the end of individual
238 239 # tree or file entries in the file list, or B) the end of
239 240 # the entire list. It's the end of the entire list if there
240 241 # were no entries (i.e. noentries is True).
241 242 if parts < 2:
242 243 parts += 1
243 244 elif noentries:
244 245 parts += 1
245 246 break
246 247 noentries = False
247 248 yield chunkheader(len(chunk))
248 249 pos = 0
249 250 while pos < len(chunk):
250 251 next = pos + 2 ** 20
251 252 yield chunk[pos:next]
252 253 pos = next
253 254 yield closechunk()
254 255
255 def _unpackmanifests(self, repo, revmap, trp, prog):
256 def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None):
256 257 self.callback = prog.increment
257 258 # no need to check for empty manifest group here:
258 259 # if the result of the merge of 1 and 2 is the same in 3 and 4,
259 260 # no new manifest will be created and the manifest group will
260 261 # be empty during the pull
261 262 self.manifestheader()
262 263 deltas = self.deltaiter()
263 repo.manifestlog.getstorage(b'').addgroup(deltas, revmap, trp)
264 storage = repo.manifestlog.getstorage(b'')
265 storage.addgroup(deltas, revmap, trp, addrevisioncb=addrevisioncb)
264 266 prog.complete()
265 267 self.callback = None
266 268
267 269 def apply(
268 270 self,
269 271 repo,
270 272 tr,
271 273 srctype,
272 274 url,
273 275 targetphase=phases.draft,
274 276 expectedtotal=None,
275 277 sidedata_categories=None,
276 278 ):
277 279 """Add the changegroup returned by source.read() to this repo.
278 280 srctype is a string like 'push', 'pull', or 'unbundle'. url is
279 281 the URL of the repo where this changegroup is coming from.
280 282
281 283 Return an integer summarizing the change to this repo:
282 284 - nothing changed or no source: 0
283 285 - more heads than before: 1+added heads (2..n)
284 286 - fewer heads than before: -1-removed heads (-2..-n)
285 287 - number of heads stays the same: 1
286 288
287 289 `sidedata_categories` is an optional set of the remote's sidedata wanted
288 290 categories.
289 291 """
290 292 repo = repo.unfiltered()
291 293
292 294 # Only useful if we're adding sidedata categories. If both peers have
293 295 # the same categories, then we simply don't do anything.
294 296 if self.version == b'04' and srctype == b'pull':
295 297 sidedata_helpers = get_sidedata_helpers(
296 298 repo,
297 299 sidedata_categories or set(),
298 300 pull=True,
299 301 )
300 302 else:
301 303 sidedata_helpers = None
302 304
303 305 def csmap(x):
304 306 repo.ui.debug(b"add changeset %s\n" % short(x))
305 307 return len(cl)
306 308
307 309 def revmap(x):
308 310 return cl.rev(x)
309 311
310 312 try:
311 313 # The transaction may already carry source information. In this
312 314 # case we use the top level data. We overwrite the argument
313 315 # because we need to use the top level value (if they exist)
314 316 # in this function.
315 317 srctype = tr.hookargs.setdefault(b'source', srctype)
316 318 tr.hookargs.setdefault(b'url', url)
317 319 repo.hook(
318 320 b'prechangegroup', throw=True, **pycompat.strkwargs(tr.hookargs)
319 321 )
320 322
321 323 # write changelog data to temp files so concurrent readers
322 324 # will not see an inconsistent view
323 325 cl = repo.changelog
324 326 cl.delayupdate(tr)
325 327 oldheads = set(cl.heads())
326 328
327 329 trp = weakref.proxy(tr)
328 330 # pull off the changeset group
329 331 repo.ui.status(_(b"adding changesets\n"))
330 332 clstart = len(cl)
331 333 progress = repo.ui.makeprogress(
332 334 _(b'changesets'), unit=_(b'chunks'), total=expectedtotal
333 335 )
334 336 self.callback = progress.increment
335 337
336 338 efilesset = set()
337 339 duprevs = []
338 340
339 341 def ondupchangelog(cl, rev):
340 342 if rev < clstart:
341 343 duprevs.append(rev)
342 344
343 345 def onchangelog(cl, rev):
344 346 ctx = cl.changelogrevision(rev)
345 347 efilesset.update(ctx.files)
346 348 repo.register_changeset(rev, ctx)
347 349
348 350 self.changelogheader()
349 351 deltas = self.deltaiter()
350 352 if not cl.addgroup(
351 353 deltas,
352 354 csmap,
353 355 trp,
354 356 alwayscache=True,
355 357 addrevisioncb=onchangelog,
356 358 duplicaterevisioncb=ondupchangelog,
357 359 ):
358 360 repo.ui.develwarn(
359 361 b'applied empty changelog from changegroup',
360 362 config=b'warn-empty-changegroup',
361 363 )
362 364 efiles = len(efilesset)
363 365 clend = len(cl)
364 366 changesets = clend - clstart
365 367 progress.complete()
366 368 del deltas
367 369 # TODO Python 2.7 removal
368 370 # del efilesset
369 371 efilesset = None
370 372 self.callback = None
371 373
374 # Keep track of the (non-changelog) revlogs we've updated and their
375 # range of new revisions for sidedata rewrite.
376 # TODO do something more efficient than keeping the reference to
377 # the revlogs, especially memory-wise.
378 touched_manifests = {}
379 touched_filelogs = {}
380
372 381 # pull off the manifest group
373 382 repo.ui.status(_(b"adding manifests\n"))
374 383 # We know that we'll never have more manifests than we had
375 384 # changesets.
376 385 progress = repo.ui.makeprogress(
377 386 _(b'manifests'), unit=_(b'chunks'), total=changesets
378 387 )
379 self._unpackmanifests(repo, revmap, trp, progress)
388 on_manifest_rev = None
389 if sidedata_helpers and b'manifest' in sidedata_helpers[1]:
390
391 def on_manifest_rev(manifest, rev):
392 range = touched_manifests.get(manifest)
393 if not range:
394 touched_manifests[manifest] = (rev, rev)
395 else:
396 assert rev == range[1] + 1
397 touched_manifests[manifest] = (range[0], rev)
398
399 self._unpackmanifests(
400 repo,
401 revmap,
402 trp,
403 progress,
404 addrevisioncb=on_manifest_rev,
405 )
380 406
381 407 needfiles = {}
382 408 if repo.ui.configbool(b'server', b'validate'):
383 409 cl = repo.changelog
384 410 ml = repo.manifestlog
385 411 # validate incoming csets have their manifests
386 412 for cset in pycompat.xrange(clstart, clend):
387 413 mfnode = cl.changelogrevision(cset).manifest
388 414 mfest = ml[mfnode].readdelta()
389 415 # store file nodes we must see
390 416 for f, n in pycompat.iteritems(mfest):
391 417 needfiles.setdefault(f, set()).add(n)
392 418
419 on_filelog_rev = None
420 if sidedata_helpers and b'filelog' in sidedata_helpers[1]:
421
422 def on_filelog_rev(filelog, rev):
423 range = touched_filelogs.get(filelog)
424 if not range:
425 touched_filelogs[filelog] = (rev, rev)
426 else:
427 assert rev == range[1] + 1
428 touched_filelogs[filelog] = (range[0], rev)
429
393 430 # process the files
394 431 repo.ui.status(_(b"adding file changes\n"))
395 432 newrevs, newfiles = _addchangegroupfiles(
396 repo, self, revmap, trp, efiles, needfiles
433 repo,
434 self,
435 revmap,
436 trp,
437 efiles,
438 needfiles,
439 addrevisioncb=on_filelog_rev,
397 440 )
398 441
442 if sidedata_helpers:
443 if b'changelog' in sidedata_helpers[1]:
444 cl.rewrite_sidedata(sidedata_helpers, clstart, clend - 1)
445 for mf, (startrev, endrev) in touched_manifests.items():
446 mf.rewrite_sidedata(sidedata_helpers, startrev, endrev)
447 for fl, (startrev, endrev) in touched_filelogs.items():
448 fl.rewrite_sidedata(sidedata_helpers, startrev, endrev)
449
399 450 # making sure the value exists
400 451 tr.changes.setdefault(b'changegroup-count-changesets', 0)
401 452 tr.changes.setdefault(b'changegroup-count-revisions', 0)
402 453 tr.changes.setdefault(b'changegroup-count-files', 0)
403 454 tr.changes.setdefault(b'changegroup-count-heads', 0)
404 455
405 456 # some code use bundle operation for internal purpose. They usually
406 457 # set `ui.quiet` to do this outside of user sight. Size the report
407 458 # of such operation now happens at the end of the transaction, that
408 459 # ui.quiet has not direct effect on the output.
409 460 #
410 461 # To preserve this intend use an inelegant hack, we fail to report
411 462 # the change if `quiet` is set. We should probably move to
412 463 # something better, but this is a good first step to allow the "end
413 464 # of transaction report" to pass tests.
414 465 if not repo.ui.quiet:
415 466 tr.changes[b'changegroup-count-changesets'] += changesets
416 467 tr.changes[b'changegroup-count-revisions'] += newrevs
417 468 tr.changes[b'changegroup-count-files'] += newfiles
418 469
419 470 deltaheads = 0
420 471 if oldheads:
421 472 heads = cl.heads()
422 473 deltaheads += len(heads) - len(oldheads)
423 474 for h in heads:
424 475 if h not in oldheads and repo[h].closesbranch():
425 476 deltaheads -= 1
426 477
427 478 # see previous comment about checking ui.quiet
428 479 if not repo.ui.quiet:
429 480 tr.changes[b'changegroup-count-heads'] += deltaheads
430 481 repo.invalidatevolatilesets()
431 482
432 483 if changesets > 0:
433 484 if b'node' not in tr.hookargs:
434 485 tr.hookargs[b'node'] = hex(cl.node(clstart))
435 486 tr.hookargs[b'node_last'] = hex(cl.node(clend - 1))
436 487 hookargs = dict(tr.hookargs)
437 488 else:
438 489 hookargs = dict(tr.hookargs)
439 490 hookargs[b'node'] = hex(cl.node(clstart))
440 491 hookargs[b'node_last'] = hex(cl.node(clend - 1))
441 492 repo.hook(
442 493 b'pretxnchangegroup',
443 494 throw=True,
444 495 **pycompat.strkwargs(hookargs)
445 496 )
446 497
447 498 added = pycompat.xrange(clstart, clend)
448 499 phaseall = None
449 500 if srctype in (b'push', b'serve'):
450 501 # Old servers can not push the boundary themselves.
451 502 # New servers won't push the boundary if changeset already
452 503 # exists locally as secret
453 504 #
454 505 # We should not use added here but the list of all change in
455 506 # the bundle
456 507 if repo.publishing():
457 508 targetphase = phaseall = phases.public
458 509 else:
459 510 # closer target phase computation
460 511
461 512 # Those changesets have been pushed from the
462 513 # outside, their phases are going to be pushed
463 514 # alongside. Therefor `targetphase` is
464 515 # ignored.
465 516 targetphase = phaseall = phases.draft
466 517 if added:
467 518 phases.registernew(repo, tr, targetphase, added)
468 519 if phaseall is not None:
469 520 if duprevs:
470 521 duprevs.extend(added)
471 522 else:
472 523 duprevs = added
473 524 phases.advanceboundary(repo, tr, phaseall, [], revs=duprevs)
474 525 duprevs = []
475 526
476 527 if changesets > 0:
477 528
478 529 def runhooks(unused_success):
479 530 # These hooks run when the lock releases, not when the
480 531 # transaction closes. So it's possible for the changelog
481 532 # to have changed since we last saw it.
482 533 if clstart >= len(repo):
483 534 return
484 535
485 536 repo.hook(b"changegroup", **pycompat.strkwargs(hookargs))
486 537
487 538 for rev in added:
488 539 args = hookargs.copy()
489 540 args[b'node'] = hex(cl.node(rev))
490 541 del args[b'node_last']
491 542 repo.hook(b"incoming", **pycompat.strkwargs(args))
492 543
493 544 newheads = [h for h in repo.heads() if h not in oldheads]
494 545 repo.ui.log(
495 546 b"incoming",
496 547 b"%d incoming changes - new heads: %s\n",
497 548 len(added),
498 549 b', '.join([hex(c[:6]) for c in newheads]),
499 550 )
500 551
501 552 tr.addpostclose(
502 553 b'changegroup-runhooks-%020i' % clstart,
503 554 lambda tr: repo._afterlock(runhooks),
504 555 )
505 556 finally:
506 557 repo.ui.flush()
507 558 # never return 0 here:
508 559 if deltaheads < 0:
509 560 ret = deltaheads - 1
510 561 else:
511 562 ret = deltaheads + 1
512 563 return ret
513 564
514 565 def deltaiter(self):
515 566 """
516 567 returns an iterator of the deltas in this changegroup
517 568
518 569 Useful for passing to the underlying storage system to be stored.
519 570 """
520 571 chain = None
521 572 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
522 573 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
523 574 yield chunkdata
524 575 chain = chunkdata[0]
525 576
526 577
527 578 class cg2unpacker(cg1unpacker):
528 579 """Unpacker for cg2 streams.
529 580
530 581 cg2 streams add support for generaldelta, so the delta header
531 582 format is slightly different. All other features about the data
532 583 remain the same.
533 584 """
534 585
535 586 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
536 587 deltaheadersize = deltaheader.size
537 588 version = b'02'
538 589
539 590 def _deltaheader(self, headertuple, prevnode):
540 591 node, p1, p2, deltabase, cs = headertuple
541 592 flags = 0
542 593 return node, p1, p2, deltabase, cs, flags
543 594
544 595
545 596 class cg3unpacker(cg2unpacker):
546 597 """Unpacker for cg3 streams.
547 598
548 599 cg3 streams add support for exchanging treemanifests and revlog
549 600 flags. It adds the revlog flags to the delta header and an empty chunk
550 601 separating manifests and files.
551 602 """
552 603
553 604 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
554 605 deltaheadersize = deltaheader.size
555 606 version = b'03'
556 607 _grouplistcount = 2 # One list of manifests and one list of files
557 608
558 609 def _deltaheader(self, headertuple, prevnode):
559 610 node, p1, p2, deltabase, cs, flags = headertuple
560 611 return node, p1, p2, deltabase, cs, flags
561 612
562 def _unpackmanifests(self, repo, revmap, trp, prog):
563 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
613 def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None):
614 super(cg3unpacker, self)._unpackmanifests(
615 repo, revmap, trp, prog, addrevisioncb=addrevisioncb
616 )
564 617 for chunkdata in iter(self.filelogheader, {}):
565 618 # If we get here, there are directory manifests in the changegroup
566 619 d = chunkdata[b"filename"]
567 620 repo.ui.debug(b"adding %s revisions\n" % d)
568 621 deltas = self.deltaiter()
569 if not repo.manifestlog.getstorage(d).addgroup(deltas, revmap, trp):
622 if not repo.manifestlog.getstorage(d).addgroup(
623 deltas, revmap, trp, addrevisioncb=addrevisioncb
624 ):
570 625 raise error.Abort(_(b"received dir revlog group is empty"))
571 626
572 627
573 628 class cg4unpacker(cg3unpacker):
574 629 """Unpacker for cg4 streams.
575 630
576 631 cg4 streams add support for exchanging sidedata.
577 632 """
578 633
579 634 version = b'04'
580 635
581 636 def deltachunk(self, prevnode):
582 637 res = super(cg4unpacker, self).deltachunk(prevnode)
583 638 if not res:
584 639 return res
585 640
586 641 (node, p1, p2, cs, deltabase, delta, flags, _sidedata) = res
587 642
588 643 sidedata_raw = getchunk(self._stream)
589 644 sidedata = {}
590 645 if len(sidedata_raw) > 0:
591 646 sidedata = sidedatamod.deserialize_sidedata(sidedata_raw)
592 647
593 648 return node, p1, p2, cs, deltabase, delta, flags, sidedata
594 649
595 650
596 651 class headerlessfixup(object):
597 652 def __init__(self, fh, h):
598 653 self._h = h
599 654 self._fh = fh
600 655
601 656 def read(self, n):
602 657 if self._h:
603 658 d, self._h = self._h[:n], self._h[n:]
604 659 if len(d) < n:
605 660 d += readexactly(self._fh, n - len(d))
606 661 return d
607 662 return readexactly(self._fh, n)
608 663
609 664
610 665 def _revisiondeltatochunks(delta, headerfn):
611 666 """Serialize a revisiondelta to changegroup chunks."""
612 667
613 668 # The captured revision delta may be encoded as a delta against
614 669 # a base revision or as a full revision. The changegroup format
615 670 # requires that everything on the wire be deltas. So for full
616 671 # revisions, we need to invent a header that says to rewrite
617 672 # data.
618 673
619 674 if delta.delta is not None:
620 675 prefix, data = b'', delta.delta
621 676 elif delta.basenode == nullid:
622 677 data = delta.revision
623 678 prefix = mdiff.trivialdiffheader(len(data))
624 679 else:
625 680 data = delta.revision
626 681 prefix = mdiff.replacediffheader(delta.baserevisionsize, len(data))
627 682
628 683 meta = headerfn(delta)
629 684
630 685 yield chunkheader(len(meta) + len(prefix) + len(data))
631 686 yield meta
632 687 if prefix:
633 688 yield prefix
634 689 yield data
635 690
636 691 sidedata = delta.sidedata
637 692 if sidedata is not None:
638 693 # Need a separate chunk for sidedata to be able to differentiate
639 694 # "raw delta" length and sidedata length
640 695 yield chunkheader(len(sidedata))
641 696 yield sidedata
642 697
643 698
644 699 def _sortnodesellipsis(store, nodes, cl, lookup):
645 700 """Sort nodes for changegroup generation."""
646 701 # Ellipses serving mode.
647 702 #
648 703 # In a perfect world, we'd generate better ellipsis-ified graphs
649 704 # for non-changelog revlogs. In practice, we haven't started doing
650 705 # that yet, so the resulting DAGs for the manifestlog and filelogs
651 706 # are actually full of bogus parentage on all the ellipsis
652 707 # nodes. This has the side effect that, while the contents are
653 708 # correct, the individual DAGs might be completely out of whack in
654 709 # a case like 882681bc3166 and its ancestors (back about 10
655 710 # revisions or so) in the main hg repo.
656 711 #
657 712 # The one invariant we *know* holds is that the new (potentially
658 713 # bogus) DAG shape will be valid if we order the nodes in the
659 714 # order that they're introduced in dramatis personae by the
660 715 # changelog, so what we do is we sort the non-changelog histories
661 716 # by the order in which they are used by the changelog.
662 717 key = lambda n: cl.rev(lookup(n))
663 718 return sorted(nodes, key=key)
664 719
665 720
666 721 def _resolvenarrowrevisioninfo(
667 722 cl,
668 723 store,
669 724 ischangelog,
670 725 rev,
671 726 linkrev,
672 727 linknode,
673 728 clrevtolocalrev,
674 729 fullclnodes,
675 730 precomputedellipsis,
676 731 ):
677 732 linkparents = precomputedellipsis[linkrev]
678 733
679 734 def local(clrev):
680 735 """Turn a changelog revnum into a local revnum.
681 736
682 737 The ellipsis dag is stored as revnums on the changelog,
683 738 but when we're producing ellipsis entries for
684 739 non-changelog revlogs, we need to turn those numbers into
685 740 something local. This does that for us, and during the
686 741 changelog sending phase will also expand the stored
687 742 mappings as needed.
688 743 """
689 744 if clrev == nullrev:
690 745 return nullrev
691 746
692 747 if ischangelog:
693 748 return clrev
694 749
695 750 # Walk the ellipsis-ized changelog breadth-first looking for a
696 751 # change that has been linked from the current revlog.
697 752 #
698 753 # For a flat manifest revlog only a single step should be necessary
699 754 # as all relevant changelog entries are relevant to the flat
700 755 # manifest.
701 756 #
702 757 # For a filelog or tree manifest dirlog however not every changelog
703 758 # entry will have been relevant, so we need to skip some changelog
704 759 # nodes even after ellipsis-izing.
705 760 walk = [clrev]
706 761 while walk:
707 762 p = walk[0]
708 763 walk = walk[1:]
709 764 if p in clrevtolocalrev:
710 765 return clrevtolocalrev[p]
711 766 elif p in fullclnodes:
712 767 walk.extend([pp for pp in cl.parentrevs(p) if pp != nullrev])
713 768 elif p in precomputedellipsis:
714 769 walk.extend(
715 770 [pp for pp in precomputedellipsis[p] if pp != nullrev]
716 771 )
717 772 else:
718 773 # In this case, we've got an ellipsis with parents
719 774 # outside the current bundle (likely an
720 775 # incremental pull). We "know" that we can use the
721 776 # value of this same revlog at whatever revision
722 777 # is pointed to by linknode. "Know" is in scare
723 778 # quotes because I haven't done enough examination
724 779 # of edge cases to convince myself this is really
725 780 # a fact - it works for all the (admittedly
726 781 # thorough) cases in our testsuite, but I would be
727 782 # somewhat unsurprised to find a case in the wild
728 783 # where this breaks down a bit. That said, I don't
729 784 # know if it would hurt anything.
730 785 for i in pycompat.xrange(rev, 0, -1):
731 786 if store.linkrev(i) == clrev:
732 787 return i
733 788 # We failed to resolve a parent for this node, so
734 789 # we crash the changegroup construction.
735 790 raise error.Abort(
736 791 b"unable to resolve parent while packing '%s' %r"
737 792 b' for changeset %r' % (store.indexfile, rev, clrev)
738 793 )
739 794
740 795 return nullrev
741 796
742 797 if not linkparents or (store.parentrevs(rev) == (nullrev, nullrev)):
743 798 p1, p2 = nullrev, nullrev
744 799 elif len(linkparents) == 1:
745 800 (p1,) = sorted(local(p) for p in linkparents)
746 801 p2 = nullrev
747 802 else:
748 803 p1, p2 = sorted(local(p) for p in linkparents)
749 804
750 805 p1node, p2node = store.node(p1), store.node(p2)
751 806
752 807 return p1node, p2node, linknode
753 808
754 809
755 810 def deltagroup(
756 811 repo,
757 812 store,
758 813 nodes,
759 814 ischangelog,
760 815 lookup,
761 816 forcedeltaparentprev,
762 817 topic=None,
763 818 ellipses=False,
764 819 clrevtolocalrev=None,
765 820 fullclnodes=None,
766 821 precomputedellipsis=None,
767 822 sidedata_helpers=None,
768 823 ):
769 824 """Calculate deltas for a set of revisions.
770 825
771 826 Is a generator of ``revisiondelta`` instances.
772 827
773 828 If topic is not None, progress detail will be generated using this
774 829 topic name (e.g. changesets, manifests, etc).
775 830
776 831 See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
777 832 """
778 833 if not nodes:
779 834 return
780 835
781 836 cl = repo.changelog
782 837
783 838 if ischangelog:
784 839 # `hg log` shows changesets in storage order. To preserve order
785 840 # across clones, send out changesets in storage order.
786 841 nodesorder = b'storage'
787 842 elif ellipses:
788 843 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
789 844 nodesorder = b'nodes'
790 845 else:
791 846 nodesorder = None
792 847
793 848 # Perform ellipses filtering and revision massaging. We do this before
794 849 # emitrevisions() because a) filtering out revisions creates less work
795 850 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
796 851 # assumptions about delta choices and we would possibly send a delta
797 852 # referencing a missing base revision.
798 853 #
799 854 # Also, calling lookup() has side-effects with regards to populating
800 855 # data structures. If we don't call lookup() for each node or if we call
801 856 # lookup() after the first pass through each node, things can break -
802 857 # possibly intermittently depending on the python hash seed! For that
803 858 # reason, we store a mapping of all linknodes during the initial node
804 859 # pass rather than use lookup() on the output side.
805 860 if ellipses:
806 861 filtered = []
807 862 adjustedparents = {}
808 863 linknodes = {}
809 864
810 865 for node in nodes:
811 866 rev = store.rev(node)
812 867 linknode = lookup(node)
813 868 linkrev = cl.rev(linknode)
814 869 clrevtolocalrev[linkrev] = rev
815 870
816 871 # If linknode is in fullclnodes, it means the corresponding
817 872 # changeset was a full changeset and is being sent unaltered.
818 873 if linknode in fullclnodes:
819 874 linknodes[node] = linknode
820 875
821 876 # If the corresponding changeset wasn't in the set computed
822 877 # as relevant to us, it should be dropped outright.
823 878 elif linkrev not in precomputedellipsis:
824 879 continue
825 880
826 881 else:
827 882 # We could probably do this later and avoid the dict
828 883 # holding state. But it likely doesn't matter.
829 884 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
830 885 cl,
831 886 store,
832 887 ischangelog,
833 888 rev,
834 889 linkrev,
835 890 linknode,
836 891 clrevtolocalrev,
837 892 fullclnodes,
838 893 precomputedellipsis,
839 894 )
840 895
841 896 adjustedparents[node] = (p1node, p2node)
842 897 linknodes[node] = linknode
843 898
844 899 filtered.append(node)
845 900
846 901 nodes = filtered
847 902
848 903 # We expect the first pass to be fast, so we only engage the progress
849 904 # meter for constructing the revision deltas.
850 905 progress = None
851 906 if topic is not None:
852 907 progress = repo.ui.makeprogress(
853 908 topic, unit=_(b'chunks'), total=len(nodes)
854 909 )
855 910
856 911 configtarget = repo.ui.config(b'devel', b'bundle.delta')
857 912 if configtarget not in (b'', b'p1', b'full'):
858 913 msg = _("""config "devel.bundle.delta" as unknown value: %s""")
859 914 repo.ui.warn(msg % configtarget)
860 915
861 916 deltamode = repository.CG_DELTAMODE_STD
862 917 if forcedeltaparentprev:
863 918 deltamode = repository.CG_DELTAMODE_PREV
864 919 elif configtarget == b'p1':
865 920 deltamode = repository.CG_DELTAMODE_P1
866 921 elif configtarget == b'full':
867 922 deltamode = repository.CG_DELTAMODE_FULL
868 923
869 924 revisions = store.emitrevisions(
870 925 nodes,
871 926 nodesorder=nodesorder,
872 927 revisiondata=True,
873 928 assumehaveparentrevisions=not ellipses,
874 929 deltamode=deltamode,
875 930 sidedata_helpers=sidedata_helpers,
876 931 )
877 932
878 933 for i, revision in enumerate(revisions):
879 934 if progress:
880 935 progress.update(i + 1)
881 936
882 937 if ellipses:
883 938 linknode = linknodes[revision.node]
884 939
885 940 if revision.node in adjustedparents:
886 941 p1node, p2node = adjustedparents[revision.node]
887 942 revision.p1node = p1node
888 943 revision.p2node = p2node
889 944 revision.flags |= repository.REVISION_FLAG_ELLIPSIS
890 945
891 946 else:
892 947 linknode = lookup(revision.node)
893 948
894 949 revision.linknode = linknode
895 950 yield revision
896 951
897 952 if progress:
898 953 progress.complete()
899 954
900 955
901 956 class cgpacker(object):
902 957 def __init__(
903 958 self,
904 959 repo,
905 960 oldmatcher,
906 961 matcher,
907 962 version,
908 963 builddeltaheader,
909 964 manifestsend,
910 965 forcedeltaparentprev=False,
911 966 bundlecaps=None,
912 967 ellipses=False,
913 968 shallow=False,
914 969 ellipsisroots=None,
915 970 fullnodes=None,
916 971 remote_sidedata=None,
917 972 ):
918 973 """Given a source repo, construct a bundler.
919 974
920 975 oldmatcher is a matcher that matches on files the client already has.
921 976 These will not be included in the changegroup.
922 977
923 978 matcher is a matcher that matches on files to include in the
924 979 changegroup. Used to facilitate sparse changegroups.
925 980
926 981 forcedeltaparentprev indicates whether delta parents must be against
927 982 the previous revision in a delta group. This should only be used for
928 983 compatibility with changegroup version 1.
929 984
930 985 builddeltaheader is a callable that constructs the header for a group
931 986 delta.
932 987
933 988 manifestsend is a chunk to send after manifests have been fully emitted.
934 989
935 990 ellipses indicates whether ellipsis serving mode is enabled.
936 991
937 992 bundlecaps is optional and can be used to specify the set of
938 993 capabilities which can be used to build the bundle. While bundlecaps is
939 994 unused in core Mercurial, extensions rely on this feature to communicate
940 995 capabilities to customize the changegroup packer.
941 996
942 997 shallow indicates whether shallow data might be sent. The packer may
943 998 need to pack file contents not introduced by the changes being packed.
944 999
945 1000 fullnodes is the set of changelog nodes which should not be ellipsis
946 1001 nodes. We store this rather than the set of nodes that should be
947 1002 ellipsis because for very large histories we expect this to be
948 1003 significantly smaller.
949 1004
950 1005 remote_sidedata is the set of sidedata categories wanted by the remote.
951 1006 """
952 1007 assert oldmatcher
953 1008 assert matcher
954 1009 self._oldmatcher = oldmatcher
955 1010 self._matcher = matcher
956 1011
957 1012 self.version = version
958 1013 self._forcedeltaparentprev = forcedeltaparentprev
959 1014 self._builddeltaheader = builddeltaheader
960 1015 self._manifestsend = manifestsend
961 1016 self._ellipses = ellipses
962 1017
963 1018 # Set of capabilities we can use to build the bundle.
964 1019 if bundlecaps is None:
965 1020 bundlecaps = set()
966 1021 self._bundlecaps = bundlecaps
967 1022 if remote_sidedata is None:
968 1023 remote_sidedata = set()
969 1024 self._remote_sidedata = remote_sidedata
970 1025 self._isshallow = shallow
971 1026 self._fullclnodes = fullnodes
972 1027
973 1028 # Maps ellipsis revs to their roots at the changelog level.
974 1029 self._precomputedellipsis = ellipsisroots
975 1030
976 1031 self._repo = repo
977 1032
978 1033 if self._repo.ui.verbose and not self._repo.ui.debugflag:
979 1034 self._verbosenote = self._repo.ui.note
980 1035 else:
981 1036 self._verbosenote = lambda s: None
982 1037
983 1038 def generate(
984 1039 self, commonrevs, clnodes, fastpathlinkrev, source, changelog=True
985 1040 ):
986 1041 """Yield a sequence of changegroup byte chunks.
987 1042 If changelog is False, changelog data won't be added to changegroup
988 1043 """
989 1044
990 1045 repo = self._repo
991 1046 cl = repo.changelog
992 1047
993 1048 self._verbosenote(_(b'uncompressed size of bundle content:\n'))
994 1049 size = 0
995 1050
996 1051 sidedata_helpers = None
997 1052 if self.version == b'04':
998 1053 remote_sidedata = self._remote_sidedata
999 1054 if source == b'strip':
1000 1055 # We're our own remote when stripping, get the no-op helpers
1001 1056 # TODO a better approach would be for the strip bundle to
1002 1057 # correctly advertise its sidedata categories directly.
1003 1058 remote_sidedata = repo._wanted_sidedata
1004 1059 sidedata_helpers = get_sidedata_helpers(repo, remote_sidedata)
1005 1060
1006 1061 clstate, deltas = self._generatechangelog(
1007 1062 cl,
1008 1063 clnodes,
1009 1064 generate=changelog,
1010 1065 sidedata_helpers=sidedata_helpers,
1011 1066 )
1012 1067 for delta in deltas:
1013 1068 for chunk in _revisiondeltatochunks(delta, self._builddeltaheader):
1014 1069 size += len(chunk)
1015 1070 yield chunk
1016 1071
1017 1072 close = closechunk()
1018 1073 size += len(close)
1019 1074 yield closechunk()
1020 1075
1021 1076 self._verbosenote(_(b'%8.i (changelog)\n') % size)
1022 1077
1023 1078 clrevorder = clstate[b'clrevorder']
1024 1079 manifests = clstate[b'manifests']
1025 1080 changedfiles = clstate[b'changedfiles']
1026 1081
1027 1082 # We need to make sure that the linkrev in the changegroup refers to
1028 1083 # the first changeset that introduced the manifest or file revision.
1029 1084 # The fastpath is usually safer than the slowpath, because the filelogs
1030 1085 # are walked in revlog order.
1031 1086 #
1032 1087 # When taking the slowpath when the manifest revlog uses generaldelta,
1033 1088 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
1034 1089 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
1035 1090 #
1036 1091 # When taking the fastpath, we are only vulnerable to reordering
1037 1092 # of the changelog itself. The changelog never uses generaldelta and is
1038 1093 # never reordered. To handle this case, we simply take the slowpath,
1039 1094 # which already has the 'clrevorder' logic. This was also fixed in
1040 1095 # cc0ff93d0c0c.
1041 1096
1042 1097 # Treemanifests don't work correctly with fastpathlinkrev
1043 1098 # either, because we don't discover which directory nodes to
1044 1099 # send along with files. This could probably be fixed.
1045 1100 fastpathlinkrev = fastpathlinkrev and not scmutil.istreemanifest(repo)
1046 1101
1047 1102 fnodes = {} # needed file nodes
1048 1103
1049 1104 size = 0
1050 1105 it = self.generatemanifests(
1051 1106 commonrevs,
1052 1107 clrevorder,
1053 1108 fastpathlinkrev,
1054 1109 manifests,
1055 1110 fnodes,
1056 1111 source,
1057 1112 clstate[b'clrevtomanifestrev'],
1058 1113 sidedata_helpers=sidedata_helpers,
1059 1114 )
1060 1115
1061 1116 for tree, deltas in it:
1062 1117 if tree:
1063 1118 assert self.version in (b'03', b'04')
1064 1119 chunk = _fileheader(tree)
1065 1120 size += len(chunk)
1066 1121 yield chunk
1067 1122
1068 1123 for delta in deltas:
1069 1124 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
1070 1125 for chunk in chunks:
1071 1126 size += len(chunk)
1072 1127 yield chunk
1073 1128
1074 1129 close = closechunk()
1075 1130 size += len(close)
1076 1131 yield close
1077 1132
1078 1133 self._verbosenote(_(b'%8.i (manifests)\n') % size)
1079 1134 yield self._manifestsend
1080 1135
1081 1136 mfdicts = None
1082 1137 if self._ellipses and self._isshallow:
1083 1138 mfdicts = [
1084 1139 (repo.manifestlog[n].read(), lr)
1085 1140 for (n, lr) in pycompat.iteritems(manifests)
1086 1141 ]
1087 1142
1088 1143 manifests.clear()
1089 1144 clrevs = {cl.rev(x) for x in clnodes}
1090 1145
1091 1146 it = self.generatefiles(
1092 1147 changedfiles,
1093 1148 commonrevs,
1094 1149 source,
1095 1150 mfdicts,
1096 1151 fastpathlinkrev,
1097 1152 fnodes,
1098 1153 clrevs,
1099 1154 sidedata_helpers=sidedata_helpers,
1100 1155 )
1101 1156
1102 1157 for path, deltas in it:
1103 1158 h = _fileheader(path)
1104 1159 size = len(h)
1105 1160 yield h
1106 1161
1107 1162 for delta in deltas:
1108 1163 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
1109 1164 for chunk in chunks:
1110 1165 size += len(chunk)
1111 1166 yield chunk
1112 1167
1113 1168 close = closechunk()
1114 1169 size += len(close)
1115 1170 yield close
1116 1171
1117 1172 self._verbosenote(_(b'%8.i %s\n') % (size, path))
1118 1173
1119 1174 yield closechunk()
1120 1175
1121 1176 if clnodes:
1122 1177 repo.hook(b'outgoing', node=hex(clnodes[0]), source=source)
1123 1178
1124 1179 def _generatechangelog(
1125 1180 self, cl, nodes, generate=True, sidedata_helpers=None
1126 1181 ):
1127 1182 """Generate data for changelog chunks.
1128 1183
1129 1184 Returns a 2-tuple of a dict containing state and an iterable of
1130 1185 byte chunks. The state will not be fully populated until the
1131 1186 chunk stream has been fully consumed.
1132 1187
1133 1188 if generate is False, the state will be fully populated and no chunk
1134 1189 stream will be yielded
1135 1190
1136 1191 See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
1137 1192 """
1138 1193 clrevorder = {}
1139 1194 manifests = {}
1140 1195 mfl = self._repo.manifestlog
1141 1196 changedfiles = set()
1142 1197 clrevtomanifestrev = {}
1143 1198
1144 1199 state = {
1145 1200 b'clrevorder': clrevorder,
1146 1201 b'manifests': manifests,
1147 1202 b'changedfiles': changedfiles,
1148 1203 b'clrevtomanifestrev': clrevtomanifestrev,
1149 1204 }
1150 1205
1151 1206 if not (generate or self._ellipses):
1152 1207 # sort the nodes in storage order
1153 1208 nodes = sorted(nodes, key=cl.rev)
1154 1209 for node in nodes:
1155 1210 c = cl.changelogrevision(node)
1156 1211 clrevorder[node] = len(clrevorder)
1157 1212 # record the first changeset introducing this manifest version
1158 1213 manifests.setdefault(c.manifest, node)
1159 1214 # Record a complete list of potentially-changed files in
1160 1215 # this manifest.
1161 1216 changedfiles.update(c.files)
1162 1217
1163 1218 return state, ()
1164 1219
1165 1220 # Callback for the changelog, used to collect changed files and
1166 1221 # manifest nodes.
1167 1222 # Returns the linkrev node (identity in the changelog case).
1168 1223 def lookupcl(x):
1169 1224 c = cl.changelogrevision(x)
1170 1225 clrevorder[x] = len(clrevorder)
1171 1226
1172 1227 if self._ellipses:
1173 1228 # Only update manifests if x is going to be sent. Otherwise we
1174 1229 # end up with bogus linkrevs specified for manifests and
1175 1230 # we skip some manifest nodes that we should otherwise
1176 1231 # have sent.
1177 1232 if (
1178 1233 x in self._fullclnodes
1179 1234 or cl.rev(x) in self._precomputedellipsis
1180 1235 ):
1181 1236
1182 1237 manifestnode = c.manifest
1183 1238 # Record the first changeset introducing this manifest
1184 1239 # version.
1185 1240 manifests.setdefault(manifestnode, x)
1186 1241 # Set this narrow-specific dict so we have the lowest
1187 1242 # manifest revnum to look up for this cl revnum. (Part of
1188 1243 # mapping changelog ellipsis parents to manifest ellipsis
1189 1244 # parents)
1190 1245 clrevtomanifestrev.setdefault(
1191 1246 cl.rev(x), mfl.rev(manifestnode)
1192 1247 )
1193 1248 # We can't trust the changed files list in the changeset if the
1194 1249 # client requested a shallow clone.
1195 1250 if self._isshallow:
1196 1251 changedfiles.update(mfl[c.manifest].read().keys())
1197 1252 else:
1198 1253 changedfiles.update(c.files)
1199 1254 else:
1200 1255 # record the first changeset introducing this manifest version
1201 1256 manifests.setdefault(c.manifest, x)
1202 1257 # Record a complete list of potentially-changed files in
1203 1258 # this manifest.
1204 1259 changedfiles.update(c.files)
1205 1260
1206 1261 return x
1207 1262
1208 1263 gen = deltagroup(
1209 1264 self._repo,
1210 1265 cl,
1211 1266 nodes,
1212 1267 True,
1213 1268 lookupcl,
1214 1269 self._forcedeltaparentprev,
1215 1270 ellipses=self._ellipses,
1216 1271 topic=_(b'changesets'),
1217 1272 clrevtolocalrev={},
1218 1273 fullclnodes=self._fullclnodes,
1219 1274 precomputedellipsis=self._precomputedellipsis,
1220 1275 sidedata_helpers=sidedata_helpers,
1221 1276 )
1222 1277
1223 1278 return state, gen
1224 1279
1225 1280 def generatemanifests(
1226 1281 self,
1227 1282 commonrevs,
1228 1283 clrevorder,
1229 1284 fastpathlinkrev,
1230 1285 manifests,
1231 1286 fnodes,
1232 1287 source,
1233 1288 clrevtolocalrev,
1234 1289 sidedata_helpers=None,
1235 1290 ):
1236 1291 """Returns an iterator of changegroup chunks containing manifests.
1237 1292
1238 1293 `source` is unused here, but is used by extensions like remotefilelog to
1239 1294 change what is sent based in pulls vs pushes, etc.
1240 1295
1241 1296 See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
1242 1297 """
1243 1298 repo = self._repo
1244 1299 mfl = repo.manifestlog
1245 1300 tmfnodes = {b'': manifests}
1246 1301
1247 1302 # Callback for the manifest, used to collect linkrevs for filelog
1248 1303 # revisions.
1249 1304 # Returns the linkrev node (collected in lookupcl).
1250 1305 def makelookupmflinknode(tree, nodes):
1251 1306 if fastpathlinkrev:
1252 1307 assert not tree
1253 1308 return (
1254 1309 manifests.__getitem__
1255 1310 ) # pytype: disable=unsupported-operands
1256 1311
1257 1312 def lookupmflinknode(x):
1258 1313 """Callback for looking up the linknode for manifests.
1259 1314
1260 1315 Returns the linkrev node for the specified manifest.
1261 1316
1262 1317 SIDE EFFECT:
1263 1318
1264 1319 1) fclnodes gets populated with the list of relevant
1265 1320 file nodes if we're not using fastpathlinkrev
1266 1321 2) When treemanifests are in use, collects treemanifest nodes
1267 1322 to send
1268 1323
1269 1324 Note that this means manifests must be completely sent to
1270 1325 the client before you can trust the list of files and
1271 1326 treemanifests to send.
1272 1327 """
1273 1328 clnode = nodes[x]
1274 1329 mdata = mfl.get(tree, x).readfast(shallow=True)
1275 1330 for p, n, fl in mdata.iterentries():
1276 1331 if fl == b't': # subdirectory manifest
1277 1332 subtree = tree + p + b'/'
1278 1333 tmfclnodes = tmfnodes.setdefault(subtree, {})
1279 1334 tmfclnode = tmfclnodes.setdefault(n, clnode)
1280 1335 if clrevorder[clnode] < clrevorder[tmfclnode]:
1281 1336 tmfclnodes[n] = clnode
1282 1337 else:
1283 1338 f = tree + p
1284 1339 fclnodes = fnodes.setdefault(f, {})
1285 1340 fclnode = fclnodes.setdefault(n, clnode)
1286 1341 if clrevorder[clnode] < clrevorder[fclnode]:
1287 1342 fclnodes[n] = clnode
1288 1343 return clnode
1289 1344
1290 1345 return lookupmflinknode
1291 1346
1292 1347 while tmfnodes:
1293 1348 tree, nodes = tmfnodes.popitem()
1294 1349
1295 1350 should_visit = self._matcher.visitdir(tree[:-1])
1296 1351 if tree and not should_visit:
1297 1352 continue
1298 1353
1299 1354 store = mfl.getstorage(tree)
1300 1355
1301 1356 if not should_visit:
1302 1357 # No nodes to send because this directory is out of
1303 1358 # the client's view of the repository (probably
1304 1359 # because of narrow clones). Do this even for the root
1305 1360 # directory (tree=='')
1306 1361 prunednodes = []
1307 1362 else:
1308 1363 # Avoid sending any manifest nodes we can prove the
1309 1364 # client already has by checking linkrevs. See the
1310 1365 # related comment in generatefiles().
1311 1366 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1312 1367
1313 1368 if tree and not prunednodes:
1314 1369 continue
1315 1370
1316 1371 lookupfn = makelookupmflinknode(tree, nodes)
1317 1372
1318 1373 deltas = deltagroup(
1319 1374 self._repo,
1320 1375 store,
1321 1376 prunednodes,
1322 1377 False,
1323 1378 lookupfn,
1324 1379 self._forcedeltaparentprev,
1325 1380 ellipses=self._ellipses,
1326 1381 topic=_(b'manifests'),
1327 1382 clrevtolocalrev=clrevtolocalrev,
1328 1383 fullclnodes=self._fullclnodes,
1329 1384 precomputedellipsis=self._precomputedellipsis,
1330 1385 sidedata_helpers=sidedata_helpers,
1331 1386 )
1332 1387
1333 1388 if not self._oldmatcher.visitdir(store.tree[:-1]):
1334 1389 yield tree, deltas
1335 1390 else:
1336 1391 # 'deltas' is a generator and we need to consume it even if
1337 1392 # we are not going to send it because a side-effect is that
1338 1393 # it updates tmdnodes (via lookupfn)
1339 1394 for d in deltas:
1340 1395 pass
1341 1396 if not tree:
1342 1397 yield tree, []
1343 1398
1344 1399 def _prunemanifests(self, store, nodes, commonrevs):
1345 1400 if not self._ellipses:
1346 1401 # In non-ellipses case and large repositories, it is better to
1347 1402 # prevent calling of store.rev and store.linkrev on a lot of
1348 1403 # nodes as compared to sending some extra data
1349 1404 return nodes.copy()
1350 1405 # This is split out as a separate method to allow filtering
1351 1406 # commonrevs in extension code.
1352 1407 #
1353 1408 # TODO(augie): this shouldn't be required, instead we should
1354 1409 # make filtering of revisions to send delegated to the store
1355 1410 # layer.
1356 1411 frev, flr = store.rev, store.linkrev
1357 1412 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1358 1413
1359 1414 # The 'source' parameter is useful for extensions
1360 1415 def generatefiles(
1361 1416 self,
1362 1417 changedfiles,
1363 1418 commonrevs,
1364 1419 source,
1365 1420 mfdicts,
1366 1421 fastpathlinkrev,
1367 1422 fnodes,
1368 1423 clrevs,
1369 1424 sidedata_helpers=None,
1370 1425 ):
1371 1426 changedfiles = [
1372 1427 f
1373 1428 for f in changedfiles
1374 1429 if self._matcher(f) and not self._oldmatcher(f)
1375 1430 ]
1376 1431
1377 1432 if not fastpathlinkrev:
1378 1433
1379 1434 def normallinknodes(unused, fname):
1380 1435 return fnodes.get(fname, {})
1381 1436
1382 1437 else:
1383 1438 cln = self._repo.changelog.node
1384 1439
1385 1440 def normallinknodes(store, fname):
1386 1441 flinkrev = store.linkrev
1387 1442 fnode = store.node
1388 1443 revs = ((r, flinkrev(r)) for r in store)
1389 1444 return {fnode(r): cln(lr) for r, lr in revs if lr in clrevs}
1390 1445
1391 1446 clrevtolocalrev = {}
1392 1447
1393 1448 if self._isshallow:
1394 1449 # In a shallow clone, the linknodes callback needs to also include
1395 1450 # those file nodes that are in the manifests we sent but weren't
1396 1451 # introduced by those manifests.
1397 1452 commonctxs = [self._repo[c] for c in commonrevs]
1398 1453 clrev = self._repo.changelog.rev
1399 1454
1400 1455 def linknodes(flog, fname):
1401 1456 for c in commonctxs:
1402 1457 try:
1403 1458 fnode = c.filenode(fname)
1404 1459 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1405 1460 except error.ManifestLookupError:
1406 1461 pass
1407 1462 links = normallinknodes(flog, fname)
1408 1463 if len(links) != len(mfdicts):
1409 1464 for mf, lr in mfdicts:
1410 1465 fnode = mf.get(fname, None)
1411 1466 if fnode in links:
1412 1467 links[fnode] = min(links[fnode], lr, key=clrev)
1413 1468 elif fnode:
1414 1469 links[fnode] = lr
1415 1470 return links
1416 1471
1417 1472 else:
1418 1473 linknodes = normallinknodes
1419 1474
1420 1475 repo = self._repo
1421 1476 progress = repo.ui.makeprogress(
1422 1477 _(b'files'), unit=_(b'files'), total=len(changedfiles)
1423 1478 )
1424 1479 for i, fname in enumerate(sorted(changedfiles)):
1425 1480 filerevlog = repo.file(fname)
1426 1481 if not filerevlog:
1427 1482 raise error.Abort(
1428 1483 _(b"empty or missing file data for %s") % fname
1429 1484 )
1430 1485
1431 1486 clrevtolocalrev.clear()
1432 1487
1433 1488 linkrevnodes = linknodes(filerevlog, fname)
1434 1489 # Lookup for filenodes, we collected the linkrev nodes above in the
1435 1490 # fastpath case and with lookupmf in the slowpath case.
1436 1491 def lookupfilelog(x):
1437 1492 return linkrevnodes[x]
1438 1493
1439 1494 frev, flr = filerevlog.rev, filerevlog.linkrev
1440 1495 # Skip sending any filenode we know the client already
1441 1496 # has. This avoids over-sending files relatively
1442 1497 # inexpensively, so it's not a problem if we under-filter
1443 1498 # here.
1444 1499 filenodes = [
1445 1500 n for n in linkrevnodes if flr(frev(n)) not in commonrevs
1446 1501 ]
1447 1502
1448 1503 if not filenodes:
1449 1504 continue
1450 1505
1451 1506 progress.update(i + 1, item=fname)
1452 1507
1453 1508 deltas = deltagroup(
1454 1509 self._repo,
1455 1510 filerevlog,
1456 1511 filenodes,
1457 1512 False,
1458 1513 lookupfilelog,
1459 1514 self._forcedeltaparentprev,
1460 1515 ellipses=self._ellipses,
1461 1516 clrevtolocalrev=clrevtolocalrev,
1462 1517 fullclnodes=self._fullclnodes,
1463 1518 precomputedellipsis=self._precomputedellipsis,
1464 1519 sidedata_helpers=sidedata_helpers,
1465 1520 )
1466 1521
1467 1522 yield fname, deltas
1468 1523
1469 1524 progress.complete()
1470 1525
1471 1526
1472 1527 def _makecg1packer(
1473 1528 repo,
1474 1529 oldmatcher,
1475 1530 matcher,
1476 1531 bundlecaps,
1477 1532 ellipses=False,
1478 1533 shallow=False,
1479 1534 ellipsisroots=None,
1480 1535 fullnodes=None,
1481 1536 remote_sidedata=None,
1482 1537 ):
1483 1538 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1484 1539 d.node, d.p1node, d.p2node, d.linknode
1485 1540 )
1486 1541
1487 1542 return cgpacker(
1488 1543 repo,
1489 1544 oldmatcher,
1490 1545 matcher,
1491 1546 b'01',
1492 1547 builddeltaheader=builddeltaheader,
1493 1548 manifestsend=b'',
1494 1549 forcedeltaparentprev=True,
1495 1550 bundlecaps=bundlecaps,
1496 1551 ellipses=ellipses,
1497 1552 shallow=shallow,
1498 1553 ellipsisroots=ellipsisroots,
1499 1554 fullnodes=fullnodes,
1500 1555 )
1501 1556
1502 1557
1503 1558 def _makecg2packer(
1504 1559 repo,
1505 1560 oldmatcher,
1506 1561 matcher,
1507 1562 bundlecaps,
1508 1563 ellipses=False,
1509 1564 shallow=False,
1510 1565 ellipsisroots=None,
1511 1566 fullnodes=None,
1512 1567 remote_sidedata=None,
1513 1568 ):
1514 1569 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1515 1570 d.node, d.p1node, d.p2node, d.basenode, d.linknode
1516 1571 )
1517 1572
1518 1573 return cgpacker(
1519 1574 repo,
1520 1575 oldmatcher,
1521 1576 matcher,
1522 1577 b'02',
1523 1578 builddeltaheader=builddeltaheader,
1524 1579 manifestsend=b'',
1525 1580 bundlecaps=bundlecaps,
1526 1581 ellipses=ellipses,
1527 1582 shallow=shallow,
1528 1583 ellipsisroots=ellipsisroots,
1529 1584 fullnodes=fullnodes,
1530 1585 )
1531 1586
1532 1587
1533 1588 def _makecg3packer(
1534 1589 repo,
1535 1590 oldmatcher,
1536 1591 matcher,
1537 1592 bundlecaps,
1538 1593 ellipses=False,
1539 1594 shallow=False,
1540 1595 ellipsisroots=None,
1541 1596 fullnodes=None,
1542 1597 remote_sidedata=None,
1543 1598 ):
1544 1599 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1545 1600 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1546 1601 )
1547 1602
1548 1603 return cgpacker(
1549 1604 repo,
1550 1605 oldmatcher,
1551 1606 matcher,
1552 1607 b'03',
1553 1608 builddeltaheader=builddeltaheader,
1554 1609 manifestsend=closechunk(),
1555 1610 bundlecaps=bundlecaps,
1556 1611 ellipses=ellipses,
1557 1612 shallow=shallow,
1558 1613 ellipsisroots=ellipsisroots,
1559 1614 fullnodes=fullnodes,
1560 1615 )
1561 1616
1562 1617
1563 1618 def _makecg4packer(
1564 1619 repo,
1565 1620 oldmatcher,
1566 1621 matcher,
1567 1622 bundlecaps,
1568 1623 ellipses=False,
1569 1624 shallow=False,
1570 1625 ellipsisroots=None,
1571 1626 fullnodes=None,
1572 1627 remote_sidedata=None,
1573 1628 ):
1574 1629 # Same header func as cg3. Sidedata is in a separate chunk from the delta to
1575 1630 # differenciate "raw delta" and sidedata.
1576 1631 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1577 1632 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1578 1633 )
1579 1634
1580 1635 return cgpacker(
1581 1636 repo,
1582 1637 oldmatcher,
1583 1638 matcher,
1584 1639 b'04',
1585 1640 builddeltaheader=builddeltaheader,
1586 1641 manifestsend=closechunk(),
1587 1642 bundlecaps=bundlecaps,
1588 1643 ellipses=ellipses,
1589 1644 shallow=shallow,
1590 1645 ellipsisroots=ellipsisroots,
1591 1646 fullnodes=fullnodes,
1592 1647 remote_sidedata=remote_sidedata,
1593 1648 )
1594 1649
1595 1650
1596 1651 _packermap = {
1597 1652 b'01': (_makecg1packer, cg1unpacker),
1598 1653 # cg2 adds support for exchanging generaldelta
1599 1654 b'02': (_makecg2packer, cg2unpacker),
1600 1655 # cg3 adds support for exchanging revlog flags and treemanifests
1601 1656 b'03': (_makecg3packer, cg3unpacker),
1602 1657 # ch4 adds support for exchanging sidedata
1603 1658 b'04': (_makecg4packer, cg4unpacker),
1604 1659 }
1605 1660
1606 1661
1607 1662 def allsupportedversions(repo):
1608 1663 versions = set(_packermap.keys())
1609 1664 needv03 = False
1610 1665 if (
1611 1666 repo.ui.configbool(b'experimental', b'changegroup3')
1612 1667 or repo.ui.configbool(b'experimental', b'treemanifest')
1613 1668 or scmutil.istreemanifest(repo)
1614 1669 ):
1615 1670 # we keep version 03 because we need to to exchange treemanifest data
1616 1671 #
1617 1672 # we also keep vresion 01 and 02, because it is possible for repo to
1618 1673 # contains both normal and tree manifest at the same time. so using
1619 1674 # older version to pull data is viable
1620 1675 #
1621 1676 # (or even to push subset of history)
1622 1677 needv03 = True
1623 1678 has_revlogv2 = requirements.REVLOGV2_REQUIREMENT in repo.requirements
1624 1679 if not has_revlogv2:
1625 1680 versions.discard(b'04')
1626 1681 if not needv03:
1627 1682 versions.discard(b'03')
1628 1683 return versions
1629 1684
1630 1685
1631 1686 # Changegroup versions that can be applied to the repo
1632 1687 def supportedincomingversions(repo):
1633 1688 return allsupportedversions(repo)
1634 1689
1635 1690
1636 1691 # Changegroup versions that can be created from the repo
1637 1692 def supportedoutgoingversions(repo):
1638 1693 versions = allsupportedversions(repo)
1639 1694 if scmutil.istreemanifest(repo):
1640 1695 # Versions 01 and 02 support only flat manifests and it's just too
1641 1696 # expensive to convert between the flat manifest and tree manifest on
1642 1697 # the fly. Since tree manifests are hashed differently, all of history
1643 1698 # would have to be converted. Instead, we simply don't even pretend to
1644 1699 # support versions 01 and 02.
1645 1700 versions.discard(b'01')
1646 1701 versions.discard(b'02')
1647 1702 if requirements.NARROW_REQUIREMENT in repo.requirements:
1648 1703 # Versions 01 and 02 don't support revlog flags, and we need to
1649 1704 # support that for stripping and unbundling to work.
1650 1705 versions.discard(b'01')
1651 1706 versions.discard(b'02')
1652 1707 if LFS_REQUIREMENT in repo.requirements:
1653 1708 # Versions 01 and 02 don't support revlog flags, and we need to
1654 1709 # mark LFS entries with REVIDX_EXTSTORED.
1655 1710 versions.discard(b'01')
1656 1711 versions.discard(b'02')
1657 1712
1658 1713 return versions
1659 1714
1660 1715
1661 1716 def localversion(repo):
1662 1717 # Finds the best version to use for bundles that are meant to be used
1663 1718 # locally, such as those from strip and shelve, and temporary bundles.
1664 1719 return max(supportedoutgoingversions(repo))
1665 1720
1666 1721
1667 1722 def safeversion(repo):
1668 1723 # Finds the smallest version that it's safe to assume clients of the repo
1669 1724 # will support. For example, all hg versions that support generaldelta also
1670 1725 # support changegroup 02.
1671 1726 versions = supportedoutgoingversions(repo)
1672 1727 if requirements.GENERALDELTA_REQUIREMENT in repo.requirements:
1673 1728 versions.discard(b'01')
1674 1729 assert versions
1675 1730 return min(versions)
1676 1731
1677 1732
1678 1733 def getbundler(
1679 1734 version,
1680 1735 repo,
1681 1736 bundlecaps=None,
1682 1737 oldmatcher=None,
1683 1738 matcher=None,
1684 1739 ellipses=False,
1685 1740 shallow=False,
1686 1741 ellipsisroots=None,
1687 1742 fullnodes=None,
1688 1743 remote_sidedata=None,
1689 1744 ):
1690 1745 assert version in supportedoutgoingversions(repo)
1691 1746
1692 1747 if matcher is None:
1693 1748 matcher = matchmod.always()
1694 1749 if oldmatcher is None:
1695 1750 oldmatcher = matchmod.never()
1696 1751
1697 1752 if version == b'01' and not matcher.always():
1698 1753 raise error.ProgrammingError(
1699 1754 b'version 01 changegroups do not support sparse file matchers'
1700 1755 )
1701 1756
1702 1757 if ellipses and version in (b'01', b'02'):
1703 1758 raise error.Abort(
1704 1759 _(
1705 1760 b'ellipsis nodes require at least cg3 on client and server, '
1706 1761 b'but negotiated version %s'
1707 1762 )
1708 1763 % version
1709 1764 )
1710 1765
1711 1766 # Requested files could include files not in the local store. So
1712 1767 # filter those out.
1713 1768 matcher = repo.narrowmatch(matcher)
1714 1769
1715 1770 fn = _packermap[version][0]
1716 1771 return fn(
1717 1772 repo,
1718 1773 oldmatcher,
1719 1774 matcher,
1720 1775 bundlecaps,
1721 1776 ellipses=ellipses,
1722 1777 shallow=shallow,
1723 1778 ellipsisroots=ellipsisroots,
1724 1779 fullnodes=fullnodes,
1725 1780 remote_sidedata=remote_sidedata,
1726 1781 )
1727 1782
1728 1783
1729 1784 def getunbundler(version, fh, alg, extras=None):
1730 1785 return _packermap[version][1](fh, alg, extras=extras)
1731 1786
1732 1787
1733 1788 def _changegroupinfo(repo, nodes, source):
1734 1789 if repo.ui.verbose or source == b'bundle':
1735 1790 repo.ui.status(_(b"%d changesets found\n") % len(nodes))
1736 1791 if repo.ui.debugflag:
1737 1792 repo.ui.debug(b"list of changesets:\n")
1738 1793 for node in nodes:
1739 1794 repo.ui.debug(b"%s\n" % hex(node))
1740 1795
1741 1796
1742 1797 def makechangegroup(
1743 1798 repo, outgoing, version, source, fastpath=False, bundlecaps=None
1744 1799 ):
1745 1800 cgstream = makestream(
1746 1801 repo,
1747 1802 outgoing,
1748 1803 version,
1749 1804 source,
1750 1805 fastpath=fastpath,
1751 1806 bundlecaps=bundlecaps,
1752 1807 )
1753 1808 return getunbundler(
1754 1809 version,
1755 1810 util.chunkbuffer(cgstream),
1756 1811 None,
1757 1812 {b'clcount': len(outgoing.missing)},
1758 1813 )
1759 1814
1760 1815
1761 1816 def makestream(
1762 1817 repo,
1763 1818 outgoing,
1764 1819 version,
1765 1820 source,
1766 1821 fastpath=False,
1767 1822 bundlecaps=None,
1768 1823 matcher=None,
1769 1824 remote_sidedata=None,
1770 1825 ):
1771 1826 bundler = getbundler(
1772 1827 version,
1773 1828 repo,
1774 1829 bundlecaps=bundlecaps,
1775 1830 matcher=matcher,
1776 1831 remote_sidedata=remote_sidedata,
1777 1832 )
1778 1833
1779 1834 repo = repo.unfiltered()
1780 1835 commonrevs = outgoing.common
1781 1836 csets = outgoing.missing
1782 1837 heads = outgoing.ancestorsof
1783 1838 # We go through the fast path if we get told to, or if all (unfiltered
1784 1839 # heads have been requested (since we then know there all linkrevs will
1785 1840 # be pulled by the client).
1786 1841 heads.sort()
1787 1842 fastpathlinkrev = fastpath or (
1788 1843 repo.filtername is None and heads == sorted(repo.heads())
1789 1844 )
1790 1845
1791 1846 repo.hook(b'preoutgoing', throw=True, source=source)
1792 1847 _changegroupinfo(repo, csets, source)
1793 1848 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1794 1849
1795 1850
1796 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1851 def _addchangegroupfiles(
1852 repo,
1853 source,
1854 revmap,
1855 trp,
1856 expectedfiles,
1857 needfiles,
1858 addrevisioncb=None,
1859 ):
1797 1860 revisions = 0
1798 1861 files = 0
1799 1862 progress = repo.ui.makeprogress(
1800 1863 _(b'files'), unit=_(b'files'), total=expectedfiles
1801 1864 )
1802 1865 for chunkdata in iter(source.filelogheader, {}):
1803 1866 files += 1
1804 1867 f = chunkdata[b"filename"]
1805 1868 repo.ui.debug(b"adding %s revisions\n" % f)
1806 1869 progress.increment()
1807 1870 fl = repo.file(f)
1808 1871 o = len(fl)
1809 1872 try:
1810 1873 deltas = source.deltaiter()
1811 if not fl.addgroup(deltas, revmap, trp):
1874 added = fl.addgroup(
1875 deltas,
1876 revmap,
1877 trp,
1878 addrevisioncb=addrevisioncb,
1879 )
1880 if not added:
1812 1881 raise error.Abort(_(b"received file revlog group is empty"))
1813 1882 except error.CensoredBaseError as e:
1814 1883 raise error.Abort(_(b"received delta base is censored: %s") % e)
1815 1884 revisions += len(fl) - o
1816 1885 if f in needfiles:
1817 1886 needs = needfiles[f]
1818 1887 for new in pycompat.xrange(o, len(fl)):
1819 1888 n = fl.node(new)
1820 1889 if n in needs:
1821 1890 needs.remove(n)
1822 1891 else:
1823 1892 raise error.Abort(_(b"received spurious file revlog entry"))
1824 1893 if not needs:
1825 1894 del needfiles[f]
1826 1895 progress.complete()
1827 1896
1828 1897 for f, needs in pycompat.iteritems(needfiles):
1829 1898 fl = repo.file(f)
1830 1899 for n in needs:
1831 1900 try:
1832 1901 fl.rev(n)
1833 1902 except error.LookupError:
1834 1903 raise error.Abort(
1835 1904 _(b'missing file data for %s:%s - run hg verify')
1836 1905 % (f, hex(n))
1837 1906 )
1838 1907
1839 1908 return revisions, files
1840 1909
1841 1910
1842 1911 def get_sidedata_helpers(repo, remote_sd_categories, pull=False):
1843 1912 # Computers for computing sidedata on-the-fly
1844 1913 sd_computers = collections.defaultdict(list)
1845 1914 # Computers for categories to remove from sidedata
1846 1915 sd_removers = collections.defaultdict(list)
1847 1916
1848 1917 to_generate = remote_sd_categories - repo._wanted_sidedata
1849 1918 to_remove = repo._wanted_sidedata - remote_sd_categories
1850 1919 if pull:
1851 1920 to_generate, to_remove = to_remove, to_generate
1852 1921
1853 1922 for revlog_kind, computers in repo._sidedata_computers.items():
1854 1923 for category, computer in computers.items():
1855 1924 if category in to_generate:
1856 1925 sd_computers[revlog_kind].append(computer)
1857 1926 if category in to_remove:
1858 1927 sd_removers[revlog_kind].append(computer)
1859 1928
1860 1929 sidedata_helpers = (repo, sd_computers, sd_removers)
1861 1930 return sidedata_helpers
@@ -1,3207 +1,3258 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .pycompat import getattr
39 39 from .revlogutils.constants import (
40 40 FLAG_GENERALDELTA,
41 41 FLAG_INLINE_DATA,
42 42 REVLOGV0,
43 43 REVLOGV1,
44 44 REVLOGV1_FLAGS,
45 45 REVLOGV2,
46 46 REVLOGV2_FLAGS,
47 47 REVLOG_DEFAULT_FLAGS,
48 48 REVLOG_DEFAULT_FORMAT,
49 49 REVLOG_DEFAULT_VERSION,
50 50 )
51 51 from .revlogutils.flagutil import (
52 52 REVIDX_DEFAULT_FLAGS,
53 53 REVIDX_ELLIPSIS,
54 54 REVIDX_EXTSTORED,
55 55 REVIDX_FLAGS_ORDER,
56 56 REVIDX_HASCOPIESINFO,
57 57 REVIDX_ISCENSORED,
58 58 REVIDX_RAWTEXT_CHANGING_FLAGS,
59 59 REVIDX_SIDEDATA,
60 60 )
61 61 from .thirdparty import attr
62 62 from . import (
63 63 ancestor,
64 64 dagop,
65 65 error,
66 66 mdiff,
67 67 policy,
68 68 pycompat,
69 69 templatefilters,
70 70 util,
71 71 )
72 72 from .interfaces import (
73 73 repository,
74 74 util as interfaceutil,
75 75 )
76 76 from .revlogutils import (
77 77 deltas as deltautil,
78 78 flagutil,
79 79 nodemap as nodemaputil,
80 80 sidedata as sidedatautil,
81 81 )
82 82 from .utils import (
83 83 storageutil,
84 84 stringutil,
85 85 )
86 86 from .pure import parsers as pureparsers
87 87
88 88 # blanked usage of all the name to prevent pyflakes constraints
89 89 # We need these name available in the module for extensions.
90 90 REVLOGV0
91 91 REVLOGV1
92 92 REVLOGV2
93 93 FLAG_INLINE_DATA
94 94 FLAG_GENERALDELTA
95 95 REVLOG_DEFAULT_FLAGS
96 96 REVLOG_DEFAULT_FORMAT
97 97 REVLOG_DEFAULT_VERSION
98 98 REVLOGV1_FLAGS
99 99 REVLOGV2_FLAGS
100 100 REVIDX_ISCENSORED
101 101 REVIDX_ELLIPSIS
102 102 REVIDX_SIDEDATA
103 103 REVIDX_HASCOPIESINFO
104 104 REVIDX_EXTSTORED
105 105 REVIDX_DEFAULT_FLAGS
106 106 REVIDX_FLAGS_ORDER
107 107 REVIDX_RAWTEXT_CHANGING_FLAGS
108 108
109 109 parsers = policy.importmod('parsers')
110 110 rustancestor = policy.importrust('ancestor')
111 111 rustdagop = policy.importrust('dagop')
112 112 rustrevlog = policy.importrust('revlog')
113 113
114 114 # Aliased for performance.
115 115 _zlibdecompress = zlib.decompress
116 116
117 117 # max size of revlog with inline data
118 118 _maxinline = 131072
119 119 _chunksize = 1048576
120 120
121 121 # Flag processors for REVIDX_ELLIPSIS.
122 122 def ellipsisreadprocessor(rl, text):
123 123 return text, False
124 124
125 125
126 126 def ellipsiswriteprocessor(rl, text):
127 127 return text, False
128 128
129 129
130 130 def ellipsisrawprocessor(rl, text):
131 131 return False
132 132
133 133
134 134 ellipsisprocessor = (
135 135 ellipsisreadprocessor,
136 136 ellipsiswriteprocessor,
137 137 ellipsisrawprocessor,
138 138 )
139 139
140 140
141 141 def getoffset(q):
142 142 return int(q >> 16)
143 143
144 144
145 145 def gettype(q):
146 146 return int(q & 0xFFFF)
147 147
148 148
149 149 def offset_type(offset, type):
150 150 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
151 151 raise ValueError(b'unknown revlog index flags')
152 152 return int(int(offset) << 16 | type)
153 153
154 154
155 155 def _verify_revision(rl, skipflags, state, node):
156 156 """Verify the integrity of the given revlog ``node`` while providing a hook
157 157 point for extensions to influence the operation."""
158 158 if skipflags:
159 159 state[b'skipread'].add(node)
160 160 else:
161 161 # Side-effect: read content and verify hash.
162 162 rl.revision(node)
163 163
164 164
165 165 # True if a fast implementation for persistent-nodemap is available
166 166 #
167 167 # We also consider we have a "fast" implementation in "pure" python because
168 168 # people using pure don't really have performance consideration (and a
169 169 # wheelbarrow of other slowness source)
170 170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
171 171 parsers, 'BaseIndexObject'
172 172 )
173 173
174 174
175 175 @attr.s(slots=True, frozen=True)
176 176 class _revisioninfo(object):
177 177 """Information about a revision that allows building its fulltext
178 178 node: expected hash of the revision
179 179 p1, p2: parent revs of the revision
180 180 btext: built text cache consisting of a one-element list
181 181 cachedelta: (baserev, uncompressed_delta) or None
182 182 flags: flags associated to the revision storage
183 183
184 184 One of btext[0] or cachedelta must be set.
185 185 """
186 186
187 187 node = attr.ib()
188 188 p1 = attr.ib()
189 189 p2 = attr.ib()
190 190 btext = attr.ib()
191 191 textlen = attr.ib()
192 192 cachedelta = attr.ib()
193 193 flags = attr.ib()
194 194
195 195
196 196 @interfaceutil.implementer(repository.irevisiondelta)
197 197 @attr.s(slots=True)
198 198 class revlogrevisiondelta(object):
199 199 node = attr.ib()
200 200 p1node = attr.ib()
201 201 p2node = attr.ib()
202 202 basenode = attr.ib()
203 203 flags = attr.ib()
204 204 baserevisionsize = attr.ib()
205 205 revision = attr.ib()
206 206 delta = attr.ib()
207 207 sidedata = attr.ib()
208 208 linknode = attr.ib(default=None)
209 209
210 210
211 211 @interfaceutil.implementer(repository.iverifyproblem)
212 212 @attr.s(frozen=True)
213 213 class revlogproblem(object):
214 214 warning = attr.ib(default=None)
215 215 error = attr.ib(default=None)
216 216 node = attr.ib(default=None)
217 217
218 218
219 219 # index v0:
220 220 # 4 bytes: offset
221 221 # 4 bytes: compressed length
222 222 # 4 bytes: base rev
223 223 # 4 bytes: link rev
224 224 # 20 bytes: parent 1 nodeid
225 225 # 20 bytes: parent 2 nodeid
226 226 # 20 bytes: nodeid
227 227 indexformatv0 = struct.Struct(b">4l20s20s20s")
228 228 indexformatv0_pack = indexformatv0.pack
229 229 indexformatv0_unpack = indexformatv0.unpack
230 230
231 231
232 232 class revlogoldindex(list):
233 233 @property
234 234 def nodemap(self):
235 235 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
236 236 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
237 237 return self._nodemap
238 238
239 239 @util.propertycache
240 240 def _nodemap(self):
241 241 nodemap = nodemaputil.NodeMap({nullid: nullrev})
242 242 for r in range(0, len(self)):
243 243 n = self[r][7]
244 244 nodemap[n] = r
245 245 return nodemap
246 246
247 247 def has_node(self, node):
248 248 """return True if the node exist in the index"""
249 249 return node in self._nodemap
250 250
251 251 def rev(self, node):
252 252 """return a revision for a node
253 253
254 254 If the node is unknown, raise a RevlogError"""
255 255 return self._nodemap[node]
256 256
257 257 def get_rev(self, node):
258 258 """return a revision for a node
259 259
260 260 If the node is unknown, return None"""
261 261 return self._nodemap.get(node)
262 262
263 263 def append(self, tup):
264 264 self._nodemap[tup[7]] = len(self)
265 265 super(revlogoldindex, self).append(tup)
266 266
267 267 def __delitem__(self, i):
268 268 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
269 269 raise ValueError(b"deleting slices only supports a:-1 with step 1")
270 270 for r in pycompat.xrange(i.start, len(self)):
271 271 del self._nodemap[self[r][7]]
272 272 super(revlogoldindex, self).__delitem__(i)
273 273
274 274 def clearcaches(self):
275 275 self.__dict__.pop('_nodemap', None)
276 276
277 277 def __getitem__(self, i):
278 278 if i == -1:
279 279 return (0, 0, 0, -1, -1, -1, -1, nullid)
280 280 return list.__getitem__(self, i)
281 281
282 282
283 283 class revlogoldio(object):
284 284 def __init__(self):
285 285 self.size = indexformatv0.size
286 286
287 287 def parseindex(self, data, inline):
288 288 s = self.size
289 289 index = []
290 290 nodemap = nodemaputil.NodeMap({nullid: nullrev})
291 291 n = off = 0
292 292 l = len(data)
293 293 while off + s <= l:
294 294 cur = data[off : off + s]
295 295 off += s
296 296 e = indexformatv0_unpack(cur)
297 297 # transform to revlogv1 format
298 298 e2 = (
299 299 offset_type(e[0], 0),
300 300 e[1],
301 301 -1,
302 302 e[2],
303 303 e[3],
304 304 nodemap.get(e[4], nullrev),
305 305 nodemap.get(e[5], nullrev),
306 306 e[6],
307 307 )
308 308 index.append(e2)
309 309 nodemap[e[6]] = n
310 310 n += 1
311 311
312 312 index = revlogoldindex(index)
313 313 return index, None
314 314
315 315 def packentry(self, entry, node, version, rev):
316 316 if gettype(entry[0]):
317 317 raise error.RevlogError(
318 318 _(b'index entry flags need revlog version 1')
319 319 )
320 320 e2 = (
321 321 getoffset(entry[0]),
322 322 entry[1],
323 323 entry[3],
324 324 entry[4],
325 325 node(entry[5]),
326 326 node(entry[6]),
327 327 entry[7],
328 328 )
329 329 return indexformatv0_pack(*e2)
330 330
331 331
332 332 # index ng:
333 333 # 6 bytes: offset
334 334 # 2 bytes: flags
335 335 # 4 bytes: compressed length
336 336 # 4 bytes: uncompressed length
337 337 # 4 bytes: base rev
338 338 # 4 bytes: link rev
339 339 # 4 bytes: parent 1 rev
340 340 # 4 bytes: parent 2 rev
341 341 # 32 bytes: nodeid
342 342 indexformatng = struct.Struct(b">Qiiiiii20s12x")
343 343 indexformatng_pack = indexformatng.pack
344 344 versionformat = struct.Struct(b">I")
345 345 versionformat_pack = versionformat.pack
346 346 versionformat_unpack = versionformat.unpack
347 347
348 348 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
349 349 # signed integer)
350 350 _maxentrysize = 0x7FFFFFFF
351 351
352 352
353 353 class revlogio(object):
354 354 def __init__(self):
355 355 self.size = indexformatng.size
356 356
357 357 def parseindex(self, data, inline):
358 358 # call the C implementation to parse the index data
359 359 index, cache = parsers.parse_index2(data, inline)
360 360 return index, cache
361 361
362 362 def packentry(self, entry, node, version, rev):
363 363 p = indexformatng_pack(*entry)
364 364 if rev == 0:
365 365 p = versionformat_pack(version) + p[4:]
366 366 return p
367 367
368 368
369 369 indexformatv2 = struct.Struct(pureparsers.Index2Mixin.index_format)
370 370 indexformatv2_pack = indexformatv2.pack
371 371
372 372
373 373 class revlogv2io(object):
374 374 def __init__(self):
375 375 self.size = indexformatv2.size
376 376
377 377 def parseindex(self, data, inline):
378 378 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
379 379 return index, cache
380 380
381 381 def packentry(self, entry, node, version, rev):
382 382 p = indexformatv2_pack(*entry)
383 383 if rev == 0:
384 384 p = versionformat_pack(version) + p[4:]
385 385 return p
386 386
387 387
388 388 NodemapRevlogIO = None
389 389
390 390 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
391 391
392 392 class NodemapRevlogIO(revlogio):
393 393 """A debug oriented IO class that return a PersistentNodeMapIndexObject
394 394
395 395 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
396 396 """
397 397
398 398 def parseindex(self, data, inline):
399 399 index, cache = parsers.parse_index_devel_nodemap(data, inline)
400 400 return index, cache
401 401
402 402
403 403 class rustrevlogio(revlogio):
404 404 def parseindex(self, data, inline):
405 405 index, cache = super(rustrevlogio, self).parseindex(data, inline)
406 406 return rustrevlog.MixedIndex(index), cache
407 407
408 408
409 409 class revlog(object):
410 410 """
411 411 the underlying revision storage object
412 412
413 413 A revlog consists of two parts, an index and the revision data.
414 414
415 415 The index is a file with a fixed record size containing
416 416 information on each revision, including its nodeid (hash), the
417 417 nodeids of its parents, the position and offset of its data within
418 418 the data file, and the revision it's based on. Finally, each entry
419 419 contains a linkrev entry that can serve as a pointer to external
420 420 data.
421 421
422 422 The revision data itself is a linear collection of data chunks.
423 423 Each chunk represents a revision and is usually represented as a
424 424 delta against the previous chunk. To bound lookup time, runs of
425 425 deltas are limited to about 2 times the length of the original
426 426 version data. This makes retrieval of a version proportional to
427 427 its size, or O(1) relative to the number of revisions.
428 428
429 429 Both pieces of the revlog are written to in an append-only
430 430 fashion, which means we never need to rewrite a file to insert or
431 431 remove data, and can use some simple techniques to avoid the need
432 432 for locking while reading.
433 433
434 434 If checkambig, indexfile is opened with checkambig=True at
435 435 writing, to avoid file stat ambiguity.
436 436
437 437 If mmaplargeindex is True, and an mmapindexthreshold is set, the
438 438 index will be mmapped rather than read if it is larger than the
439 439 configured threshold.
440 440
441 441 If censorable is True, the revlog can have censored revisions.
442 442
443 443 If `upperboundcomp` is not None, this is the expected maximal gain from
444 444 compression for the data content.
445 445
446 446 `concurrencychecker` is an optional function that receives 3 arguments: a
447 447 file handle, a filename, and an expected position. It should check whether
448 448 the current position in the file handle is valid, and log/warn/fail (by
449 449 raising).
450 450 """
451 451
452 452 _flagserrorclass = error.RevlogError
453 453
454 454 def __init__(
455 455 self,
456 456 opener,
457 457 indexfile,
458 458 datafile=None,
459 459 checkambig=False,
460 460 mmaplargeindex=False,
461 461 censorable=False,
462 462 upperboundcomp=None,
463 463 persistentnodemap=False,
464 464 concurrencychecker=None,
465 465 ):
466 466 """
467 467 create a revlog object
468 468
469 469 opener is a function that abstracts the file opening operation
470 470 and can be used to implement COW semantics or the like.
471 471
472 472 """
473 473 self.upperboundcomp = upperboundcomp
474 474 self.indexfile = indexfile
475 475 self.datafile = datafile or (indexfile[:-2] + b".d")
476 476 self.nodemap_file = None
477 477 if persistentnodemap:
478 478 self.nodemap_file = nodemaputil.get_nodemap_file(
479 479 opener, self.indexfile
480 480 )
481 481
482 482 self.opener = opener
483 483 # When True, indexfile is opened with checkambig=True at writing, to
484 484 # avoid file stat ambiguity.
485 485 self._checkambig = checkambig
486 486 self._mmaplargeindex = mmaplargeindex
487 487 self._censorable = censorable
488 488 # 3-tuple of (node, rev, text) for a raw revision.
489 489 self._revisioncache = None
490 490 # Maps rev to chain base rev.
491 491 self._chainbasecache = util.lrucachedict(100)
492 492 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
493 493 self._chunkcache = (0, b'')
494 494 # How much data to read and cache into the raw revlog data cache.
495 495 self._chunkcachesize = 65536
496 496 self._maxchainlen = None
497 497 self._deltabothparents = True
498 498 self.index = None
499 499 self._nodemap_docket = None
500 500 # Mapping of partial identifiers to full nodes.
501 501 self._pcache = {}
502 502 # Mapping of revision integer to full node.
503 503 self._compengine = b'zlib'
504 504 self._compengineopts = {}
505 505 self._maxdeltachainspan = -1
506 506 self._withsparseread = False
507 507 self._sparserevlog = False
508 508 self._srdensitythreshold = 0.50
509 509 self._srmingapsize = 262144
510 510
511 511 # Make copy of flag processors so each revlog instance can support
512 512 # custom flags.
513 513 self._flagprocessors = dict(flagutil.flagprocessors)
514 514
515 515 # 2-tuple of file handles being used for active writing.
516 516 self._writinghandles = None
517 517
518 518 self._loadindex()
519 519
520 520 self._concurrencychecker = concurrencychecker
521 521
522 522 def _loadindex(self):
523 523 mmapindexthreshold = None
524 524 opts = self.opener.options
525 525
526 526 if b'revlogv2' in opts:
527 527 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
528 528 elif b'revlogv1' in opts:
529 529 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
530 530 if b'generaldelta' in opts:
531 531 newversionflags |= FLAG_GENERALDELTA
532 532 elif b'revlogv0' in self.opener.options:
533 533 newversionflags = REVLOGV0
534 534 else:
535 535 newversionflags = REVLOG_DEFAULT_VERSION
536 536
537 537 if b'chunkcachesize' in opts:
538 538 self._chunkcachesize = opts[b'chunkcachesize']
539 539 if b'maxchainlen' in opts:
540 540 self._maxchainlen = opts[b'maxchainlen']
541 541 if b'deltabothparents' in opts:
542 542 self._deltabothparents = opts[b'deltabothparents']
543 543 self._lazydelta = bool(opts.get(b'lazydelta', True))
544 544 self._lazydeltabase = False
545 545 if self._lazydelta:
546 546 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
547 547 if b'compengine' in opts:
548 548 self._compengine = opts[b'compengine']
549 549 if b'zlib.level' in opts:
550 550 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
551 551 if b'zstd.level' in opts:
552 552 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
553 553 if b'maxdeltachainspan' in opts:
554 554 self._maxdeltachainspan = opts[b'maxdeltachainspan']
555 555 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
556 556 mmapindexthreshold = opts[b'mmapindexthreshold']
557 557 self.hassidedata = bool(opts.get(b'side-data', False))
558 558 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
559 559 withsparseread = bool(opts.get(b'with-sparse-read', False))
560 560 # sparse-revlog forces sparse-read
561 561 self._withsparseread = self._sparserevlog or withsparseread
562 562 if b'sparse-read-density-threshold' in opts:
563 563 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
564 564 if b'sparse-read-min-gap-size' in opts:
565 565 self._srmingapsize = opts[b'sparse-read-min-gap-size']
566 566 if opts.get(b'enableellipsis'):
567 567 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
568 568
569 569 # revlog v0 doesn't have flag processors
570 570 for flag, processor in pycompat.iteritems(
571 571 opts.get(b'flagprocessors', {})
572 572 ):
573 573 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
574 574
575 575 if self._chunkcachesize <= 0:
576 576 raise error.RevlogError(
577 577 _(b'revlog chunk cache size %r is not greater than 0')
578 578 % self._chunkcachesize
579 579 )
580 580 elif self._chunkcachesize & (self._chunkcachesize - 1):
581 581 raise error.RevlogError(
582 582 _(b'revlog chunk cache size %r is not a power of 2')
583 583 % self._chunkcachesize
584 584 )
585 585
586 586 indexdata = b''
587 587 self._initempty = True
588 588 try:
589 589 with self._indexfp() as f:
590 590 if (
591 591 mmapindexthreshold is not None
592 592 and self.opener.fstat(f).st_size >= mmapindexthreshold
593 593 ):
594 594 # TODO: should .close() to release resources without
595 595 # relying on Python GC
596 596 indexdata = util.buffer(util.mmapread(f))
597 597 else:
598 598 indexdata = f.read()
599 599 if len(indexdata) > 0:
600 600 versionflags = versionformat_unpack(indexdata[:4])[0]
601 601 self._initempty = False
602 602 else:
603 603 versionflags = newversionflags
604 604 except IOError as inst:
605 605 if inst.errno != errno.ENOENT:
606 606 raise
607 607
608 608 versionflags = newversionflags
609 609
610 610 self.version = versionflags
611 611
612 612 flags = versionflags & ~0xFFFF
613 613 fmt = versionflags & 0xFFFF
614 614
615 615 if fmt == REVLOGV0:
616 616 if flags:
617 617 raise error.RevlogError(
618 618 _(b'unknown flags (%#04x) in version %d revlog %s')
619 619 % (flags >> 16, fmt, self.indexfile)
620 620 )
621 621
622 622 self._inline = False
623 623 self._generaldelta = False
624 624
625 625 elif fmt == REVLOGV1:
626 626 if flags & ~REVLOGV1_FLAGS:
627 627 raise error.RevlogError(
628 628 _(b'unknown flags (%#04x) in version %d revlog %s')
629 629 % (flags >> 16, fmt, self.indexfile)
630 630 )
631 631
632 632 self._inline = versionflags & FLAG_INLINE_DATA
633 633 self._generaldelta = versionflags & FLAG_GENERALDELTA
634 634
635 635 elif fmt == REVLOGV2:
636 636 if flags & ~REVLOGV2_FLAGS:
637 637 raise error.RevlogError(
638 638 _(b'unknown flags (%#04x) in version %d revlog %s')
639 639 % (flags >> 16, fmt, self.indexfile)
640 640 )
641 641
642 642 # There is a bug in the transaction handling when going from an
643 643 # inline revlog to a separate index and data file. Turn it off until
644 644 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
645 645 # See issue6485
646 646 self._inline = False
647 647 # generaldelta implied by version 2 revlogs.
648 648 self._generaldelta = True
649 649
650 650 else:
651 651 raise error.RevlogError(
652 652 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
653 653 )
654 654 # sparse-revlog can't be on without general-delta (issue6056)
655 655 if not self._generaldelta:
656 656 self._sparserevlog = False
657 657
658 658 self._storedeltachains = True
659 659
660 660 devel_nodemap = (
661 661 self.nodemap_file
662 662 and opts.get(b'devel-force-nodemap', False)
663 663 and NodemapRevlogIO is not None
664 664 )
665 665
666 666 use_rust_index = False
667 667 if rustrevlog is not None:
668 668 if self.nodemap_file is not None:
669 669 use_rust_index = True
670 670 else:
671 671 use_rust_index = self.opener.options.get(b'rust.index')
672 672
673 673 self._io = revlogio()
674 674 if self.version == REVLOGV0:
675 675 self._io = revlogoldio()
676 676 elif fmt == REVLOGV2:
677 677 self._io = revlogv2io()
678 678 elif devel_nodemap:
679 679 self._io = NodemapRevlogIO()
680 680 elif use_rust_index:
681 681 self._io = rustrevlogio()
682 682 try:
683 683 d = self._io.parseindex(indexdata, self._inline)
684 684 index, _chunkcache = d
685 685 use_nodemap = (
686 686 not self._inline
687 687 and self.nodemap_file is not None
688 688 and util.safehasattr(index, 'update_nodemap_data')
689 689 )
690 690 if use_nodemap:
691 691 nodemap_data = nodemaputil.persisted_data(self)
692 692 if nodemap_data is not None:
693 693 docket = nodemap_data[0]
694 694 if (
695 695 len(d[0]) > docket.tip_rev
696 696 and d[0][docket.tip_rev][7] == docket.tip_node
697 697 ):
698 698 # no changelog tampering
699 699 self._nodemap_docket = docket
700 700 index.update_nodemap_data(*nodemap_data)
701 701 except (ValueError, IndexError):
702 702 raise error.RevlogError(
703 703 _(b"index %s is corrupted") % self.indexfile
704 704 )
705 705 self.index, self._chunkcache = d
706 706 if not self._chunkcache:
707 707 self._chunkclear()
708 708 # revnum -> (chain-length, sum-delta-length)
709 709 self._chaininfocache = util.lrucachedict(500)
710 710 # revlog header -> revlog compressor
711 711 self._decompressors = {}
712 712
713 713 @util.propertycache
714 714 def _compressor(self):
715 715 engine = util.compengines[self._compengine]
716 716 return engine.revlogcompressor(self._compengineopts)
717 717
718 718 def _indexfp(self, mode=b'r'):
719 719 """file object for the revlog's index file"""
720 720 args = {'mode': mode}
721 721 if mode != b'r':
722 722 args['checkambig'] = self._checkambig
723 723 if mode == b'w':
724 724 args['atomictemp'] = True
725 725 return self.opener(self.indexfile, **args)
726 726
727 727 def _datafp(self, mode=b'r'):
728 728 """file object for the revlog's data file"""
729 729 return self.opener(self.datafile, mode=mode)
730 730
731 731 @contextlib.contextmanager
732 732 def _datareadfp(self, existingfp=None):
733 733 """file object suitable to read data"""
734 734 # Use explicit file handle, if given.
735 735 if existingfp is not None:
736 736 yield existingfp
737 737
738 738 # Use a file handle being actively used for writes, if available.
739 739 # There is some danger to doing this because reads will seek the
740 740 # file. However, _writeentry() performs a SEEK_END before all writes,
741 741 # so we should be safe.
742 742 elif self._writinghandles:
743 743 if self._inline:
744 744 yield self._writinghandles[0]
745 745 else:
746 746 yield self._writinghandles[1]
747 747
748 748 # Otherwise open a new file handle.
749 749 else:
750 750 if self._inline:
751 751 func = self._indexfp
752 752 else:
753 753 func = self._datafp
754 754 with func() as fp:
755 755 yield fp
756 756
757 757 def tiprev(self):
758 758 return len(self.index) - 1
759 759
760 760 def tip(self):
761 761 return self.node(self.tiprev())
762 762
763 763 def __contains__(self, rev):
764 764 return 0 <= rev < len(self)
765 765
766 766 def __len__(self):
767 767 return len(self.index)
768 768
769 769 def __iter__(self):
770 770 return iter(pycompat.xrange(len(self)))
771 771
772 772 def revs(self, start=0, stop=None):
773 773 """iterate over all rev in this revlog (from start to stop)"""
774 774 return storageutil.iterrevs(len(self), start=start, stop=stop)
775 775
776 776 @property
777 777 def nodemap(self):
778 778 msg = (
779 779 b"revlog.nodemap is deprecated, "
780 780 b"use revlog.index.[has_node|rev|get_rev]"
781 781 )
782 782 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
783 783 return self.index.nodemap
784 784
785 785 @property
786 786 def _nodecache(self):
787 787 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
788 788 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
789 789 return self.index.nodemap
790 790
791 791 def hasnode(self, node):
792 792 try:
793 793 self.rev(node)
794 794 return True
795 795 except KeyError:
796 796 return False
797 797
798 798 def candelta(self, baserev, rev):
799 799 """whether two revisions (baserev, rev) can be delta-ed or not"""
800 800 # Disable delta if either rev requires a content-changing flag
801 801 # processor (ex. LFS). This is because such flag processor can alter
802 802 # the rawtext content that the delta will be based on, and two clients
803 803 # could have a same revlog node with different flags (i.e. different
804 804 # rawtext contents) and the delta could be incompatible.
805 805 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
806 806 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
807 807 ):
808 808 return False
809 809 return True
810 810
811 811 def update_caches(self, transaction):
812 812 if self.nodemap_file is not None:
813 813 if transaction is None:
814 814 nodemaputil.update_persistent_nodemap(self)
815 815 else:
816 816 nodemaputil.setup_persistent_nodemap(transaction, self)
817 817
818 818 def clearcaches(self):
819 819 self._revisioncache = None
820 820 self._chainbasecache.clear()
821 821 self._chunkcache = (0, b'')
822 822 self._pcache = {}
823 823 self._nodemap_docket = None
824 824 self.index.clearcaches()
825 825 # The python code is the one responsible for validating the docket, we
826 826 # end up having to refresh it here.
827 827 use_nodemap = (
828 828 not self._inline
829 829 and self.nodemap_file is not None
830 830 and util.safehasattr(self.index, 'update_nodemap_data')
831 831 )
832 832 if use_nodemap:
833 833 nodemap_data = nodemaputil.persisted_data(self)
834 834 if nodemap_data is not None:
835 835 self._nodemap_docket = nodemap_data[0]
836 836 self.index.update_nodemap_data(*nodemap_data)
837 837
838 838 def rev(self, node):
839 839 try:
840 840 return self.index.rev(node)
841 841 except TypeError:
842 842 raise
843 843 except error.RevlogError:
844 844 # parsers.c radix tree lookup failed
845 845 if node == wdirid or node in wdirfilenodeids:
846 846 raise error.WdirUnsupported
847 847 raise error.LookupError(node, self.indexfile, _(b'no node'))
848 848
849 849 # Accessors for index entries.
850 850
851 851 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
852 852 # are flags.
853 853 def start(self, rev):
854 854 return int(self.index[rev][0] >> 16)
855 855
856 856 def flags(self, rev):
857 857 return self.index[rev][0] & 0xFFFF
858 858
859 859 def length(self, rev):
860 860 return self.index[rev][1]
861 861
862 862 def sidedata_length(self, rev):
863 863 if self.version & 0xFFFF != REVLOGV2:
864 864 return 0
865 865 return self.index[rev][9]
866 866
867 867 def rawsize(self, rev):
868 868 """return the length of the uncompressed text for a given revision"""
869 869 l = self.index[rev][2]
870 870 if l >= 0:
871 871 return l
872 872
873 873 t = self.rawdata(rev)
874 874 return len(t)
875 875
876 876 def size(self, rev):
877 877 """length of non-raw text (processed by a "read" flag processor)"""
878 878 # fast path: if no "read" flag processor could change the content,
879 879 # size is rawsize. note: ELLIPSIS is known to not change the content.
880 880 flags = self.flags(rev)
881 881 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
882 882 return self.rawsize(rev)
883 883
884 884 return len(self.revision(rev, raw=False))
885 885
886 886 def chainbase(self, rev):
887 887 base = self._chainbasecache.get(rev)
888 888 if base is not None:
889 889 return base
890 890
891 891 index = self.index
892 892 iterrev = rev
893 893 base = index[iterrev][3]
894 894 while base != iterrev:
895 895 iterrev = base
896 896 base = index[iterrev][3]
897 897
898 898 self._chainbasecache[rev] = base
899 899 return base
900 900
901 901 def linkrev(self, rev):
902 902 return self.index[rev][4]
903 903
904 904 def parentrevs(self, rev):
905 905 try:
906 906 entry = self.index[rev]
907 907 except IndexError:
908 908 if rev == wdirrev:
909 909 raise error.WdirUnsupported
910 910 raise
911 911
912 912 return entry[5], entry[6]
913 913
914 914 # fast parentrevs(rev) where rev isn't filtered
915 915 _uncheckedparentrevs = parentrevs
916 916
917 917 def node(self, rev):
918 918 try:
919 919 return self.index[rev][7]
920 920 except IndexError:
921 921 if rev == wdirrev:
922 922 raise error.WdirUnsupported
923 923 raise
924 924
925 925 # Derived from index values.
926 926
927 927 def end(self, rev):
928 928 return self.start(rev) + self.length(rev)
929 929
930 930 def parents(self, node):
931 931 i = self.index
932 932 d = i[self.rev(node)]
933 933 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
934 934
935 935 def chainlen(self, rev):
936 936 return self._chaininfo(rev)[0]
937 937
938 938 def _chaininfo(self, rev):
939 939 chaininfocache = self._chaininfocache
940 940 if rev in chaininfocache:
941 941 return chaininfocache[rev]
942 942 index = self.index
943 943 generaldelta = self._generaldelta
944 944 iterrev = rev
945 945 e = index[iterrev]
946 946 clen = 0
947 947 compresseddeltalen = 0
948 948 while iterrev != e[3]:
949 949 clen += 1
950 950 compresseddeltalen += e[1]
951 951 if generaldelta:
952 952 iterrev = e[3]
953 953 else:
954 954 iterrev -= 1
955 955 if iterrev in chaininfocache:
956 956 t = chaininfocache[iterrev]
957 957 clen += t[0]
958 958 compresseddeltalen += t[1]
959 959 break
960 960 e = index[iterrev]
961 961 else:
962 962 # Add text length of base since decompressing that also takes
963 963 # work. For cache hits the length is already included.
964 964 compresseddeltalen += e[1]
965 965 r = (clen, compresseddeltalen)
966 966 chaininfocache[rev] = r
967 967 return r
968 968
969 969 def _deltachain(self, rev, stoprev=None):
970 970 """Obtain the delta chain for a revision.
971 971
972 972 ``stoprev`` specifies a revision to stop at. If not specified, we
973 973 stop at the base of the chain.
974 974
975 975 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
976 976 revs in ascending order and ``stopped`` is a bool indicating whether
977 977 ``stoprev`` was hit.
978 978 """
979 979 # Try C implementation.
980 980 try:
981 981 return self.index.deltachain(rev, stoprev, self._generaldelta)
982 982 except AttributeError:
983 983 pass
984 984
985 985 chain = []
986 986
987 987 # Alias to prevent attribute lookup in tight loop.
988 988 index = self.index
989 989 generaldelta = self._generaldelta
990 990
991 991 iterrev = rev
992 992 e = index[iterrev]
993 993 while iterrev != e[3] and iterrev != stoprev:
994 994 chain.append(iterrev)
995 995 if generaldelta:
996 996 iterrev = e[3]
997 997 else:
998 998 iterrev -= 1
999 999 e = index[iterrev]
1000 1000
1001 1001 if iterrev == stoprev:
1002 1002 stopped = True
1003 1003 else:
1004 1004 chain.append(iterrev)
1005 1005 stopped = False
1006 1006
1007 1007 chain.reverse()
1008 1008 return chain, stopped
1009 1009
1010 1010 def ancestors(self, revs, stoprev=0, inclusive=False):
1011 1011 """Generate the ancestors of 'revs' in reverse revision order.
1012 1012 Does not generate revs lower than stoprev.
1013 1013
1014 1014 See the documentation for ancestor.lazyancestors for more details."""
1015 1015
1016 1016 # first, make sure start revisions aren't filtered
1017 1017 revs = list(revs)
1018 1018 checkrev = self.node
1019 1019 for r in revs:
1020 1020 checkrev(r)
1021 1021 # and we're sure ancestors aren't filtered as well
1022 1022
1023 1023 if rustancestor is not None:
1024 1024 lazyancestors = rustancestor.LazyAncestors
1025 1025 arg = self.index
1026 1026 else:
1027 1027 lazyancestors = ancestor.lazyancestors
1028 1028 arg = self._uncheckedparentrevs
1029 1029 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1030 1030
1031 1031 def descendants(self, revs):
1032 1032 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1033 1033
1034 1034 def findcommonmissing(self, common=None, heads=None):
1035 1035 """Return a tuple of the ancestors of common and the ancestors of heads
1036 1036 that are not ancestors of common. In revset terminology, we return the
1037 1037 tuple:
1038 1038
1039 1039 ::common, (::heads) - (::common)
1040 1040
1041 1041 The list is sorted by revision number, meaning it is
1042 1042 topologically sorted.
1043 1043
1044 1044 'heads' and 'common' are both lists of node IDs. If heads is
1045 1045 not supplied, uses all of the revlog's heads. If common is not
1046 1046 supplied, uses nullid."""
1047 1047 if common is None:
1048 1048 common = [nullid]
1049 1049 if heads is None:
1050 1050 heads = self.heads()
1051 1051
1052 1052 common = [self.rev(n) for n in common]
1053 1053 heads = [self.rev(n) for n in heads]
1054 1054
1055 1055 # we want the ancestors, but inclusive
1056 1056 class lazyset(object):
1057 1057 def __init__(self, lazyvalues):
1058 1058 self.addedvalues = set()
1059 1059 self.lazyvalues = lazyvalues
1060 1060
1061 1061 def __contains__(self, value):
1062 1062 return value in self.addedvalues or value in self.lazyvalues
1063 1063
1064 1064 def __iter__(self):
1065 1065 added = self.addedvalues
1066 1066 for r in added:
1067 1067 yield r
1068 1068 for r in self.lazyvalues:
1069 1069 if not r in added:
1070 1070 yield r
1071 1071
1072 1072 def add(self, value):
1073 1073 self.addedvalues.add(value)
1074 1074
1075 1075 def update(self, values):
1076 1076 self.addedvalues.update(values)
1077 1077
1078 1078 has = lazyset(self.ancestors(common))
1079 1079 has.add(nullrev)
1080 1080 has.update(common)
1081 1081
1082 1082 # take all ancestors from heads that aren't in has
1083 1083 missing = set()
1084 1084 visit = collections.deque(r for r in heads if r not in has)
1085 1085 while visit:
1086 1086 r = visit.popleft()
1087 1087 if r in missing:
1088 1088 continue
1089 1089 else:
1090 1090 missing.add(r)
1091 1091 for p in self.parentrevs(r):
1092 1092 if p not in has:
1093 1093 visit.append(p)
1094 1094 missing = list(missing)
1095 1095 missing.sort()
1096 1096 return has, [self.node(miss) for miss in missing]
1097 1097
1098 1098 def incrementalmissingrevs(self, common=None):
1099 1099 """Return an object that can be used to incrementally compute the
1100 1100 revision numbers of the ancestors of arbitrary sets that are not
1101 1101 ancestors of common. This is an ancestor.incrementalmissingancestors
1102 1102 object.
1103 1103
1104 1104 'common' is a list of revision numbers. If common is not supplied, uses
1105 1105 nullrev.
1106 1106 """
1107 1107 if common is None:
1108 1108 common = [nullrev]
1109 1109
1110 1110 if rustancestor is not None:
1111 1111 return rustancestor.MissingAncestors(self.index, common)
1112 1112 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1113 1113
1114 1114 def findmissingrevs(self, common=None, heads=None):
1115 1115 """Return the revision numbers of the ancestors of heads that
1116 1116 are not ancestors of common.
1117 1117
1118 1118 More specifically, return a list of revision numbers corresponding to
1119 1119 nodes N such that every N satisfies the following constraints:
1120 1120
1121 1121 1. N is an ancestor of some node in 'heads'
1122 1122 2. N is not an ancestor of any node in 'common'
1123 1123
1124 1124 The list is sorted by revision number, meaning it is
1125 1125 topologically sorted.
1126 1126
1127 1127 'heads' and 'common' are both lists of revision numbers. If heads is
1128 1128 not supplied, uses all of the revlog's heads. If common is not
1129 1129 supplied, uses nullid."""
1130 1130 if common is None:
1131 1131 common = [nullrev]
1132 1132 if heads is None:
1133 1133 heads = self.headrevs()
1134 1134
1135 1135 inc = self.incrementalmissingrevs(common=common)
1136 1136 return inc.missingancestors(heads)
1137 1137
1138 1138 def findmissing(self, common=None, heads=None):
1139 1139 """Return the ancestors of heads that are not ancestors of common.
1140 1140
1141 1141 More specifically, return a list of nodes N such that every N
1142 1142 satisfies the following constraints:
1143 1143
1144 1144 1. N is an ancestor of some node in 'heads'
1145 1145 2. N is not an ancestor of any node in 'common'
1146 1146
1147 1147 The list is sorted by revision number, meaning it is
1148 1148 topologically sorted.
1149 1149
1150 1150 'heads' and 'common' are both lists of node IDs. If heads is
1151 1151 not supplied, uses all of the revlog's heads. If common is not
1152 1152 supplied, uses nullid."""
1153 1153 if common is None:
1154 1154 common = [nullid]
1155 1155 if heads is None:
1156 1156 heads = self.heads()
1157 1157
1158 1158 common = [self.rev(n) for n in common]
1159 1159 heads = [self.rev(n) for n in heads]
1160 1160
1161 1161 inc = self.incrementalmissingrevs(common=common)
1162 1162 return [self.node(r) for r in inc.missingancestors(heads)]
1163 1163
1164 1164 def nodesbetween(self, roots=None, heads=None):
1165 1165 """Return a topological path from 'roots' to 'heads'.
1166 1166
1167 1167 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1168 1168 topologically sorted list of all nodes N that satisfy both of
1169 1169 these constraints:
1170 1170
1171 1171 1. N is a descendant of some node in 'roots'
1172 1172 2. N is an ancestor of some node in 'heads'
1173 1173
1174 1174 Every node is considered to be both a descendant and an ancestor
1175 1175 of itself, so every reachable node in 'roots' and 'heads' will be
1176 1176 included in 'nodes'.
1177 1177
1178 1178 'outroots' is the list of reachable nodes in 'roots', i.e., the
1179 1179 subset of 'roots' that is returned in 'nodes'. Likewise,
1180 1180 'outheads' is the subset of 'heads' that is also in 'nodes'.
1181 1181
1182 1182 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1183 1183 unspecified, uses nullid as the only root. If 'heads' is
1184 1184 unspecified, uses list of all of the revlog's heads."""
1185 1185 nonodes = ([], [], [])
1186 1186 if roots is not None:
1187 1187 roots = list(roots)
1188 1188 if not roots:
1189 1189 return nonodes
1190 1190 lowestrev = min([self.rev(n) for n in roots])
1191 1191 else:
1192 1192 roots = [nullid] # Everybody's a descendant of nullid
1193 1193 lowestrev = nullrev
1194 1194 if (lowestrev == nullrev) and (heads is None):
1195 1195 # We want _all_ the nodes!
1196 1196 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1197 1197 if heads is None:
1198 1198 # All nodes are ancestors, so the latest ancestor is the last
1199 1199 # node.
1200 1200 highestrev = len(self) - 1
1201 1201 # Set ancestors to None to signal that every node is an ancestor.
1202 1202 ancestors = None
1203 1203 # Set heads to an empty dictionary for later discovery of heads
1204 1204 heads = {}
1205 1205 else:
1206 1206 heads = list(heads)
1207 1207 if not heads:
1208 1208 return nonodes
1209 1209 ancestors = set()
1210 1210 # Turn heads into a dictionary so we can remove 'fake' heads.
1211 1211 # Also, later we will be using it to filter out the heads we can't
1212 1212 # find from roots.
1213 1213 heads = dict.fromkeys(heads, False)
1214 1214 # Start at the top and keep marking parents until we're done.
1215 1215 nodestotag = set(heads)
1216 1216 # Remember where the top was so we can use it as a limit later.
1217 1217 highestrev = max([self.rev(n) for n in nodestotag])
1218 1218 while nodestotag:
1219 1219 # grab a node to tag
1220 1220 n = nodestotag.pop()
1221 1221 # Never tag nullid
1222 1222 if n == nullid:
1223 1223 continue
1224 1224 # A node's revision number represents its place in a
1225 1225 # topologically sorted list of nodes.
1226 1226 r = self.rev(n)
1227 1227 if r >= lowestrev:
1228 1228 if n not in ancestors:
1229 1229 # If we are possibly a descendant of one of the roots
1230 1230 # and we haven't already been marked as an ancestor
1231 1231 ancestors.add(n) # Mark as ancestor
1232 1232 # Add non-nullid parents to list of nodes to tag.
1233 1233 nodestotag.update(
1234 1234 [p for p in self.parents(n) if p != nullid]
1235 1235 )
1236 1236 elif n in heads: # We've seen it before, is it a fake head?
1237 1237 # So it is, real heads should not be the ancestors of
1238 1238 # any other heads.
1239 1239 heads.pop(n)
1240 1240 if not ancestors:
1241 1241 return nonodes
1242 1242 # Now that we have our set of ancestors, we want to remove any
1243 1243 # roots that are not ancestors.
1244 1244
1245 1245 # If one of the roots was nullid, everything is included anyway.
1246 1246 if lowestrev > nullrev:
1247 1247 # But, since we weren't, let's recompute the lowest rev to not
1248 1248 # include roots that aren't ancestors.
1249 1249
1250 1250 # Filter out roots that aren't ancestors of heads
1251 1251 roots = [root for root in roots if root in ancestors]
1252 1252 # Recompute the lowest revision
1253 1253 if roots:
1254 1254 lowestrev = min([self.rev(root) for root in roots])
1255 1255 else:
1256 1256 # No more roots? Return empty list
1257 1257 return nonodes
1258 1258 else:
1259 1259 # We are descending from nullid, and don't need to care about
1260 1260 # any other roots.
1261 1261 lowestrev = nullrev
1262 1262 roots = [nullid]
1263 1263 # Transform our roots list into a set.
1264 1264 descendants = set(roots)
1265 1265 # Also, keep the original roots so we can filter out roots that aren't
1266 1266 # 'real' roots (i.e. are descended from other roots).
1267 1267 roots = descendants.copy()
1268 1268 # Our topologically sorted list of output nodes.
1269 1269 orderedout = []
1270 1270 # Don't start at nullid since we don't want nullid in our output list,
1271 1271 # and if nullid shows up in descendants, empty parents will look like
1272 1272 # they're descendants.
1273 1273 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1274 1274 n = self.node(r)
1275 1275 isdescendant = False
1276 1276 if lowestrev == nullrev: # Everybody is a descendant of nullid
1277 1277 isdescendant = True
1278 1278 elif n in descendants:
1279 1279 # n is already a descendant
1280 1280 isdescendant = True
1281 1281 # This check only needs to be done here because all the roots
1282 1282 # will start being marked is descendants before the loop.
1283 1283 if n in roots:
1284 1284 # If n was a root, check if it's a 'real' root.
1285 1285 p = tuple(self.parents(n))
1286 1286 # If any of its parents are descendants, it's not a root.
1287 1287 if (p[0] in descendants) or (p[1] in descendants):
1288 1288 roots.remove(n)
1289 1289 else:
1290 1290 p = tuple(self.parents(n))
1291 1291 # A node is a descendant if either of its parents are
1292 1292 # descendants. (We seeded the dependents list with the roots
1293 1293 # up there, remember?)
1294 1294 if (p[0] in descendants) or (p[1] in descendants):
1295 1295 descendants.add(n)
1296 1296 isdescendant = True
1297 1297 if isdescendant and ((ancestors is None) or (n in ancestors)):
1298 1298 # Only include nodes that are both descendants and ancestors.
1299 1299 orderedout.append(n)
1300 1300 if (ancestors is not None) and (n in heads):
1301 1301 # We're trying to figure out which heads are reachable
1302 1302 # from roots.
1303 1303 # Mark this head as having been reached
1304 1304 heads[n] = True
1305 1305 elif ancestors is None:
1306 1306 # Otherwise, we're trying to discover the heads.
1307 1307 # Assume this is a head because if it isn't, the next step
1308 1308 # will eventually remove it.
1309 1309 heads[n] = True
1310 1310 # But, obviously its parents aren't.
1311 1311 for p in self.parents(n):
1312 1312 heads.pop(p, None)
1313 1313 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1314 1314 roots = list(roots)
1315 1315 assert orderedout
1316 1316 assert roots
1317 1317 assert heads
1318 1318 return (orderedout, roots, heads)
1319 1319
1320 1320 def headrevs(self, revs=None):
1321 1321 if revs is None:
1322 1322 try:
1323 1323 return self.index.headrevs()
1324 1324 except AttributeError:
1325 1325 return self._headrevs()
1326 1326 if rustdagop is not None:
1327 1327 return rustdagop.headrevs(self.index, revs)
1328 1328 return dagop.headrevs(revs, self._uncheckedparentrevs)
1329 1329
1330 1330 def computephases(self, roots):
1331 1331 return self.index.computephasesmapsets(roots)
1332 1332
1333 1333 def _headrevs(self):
1334 1334 count = len(self)
1335 1335 if not count:
1336 1336 return [nullrev]
1337 1337 # we won't iter over filtered rev so nobody is a head at start
1338 1338 ishead = [0] * (count + 1)
1339 1339 index = self.index
1340 1340 for r in self:
1341 1341 ishead[r] = 1 # I may be an head
1342 1342 e = index[r]
1343 1343 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1344 1344 return [r for r, val in enumerate(ishead) if val]
1345 1345
1346 1346 def heads(self, start=None, stop=None):
1347 1347 """return the list of all nodes that have no children
1348 1348
1349 1349 if start is specified, only heads that are descendants of
1350 1350 start will be returned
1351 1351 if stop is specified, it will consider all the revs from stop
1352 1352 as if they had no children
1353 1353 """
1354 1354 if start is None and stop is None:
1355 1355 if not len(self):
1356 1356 return [nullid]
1357 1357 return [self.node(r) for r in self.headrevs()]
1358 1358
1359 1359 if start is None:
1360 1360 start = nullrev
1361 1361 else:
1362 1362 start = self.rev(start)
1363 1363
1364 1364 stoprevs = {self.rev(n) for n in stop or []}
1365 1365
1366 1366 revs = dagop.headrevssubset(
1367 1367 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1368 1368 )
1369 1369
1370 1370 return [self.node(rev) for rev in revs]
1371 1371
1372 1372 def children(self, node):
1373 1373 """find the children of a given node"""
1374 1374 c = []
1375 1375 p = self.rev(node)
1376 1376 for r in self.revs(start=p + 1):
1377 1377 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1378 1378 if prevs:
1379 1379 for pr in prevs:
1380 1380 if pr == p:
1381 1381 c.append(self.node(r))
1382 1382 elif p == nullrev:
1383 1383 c.append(self.node(r))
1384 1384 return c
1385 1385
1386 1386 def commonancestorsheads(self, a, b):
1387 1387 """calculate all the heads of the common ancestors of nodes a and b"""
1388 1388 a, b = self.rev(a), self.rev(b)
1389 1389 ancs = self._commonancestorsheads(a, b)
1390 1390 return pycompat.maplist(self.node, ancs)
1391 1391
1392 1392 def _commonancestorsheads(self, *revs):
1393 1393 """calculate all the heads of the common ancestors of revs"""
1394 1394 try:
1395 1395 ancs = self.index.commonancestorsheads(*revs)
1396 1396 except (AttributeError, OverflowError): # C implementation failed
1397 1397 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1398 1398 return ancs
1399 1399
1400 1400 def isancestor(self, a, b):
1401 1401 """return True if node a is an ancestor of node b
1402 1402
1403 1403 A revision is considered an ancestor of itself."""
1404 1404 a, b = self.rev(a), self.rev(b)
1405 1405 return self.isancestorrev(a, b)
1406 1406
1407 1407 def isancestorrev(self, a, b):
1408 1408 """return True if revision a is an ancestor of revision b
1409 1409
1410 1410 A revision is considered an ancestor of itself.
1411 1411
1412 1412 The implementation of this is trivial but the use of
1413 1413 reachableroots is not."""
1414 1414 if a == nullrev:
1415 1415 return True
1416 1416 elif a == b:
1417 1417 return True
1418 1418 elif a > b:
1419 1419 return False
1420 1420 return bool(self.reachableroots(a, [b], [a], includepath=False))
1421 1421
1422 1422 def reachableroots(self, minroot, heads, roots, includepath=False):
1423 1423 """return (heads(::(<roots> and <roots>::<heads>)))
1424 1424
1425 1425 If includepath is True, return (<roots>::<heads>)."""
1426 1426 try:
1427 1427 return self.index.reachableroots2(
1428 1428 minroot, heads, roots, includepath
1429 1429 )
1430 1430 except AttributeError:
1431 1431 return dagop._reachablerootspure(
1432 1432 self.parentrevs, minroot, roots, heads, includepath
1433 1433 )
1434 1434
1435 1435 def ancestor(self, a, b):
1436 1436 """calculate the "best" common ancestor of nodes a and b"""
1437 1437
1438 1438 a, b = self.rev(a), self.rev(b)
1439 1439 try:
1440 1440 ancs = self.index.ancestors(a, b)
1441 1441 except (AttributeError, OverflowError):
1442 1442 ancs = ancestor.ancestors(self.parentrevs, a, b)
1443 1443 if ancs:
1444 1444 # choose a consistent winner when there's a tie
1445 1445 return min(map(self.node, ancs))
1446 1446 return nullid
1447 1447
1448 1448 def _match(self, id):
1449 1449 if isinstance(id, int):
1450 1450 # rev
1451 1451 return self.node(id)
1452 1452 if len(id) == 20:
1453 1453 # possibly a binary node
1454 1454 # odds of a binary node being all hex in ASCII are 1 in 10**25
1455 1455 try:
1456 1456 node = id
1457 1457 self.rev(node) # quick search the index
1458 1458 return node
1459 1459 except error.LookupError:
1460 1460 pass # may be partial hex id
1461 1461 try:
1462 1462 # str(rev)
1463 1463 rev = int(id)
1464 1464 if b"%d" % rev != id:
1465 1465 raise ValueError
1466 1466 if rev < 0:
1467 1467 rev = len(self) + rev
1468 1468 if rev < 0 or rev >= len(self):
1469 1469 raise ValueError
1470 1470 return self.node(rev)
1471 1471 except (ValueError, OverflowError):
1472 1472 pass
1473 1473 if len(id) == 40:
1474 1474 try:
1475 1475 # a full hex nodeid?
1476 1476 node = bin(id)
1477 1477 self.rev(node)
1478 1478 return node
1479 1479 except (TypeError, error.LookupError):
1480 1480 pass
1481 1481
1482 1482 def _partialmatch(self, id):
1483 1483 # we don't care wdirfilenodeids as they should be always full hash
1484 1484 maybewdir = wdirhex.startswith(id)
1485 1485 try:
1486 1486 partial = self.index.partialmatch(id)
1487 1487 if partial and self.hasnode(partial):
1488 1488 if maybewdir:
1489 1489 # single 'ff...' match in radix tree, ambiguous with wdir
1490 1490 raise error.RevlogError
1491 1491 return partial
1492 1492 if maybewdir:
1493 1493 # no 'ff...' match in radix tree, wdir identified
1494 1494 raise error.WdirUnsupported
1495 1495 return None
1496 1496 except error.RevlogError:
1497 1497 # parsers.c radix tree lookup gave multiple matches
1498 1498 # fast path: for unfiltered changelog, radix tree is accurate
1499 1499 if not getattr(self, 'filteredrevs', None):
1500 1500 raise error.AmbiguousPrefixLookupError(
1501 1501 id, self.indexfile, _(b'ambiguous identifier')
1502 1502 )
1503 1503 # fall through to slow path that filters hidden revisions
1504 1504 except (AttributeError, ValueError):
1505 1505 # we are pure python, or key was too short to search radix tree
1506 1506 pass
1507 1507
1508 1508 if id in self._pcache:
1509 1509 return self._pcache[id]
1510 1510
1511 1511 if len(id) <= 40:
1512 1512 try:
1513 1513 # hex(node)[:...]
1514 1514 l = len(id) // 2 # grab an even number of digits
1515 1515 prefix = bin(id[: l * 2])
1516 1516 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1517 1517 nl = [
1518 1518 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1519 1519 ]
1520 1520 if nullhex.startswith(id):
1521 1521 nl.append(nullid)
1522 1522 if len(nl) > 0:
1523 1523 if len(nl) == 1 and not maybewdir:
1524 1524 self._pcache[id] = nl[0]
1525 1525 return nl[0]
1526 1526 raise error.AmbiguousPrefixLookupError(
1527 1527 id, self.indexfile, _(b'ambiguous identifier')
1528 1528 )
1529 1529 if maybewdir:
1530 1530 raise error.WdirUnsupported
1531 1531 return None
1532 1532 except TypeError:
1533 1533 pass
1534 1534
1535 1535 def lookup(self, id):
1536 1536 """locate a node based on:
1537 1537 - revision number or str(revision number)
1538 1538 - nodeid or subset of hex nodeid
1539 1539 """
1540 1540 n = self._match(id)
1541 1541 if n is not None:
1542 1542 return n
1543 1543 n = self._partialmatch(id)
1544 1544 if n:
1545 1545 return n
1546 1546
1547 1547 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1548 1548
1549 1549 def shortest(self, node, minlength=1):
1550 1550 """Find the shortest unambiguous prefix that matches node."""
1551 1551
1552 1552 def isvalid(prefix):
1553 1553 try:
1554 1554 matchednode = self._partialmatch(prefix)
1555 1555 except error.AmbiguousPrefixLookupError:
1556 1556 return False
1557 1557 except error.WdirUnsupported:
1558 1558 # single 'ff...' match
1559 1559 return True
1560 1560 if matchednode is None:
1561 1561 raise error.LookupError(node, self.indexfile, _(b'no node'))
1562 1562 return True
1563 1563
1564 1564 def maybewdir(prefix):
1565 1565 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1566 1566
1567 1567 hexnode = hex(node)
1568 1568
1569 1569 def disambiguate(hexnode, minlength):
1570 1570 """Disambiguate against wdirid."""
1571 1571 for length in range(minlength, len(hexnode) + 1):
1572 1572 prefix = hexnode[:length]
1573 1573 if not maybewdir(prefix):
1574 1574 return prefix
1575 1575
1576 1576 if not getattr(self, 'filteredrevs', None):
1577 1577 try:
1578 1578 length = max(self.index.shortest(node), minlength)
1579 1579 return disambiguate(hexnode, length)
1580 1580 except error.RevlogError:
1581 1581 if node != wdirid:
1582 1582 raise error.LookupError(node, self.indexfile, _(b'no node'))
1583 1583 except AttributeError:
1584 1584 # Fall through to pure code
1585 1585 pass
1586 1586
1587 1587 if node == wdirid:
1588 1588 for length in range(minlength, len(hexnode) + 1):
1589 1589 prefix = hexnode[:length]
1590 1590 if isvalid(prefix):
1591 1591 return prefix
1592 1592
1593 1593 for length in range(minlength, len(hexnode) + 1):
1594 1594 prefix = hexnode[:length]
1595 1595 if isvalid(prefix):
1596 1596 return disambiguate(hexnode, length)
1597 1597
1598 1598 def cmp(self, node, text):
1599 1599 """compare text with a given file revision
1600 1600
1601 1601 returns True if text is different than what is stored.
1602 1602 """
1603 1603 p1, p2 = self.parents(node)
1604 1604 return storageutil.hashrevisionsha1(text, p1, p2) != node
1605 1605
1606 1606 def _cachesegment(self, offset, data):
1607 1607 """Add a segment to the revlog cache.
1608 1608
1609 1609 Accepts an absolute offset and the data that is at that location.
1610 1610 """
1611 1611 o, d = self._chunkcache
1612 1612 # try to add to existing cache
1613 1613 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1614 1614 self._chunkcache = o, d + data
1615 1615 else:
1616 1616 self._chunkcache = offset, data
1617 1617
1618 1618 def _readsegment(self, offset, length, df=None):
1619 1619 """Load a segment of raw data from the revlog.
1620 1620
1621 1621 Accepts an absolute offset, length to read, and an optional existing
1622 1622 file handle to read from.
1623 1623
1624 1624 If an existing file handle is passed, it will be seeked and the
1625 1625 original seek position will NOT be restored.
1626 1626
1627 1627 Returns a str or buffer of raw byte data.
1628 1628
1629 1629 Raises if the requested number of bytes could not be read.
1630 1630 """
1631 1631 # Cache data both forward and backward around the requested
1632 1632 # data, in a fixed size window. This helps speed up operations
1633 1633 # involving reading the revlog backwards.
1634 1634 cachesize = self._chunkcachesize
1635 1635 realoffset = offset & ~(cachesize - 1)
1636 1636 reallength = (
1637 1637 (offset + length + cachesize) & ~(cachesize - 1)
1638 1638 ) - realoffset
1639 1639 with self._datareadfp(df) as df:
1640 1640 df.seek(realoffset)
1641 1641 d = df.read(reallength)
1642 1642
1643 1643 self._cachesegment(realoffset, d)
1644 1644 if offset != realoffset or reallength != length:
1645 1645 startoffset = offset - realoffset
1646 1646 if len(d) - startoffset < length:
1647 1647 raise error.RevlogError(
1648 1648 _(
1649 1649 b'partial read of revlog %s; expected %d bytes from '
1650 1650 b'offset %d, got %d'
1651 1651 )
1652 1652 % (
1653 1653 self.indexfile if self._inline else self.datafile,
1654 1654 length,
1655 1655 realoffset,
1656 1656 len(d) - startoffset,
1657 1657 )
1658 1658 )
1659 1659
1660 1660 return util.buffer(d, startoffset, length)
1661 1661
1662 1662 if len(d) < length:
1663 1663 raise error.RevlogError(
1664 1664 _(
1665 1665 b'partial read of revlog %s; expected %d bytes from offset '
1666 1666 b'%d, got %d'
1667 1667 )
1668 1668 % (
1669 1669 self.indexfile if self._inline else self.datafile,
1670 1670 length,
1671 1671 offset,
1672 1672 len(d),
1673 1673 )
1674 1674 )
1675 1675
1676 1676 return d
1677 1677
1678 1678 def _getsegment(self, offset, length, df=None):
1679 1679 """Obtain a segment of raw data from the revlog.
1680 1680
1681 1681 Accepts an absolute offset, length of bytes to obtain, and an
1682 1682 optional file handle to the already-opened revlog. If the file
1683 1683 handle is used, it's original seek position will not be preserved.
1684 1684
1685 1685 Requests for data may be returned from a cache.
1686 1686
1687 1687 Returns a str or a buffer instance of raw byte data.
1688 1688 """
1689 1689 o, d = self._chunkcache
1690 1690 l = len(d)
1691 1691
1692 1692 # is it in the cache?
1693 1693 cachestart = offset - o
1694 1694 cacheend = cachestart + length
1695 1695 if cachestart >= 0 and cacheend <= l:
1696 1696 if cachestart == 0 and cacheend == l:
1697 1697 return d # avoid a copy
1698 1698 return util.buffer(d, cachestart, cacheend - cachestart)
1699 1699
1700 1700 return self._readsegment(offset, length, df=df)
1701 1701
1702 1702 def _getsegmentforrevs(self, startrev, endrev, df=None):
1703 1703 """Obtain a segment of raw data corresponding to a range of revisions.
1704 1704
1705 1705 Accepts the start and end revisions and an optional already-open
1706 1706 file handle to be used for reading. If the file handle is read, its
1707 1707 seek position will not be preserved.
1708 1708
1709 1709 Requests for data may be satisfied by a cache.
1710 1710
1711 1711 Returns a 2-tuple of (offset, data) for the requested range of
1712 1712 revisions. Offset is the integer offset from the beginning of the
1713 1713 revlog and data is a str or buffer of the raw byte data.
1714 1714
1715 1715 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1716 1716 to determine where each revision's data begins and ends.
1717 1717 """
1718 1718 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1719 1719 # (functions are expensive).
1720 1720 index = self.index
1721 1721 istart = index[startrev]
1722 1722 start = int(istart[0] >> 16)
1723 1723 if startrev == endrev:
1724 1724 end = start + istart[1]
1725 1725 else:
1726 1726 iend = index[endrev]
1727 1727 end = int(iend[0] >> 16) + iend[1]
1728 1728
1729 1729 if self._inline:
1730 1730 start += (startrev + 1) * self._io.size
1731 1731 end += (endrev + 1) * self._io.size
1732 1732 length = end - start
1733 1733
1734 1734 return start, self._getsegment(start, length, df=df)
1735 1735
1736 1736 def _chunk(self, rev, df=None):
1737 1737 """Obtain a single decompressed chunk for a revision.
1738 1738
1739 1739 Accepts an integer revision and an optional already-open file handle
1740 1740 to be used for reading. If used, the seek position of the file will not
1741 1741 be preserved.
1742 1742
1743 1743 Returns a str holding uncompressed data for the requested revision.
1744 1744 """
1745 1745 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1746 1746
1747 1747 def _chunks(self, revs, df=None, targetsize=None):
1748 1748 """Obtain decompressed chunks for the specified revisions.
1749 1749
1750 1750 Accepts an iterable of numeric revisions that are assumed to be in
1751 1751 ascending order. Also accepts an optional already-open file handle
1752 1752 to be used for reading. If used, the seek position of the file will
1753 1753 not be preserved.
1754 1754
1755 1755 This function is similar to calling ``self._chunk()`` multiple times,
1756 1756 but is faster.
1757 1757
1758 1758 Returns a list with decompressed data for each requested revision.
1759 1759 """
1760 1760 if not revs:
1761 1761 return []
1762 1762 start = self.start
1763 1763 length = self.length
1764 1764 inline = self._inline
1765 1765 iosize = self._io.size
1766 1766 buffer = util.buffer
1767 1767
1768 1768 l = []
1769 1769 ladd = l.append
1770 1770
1771 1771 if not self._withsparseread:
1772 1772 slicedchunks = (revs,)
1773 1773 else:
1774 1774 slicedchunks = deltautil.slicechunk(
1775 1775 self, revs, targetsize=targetsize
1776 1776 )
1777 1777
1778 1778 for revschunk in slicedchunks:
1779 1779 firstrev = revschunk[0]
1780 1780 # Skip trailing revisions with empty diff
1781 1781 for lastrev in revschunk[::-1]:
1782 1782 if length(lastrev) != 0:
1783 1783 break
1784 1784
1785 1785 try:
1786 1786 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1787 1787 except OverflowError:
1788 1788 # issue4215 - we can't cache a run of chunks greater than
1789 1789 # 2G on Windows
1790 1790 return [self._chunk(rev, df=df) for rev in revschunk]
1791 1791
1792 1792 decomp = self.decompress
1793 1793 for rev in revschunk:
1794 1794 chunkstart = start(rev)
1795 1795 if inline:
1796 1796 chunkstart += (rev + 1) * iosize
1797 1797 chunklength = length(rev)
1798 1798 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1799 1799
1800 1800 return l
1801 1801
1802 1802 def _chunkclear(self):
1803 1803 """Clear the raw chunk cache."""
1804 1804 self._chunkcache = (0, b'')
1805 1805
1806 1806 def deltaparent(self, rev):
1807 1807 """return deltaparent of the given revision"""
1808 1808 base = self.index[rev][3]
1809 1809 if base == rev:
1810 1810 return nullrev
1811 1811 elif self._generaldelta:
1812 1812 return base
1813 1813 else:
1814 1814 return rev - 1
1815 1815
1816 1816 def issnapshot(self, rev):
1817 1817 """tells whether rev is a snapshot"""
1818 1818 if not self._sparserevlog:
1819 1819 return self.deltaparent(rev) == nullrev
1820 1820 elif util.safehasattr(self.index, b'issnapshot'):
1821 1821 # directly assign the method to cache the testing and access
1822 1822 self.issnapshot = self.index.issnapshot
1823 1823 return self.issnapshot(rev)
1824 1824 if rev == nullrev:
1825 1825 return True
1826 1826 entry = self.index[rev]
1827 1827 base = entry[3]
1828 1828 if base == rev:
1829 1829 return True
1830 1830 if base == nullrev:
1831 1831 return True
1832 1832 p1 = entry[5]
1833 1833 p2 = entry[6]
1834 1834 if base == p1 or base == p2:
1835 1835 return False
1836 1836 return self.issnapshot(base)
1837 1837
1838 1838 def snapshotdepth(self, rev):
1839 1839 """number of snapshot in the chain before this one"""
1840 1840 if not self.issnapshot(rev):
1841 1841 raise error.ProgrammingError(b'revision %d not a snapshot')
1842 1842 return len(self._deltachain(rev)[0]) - 1
1843 1843
1844 1844 def revdiff(self, rev1, rev2):
1845 1845 """return or calculate a delta between two revisions
1846 1846
1847 1847 The delta calculated is in binary form and is intended to be written to
1848 1848 revlog data directly. So this function needs raw revision data.
1849 1849 """
1850 1850 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1851 1851 return bytes(self._chunk(rev2))
1852 1852
1853 1853 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1854 1854
1855 1855 def _processflags(self, text, flags, operation, raw=False):
1856 1856 """deprecated entry point to access flag processors"""
1857 1857 msg = b'_processflag(...) use the specialized variant'
1858 1858 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1859 1859 if raw:
1860 1860 return text, flagutil.processflagsraw(self, text, flags)
1861 1861 elif operation == b'read':
1862 1862 return flagutil.processflagsread(self, text, flags)
1863 1863 else: # write operation
1864 1864 return flagutil.processflagswrite(self, text, flags)
1865 1865
1866 1866 def revision(self, nodeorrev, _df=None, raw=False):
1867 1867 """return an uncompressed revision of a given node or revision
1868 1868 number.
1869 1869
1870 1870 _df - an existing file handle to read from. (internal-only)
1871 1871 raw - an optional argument specifying if the revision data is to be
1872 1872 treated as raw data when applying flag transforms. 'raw' should be set
1873 1873 to True when generating changegroups or in debug commands.
1874 1874 """
1875 1875 if raw:
1876 1876 msg = (
1877 1877 b'revlog.revision(..., raw=True) is deprecated, '
1878 1878 b'use revlog.rawdata(...)'
1879 1879 )
1880 1880 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1881 1881 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1882 1882
1883 1883 def sidedata(self, nodeorrev, _df=None):
1884 1884 """a map of extra data related to the changeset but not part of the hash
1885 1885
1886 1886 This function currently return a dictionary. However, more advanced
1887 1887 mapping object will likely be used in the future for a more
1888 1888 efficient/lazy code.
1889 1889 """
1890 1890 return self._revisiondata(nodeorrev, _df)[1]
1891 1891
1892 1892 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1893 1893 # deal with <nodeorrev> argument type
1894 1894 if isinstance(nodeorrev, int):
1895 1895 rev = nodeorrev
1896 1896 node = self.node(rev)
1897 1897 else:
1898 1898 node = nodeorrev
1899 1899 rev = None
1900 1900
1901 1901 # fast path the special `nullid` rev
1902 1902 if node == nullid:
1903 1903 return b"", {}
1904 1904
1905 1905 # ``rawtext`` is the text as stored inside the revlog. Might be the
1906 1906 # revision or might need to be processed to retrieve the revision.
1907 1907 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1908 1908
1909 1909 if self.version & 0xFFFF == REVLOGV2:
1910 1910 if rev is None:
1911 1911 rev = self.rev(node)
1912 1912 sidedata = self._sidedata(rev)
1913 1913 else:
1914 1914 sidedata = {}
1915 1915
1916 1916 if raw and validated:
1917 1917 # if we don't want to process the raw text and that raw
1918 1918 # text is cached, we can exit early.
1919 1919 return rawtext, sidedata
1920 1920 if rev is None:
1921 1921 rev = self.rev(node)
1922 1922 # the revlog's flag for this revision
1923 1923 # (usually alter its state or content)
1924 1924 flags = self.flags(rev)
1925 1925
1926 1926 if validated and flags == REVIDX_DEFAULT_FLAGS:
1927 1927 # no extra flags set, no flag processor runs, text = rawtext
1928 1928 return rawtext, sidedata
1929 1929
1930 1930 if raw:
1931 1931 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1932 1932 text = rawtext
1933 1933 else:
1934 1934 r = flagutil.processflagsread(self, rawtext, flags)
1935 1935 text, validatehash = r
1936 1936 if validatehash:
1937 1937 self.checkhash(text, node, rev=rev)
1938 1938 if not validated:
1939 1939 self._revisioncache = (node, rev, rawtext)
1940 1940
1941 1941 return text, sidedata
1942 1942
1943 1943 def _rawtext(self, node, rev, _df=None):
1944 1944 """return the possibly unvalidated rawtext for a revision
1945 1945
1946 1946 returns (rev, rawtext, validated)
1947 1947 """
1948 1948
1949 1949 # revision in the cache (could be useful to apply delta)
1950 1950 cachedrev = None
1951 1951 # An intermediate text to apply deltas to
1952 1952 basetext = None
1953 1953
1954 1954 # Check if we have the entry in cache
1955 1955 # The cache entry looks like (node, rev, rawtext)
1956 1956 if self._revisioncache:
1957 1957 if self._revisioncache[0] == node:
1958 1958 return (rev, self._revisioncache[2], True)
1959 1959 cachedrev = self._revisioncache[1]
1960 1960
1961 1961 if rev is None:
1962 1962 rev = self.rev(node)
1963 1963
1964 1964 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1965 1965 if stopped:
1966 1966 basetext = self._revisioncache[2]
1967 1967
1968 1968 # drop cache to save memory, the caller is expected to
1969 1969 # update self._revisioncache after validating the text
1970 1970 self._revisioncache = None
1971 1971
1972 1972 targetsize = None
1973 1973 rawsize = self.index[rev][2]
1974 1974 if 0 <= rawsize:
1975 1975 targetsize = 4 * rawsize
1976 1976
1977 1977 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1978 1978 if basetext is None:
1979 1979 basetext = bytes(bins[0])
1980 1980 bins = bins[1:]
1981 1981
1982 1982 rawtext = mdiff.patches(basetext, bins)
1983 1983 del basetext # let us have a chance to free memory early
1984 1984 return (rev, rawtext, False)
1985 1985
1986 1986 def _sidedata(self, rev):
1987 1987 """Return the sidedata for a given revision number."""
1988 1988 index_entry = self.index[rev]
1989 1989 sidedata_offset = index_entry[8]
1990 1990 sidedata_size = index_entry[9]
1991 1991
1992 1992 if self._inline:
1993 1993 sidedata_offset += self._io.size * (1 + rev)
1994 1994 if sidedata_size == 0:
1995 1995 return {}
1996 1996
1997 1997 segment = self._getsegment(sidedata_offset, sidedata_size)
1998 1998 sidedata = sidedatautil.deserialize_sidedata(segment)
1999 1999 return sidedata
2000 2000
2001 2001 def rawdata(self, nodeorrev, _df=None):
2002 2002 """return an uncompressed raw data of a given node or revision number.
2003 2003
2004 2004 _df - an existing file handle to read from. (internal-only)
2005 2005 """
2006 2006 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2007 2007
2008 2008 def hash(self, text, p1, p2):
2009 2009 """Compute a node hash.
2010 2010
2011 2011 Available as a function so that subclasses can replace the hash
2012 2012 as needed.
2013 2013 """
2014 2014 return storageutil.hashrevisionsha1(text, p1, p2)
2015 2015
2016 2016 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2017 2017 """Check node hash integrity.
2018 2018
2019 2019 Available as a function so that subclasses can extend hash mismatch
2020 2020 behaviors as needed.
2021 2021 """
2022 2022 try:
2023 2023 if p1 is None and p2 is None:
2024 2024 p1, p2 = self.parents(node)
2025 2025 if node != self.hash(text, p1, p2):
2026 2026 # Clear the revision cache on hash failure. The revision cache
2027 2027 # only stores the raw revision and clearing the cache does have
2028 2028 # the side-effect that we won't have a cache hit when the raw
2029 2029 # revision data is accessed. But this case should be rare and
2030 2030 # it is extra work to teach the cache about the hash
2031 2031 # verification state.
2032 2032 if self._revisioncache and self._revisioncache[0] == node:
2033 2033 self._revisioncache = None
2034 2034
2035 2035 revornode = rev
2036 2036 if revornode is None:
2037 2037 revornode = templatefilters.short(hex(node))
2038 2038 raise error.RevlogError(
2039 2039 _(b"integrity check failed on %s:%s")
2040 2040 % (self.indexfile, pycompat.bytestr(revornode))
2041 2041 )
2042 2042 except error.RevlogError:
2043 2043 if self._censorable and storageutil.iscensoredtext(text):
2044 2044 raise error.CensoredNodeError(self.indexfile, node, text)
2045 2045 raise
2046 2046
2047 2047 def _enforceinlinesize(self, tr, fp=None):
2048 2048 """Check if the revlog is too big for inline and convert if so.
2049 2049
2050 2050 This should be called after revisions are added to the revlog. If the
2051 2051 revlog has grown too large to be an inline revlog, it will convert it
2052 2052 to use multiple index and data files.
2053 2053 """
2054 2054 tiprev = len(self) - 1
2055 2055 if (
2056 2056 not self._inline
2057 2057 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2058 2058 ):
2059 2059 return
2060 2060
2061 2061 troffset = tr.findoffset(self.indexfile)
2062 2062 if troffset is None:
2063 2063 raise error.RevlogError(
2064 2064 _(b"%s not found in the transaction") % self.indexfile
2065 2065 )
2066 2066 trindex = 0
2067 2067 tr.add(self.datafile, 0)
2068 2068
2069 2069 if fp:
2070 2070 fp.flush()
2071 2071 fp.close()
2072 2072 # We can't use the cached file handle after close(). So prevent
2073 2073 # its usage.
2074 2074 self._writinghandles = None
2075 2075
2076 2076 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2077 2077 for r in self:
2078 2078 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2079 2079 if troffset <= self.start(r):
2080 2080 trindex = r
2081 2081
2082 2082 with self._indexfp(b'w') as fp:
2083 2083 self.version &= ~FLAG_INLINE_DATA
2084 2084 self._inline = False
2085 2085 io = self._io
2086 2086 for i in self:
2087 2087 e = io.packentry(self.index[i], self.node, self.version, i)
2088 2088 fp.write(e)
2089 2089
2090 2090 # the temp file replace the real index when we exit the context
2091 2091 # manager
2092 2092
2093 2093 tr.replace(self.indexfile, trindex * self._io.size)
2094 2094 nodemaputil.setup_persistent_nodemap(tr, self)
2095 2095 self._chunkclear()
2096 2096
2097 2097 def _nodeduplicatecallback(self, transaction, node):
2098 2098 """called when trying to add a node already stored."""
2099 2099
2100 2100 def addrevision(
2101 2101 self,
2102 2102 text,
2103 2103 transaction,
2104 2104 link,
2105 2105 p1,
2106 2106 p2,
2107 2107 cachedelta=None,
2108 2108 node=None,
2109 2109 flags=REVIDX_DEFAULT_FLAGS,
2110 2110 deltacomputer=None,
2111 2111 sidedata=None,
2112 2112 ):
2113 2113 """add a revision to the log
2114 2114
2115 2115 text - the revision data to add
2116 2116 transaction - the transaction object used for rollback
2117 2117 link - the linkrev data to add
2118 2118 p1, p2 - the parent nodeids of the revision
2119 2119 cachedelta - an optional precomputed delta
2120 2120 node - nodeid of revision; typically node is not specified, and it is
2121 2121 computed by default as hash(text, p1, p2), however subclasses might
2122 2122 use different hashing method (and override checkhash() in such case)
2123 2123 flags - the known flags to set on the revision
2124 2124 deltacomputer - an optional deltacomputer instance shared between
2125 2125 multiple calls
2126 2126 """
2127 2127 if link == nullrev:
2128 2128 raise error.RevlogError(
2129 2129 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2130 2130 )
2131 2131
2132 2132 if sidedata is None:
2133 2133 sidedata = {}
2134 2134 elif not self.hassidedata:
2135 2135 raise error.ProgrammingError(
2136 2136 _(b"trying to add sidedata to a revlog who don't support them")
2137 2137 )
2138 2138
2139 2139 if flags:
2140 2140 node = node or self.hash(text, p1, p2)
2141 2141
2142 2142 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2143 2143
2144 2144 # If the flag processor modifies the revision data, ignore any provided
2145 2145 # cachedelta.
2146 2146 if rawtext != text:
2147 2147 cachedelta = None
2148 2148
2149 2149 if len(rawtext) > _maxentrysize:
2150 2150 raise error.RevlogError(
2151 2151 _(
2152 2152 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2153 2153 )
2154 2154 % (self.indexfile, len(rawtext))
2155 2155 )
2156 2156
2157 2157 node = node or self.hash(rawtext, p1, p2)
2158 2158 rev = self.index.get_rev(node)
2159 2159 if rev is not None:
2160 2160 return rev
2161 2161
2162 2162 if validatehash:
2163 2163 self.checkhash(rawtext, node, p1=p1, p2=p2)
2164 2164
2165 2165 return self.addrawrevision(
2166 2166 rawtext,
2167 2167 transaction,
2168 2168 link,
2169 2169 p1,
2170 2170 p2,
2171 2171 node,
2172 2172 flags,
2173 2173 cachedelta=cachedelta,
2174 2174 deltacomputer=deltacomputer,
2175 2175 sidedata=sidedata,
2176 2176 )
2177 2177
2178 2178 def addrawrevision(
2179 2179 self,
2180 2180 rawtext,
2181 2181 transaction,
2182 2182 link,
2183 2183 p1,
2184 2184 p2,
2185 2185 node,
2186 2186 flags,
2187 2187 cachedelta=None,
2188 2188 deltacomputer=None,
2189 2189 sidedata=None,
2190 2190 ):
2191 2191 """add a raw revision with known flags, node and parents
2192 2192 useful when reusing a revision not stored in this revlog (ex: received
2193 2193 over wire, or read from an external bundle).
2194 2194 """
2195 2195 dfh = None
2196 2196 if not self._inline:
2197 2197 dfh = self._datafp(b"a+")
2198 2198 ifh = self._indexfp(b"a+")
2199 2199 try:
2200 2200 return self._addrevision(
2201 2201 node,
2202 2202 rawtext,
2203 2203 transaction,
2204 2204 link,
2205 2205 p1,
2206 2206 p2,
2207 2207 flags,
2208 2208 cachedelta,
2209 2209 ifh,
2210 2210 dfh,
2211 2211 deltacomputer=deltacomputer,
2212 2212 sidedata=sidedata,
2213 2213 )
2214 2214 finally:
2215 2215 if dfh:
2216 2216 dfh.close()
2217 2217 ifh.close()
2218 2218
2219 2219 def compress(self, data):
2220 2220 """Generate a possibly-compressed representation of data."""
2221 2221 if not data:
2222 2222 return b'', data
2223 2223
2224 2224 compressed = self._compressor.compress(data)
2225 2225
2226 2226 if compressed:
2227 2227 # The revlog compressor added the header in the returned data.
2228 2228 return b'', compressed
2229 2229
2230 2230 if data[0:1] == b'\0':
2231 2231 return b'', data
2232 2232 return b'u', data
2233 2233
2234 2234 def decompress(self, data):
2235 2235 """Decompress a revlog chunk.
2236 2236
2237 2237 The chunk is expected to begin with a header identifying the
2238 2238 format type so it can be routed to an appropriate decompressor.
2239 2239 """
2240 2240 if not data:
2241 2241 return data
2242 2242
2243 2243 # Revlogs are read much more frequently than they are written and many
2244 2244 # chunks only take microseconds to decompress, so performance is
2245 2245 # important here.
2246 2246 #
2247 2247 # We can make a few assumptions about revlogs:
2248 2248 #
2249 2249 # 1) the majority of chunks will be compressed (as opposed to inline
2250 2250 # raw data).
2251 2251 # 2) decompressing *any* data will likely by at least 10x slower than
2252 2252 # returning raw inline data.
2253 2253 # 3) we want to prioritize common and officially supported compression
2254 2254 # engines
2255 2255 #
2256 2256 # It follows that we want to optimize for "decompress compressed data
2257 2257 # when encoded with common and officially supported compression engines"
2258 2258 # case over "raw data" and "data encoded by less common or non-official
2259 2259 # compression engines." That is why we have the inline lookup first
2260 2260 # followed by the compengines lookup.
2261 2261 #
2262 2262 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2263 2263 # compressed chunks. And this matters for changelog and manifest reads.
2264 2264 t = data[0:1]
2265 2265
2266 2266 if t == b'x':
2267 2267 try:
2268 2268 return _zlibdecompress(data)
2269 2269 except zlib.error as e:
2270 2270 raise error.RevlogError(
2271 2271 _(b'revlog decompress error: %s')
2272 2272 % stringutil.forcebytestr(e)
2273 2273 )
2274 2274 # '\0' is more common than 'u' so it goes first.
2275 2275 elif t == b'\0':
2276 2276 return data
2277 2277 elif t == b'u':
2278 2278 return util.buffer(data, 1)
2279 2279
2280 2280 try:
2281 2281 compressor = self._decompressors[t]
2282 2282 except KeyError:
2283 2283 try:
2284 2284 engine = util.compengines.forrevlogheader(t)
2285 2285 compressor = engine.revlogcompressor(self._compengineopts)
2286 2286 self._decompressors[t] = compressor
2287 2287 except KeyError:
2288 2288 raise error.RevlogError(_(b'unknown compression type %r') % t)
2289 2289
2290 2290 return compressor.decompress(data)
2291 2291
2292 2292 def _addrevision(
2293 2293 self,
2294 2294 node,
2295 2295 rawtext,
2296 2296 transaction,
2297 2297 link,
2298 2298 p1,
2299 2299 p2,
2300 2300 flags,
2301 2301 cachedelta,
2302 2302 ifh,
2303 2303 dfh,
2304 2304 alwayscache=False,
2305 2305 deltacomputer=None,
2306 2306 sidedata=None,
2307 2307 ):
2308 2308 """internal function to add revisions to the log
2309 2309
2310 2310 see addrevision for argument descriptions.
2311 2311
2312 2312 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2313 2313
2314 2314 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2315 2315 be used.
2316 2316
2317 2317 invariants:
2318 2318 - rawtext is optional (can be None); if not set, cachedelta must be set.
2319 2319 if both are set, they must correspond to each other.
2320 2320 """
2321 2321 if node == nullid:
2322 2322 raise error.RevlogError(
2323 2323 _(b"%s: attempt to add null revision") % self.indexfile
2324 2324 )
2325 2325 if node == wdirid or node in wdirfilenodeids:
2326 2326 raise error.RevlogError(
2327 2327 _(b"%s: attempt to add wdir revision") % self.indexfile
2328 2328 )
2329 2329
2330 2330 if self._inline:
2331 2331 fh = ifh
2332 2332 else:
2333 2333 fh = dfh
2334 2334
2335 2335 btext = [rawtext]
2336 2336
2337 2337 curr = len(self)
2338 2338 prev = curr - 1
2339 2339
2340 2340 offset = self._get_data_offset(prev)
2341 2341
2342 2342 if self._concurrencychecker:
2343 2343 if self._inline:
2344 2344 # offset is "as if" it were in the .d file, so we need to add on
2345 2345 # the size of the entry metadata.
2346 2346 self._concurrencychecker(
2347 2347 ifh, self.indexfile, offset + curr * self._io.size
2348 2348 )
2349 2349 else:
2350 2350 # Entries in the .i are a consistent size.
2351 2351 self._concurrencychecker(
2352 2352 ifh, self.indexfile, curr * self._io.size
2353 2353 )
2354 2354 self._concurrencychecker(dfh, self.datafile, offset)
2355 2355
2356 2356 p1r, p2r = self.rev(p1), self.rev(p2)
2357 2357
2358 2358 # full versions are inserted when the needed deltas
2359 2359 # become comparable to the uncompressed text
2360 2360 if rawtext is None:
2361 2361 # need rawtext size, before changed by flag processors, which is
2362 2362 # the non-raw size. use revlog explicitly to avoid filelog's extra
2363 2363 # logic that might remove metadata size.
2364 2364 textlen = mdiff.patchedsize(
2365 2365 revlog.size(self, cachedelta[0]), cachedelta[1]
2366 2366 )
2367 2367 else:
2368 2368 textlen = len(rawtext)
2369 2369
2370 2370 if deltacomputer is None:
2371 2371 deltacomputer = deltautil.deltacomputer(self)
2372 2372
2373 2373 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2374 2374
2375 2375 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2376 2376
2377 2377 if sidedata:
2378 2378 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2379 2379 sidedata_offset = offset + deltainfo.deltalen
2380 2380 else:
2381 2381 serialized_sidedata = b""
2382 2382 # Don't store the offset if the sidedata is empty, that way
2383 2383 # we can easily detect empty sidedata and they will be no different
2384 2384 # than ones we manually add.
2385 2385 sidedata_offset = 0
2386 2386
2387 2387 e = (
2388 2388 offset_type(offset, flags),
2389 2389 deltainfo.deltalen,
2390 2390 textlen,
2391 2391 deltainfo.base,
2392 2392 link,
2393 2393 p1r,
2394 2394 p2r,
2395 2395 node,
2396 2396 sidedata_offset,
2397 2397 len(serialized_sidedata),
2398 2398 )
2399 2399
2400 2400 if self.version & 0xFFFF != REVLOGV2:
2401 2401 e = e[:8]
2402 2402
2403 2403 self.index.append(e)
2404 2404 entry = self._io.packentry(e, self.node, self.version, curr)
2405 2405 self._writeentry(
2406 2406 transaction,
2407 2407 ifh,
2408 2408 dfh,
2409 2409 entry,
2410 2410 deltainfo.data,
2411 2411 link,
2412 2412 offset,
2413 2413 serialized_sidedata,
2414 2414 )
2415 2415
2416 2416 rawtext = btext[0]
2417 2417
2418 2418 if alwayscache and rawtext is None:
2419 2419 rawtext = deltacomputer.buildtext(revinfo, fh)
2420 2420
2421 2421 if type(rawtext) == bytes: # only accept immutable objects
2422 2422 self._revisioncache = (node, curr, rawtext)
2423 2423 self._chainbasecache[curr] = deltainfo.chainbase
2424 2424 return curr
2425 2425
2426 2426 def _get_data_offset(self, prev):
2427 2427 """Returns the current offset in the (in-transaction) data file.
2428 2428 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2429 2429 file to store that information: since sidedata can be rewritten to the
2430 2430 end of the data file within a transaction, you can have cases where, for
2431 2431 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2432 2432 to `n - 1`'s sidedata being written after `n`'s data.
2433 2433
2434 2434 TODO cache this in a docket file before getting out of experimental."""
2435 2435 if self.version & 0xFFFF != REVLOGV2:
2436 2436 return self.end(prev)
2437 2437
2438 2438 offset = 0
2439 2439 for rev, entry in enumerate(self.index):
2440 2440 sidedata_end = entry[8] + entry[9]
2441 2441 # Sidedata for a previous rev has potentially been written after
2442 2442 # this rev's end, so take the max.
2443 2443 offset = max(self.end(rev), offset, sidedata_end)
2444 2444 return offset
2445 2445
2446 2446 def _writeentry(
2447 2447 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2448 2448 ):
2449 2449 # Files opened in a+ mode have inconsistent behavior on various
2450 2450 # platforms. Windows requires that a file positioning call be made
2451 2451 # when the file handle transitions between reads and writes. See
2452 2452 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2453 2453 # platforms, Python or the platform itself can be buggy. Some versions
2454 2454 # of Solaris have been observed to not append at the end of the file
2455 2455 # if the file was seeked to before the end. See issue4943 for more.
2456 2456 #
2457 2457 # We work around this issue by inserting a seek() before writing.
2458 2458 # Note: This is likely not necessary on Python 3. However, because
2459 2459 # the file handle is reused for reads and may be seeked there, we need
2460 2460 # to be careful before changing this.
2461 2461 ifh.seek(0, os.SEEK_END)
2462 2462 if dfh:
2463 2463 dfh.seek(0, os.SEEK_END)
2464 2464
2465 2465 curr = len(self) - 1
2466 2466 if not self._inline:
2467 2467 transaction.add(self.datafile, offset)
2468 2468 transaction.add(self.indexfile, curr * len(entry))
2469 2469 if data[0]:
2470 2470 dfh.write(data[0])
2471 2471 dfh.write(data[1])
2472 2472 if sidedata:
2473 2473 dfh.write(sidedata)
2474 2474 ifh.write(entry)
2475 2475 else:
2476 2476 offset += curr * self._io.size
2477 2477 transaction.add(self.indexfile, offset)
2478 2478 ifh.write(entry)
2479 2479 ifh.write(data[0])
2480 2480 ifh.write(data[1])
2481 2481 if sidedata:
2482 2482 ifh.write(sidedata)
2483 2483 self._enforceinlinesize(transaction, ifh)
2484 2484 nodemaputil.setup_persistent_nodemap(transaction, self)
2485 2485
2486 2486 def addgroup(
2487 2487 self,
2488 2488 deltas,
2489 2489 linkmapper,
2490 2490 transaction,
2491 2491 alwayscache=False,
2492 2492 addrevisioncb=None,
2493 2493 duplicaterevisioncb=None,
2494 2494 ):
2495 2495 """
2496 2496 add a delta group
2497 2497
2498 2498 given a set of deltas, add them to the revision log. the
2499 2499 first delta is against its parent, which should be in our
2500 2500 log, the rest are against the previous delta.
2501 2501
2502 2502 If ``addrevisioncb`` is defined, it will be called with arguments of
2503 2503 this revlog and the node that was added.
2504 2504 """
2505 2505
2506 2506 if self._writinghandles:
2507 2507 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2508 2508
2509 2509 r = len(self)
2510 2510 end = 0
2511 2511 if r:
2512 2512 end = self.end(r - 1)
2513 2513 ifh = self._indexfp(b"a+")
2514 2514 isize = r * self._io.size
2515 2515 if self._inline:
2516 2516 transaction.add(self.indexfile, end + isize)
2517 2517 dfh = None
2518 2518 else:
2519 2519 transaction.add(self.indexfile, isize)
2520 2520 transaction.add(self.datafile, end)
2521 2521 dfh = self._datafp(b"a+")
2522 2522
2523 2523 def flush():
2524 2524 if dfh:
2525 2525 dfh.flush()
2526 2526 ifh.flush()
2527 2527
2528 2528 self._writinghandles = (ifh, dfh)
2529 2529 empty = True
2530 2530
2531 2531 try:
2532 2532 deltacomputer = deltautil.deltacomputer(self)
2533 2533 # loop through our set of deltas
2534 2534 for data in deltas:
2535 2535 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2536 2536 link = linkmapper(linknode)
2537 2537 flags = flags or REVIDX_DEFAULT_FLAGS
2538 2538
2539 2539 rev = self.index.get_rev(node)
2540 2540 if rev is not None:
2541 2541 # this can happen if two branches make the same change
2542 2542 self._nodeduplicatecallback(transaction, rev)
2543 2543 if duplicaterevisioncb:
2544 2544 duplicaterevisioncb(self, rev)
2545 2545 empty = False
2546 2546 continue
2547 2547
2548 2548 for p in (p1, p2):
2549 2549 if not self.index.has_node(p):
2550 2550 raise error.LookupError(
2551 2551 p, self.indexfile, _(b'unknown parent')
2552 2552 )
2553 2553
2554 2554 if not self.index.has_node(deltabase):
2555 2555 raise error.LookupError(
2556 2556 deltabase, self.indexfile, _(b'unknown delta base')
2557 2557 )
2558 2558
2559 2559 baserev = self.rev(deltabase)
2560 2560
2561 2561 if baserev != nullrev and self.iscensored(baserev):
2562 2562 # if base is censored, delta must be full replacement in a
2563 2563 # single patch operation
2564 2564 hlen = struct.calcsize(b">lll")
2565 2565 oldlen = self.rawsize(baserev)
2566 2566 newlen = len(delta) - hlen
2567 2567 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2568 2568 raise error.CensoredBaseError(
2569 2569 self.indexfile, self.node(baserev)
2570 2570 )
2571 2571
2572 2572 if not flags and self._peek_iscensored(baserev, delta, flush):
2573 2573 flags |= REVIDX_ISCENSORED
2574 2574
2575 2575 # We assume consumers of addrevisioncb will want to retrieve
2576 2576 # the added revision, which will require a call to
2577 2577 # revision(). revision() will fast path if there is a cache
2578 2578 # hit. So, we tell _addrevision() to always cache in this case.
2579 2579 # We're only using addgroup() in the context of changegroup
2580 2580 # generation so the revision data can always be handled as raw
2581 2581 # by the flagprocessor.
2582 2582 rev = self._addrevision(
2583 2583 node,
2584 2584 None,
2585 2585 transaction,
2586 2586 link,
2587 2587 p1,
2588 2588 p2,
2589 2589 flags,
2590 2590 (baserev, delta),
2591 2591 ifh,
2592 2592 dfh,
2593 2593 alwayscache=alwayscache,
2594 2594 deltacomputer=deltacomputer,
2595 2595 sidedata=sidedata,
2596 2596 )
2597 2597
2598 2598 if addrevisioncb:
2599 2599 addrevisioncb(self, rev)
2600 2600 empty = False
2601 2601
2602 2602 if not dfh and not self._inline:
2603 2603 # addrevision switched from inline to conventional
2604 2604 # reopen the index
2605 2605 ifh.close()
2606 2606 dfh = self._datafp(b"a+")
2607 2607 ifh = self._indexfp(b"a+")
2608 2608 self._writinghandles = (ifh, dfh)
2609 2609 finally:
2610 2610 self._writinghandles = None
2611 2611
2612 2612 if dfh:
2613 2613 dfh.close()
2614 2614 ifh.close()
2615 2615 return not empty
2616 2616
2617 2617 def iscensored(self, rev):
2618 2618 """Check if a file revision is censored."""
2619 2619 if not self._censorable:
2620 2620 return False
2621 2621
2622 2622 return self.flags(rev) & REVIDX_ISCENSORED
2623 2623
2624 2624 def _peek_iscensored(self, baserev, delta, flush):
2625 2625 """Quickly check if a delta produces a censored revision."""
2626 2626 if not self._censorable:
2627 2627 return False
2628 2628
2629 2629 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2630 2630
2631 2631 def getstrippoint(self, minlink):
2632 2632 """find the minimum rev that must be stripped to strip the linkrev
2633 2633
2634 2634 Returns a tuple containing the minimum rev and a set of all revs that
2635 2635 have linkrevs that will be broken by this strip.
2636 2636 """
2637 2637 return storageutil.resolvestripinfo(
2638 2638 minlink,
2639 2639 len(self) - 1,
2640 2640 self.headrevs(),
2641 2641 self.linkrev,
2642 2642 self.parentrevs,
2643 2643 )
2644 2644
2645 2645 def strip(self, minlink, transaction):
2646 2646 """truncate the revlog on the first revision with a linkrev >= minlink
2647 2647
2648 2648 This function is called when we're stripping revision minlink and
2649 2649 its descendants from the repository.
2650 2650
2651 2651 We have to remove all revisions with linkrev >= minlink, because
2652 2652 the equivalent changelog revisions will be renumbered after the
2653 2653 strip.
2654 2654
2655 2655 So we truncate the revlog on the first of these revisions, and
2656 2656 trust that the caller has saved the revisions that shouldn't be
2657 2657 removed and that it'll re-add them after this truncation.
2658 2658 """
2659 2659 if len(self) == 0:
2660 2660 return
2661 2661
2662 2662 rev, _ = self.getstrippoint(minlink)
2663 2663 if rev == len(self):
2664 2664 return
2665 2665
2666 2666 # first truncate the files on disk
2667 2667 end = self.start(rev)
2668 2668 if not self._inline:
2669 2669 transaction.add(self.datafile, end)
2670 2670 end = rev * self._io.size
2671 2671 else:
2672 2672 end += rev * self._io.size
2673 2673
2674 2674 transaction.add(self.indexfile, end)
2675 2675
2676 2676 # then reset internal state in memory to forget those revisions
2677 2677 self._revisioncache = None
2678 2678 self._chaininfocache = util.lrucachedict(500)
2679 2679 self._chunkclear()
2680 2680
2681 2681 del self.index[rev:-1]
2682 2682
2683 2683 def checksize(self):
2684 2684 """Check size of index and data files
2685 2685
2686 2686 return a (dd, di) tuple.
2687 2687 - dd: extra bytes for the "data" file
2688 2688 - di: extra bytes for the "index" file
2689 2689
2690 2690 A healthy revlog will return (0, 0).
2691 2691 """
2692 2692 expected = 0
2693 2693 if len(self):
2694 2694 expected = max(0, self.end(len(self) - 1))
2695 2695
2696 2696 try:
2697 2697 with self._datafp() as f:
2698 2698 f.seek(0, io.SEEK_END)
2699 2699 actual = f.tell()
2700 2700 dd = actual - expected
2701 2701 except IOError as inst:
2702 2702 if inst.errno != errno.ENOENT:
2703 2703 raise
2704 2704 dd = 0
2705 2705
2706 2706 try:
2707 2707 f = self.opener(self.indexfile)
2708 2708 f.seek(0, io.SEEK_END)
2709 2709 actual = f.tell()
2710 2710 f.close()
2711 2711 s = self._io.size
2712 2712 i = max(0, actual // s)
2713 2713 di = actual - (i * s)
2714 2714 if self._inline:
2715 2715 databytes = 0
2716 2716 for r in self:
2717 2717 databytes += max(0, self.length(r))
2718 2718 dd = 0
2719 2719 di = actual - len(self) * s - databytes
2720 2720 except IOError as inst:
2721 2721 if inst.errno != errno.ENOENT:
2722 2722 raise
2723 2723 di = 0
2724 2724
2725 2725 return (dd, di)
2726 2726
2727 2727 def files(self):
2728 2728 res = [self.indexfile]
2729 2729 if not self._inline:
2730 2730 res.append(self.datafile)
2731 2731 return res
2732 2732
2733 2733 def emitrevisions(
2734 2734 self,
2735 2735 nodes,
2736 2736 nodesorder=None,
2737 2737 revisiondata=False,
2738 2738 assumehaveparentrevisions=False,
2739 2739 deltamode=repository.CG_DELTAMODE_STD,
2740 2740 sidedata_helpers=None,
2741 2741 ):
2742 2742 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2743 2743 raise error.ProgrammingError(
2744 2744 b'unhandled value for nodesorder: %s' % nodesorder
2745 2745 )
2746 2746
2747 2747 if nodesorder is None and not self._generaldelta:
2748 2748 nodesorder = b'storage'
2749 2749
2750 2750 if (
2751 2751 not self._storedeltachains
2752 2752 and deltamode != repository.CG_DELTAMODE_PREV
2753 2753 ):
2754 2754 deltamode = repository.CG_DELTAMODE_FULL
2755 2755
2756 2756 return storageutil.emitrevisions(
2757 2757 self,
2758 2758 nodes,
2759 2759 nodesorder,
2760 2760 revlogrevisiondelta,
2761 2761 deltaparentfn=self.deltaparent,
2762 2762 candeltafn=self.candelta,
2763 2763 rawsizefn=self.rawsize,
2764 2764 revdifffn=self.revdiff,
2765 2765 flagsfn=self.flags,
2766 2766 deltamode=deltamode,
2767 2767 revisiondata=revisiondata,
2768 2768 assumehaveparentrevisions=assumehaveparentrevisions,
2769 2769 sidedata_helpers=sidedata_helpers,
2770 2770 )
2771 2771
2772 2772 DELTAREUSEALWAYS = b'always'
2773 2773 DELTAREUSESAMEREVS = b'samerevs'
2774 2774 DELTAREUSENEVER = b'never'
2775 2775
2776 2776 DELTAREUSEFULLADD = b'fulladd'
2777 2777
2778 2778 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2779 2779
2780 2780 def clone(
2781 2781 self,
2782 2782 tr,
2783 2783 destrevlog,
2784 2784 addrevisioncb=None,
2785 2785 deltareuse=DELTAREUSESAMEREVS,
2786 2786 forcedeltabothparents=None,
2787 2787 sidedatacompanion=None,
2788 2788 ):
2789 2789 """Copy this revlog to another, possibly with format changes.
2790 2790
2791 2791 The destination revlog will contain the same revisions and nodes.
2792 2792 However, it may not be bit-for-bit identical due to e.g. delta encoding
2793 2793 differences.
2794 2794
2795 2795 The ``deltareuse`` argument control how deltas from the existing revlog
2796 2796 are preserved in the destination revlog. The argument can have the
2797 2797 following values:
2798 2798
2799 2799 DELTAREUSEALWAYS
2800 2800 Deltas will always be reused (if possible), even if the destination
2801 2801 revlog would not select the same revisions for the delta. This is the
2802 2802 fastest mode of operation.
2803 2803 DELTAREUSESAMEREVS
2804 2804 Deltas will be reused if the destination revlog would pick the same
2805 2805 revisions for the delta. This mode strikes a balance between speed
2806 2806 and optimization.
2807 2807 DELTAREUSENEVER
2808 2808 Deltas will never be reused. This is the slowest mode of execution.
2809 2809 This mode can be used to recompute deltas (e.g. if the diff/delta
2810 2810 algorithm changes).
2811 2811 DELTAREUSEFULLADD
2812 2812 Revision will be re-added as if their were new content. This is
2813 2813 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2814 2814 eg: large file detection and handling.
2815 2815
2816 2816 Delta computation can be slow, so the choice of delta reuse policy can
2817 2817 significantly affect run time.
2818 2818
2819 2819 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2820 2820 two extremes. Deltas will be reused if they are appropriate. But if the
2821 2821 delta could choose a better revision, it will do so. This means if you
2822 2822 are converting a non-generaldelta revlog to a generaldelta revlog,
2823 2823 deltas will be recomputed if the delta's parent isn't a parent of the
2824 2824 revision.
2825 2825
2826 2826 In addition to the delta policy, the ``forcedeltabothparents``
2827 2827 argument controls whether to force compute deltas against both parents
2828 2828 for merges. By default, the current default is used.
2829 2829
2830 2830 If not None, the `sidedatacompanion` is callable that accept two
2831 2831 arguments:
2832 2832
2833 2833 (srcrevlog, rev)
2834 2834
2835 2835 and return a quintet that control changes to sidedata content from the
2836 2836 old revision to the new clone result:
2837 2837
2838 2838 (dropall, filterout, update, new_flags, dropped_flags)
2839 2839
2840 2840 * if `dropall` is True, all sidedata should be dropped
2841 2841 * `filterout` is a set of sidedata keys that should be dropped
2842 2842 * `update` is a mapping of additionnal/new key -> value
2843 2843 * new_flags is a bitfields of new flags that the revision should get
2844 2844 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2845 2845 """
2846 2846 if deltareuse not in self.DELTAREUSEALL:
2847 2847 raise ValueError(
2848 2848 _(b'value for deltareuse invalid: %s') % deltareuse
2849 2849 )
2850 2850
2851 2851 if len(destrevlog):
2852 2852 raise ValueError(_(b'destination revlog is not empty'))
2853 2853
2854 2854 if getattr(self, 'filteredrevs', None):
2855 2855 raise ValueError(_(b'source revlog has filtered revisions'))
2856 2856 if getattr(destrevlog, 'filteredrevs', None):
2857 2857 raise ValueError(_(b'destination revlog has filtered revisions'))
2858 2858
2859 2859 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2860 2860 # if possible.
2861 2861 oldlazydelta = destrevlog._lazydelta
2862 2862 oldlazydeltabase = destrevlog._lazydeltabase
2863 2863 oldamd = destrevlog._deltabothparents
2864 2864
2865 2865 try:
2866 2866 if deltareuse == self.DELTAREUSEALWAYS:
2867 2867 destrevlog._lazydeltabase = True
2868 2868 destrevlog._lazydelta = True
2869 2869 elif deltareuse == self.DELTAREUSESAMEREVS:
2870 2870 destrevlog._lazydeltabase = False
2871 2871 destrevlog._lazydelta = True
2872 2872 elif deltareuse == self.DELTAREUSENEVER:
2873 2873 destrevlog._lazydeltabase = False
2874 2874 destrevlog._lazydelta = False
2875 2875
2876 2876 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2877 2877
2878 2878 self._clone(
2879 2879 tr,
2880 2880 destrevlog,
2881 2881 addrevisioncb,
2882 2882 deltareuse,
2883 2883 forcedeltabothparents,
2884 2884 sidedatacompanion,
2885 2885 )
2886 2886
2887 2887 finally:
2888 2888 destrevlog._lazydelta = oldlazydelta
2889 2889 destrevlog._lazydeltabase = oldlazydeltabase
2890 2890 destrevlog._deltabothparents = oldamd
2891 2891
2892 2892 def _clone(
2893 2893 self,
2894 2894 tr,
2895 2895 destrevlog,
2896 2896 addrevisioncb,
2897 2897 deltareuse,
2898 2898 forcedeltabothparents,
2899 2899 sidedatacompanion,
2900 2900 ):
2901 2901 """perform the core duty of `revlog.clone` after parameter processing"""
2902 2902 deltacomputer = deltautil.deltacomputer(destrevlog)
2903 2903 index = self.index
2904 2904 for rev in self:
2905 2905 entry = index[rev]
2906 2906
2907 2907 # Some classes override linkrev to take filtered revs into
2908 2908 # account. Use raw entry from index.
2909 2909 flags = entry[0] & 0xFFFF
2910 2910 linkrev = entry[4]
2911 2911 p1 = index[entry[5]][7]
2912 2912 p2 = index[entry[6]][7]
2913 2913 node = entry[7]
2914 2914
2915 2915 sidedataactions = (False, [], {}, 0, 0)
2916 2916 if sidedatacompanion is not None:
2917 2917 sidedataactions = sidedatacompanion(self, rev)
2918 2918
2919 2919 # (Possibly) reuse the delta from the revlog if allowed and
2920 2920 # the revlog chunk is a delta.
2921 2921 cachedelta = None
2922 2922 rawtext = None
2923 2923 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2924 2924 dropall = sidedataactions[0]
2925 2925 filterout = sidedataactions[1]
2926 2926 update = sidedataactions[2]
2927 2927 new_flags = sidedataactions[3]
2928 2928 dropped_flags = sidedataactions[4]
2929 2929 text, sidedata = self._revisiondata(rev)
2930 2930 if dropall:
2931 2931 sidedata = {}
2932 2932 for key in filterout:
2933 2933 sidedata.pop(key, None)
2934 2934 sidedata.update(update)
2935 2935 if not sidedata:
2936 2936 sidedata = None
2937 2937
2938 2938 flags |= new_flags
2939 2939 flags &= ~dropped_flags
2940 2940
2941 2941 destrevlog.addrevision(
2942 2942 text,
2943 2943 tr,
2944 2944 linkrev,
2945 2945 p1,
2946 2946 p2,
2947 2947 cachedelta=cachedelta,
2948 2948 node=node,
2949 2949 flags=flags,
2950 2950 deltacomputer=deltacomputer,
2951 2951 sidedata=sidedata,
2952 2952 )
2953 2953 else:
2954 2954 if destrevlog._lazydelta:
2955 2955 dp = self.deltaparent(rev)
2956 2956 if dp != nullrev:
2957 2957 cachedelta = (dp, bytes(self._chunk(rev)))
2958 2958
2959 2959 if not cachedelta:
2960 2960 rawtext = self.rawdata(rev)
2961 2961
2962 2962 ifh = destrevlog.opener(
2963 2963 destrevlog.indexfile, b'a+', checkambig=False
2964 2964 )
2965 2965 dfh = None
2966 2966 if not destrevlog._inline:
2967 2967 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2968 2968 try:
2969 2969 destrevlog._addrevision(
2970 2970 node,
2971 2971 rawtext,
2972 2972 tr,
2973 2973 linkrev,
2974 2974 p1,
2975 2975 p2,
2976 2976 flags,
2977 2977 cachedelta,
2978 2978 ifh,
2979 2979 dfh,
2980 2980 deltacomputer=deltacomputer,
2981 2981 )
2982 2982 finally:
2983 2983 if dfh:
2984 2984 dfh.close()
2985 2985 ifh.close()
2986 2986
2987 2987 if addrevisioncb:
2988 2988 addrevisioncb(self, rev, node)
2989 2989
2990 2990 def censorrevision(self, tr, censornode, tombstone=b''):
2991 2991 if (self.version & 0xFFFF) == REVLOGV0:
2992 2992 raise error.RevlogError(
2993 2993 _(b'cannot censor with version %d revlogs') % self.version
2994 2994 )
2995 2995
2996 2996 censorrev = self.rev(censornode)
2997 2997 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2998 2998
2999 2999 if len(tombstone) > self.rawsize(censorrev):
3000 3000 raise error.Abort(
3001 3001 _(b'censor tombstone must be no longer than censored data')
3002 3002 )
3003 3003
3004 3004 # Rewriting the revlog in place is hard. Our strategy for censoring is
3005 3005 # to create a new revlog, copy all revisions to it, then replace the
3006 3006 # revlogs on transaction close.
3007 3007
3008 3008 newindexfile = self.indexfile + b'.tmpcensored'
3009 3009 newdatafile = self.datafile + b'.tmpcensored'
3010 3010
3011 3011 # This is a bit dangerous. We could easily have a mismatch of state.
3012 3012 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
3013 3013 newrl.version = self.version
3014 3014 newrl._generaldelta = self._generaldelta
3015 3015 newrl._io = self._io
3016 3016
3017 3017 for rev in self.revs():
3018 3018 node = self.node(rev)
3019 3019 p1, p2 = self.parents(node)
3020 3020
3021 3021 if rev == censorrev:
3022 3022 newrl.addrawrevision(
3023 3023 tombstone,
3024 3024 tr,
3025 3025 self.linkrev(censorrev),
3026 3026 p1,
3027 3027 p2,
3028 3028 censornode,
3029 3029 REVIDX_ISCENSORED,
3030 3030 )
3031 3031
3032 3032 if newrl.deltaparent(rev) != nullrev:
3033 3033 raise error.Abort(
3034 3034 _(
3035 3035 b'censored revision stored as delta; '
3036 3036 b'cannot censor'
3037 3037 ),
3038 3038 hint=_(
3039 3039 b'censoring of revlogs is not '
3040 3040 b'fully implemented; please report '
3041 3041 b'this bug'
3042 3042 ),
3043 3043 )
3044 3044 continue
3045 3045
3046 3046 if self.iscensored(rev):
3047 3047 if self.deltaparent(rev) != nullrev:
3048 3048 raise error.Abort(
3049 3049 _(
3050 3050 b'cannot censor due to censored '
3051 3051 b'revision having delta stored'
3052 3052 )
3053 3053 )
3054 3054 rawtext = self._chunk(rev)
3055 3055 else:
3056 3056 rawtext = self.rawdata(rev)
3057 3057
3058 3058 newrl.addrawrevision(
3059 3059 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3060 3060 )
3061 3061
3062 3062 tr.addbackup(self.indexfile, location=b'store')
3063 3063 if not self._inline:
3064 3064 tr.addbackup(self.datafile, location=b'store')
3065 3065
3066 3066 self.opener.rename(newrl.indexfile, self.indexfile)
3067 3067 if not self._inline:
3068 3068 self.opener.rename(newrl.datafile, self.datafile)
3069 3069
3070 3070 self.clearcaches()
3071 3071 self._loadindex()
3072 3072
3073 3073 def verifyintegrity(self, state):
3074 3074 """Verifies the integrity of the revlog.
3075 3075
3076 3076 Yields ``revlogproblem`` instances describing problems that are
3077 3077 found.
3078 3078 """
3079 3079 dd, di = self.checksize()
3080 3080 if dd:
3081 3081 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3082 3082 if di:
3083 3083 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3084 3084
3085 3085 version = self.version & 0xFFFF
3086 3086
3087 3087 # The verifier tells us what version revlog we should be.
3088 3088 if version != state[b'expectedversion']:
3089 3089 yield revlogproblem(
3090 3090 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3091 3091 % (self.indexfile, version, state[b'expectedversion'])
3092 3092 )
3093 3093
3094 3094 state[b'skipread'] = set()
3095 3095 state[b'safe_renamed'] = set()
3096 3096
3097 3097 for rev in self:
3098 3098 node = self.node(rev)
3099 3099
3100 3100 # Verify contents. 4 cases to care about:
3101 3101 #
3102 3102 # common: the most common case
3103 3103 # rename: with a rename
3104 3104 # meta: file content starts with b'\1\n', the metadata
3105 3105 # header defined in filelog.py, but without a rename
3106 3106 # ext: content stored externally
3107 3107 #
3108 3108 # More formally, their differences are shown below:
3109 3109 #
3110 3110 # | common | rename | meta | ext
3111 3111 # -------------------------------------------------------
3112 3112 # flags() | 0 | 0 | 0 | not 0
3113 3113 # renamed() | False | True | False | ?
3114 3114 # rawtext[0:2]=='\1\n'| False | True | True | ?
3115 3115 #
3116 3116 # "rawtext" means the raw text stored in revlog data, which
3117 3117 # could be retrieved by "rawdata(rev)". "text"
3118 3118 # mentioned below is "revision(rev)".
3119 3119 #
3120 3120 # There are 3 different lengths stored physically:
3121 3121 # 1. L1: rawsize, stored in revlog index
3122 3122 # 2. L2: len(rawtext), stored in revlog data
3123 3123 # 3. L3: len(text), stored in revlog data if flags==0, or
3124 3124 # possibly somewhere else if flags!=0
3125 3125 #
3126 3126 # L1 should be equal to L2. L3 could be different from them.
3127 3127 # "text" may or may not affect commit hash depending on flag
3128 3128 # processors (see flagutil.addflagprocessor).
3129 3129 #
3130 3130 # | common | rename | meta | ext
3131 3131 # -------------------------------------------------
3132 3132 # rawsize() | L1 | L1 | L1 | L1
3133 3133 # size() | L1 | L2-LM | L1(*) | L1 (?)
3134 3134 # len(rawtext) | L2 | L2 | L2 | L2
3135 3135 # len(text) | L2 | L2 | L2 | L3
3136 3136 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3137 3137 #
3138 3138 # LM: length of metadata, depending on rawtext
3139 3139 # (*): not ideal, see comment in filelog.size
3140 3140 # (?): could be "- len(meta)" if the resolved content has
3141 3141 # rename metadata
3142 3142 #
3143 3143 # Checks needed to be done:
3144 3144 # 1. length check: L1 == L2, in all cases.
3145 3145 # 2. hash check: depending on flag processor, we may need to
3146 3146 # use either "text" (external), or "rawtext" (in revlog).
3147 3147
3148 3148 try:
3149 3149 skipflags = state.get(b'skipflags', 0)
3150 3150 if skipflags:
3151 3151 skipflags &= self.flags(rev)
3152 3152
3153 3153 _verify_revision(self, skipflags, state, node)
3154 3154
3155 3155 l1 = self.rawsize(rev)
3156 3156 l2 = len(self.rawdata(node))
3157 3157
3158 3158 if l1 != l2:
3159 3159 yield revlogproblem(
3160 3160 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3161 3161 node=node,
3162 3162 )
3163 3163
3164 3164 except error.CensoredNodeError:
3165 3165 if state[b'erroroncensored']:
3166 3166 yield revlogproblem(
3167 3167 error=_(b'censored file data'), node=node
3168 3168 )
3169 3169 state[b'skipread'].add(node)
3170 3170 except Exception as e:
3171 3171 yield revlogproblem(
3172 3172 error=_(b'unpacking %s: %s')
3173 3173 % (short(node), stringutil.forcebytestr(e)),
3174 3174 node=node,
3175 3175 )
3176 3176 state[b'skipread'].add(node)
3177 3177
3178 3178 def storageinfo(
3179 3179 self,
3180 3180 exclusivefiles=False,
3181 3181 sharedfiles=False,
3182 3182 revisionscount=False,
3183 3183 trackedsize=False,
3184 3184 storedsize=False,
3185 3185 ):
3186 3186 d = {}
3187 3187
3188 3188 if exclusivefiles:
3189 3189 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3190 3190 if not self._inline:
3191 3191 d[b'exclusivefiles'].append((self.opener, self.datafile))
3192 3192
3193 3193 if sharedfiles:
3194 3194 d[b'sharedfiles'] = []
3195 3195
3196 3196 if revisionscount:
3197 3197 d[b'revisionscount'] = len(self)
3198 3198
3199 3199 if trackedsize:
3200 3200 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3201 3201
3202 3202 if storedsize:
3203 3203 d[b'storedsize'] = sum(
3204 3204 self.opener.stat(path).st_size for path in self.files()
3205 3205 )
3206 3206
3207 3207 return d
3208
3209 def rewrite_sidedata(self, helpers, startrev, endrev):
3210 if self.version & 0xFFFF != REVLOGV2:
3211 return
3212 # inline are not yet supported because they suffer from an issue when
3213 # rewriting them (since it's not an append-only operation).
3214 # See issue6485.
3215 assert not self._inline
3216 if not helpers[1] and not helpers[2]:
3217 # Nothing to generate or remove
3218 return
3219
3220 new_entries = []
3221 # append the new sidedata
3222 with self._datafp(b'a+') as fp:
3223 # Maybe this bug still exists, see revlog._writeentry
3224 fp.seek(0, os.SEEK_END)
3225 current_offset = fp.tell()
3226 for rev in range(startrev, endrev + 1):
3227 entry = self.index[rev]
3228 new_sidedata = storageutil.run_sidedata_helpers(
3229 store=self,
3230 sidedata_helpers=helpers,
3231 sidedata={},
3232 rev=rev,
3233 )
3234
3235 serialized_sidedata = sidedatautil.serialize_sidedata(
3236 new_sidedata
3237 )
3238 if entry[8] != 0 or entry[9] != 0:
3239 # rewriting entries that already have sidedata is not
3240 # supported yet, because it introduces garbage data in the
3241 # revlog.
3242 msg = "Rewriting existing sidedata is not supported yet"
3243 raise error.Abort(msg)
3244 entry = entry[:8]
3245 entry += (current_offset, len(serialized_sidedata))
3246
3247 fp.write(serialized_sidedata)
3248 new_entries.append(entry)
3249 current_offset += len(serialized_sidedata)
3250
3251 # rewrite the new index entries
3252 with self._indexfp(b'w+') as fp:
3253 fp.seek(startrev * self._io.size)
3254 for i, entry in enumerate(new_entries):
3255 rev = startrev + i
3256 self.index.replace_sidedata_info(rev, entry[8], entry[9])
3257 packed = self._io.packentry(entry, self.node, self.version, rev)
3258 fp.write(packed)
@@ -1,499 +1,497 b''
1 1 #testcases extra sidedata
2 2
3 3 #if extra
4 4 $ cat >> $HGRCPATH << EOF
5 5 > [experimental]
6 6 > copies.write-to=changeset-only
7 7 > copies.read-from=changeset-only
8 8 > [alias]
9 9 > changesetcopies = log -r . -T 'files: {files}
10 10 > {extras % "{ifcontains("files", key, "{key}: {value}\n")}"}
11 11 > {extras % "{ifcontains("copies", key, "{key}: {value}\n")}"}'
12 12 > EOF
13 13 #endif
14 14
15 15 #if sidedata
16 16 $ cat >> $HGRCPATH << EOF
17 17 > [format]
18 18 > exp-use-copies-side-data-changeset = yes
19 19 > EOF
20 20 #endif
21 21
22 22 $ cat >> $HGRCPATH << EOF
23 23 > [alias]
24 24 > showcopies = log -r . -T '{file_copies % "{source} -> {name}\n"}'
25 25 > [extensions]
26 26 > rebase =
27 27 > split =
28 28 > EOF
29 29
30 30 Check that copies are recorded correctly
31 31
32 32 $ hg init repo
33 33 $ cd repo
34 34 #if sidedata
35 35 $ hg debugformat -v
36 36 format-variant repo config default
37 37 fncache: yes yes yes
38 38 dotencode: yes yes yes
39 39 generaldelta: yes yes yes
40 40 share-safe: no no no
41 41 sparserevlog: yes yes yes
42 42 persistent-nodemap: no no no
43 43 copies-sdc: yes yes no
44 44 revlog-v2: yes yes no
45 45 plain-cl-delta: yes yes yes
46 46 compression: zlib zlib zlib
47 47 compression-level: default default default
48 48 #else
49 49 $ hg debugformat -v
50 50 format-variant repo config default
51 51 fncache: yes yes yes
52 52 dotencode: yes yes yes
53 53 generaldelta: yes yes yes
54 54 share-safe: no no no
55 55 sparserevlog: yes yes yes
56 56 persistent-nodemap: no no no
57 57 copies-sdc: no no no
58 58 revlog-v2: no no no
59 59 plain-cl-delta: yes yes yes
60 60 compression: zlib zlib zlib
61 61 compression-level: default default default
62 62 #endif
63 63 $ echo a > a
64 64 $ hg add a
65 65 $ hg ci -m initial
66 66 $ hg cp a b
67 67 $ hg cp a c
68 68 $ hg cp a d
69 69 $ hg ci -m 'copy a to b, c, and d'
70 70
71 71 #if extra
72 72
73 73 $ hg changesetcopies
74 74 files: b c d
75 75 filesadded: 0
76 76 1
77 77 2
78 78
79 79 p1copies: 0\x00a (esc)
80 80 1\x00a (esc)
81 81 2\x00a (esc)
82 82 #else
83 83 $ hg debugsidedata -c -v -- -1
84 84 1 sidedata entries
85 85 entry-0014 size 44
86 86 '\x00\x00\x00\x04\x00\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x00\x06\x00\x00\x00\x03\x00\x00\x00\x00\x06\x00\x00\x00\x04\x00\x00\x00\x00abcd'
87 87 #endif
88 88
89 89 $ hg showcopies
90 90 a -> b
91 91 a -> c
92 92 a -> d
93 93
94 94 #if extra
95 95
96 96 $ hg showcopies --config experimental.copies.read-from=compatibility
97 97 a -> b
98 98 a -> c
99 99 a -> d
100 100 $ hg showcopies --config experimental.copies.read-from=filelog-only
101 101
102 102 #endif
103 103
104 104 Check that renames are recorded correctly
105 105
106 106 $ hg mv b b2
107 107 $ hg ci -m 'rename b to b2'
108 108
109 109 #if extra
110 110
111 111 $ hg changesetcopies
112 112 files: b b2
113 113 filesadded: 1
114 114 filesremoved: 0
115 115
116 116 p1copies: 1\x00b (esc)
117 117
118 118 #else
119 119 $ hg debugsidedata -c -v -- -1
120 120 1 sidedata entries
121 121 entry-0014 size 25
122 122 '\x00\x00\x00\x02\x0c\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x03\x00\x00\x00\x00bb2'
123 123 #endif
124 124
125 125 $ hg showcopies
126 126 b -> b2
127 127
128 128
129 129 Rename onto existing file. This should get recorded in the changeset files list and in the extras,
130 130 even though there is no filelog entry.
131 131
132 132 $ hg cp b2 c --force
133 133 $ hg st --copies
134 134 M c
135 135 b2
136 136
137 137 #if extra
138 138
139 139 $ hg debugindex c
140 140 rev linkrev nodeid p1 p2
141 141 0 1 b789fdd96dc2 000000000000 000000000000
142 142
143 143 #else
144 144
145 145 $ hg debugindex c
146 146 rev linkrev nodeid p1 p2
147 147 0 1 37d9b5d994ea 000000000000 000000000000
148 148
149 149 #endif
150 150
151 151
152 152 $ hg ci -m 'move b onto d'
153 153
154 154 #if extra
155 155
156 156 $ hg changesetcopies
157 157 files: c
158 158
159 159 p1copies: 0\x00b2 (esc)
160 160
161 161 #else
162 162 $ hg debugsidedata -c -v -- -1
163 163 1 sidedata entries
164 164 entry-0014 size 25
165 165 '\x00\x00\x00\x02\x00\x00\x00\x00\x02\x00\x00\x00\x00\x16\x00\x00\x00\x03\x00\x00\x00\x00b2c'
166 166 #endif
167 167
168 168 $ hg showcopies
169 169 b2 -> c
170 170
171 171 #if extra
172 172
173 173 $ hg debugindex c
174 174 rev linkrev nodeid p1 p2
175 175 0 1 b789fdd96dc2 000000000000 000000000000
176 176
177 177 #else
178 178
179 179 $ hg debugindex c
180 180 rev linkrev nodeid p1 p2
181 181 0 1 37d9b5d994ea 000000000000 000000000000
182 182 1 3 029625640347 000000000000 000000000000
183 183
184 184 #endif
185 185
186 186 Create a merge commit with copying done during merge.
187 187
188 188 $ hg co 0
189 189 0 files updated, 0 files merged, 3 files removed, 0 files unresolved
190 190 $ hg cp a e
191 191 $ hg cp a f
192 192 $ hg ci -m 'copy a to e and f'
193 193 created new head
194 194 $ hg merge 3
195 195 3 files updated, 0 files merged, 0 files removed, 0 files unresolved
196 196 (branch merge, don't forget to commit)
197 197 File 'a' exists on both sides, so 'g' could be recorded as being from p1 or p2, but we currently
198 198 always record it as being from p1
199 199 $ hg cp a g
200 200 File 'd' exists only in p2, so 'h' should be from p2
201 201 $ hg cp d h
202 202 File 'f' exists only in p1, so 'i' should be from p1
203 203 $ hg cp f i
204 204 $ hg ci -m 'merge'
205 205
206 206 #if extra
207 207
208 208 $ hg changesetcopies
209 209 files: g h i
210 210 filesadded: 0
211 211 1
212 212 2
213 213
214 214 p1copies: 0\x00a (esc)
215 215 2\x00f (esc)
216 216 p2copies: 1\x00d (esc)
217 217
218 218 #else
219 219 $ hg debugsidedata -c -v -- -1
220 220 1 sidedata entries
221 221 entry-0014 size 64
222 222 '\x00\x00\x00\x06\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x06\x00\x00\x00\x04\x00\x00\x00\x00\x07\x00\x00\x00\x05\x00\x00\x00\x01\x06\x00\x00\x00\x06\x00\x00\x00\x02adfghi'
223 223 #endif
224 224
225 225 $ hg showcopies
226 226 a -> g
227 227 d -> h
228 228 f -> i
229 229
230 230 Test writing to both changeset and filelog
231 231
232 232 $ hg cp a j
233 233 #if extra
234 234 $ hg ci -m 'copy a to j' --config experimental.copies.write-to=compatibility
235 235 $ hg changesetcopies
236 236 files: j
237 237 filesadded: 0
238 238 filesremoved:
239 239
240 240 p1copies: 0\x00a (esc)
241 241 p2copies:
242 242 #else
243 243 $ hg ci -m 'copy a to j'
244 244 $ hg debugsidedata -c -v -- -1
245 245 1 sidedata entries
246 246 entry-0014 size 24
247 247 '\x00\x00\x00\x02\x00\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x00aj'
248 248 #endif
249 249 $ hg debugdata j 0
250 250 \x01 (esc)
251 251 copy: a
252 252 copyrev: b789fdd96dc2f3bd229c1dd8eedf0fc60e2b68e3
253 253 \x01 (esc)
254 254 a
255 255 $ hg showcopies
256 256 a -> j
257 257 $ hg showcopies --config experimental.copies.read-from=compatibility
258 258 a -> j
259 259 $ hg showcopies --config experimental.copies.read-from=filelog-only
260 260 a -> j
261 261 Existing copy information in the changeset gets removed on amend and writing
262 262 copy information on to the filelog
263 263 #if extra
264 264 $ hg ci --amend -m 'copy a to j, v2' \
265 265 > --config experimental.copies.write-to=filelog-only
266 266 saved backup bundle to $TESTTMP/repo/.hg/strip-backup/*-*-amend.hg (glob)
267 267 $ hg changesetcopies
268 268 files: j
269 269
270 270 #else
271 271 $ hg ci --amend -m 'copy a to j, v2'
272 272 saved backup bundle to $TESTTMP/repo/.hg/strip-backup/*-*-amend.hg (glob)
273 273 $ hg debugsidedata -c -v -- -1
274 1 sidedata entries (missing-correct-output !)
275 entry-0014 size 24 (missing-correct-output !)
276 '\x00\x00\x00\x02\x00\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x00aj' (missing-correct-output !)
274 1 sidedata entries
275 entry-0014 size 24
276 '\x00\x00\x00\x02\x00\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x00aj'
277 277 #endif
278 278 $ hg showcopies --config experimental.copies.read-from=filelog-only
279 a -> j (sidedata missing-correct-output !)
280 a -> j (no-sidedata !)
279 a -> j
281 280 The entries should be written to extras even if they're empty (so the client
282 281 won't have to fall back to reading from filelogs)
283 282 $ echo x >> j
284 283 #if extra
285 284 $ hg ci -m 'modify j' --config experimental.copies.write-to=compatibility
286 285 $ hg changesetcopies
287 286 files: j
288 287 filesadded:
289 288 filesremoved:
290 289
291 290 p1copies:
292 291 p2copies:
293 292 #else
294 293 $ hg ci -m 'modify j'
295 294 $ hg debugsidedata -c -v -- -1
296 295 1 sidedata entries
297 296 entry-0014 size 14
298 297 '\x00\x00\x00\x01\x14\x00\x00\x00\x01\x00\x00\x00\x00j'
299 298 #endif
300 299
301 300 Test writing only to filelog
302 301
303 302 $ hg cp a k
304 303 #if extra
305 304 $ hg ci -m 'copy a to k' --config experimental.copies.write-to=filelog-only
306 305
307 306 $ hg changesetcopies
308 307 files: k
309 308
310 309 #else
311 310 $ hg ci -m 'copy a to k'
312 311 $ hg debugsidedata -c -v -- -1
313 312 1 sidedata entries
314 313 entry-0014 size 24
315 314 '\x00\x00\x00\x02\x00\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x00ak'
316 315 #endif
317 316
318 317 $ hg debugdata k 0
319 318 \x01 (esc)
320 319 copy: a
321 320 copyrev: b789fdd96dc2f3bd229c1dd8eedf0fc60e2b68e3
322 321 \x01 (esc)
323 322 a
324 323 #if extra
325 324 $ hg showcopies
326 325
327 326 $ hg showcopies --config experimental.copies.read-from=compatibility
328 327 a -> k
329 328 $ hg showcopies --config experimental.copies.read-from=filelog-only
330 329 a -> k
331 330 #else
332 331 $ hg showcopies
333 332 a -> k
334 333 #endif
335 334
336 335 $ cd ..
337 336
338 337 Test rebasing a commit with copy information
339 338
340 339 $ hg init rebase-rename
341 340 $ cd rebase-rename
342 341 $ echo a > a
343 342 $ hg ci -Aqm 'add a'
344 343 $ echo a2 > a
345 344 $ hg ci -m 'modify a'
346 345 $ hg co -q 0
347 346 $ hg mv a b
348 347 $ hg ci -qm 'rename a to b'
349 348 Not only do we want this to run in-memory, it shouldn't fall back to
350 349 on-disk merge (no conflicts), so we force it to be in-memory
351 350 with no fallback.
352 351 $ hg rebase -d 1 --config rebase.experimental.inmemory=yes --config devel.rebase.force-in-memory-merge=yes
353 352 rebasing 2:* tip "rename a to b" (glob)
354 353 merging a and b to b
355 354 saved backup bundle to $TESTTMP/rebase-rename/.hg/strip-backup/*-*-rebase.hg (glob)
356 355 $ hg st --change . --copies
357 356 A b
358 a (sidedata missing-correct-output !)
359 a (no-sidedata !)
357 a
360 358 R a
361 359 $ cd ..
362 360
363 361 Test splitting a commit
364 362
365 363 $ hg init split
366 364 $ cd split
367 365 $ echo a > a
368 366 $ echo b > b
369 367 $ hg ci -Aqm 'add a and b'
370 368 $ echo a2 > a
371 369 $ hg mv b c
372 370 $ hg ci -m 'modify a, move b to c'
373 371 $ hg --config ui.interactive=yes split <<EOF
374 372 > y
375 373 > y
376 374 > n
377 375 > y
378 376 > EOF
379 377 diff --git a/a b/a
380 378 1 hunks, 1 lines changed
381 379 examine changes to 'a'?
382 380 (enter ? for help) [Ynesfdaq?] y
383 381
384 382 @@ -1,1 +1,1 @@
385 383 -a
386 384 +a2
387 385 record this change to 'a'?
388 386 (enter ? for help) [Ynesfdaq?] y
389 387
390 388 diff --git a/b b/c
391 389 rename from b
392 390 rename to c
393 391 examine changes to 'b' and 'c'?
394 392 (enter ? for help) [Ynesfdaq?] n
395 393
396 394 created new head
397 395 diff --git a/b b/c
398 396 rename from b
399 397 rename to c
400 398 examine changes to 'b' and 'c'?
401 399 (enter ? for help) [Ynesfdaq?] y
402 400
403 401 saved backup bundle to $TESTTMP/split/.hg/strip-backup/*-*-split.hg (glob)
404 402 $ cd ..
405 403
406 404 Test committing half a rename
407 405
408 406 $ hg init partial
409 407 $ cd partial
410 408 $ echo a > a
411 409 $ hg ci -Aqm 'add a'
412 410 $ hg mv a b
413 411 $ hg ci -m 'remove a' a
414 412
415 413 #if sidedata
416 414
417 415 Test upgrading/downgrading to sidedata storage
418 416 ==============================================
419 417
420 418 downgrading (keeping some sidedata)
421 419
422 420 $ hg debugformat -v
423 421 format-variant repo config default
424 422 fncache: yes yes yes
425 423 dotencode: yes yes yes
426 424 generaldelta: yes yes yes
427 425 share-safe: no no no
428 426 sparserevlog: yes yes yes
429 427 persistent-nodemap: no no no
430 428 copies-sdc: yes yes no
431 429 revlog-v2: yes yes no
432 430 plain-cl-delta: yes yes yes
433 431 compression: zlib zlib zlib
434 432 compression-level: default default default
435 433 $ hg debugsidedata -c -- 0
436 434 1 sidedata entries
437 435 entry-0014 size 14
438 436 $ hg debugsidedata -c -- 1
439 437 1 sidedata entries
440 438 entry-0014 size 14
441 439 $ hg debugsidedata -m -- 0
442 440 $ cat << EOF > .hg/hgrc
443 441 > [format]
444 442 > exp-use-side-data = yes
445 443 > exp-use-copies-side-data-changeset = no
446 444 > EOF
447 445 $ hg debugupgraderepo --run --quiet --no-backup > /dev/null
448 446 $ hg debugformat -v
449 447 format-variant repo config default
450 448 fncache: yes yes yes
451 449 dotencode: yes yes yes
452 450 generaldelta: yes yes yes
453 451 share-safe: no no no
454 452 sparserevlog: yes yes yes
455 453 persistent-nodemap: no no no
456 454 copies-sdc: no no no
457 455 revlog-v2: yes yes no
458 456 plain-cl-delta: yes yes yes
459 457 compression: zlib zlib zlib
460 458 compression-level: default default default
461 459 $ hg debugsidedata -c -- 0
462 460 1 sidedata entries
463 461 entry-0014 size 14
464 462 $ hg debugsidedata -c -- 1
465 463 1 sidedata entries
466 464 entry-0014 size 14
467 465 $ hg debugsidedata -m -- 0
468 466
469 467 upgrading
470 468
471 469 $ cat << EOF > .hg/hgrc
472 470 > [format]
473 471 > exp-use-copies-side-data-changeset = yes
474 472 > EOF
475 473 $ hg debugupgraderepo --run --quiet --no-backup > /dev/null
476 474 $ hg debugformat -v
477 475 format-variant repo config default
478 476 fncache: yes yes yes
479 477 dotencode: yes yes yes
480 478 generaldelta: yes yes yes
481 479 share-safe: no no no
482 480 sparserevlog: yes yes yes
483 481 persistent-nodemap: no no no
484 482 copies-sdc: yes yes no
485 483 revlog-v2: yes yes no
486 484 plain-cl-delta: yes yes yes
487 485 compression: zlib zlib zlib
488 486 compression-level: default default default
489 487 $ hg debugsidedata -c -- 0
490 488 1 sidedata entries
491 489 entry-0014 size 14
492 490 $ hg debugsidedata -c -- 1
493 491 1 sidedata entries
494 492 entry-0014 size 14
495 493 $ hg debugsidedata -m -- 0
496 494
497 495 #endif
498 496
499 497 $ cd ..
@@ -1,88 +1,50 b''
1 # ext-sidedata.py - small extension to test the sidedata logic
1 # coding: utf8
2 # ext-sidedata-2.py - small extension to test (differently) the sidedata logic
2 3 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net)
4 # Simulates a client for a complex sidedata exchange.
5 #
6 # Copyright 2021 Raphaël Gomès <rgomes@octobus.net>
4 7 #
5 8 # This software may be used and distributed according to the terms of the
6 9 # GNU General Public License version 2 or any later version.
7 10
8 11 from __future__ import absolute_import
9 12
10 13 import hashlib
11 14 import struct
12 15
13 from mercurial.node import (
14 nullid,
15 nullrev,
16 )
17 from mercurial import (
18 extensions,
19 requirements,
20 revlog,
21 )
22
23 from mercurial.upgrade_utils import engine as upgrade_engine
24
25 from mercurial.revlogutils import sidedata
16 from mercurial.revlogutils import sidedata as sidedatamod
26 17
27 18
28 def wrapaddrevision(
29 orig, self, text, transaction, link, p1, p2, *args, **kwargs
30 ):
31 if kwargs.get('sidedata') is None:
32 kwargs['sidedata'] = {}
33 sd = kwargs['sidedata']
34 ## let's store some arbitrary data just for testing
35 # text length
36 sd[sidedata.SD_TEST1] = struct.pack('>I', len(text))
37 # and sha2 hashes
38 sha256 = hashlib.sha256(text).digest()
39 sd[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
40 return orig(self, text, transaction, link, p1, p2, *args, **kwargs)
19 def compute_sidedata_1(repo, revlog, rev, sidedata, text=None):
20 sidedata = sidedata.copy()
21 if text is None:
22 text = revlog.revision(rev)
23 sidedata[sidedatamod.SD_TEST1] = struct.pack('>I', len(text))
24 return sidedata
41 25
42 26
43 def wrap_revisiondata(orig, self, nodeorrev, *args, **kwargs):
44 text, sd = orig(self, nodeorrev, *args, **kwargs)
45 if getattr(self, 'sidedatanocheck', False):
46 return text, sd
47 if self.version & 0xFFFF != 2:
48 return text, sd
49 if nodeorrev != nullrev and nodeorrev != nullid:
50 if len(text) != struct.unpack('>I', sd[sidedata.SD_TEST1])[0]:
51 raise RuntimeError('text size mismatch')
52 expected = sd[sidedata.SD_TEST2]
53 got = hashlib.sha256(text).digest()
54 if got != expected:
55 raise RuntimeError('sha256 mismatch')
56 return text, sd
27 def compute_sidedata_2(repo, revlog, rev, sidedata, text=None):
28 sidedata = sidedata.copy()
29 if text is None:
30 text = revlog.revision(rev)
31 sha256 = hashlib.sha256(text).digest()
32 sidedata[sidedatamod.SD_TEST2] = struct.pack('>32s', sha256)
33 return sidedata
57 34
58 35
59 def wrapgetsidedatacompanion(orig, srcrepo, dstrepo):
60 sidedatacompanion = orig(srcrepo, dstrepo)
61 addedreqs = dstrepo.requirements - srcrepo.requirements
62 if requirements.SIDEDATA_REQUIREMENT in addedreqs:
63 assert sidedatacompanion is None # deal with composition later
64
65 def sidedatacompanion(revlog, rev):
66 update = {}
67 revlog.sidedatanocheck = True
68 try:
69 text = revlog.revision(rev)
70 finally:
71 del revlog.sidedatanocheck
72 ## let's store some arbitrary data just for testing
73 # text length
74 update[sidedata.SD_TEST1] = struct.pack('>I', len(text))
75 # and sha2 hashes
76 sha256 = hashlib.sha256(text).digest()
77 update[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
78 return False, (), update, 0, 0
79
80 return sidedatacompanion
81
82
83 def extsetup(ui):
84 extensions.wrapfunction(revlog.revlog, 'addrevision', wrapaddrevision)
85 extensions.wrapfunction(revlog.revlog, '_revisiondata', wrap_revisiondata)
86 extensions.wrapfunction(
87 upgrade_engine, 'getsidedatacompanion', wrapgetsidedatacompanion
36 def reposetup(ui, repo):
37 # Sidedata keys happen to be the same as the categories, easier for testing.
38 for kind in (b'changelog', b'manifest', b'filelog'):
39 repo.register_sidedata_computer(
40 kind,
41 sidedatamod.SD_TEST1,
42 (sidedatamod.SD_TEST1,),
43 compute_sidedata_1,
88 44 )
45 repo.register_sidedata_computer(
46 kind,
47 sidedatamod.SD_TEST2,
48 (sidedatamod.SD_TEST2,),
49 compute_sidedata_2,
50 )
@@ -1,88 +1,88 b''
1 # ext-sidedata.py - small extension to test the sidedata logic
1 # coding: utf8
2 # ext-sidedata-3.py - small extension to test (differently still) the sidedata
3 # logic
2 4 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net)
5 # Simulates a client for a complex sidedata exchange.
6 #
7 # Copyright 2021 Raphaël Gomès <rgomes@octobus.net>
4 8 #
5 9 # This software may be used and distributed according to the terms of the
6 10 # GNU General Public License version 2 or any later version.
7 11
8 12 from __future__ import absolute_import
9 13
10 14 import hashlib
11 15 import struct
12 16
13 from mercurial.node import (
14 nullid,
15 nullrev,
16 )
17 17 from mercurial import (
18 18 extensions,
19 requirements,
20 19 revlog,
21 20 )
22 21
23 from mercurial.upgrade_utils import engine as upgrade_engine
22 from mercurial.revlogutils import sidedata as sidedatamod
23
24
25 def compute_sidedata_1(repo, revlog, rev, sidedata, text=None):
26 sidedata = sidedata.copy()
27 if text is None:
28 text = revlog.revision(rev)
29 sidedata[sidedatamod.SD_TEST1] = struct.pack('>I', len(text))
30 return sidedata
31
24 32
25 from mercurial.revlogutils import sidedata
33 def compute_sidedata_2(repo, revlog, rev, sidedata, text=None):
34 sidedata = sidedata.copy()
35 if text is None:
36 text = revlog.revision(rev)
37 sha256 = hashlib.sha256(text).digest()
38 sidedata[sidedatamod.SD_TEST2] = struct.pack('>32s', sha256)
39 return sidedata
40
41
42 def compute_sidedata_3(repo, revlog, rev, sidedata, text=None):
43 sidedata = sidedata.copy()
44 if text is None:
45 text = revlog.revision(rev)
46 sha384 = hashlib.sha384(text).digest()
47 sidedata[sidedatamod.SD_TEST3] = struct.pack('>48s', sha384)
48 return sidedata
26 49
27 50
28 51 def wrapaddrevision(
29 52 orig, self, text, transaction, link, p1, p2, *args, **kwargs
30 53 ):
31 54 if kwargs.get('sidedata') is None:
32 55 kwargs['sidedata'] = {}
33 56 sd = kwargs['sidedata']
34 ## let's store some arbitrary data just for testing
35 # text length
36 sd[sidedata.SD_TEST1] = struct.pack('>I', len(text))
37 # and sha2 hashes
38 sha256 = hashlib.sha256(text).digest()
39 sd[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
57 sd = compute_sidedata_1(None, self, None, sd, text=text)
58 kwargs['sidedata'] = compute_sidedata_2(None, self, None, sd, text=text)
40 59 return orig(self, text, transaction, link, p1, p2, *args, **kwargs)
41 60
42 61
43 def wrap_revisiondata(orig, self, nodeorrev, *args, **kwargs):
44 text, sd = orig(self, nodeorrev, *args, **kwargs)
45 if getattr(self, 'sidedatanocheck', False):
46 return text, sd
47 if self.version & 0xFFFF != 2:
48 return text, sd
49 if nodeorrev != nullrev and nodeorrev != nullid:
50 if len(text) != struct.unpack('>I', sd[sidedata.SD_TEST1])[0]:
51 raise RuntimeError('text size mismatch')
52 expected = sd[sidedata.SD_TEST2]
53 got = hashlib.sha256(text).digest()
54 if got != expected:
55 raise RuntimeError('sha256 mismatch')
56 return text, sd
57
58
59 def wrapgetsidedatacompanion(orig, srcrepo, dstrepo):
60 sidedatacompanion = orig(srcrepo, dstrepo)
61 addedreqs = dstrepo.requirements - srcrepo.requirements
62 if requirements.SIDEDATA_REQUIREMENT in addedreqs:
63 assert sidedatacompanion is None # deal with composition later
64
65 def sidedatacompanion(revlog, rev):
66 update = {}
67 revlog.sidedatanocheck = True
68 try:
69 text = revlog.revision(rev)
70 finally:
71 del revlog.sidedatanocheck
72 ## let's store some arbitrary data just for testing
73 # text length
74 update[sidedata.SD_TEST1] = struct.pack('>I', len(text))
75 # and sha2 hashes
76 sha256 = hashlib.sha256(text).digest()
77 update[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
78 return False, (), update, 0, 0
79
80 return sidedatacompanion
81
82
83 62 def extsetup(ui):
84 63 extensions.wrapfunction(revlog.revlog, 'addrevision', wrapaddrevision)
85 extensions.wrapfunction(revlog.revlog, '_revisiondata', wrap_revisiondata)
86 extensions.wrapfunction(
87 upgrade_engine, 'getsidedatacompanion', wrapgetsidedatacompanion
64
65
66 def reposetup(ui, repo):
67 # Sidedata keys happen to be the same as the categories, easier for testing.
68 for kind in (b'changelog', b'manifest', b'filelog'):
69 repo.register_sidedata_computer(
70 kind,
71 sidedatamod.SD_TEST1,
72 (sidedatamod.SD_TEST1,),
73 compute_sidedata_1,
88 74 )
75 repo.register_sidedata_computer(
76 kind,
77 sidedatamod.SD_TEST2,
78 (sidedatamod.SD_TEST2,),
79 compute_sidedata_2,
80 )
81 repo.register_sidedata_computer(
82 kind,
83 sidedatamod.SD_TEST3,
84 (sidedatamod.SD_TEST3,),
85 compute_sidedata_3,
86 )
87 repo.register_wanted_sidedata(sidedatamod.SD_TEST1)
88 repo.register_wanted_sidedata(sidedatamod.SD_TEST2)
@@ -1,88 +1,19 b''
1 # ext-sidedata.py - small extension to test the sidedata logic
1 # coding: utf8
2 # ext-sidedata-4.py - small extension to test (differently still) the sidedata
3 # logic
2 4 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net)
5 # Simulates a server for a complex sidedata exchange.
6 #
7 # Copyright 2021 Raphaël Gomès <rgomes@octobus.net>
4 8 #
5 9 # This software may be used and distributed according to the terms of the
6 10 # GNU General Public License version 2 or any later version.
7 11
8 12 from __future__ import absolute_import
9 13
10 import hashlib
11 import struct
12
13 from mercurial.node import (
14 nullid,
15 nullrev,
16 )
17 from mercurial import (
18 extensions,
19 requirements,
20 revlog,
21 )
22
23 from mercurial.upgrade_utils import engine as upgrade_engine
24
25 14 from mercurial.revlogutils import sidedata
26 15
27 16
28 def wrapaddrevision(
29 orig, self, text, transaction, link, p1, p2, *args, **kwargs
30 ):
31 if kwargs.get('sidedata') is None:
32 kwargs['sidedata'] = {}
33 sd = kwargs['sidedata']
34 ## let's store some arbitrary data just for testing
35 # text length
36 sd[sidedata.SD_TEST1] = struct.pack('>I', len(text))
37 # and sha2 hashes
38 sha256 = hashlib.sha256(text).digest()
39 sd[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
40 return orig(self, text, transaction, link, p1, p2, *args, **kwargs)
41
42
43 def wrap_revisiondata(orig, self, nodeorrev, *args, **kwargs):
44 text, sd = orig(self, nodeorrev, *args, **kwargs)
45 if getattr(self, 'sidedatanocheck', False):
46 return text, sd
47 if self.version & 0xFFFF != 2:
48 return text, sd
49 if nodeorrev != nullrev and nodeorrev != nullid:
50 if len(text) != struct.unpack('>I', sd[sidedata.SD_TEST1])[0]:
51 raise RuntimeError('text size mismatch')
52 expected = sd[sidedata.SD_TEST2]
53 got = hashlib.sha256(text).digest()
54 if got != expected:
55 raise RuntimeError('sha256 mismatch')
56 return text, sd
57
58
59 def wrapgetsidedatacompanion(orig, srcrepo, dstrepo):
60 sidedatacompanion = orig(srcrepo, dstrepo)
61 addedreqs = dstrepo.requirements - srcrepo.requirements
62 if requirements.SIDEDATA_REQUIREMENT in addedreqs:
63 assert sidedatacompanion is None # deal with composition later
64
65 def sidedatacompanion(revlog, rev):
66 update = {}
67 revlog.sidedatanocheck = True
68 try:
69 text = revlog.revision(rev)
70 finally:
71 del revlog.sidedatanocheck
72 ## let's store some arbitrary data just for testing
73 # text length
74 update[sidedata.SD_TEST1] = struct.pack('>I', len(text))
75 # and sha2 hashes
76 sha256 = hashlib.sha256(text).digest()
77 update[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
78 return False, (), update, 0, 0
79
80 return sidedatacompanion
81
82
83 def extsetup(ui):
84 extensions.wrapfunction(revlog.revlog, 'addrevision', wrapaddrevision)
85 extensions.wrapfunction(revlog.revlog, '_revisiondata', wrap_revisiondata)
86 extensions.wrapfunction(
87 upgrade_engine, 'getsidedatacompanion', wrapgetsidedatacompanion
88 )
17 def reposetup(ui, repo):
18 repo.register_wanted_sidedata(sidedata.SD_TEST2)
19 repo.register_wanted_sidedata(sidedata.SD_TEST3)
@@ -1,88 +1,96 b''
1 1 # ext-sidedata.py - small extension to test the sidedata logic
2 2 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net)
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import hashlib
11 11 import struct
12 12
13 13 from mercurial.node import (
14 14 nullid,
15 15 nullrev,
16 16 )
17 17 from mercurial import (
18 18 extensions,
19 19 requirements,
20 20 revlog,
21 21 )
22 22
23 23 from mercurial.upgrade_utils import engine as upgrade_engine
24 24
25 25 from mercurial.revlogutils import sidedata
26 26
27 27
28 28 def wrapaddrevision(
29 29 orig, self, text, transaction, link, p1, p2, *args, **kwargs
30 30 ):
31 31 if kwargs.get('sidedata') is None:
32 32 kwargs['sidedata'] = {}
33 33 sd = kwargs['sidedata']
34 34 ## let's store some arbitrary data just for testing
35 35 # text length
36 36 sd[sidedata.SD_TEST1] = struct.pack('>I', len(text))
37 37 # and sha2 hashes
38 38 sha256 = hashlib.sha256(text).digest()
39 39 sd[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
40 40 return orig(self, text, transaction, link, p1, p2, *args, **kwargs)
41 41
42 42
43 43 def wrap_revisiondata(orig, self, nodeorrev, *args, **kwargs):
44 44 text, sd = orig(self, nodeorrev, *args, **kwargs)
45 45 if getattr(self, 'sidedatanocheck', False):
46 46 return text, sd
47 47 if self.version & 0xFFFF != 2:
48 48 return text, sd
49 49 if nodeorrev != nullrev and nodeorrev != nullid:
50 if len(text) != struct.unpack('>I', sd[sidedata.SD_TEST1])[0]:
50 cat1 = sd.get(sidedata.SD_TEST1)
51 if cat1 is not None and len(text) != struct.unpack('>I', cat1)[0]:
51 52 raise RuntimeError('text size mismatch')
52 expected = sd[sidedata.SD_TEST2]
53 expected = sd.get(sidedata.SD_TEST2)
53 54 got = hashlib.sha256(text).digest()
54 if got != expected:
55 if expected is not None and got != expected:
55 56 raise RuntimeError('sha256 mismatch')
56 57 return text, sd
57 58
58 59
59 60 def wrapgetsidedatacompanion(orig, srcrepo, dstrepo):
60 61 sidedatacompanion = orig(srcrepo, dstrepo)
61 62 addedreqs = dstrepo.requirements - srcrepo.requirements
62 63 if requirements.SIDEDATA_REQUIREMENT in addedreqs:
63 64 assert sidedatacompanion is None # deal with composition later
64 65
65 66 def sidedatacompanion(revlog, rev):
66 67 update = {}
67 68 revlog.sidedatanocheck = True
68 69 try:
69 70 text = revlog.revision(rev)
70 71 finally:
71 72 del revlog.sidedatanocheck
72 73 ## let's store some arbitrary data just for testing
73 74 # text length
74 75 update[sidedata.SD_TEST1] = struct.pack('>I', len(text))
75 76 # and sha2 hashes
76 77 sha256 = hashlib.sha256(text).digest()
77 78 update[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
78 79 return False, (), update, 0, 0
79 80
80 81 return sidedatacompanion
81 82
82 83
83 84 def extsetup(ui):
84 85 extensions.wrapfunction(revlog.revlog, 'addrevision', wrapaddrevision)
85 86 extensions.wrapfunction(revlog.revlog, '_revisiondata', wrap_revisiondata)
86 87 extensions.wrapfunction(
87 88 upgrade_engine, 'getsidedatacompanion', wrapgetsidedatacompanion
88 89 )
90
91
92 def reposetup(ui, repo):
93 # We don't register sidedata computers because we don't care within these
94 # tests
95 repo.register_wanted_sidedata(sidedata.SD_TEST1)
96 repo.register_wanted_sidedata(sidedata.SD_TEST2)
General Comments 0
You need to be logged in to leave comments. Login now