##// END OF EJS Templates
sidedata-exchange: rewrite sidedata on-the-fly whenever possible...
Raphaël Gomès -
r47452:ba8e508a default
parent child Browse files
Show More
@@ -0,0 +1,473 b''
1 ===========================
2 Tests for sidedata exchange
3 ===========================
4
5 Check simple exchange behavior
6 ==============================
7
8 Pusher and pushed have sidedata enabled
9 ---------------------------------------
10
11 $ hg init sidedata-source --config format.exp-use-side-data=yes
12 $ cat << EOF >> sidedata-source/.hg/hgrc
13 > [extensions]
14 > testsidedata=$TESTDIR/testlib/ext-sidedata-5.py
15 > EOF
16 $ hg init sidedata-target --config format.exp-use-side-data=yes
17 $ cat << EOF >> sidedata-target/.hg/hgrc
18 > [extensions]
19 > testsidedata=$TESTDIR/testlib/ext-sidedata-5.py
20 > EOF
21 $ cd sidedata-source
22 $ echo a > a
23 $ echo b > b
24 $ echo c > c
25 $ hg commit -Am "initial"
26 adding a
27 adding b
28 adding c
29 $ echo aa > a
30 $ hg commit -m "other"
31 $ hg push -r . ../sidedata-target
32 pushing to ../sidedata-target
33 searching for changes
34 adding changesets
35 adding manifests
36 adding file changes
37 added 2 changesets with 4 changes to 3 files
38 $ hg -R ../sidedata-target debugsidedata -c 0
39 2 sidedata entries
40 entry-0001 size 4
41 entry-0002 size 32
42 $ hg -R ../sidedata-target debugsidedata -c 1 -v
43 2 sidedata entries
44 entry-0001 size 4
45 '\x00\x00\x00:'
46 entry-0002 size 32
47 '\xa3\xee4v\x99\x85$\x9f\x1f\x8dKe\x0f\xc3\x9d-\xc9\xb5%[\x15=h\xe9\xf2O\xb5\xd9\x1f*\xff\xe5'
48 $ hg -R ../sidedata-target debugsidedata -m 0
49 2 sidedata entries
50 entry-0001 size 4
51 entry-0002 size 32
52 $ hg -R ../sidedata-target debugsidedata -m 1 -v
53 2 sidedata entries
54 entry-0001 size 4
55 '\x00\x00\x00\x81'
56 entry-0002 size 32
57 '-bL\xc5\xa4uu"#\xac\x1b`,\xc0\xbc\x9d\xf5\xac\xf0\x1d\x89)2\xf8N\xb1\x14m\xce\xd7\xbc\xae'
58 $ hg -R ../sidedata-target debugsidedata a 0
59 2 sidedata entries
60 entry-0001 size 4
61 entry-0002 size 32
62 $ hg -R ../sidedata-target debugsidedata a 1 -v
63 2 sidedata entries
64 entry-0001 size 4
65 '\x00\x00\x00\x03'
66 entry-0002 size 32
67 '\xd9\xcd\x81UvL5C\xf1\x0f\xad\x8aH\rt17Fo\x8dU!<\x8e\xae\xfc\xd1/\x06\xd4:\x80'
68 $ cd ..
69
70 Puller and pulled have sidedata enabled
71 ---------------------------------------
72
73 $ rm -rf sidedata-source sidedata-target
74 $ hg init sidedata-source --config format.exp-use-side-data=yes
75 $ cat << EOF >> sidedata-source/.hg/hgrc
76 > [extensions]
77 > testsidedata=$TESTDIR/testlib/ext-sidedata-5.py
78 > EOF
79 $ hg init sidedata-target --config format.exp-use-side-data=yes
80 $ cat << EOF >> sidedata-target/.hg/hgrc
81 > [extensions]
82 > testsidedata=$TESTDIR/testlib/ext-sidedata-5.py
83 > EOF
84 $ cd sidedata-source
85 $ echo a > a
86 $ echo b > b
87 $ echo c > c
88 $ hg commit -Am "initial"
89 adding a
90 adding b
91 adding c
92 $ echo aa > a
93 $ hg commit -m "other"
94 $ hg pull -R ../sidedata-target ../sidedata-source
95 pulling from ../sidedata-source
96 requesting all changes
97 adding changesets
98 adding manifests
99 adding file changes
100 added 2 changesets with 4 changes to 3 files
101 new changesets 05da661850d7:7ec8b4049447
102 (run 'hg update' to get a working copy)
103 $ hg -R ../sidedata-target debugsidedata -c 0
104 2 sidedata entries
105 entry-0001 size 4
106 entry-0002 size 32
107 $ hg -R ../sidedata-target debugsidedata -c 1 -v
108 2 sidedata entries
109 entry-0001 size 4
110 '\x00\x00\x00:'
111 entry-0002 size 32
112 '\xa3\xee4v\x99\x85$\x9f\x1f\x8dKe\x0f\xc3\x9d-\xc9\xb5%[\x15=h\xe9\xf2O\xb5\xd9\x1f*\xff\xe5'
113 $ hg -R ../sidedata-target debugsidedata -m 0
114 2 sidedata entries
115 entry-0001 size 4
116 entry-0002 size 32
117 $ hg -R ../sidedata-target debugsidedata -m 1 -v
118 2 sidedata entries
119 entry-0001 size 4
120 '\x00\x00\x00\x81'
121 entry-0002 size 32
122 '-bL\xc5\xa4uu"#\xac\x1b`,\xc0\xbc\x9d\xf5\xac\xf0\x1d\x89)2\xf8N\xb1\x14m\xce\xd7\xbc\xae'
123 $ hg -R ../sidedata-target debugsidedata a 0
124 2 sidedata entries
125 entry-0001 size 4
126 entry-0002 size 32
127 $ hg -R ../sidedata-target debugsidedata a 1 -v
128 2 sidedata entries
129 entry-0001 size 4
130 '\x00\x00\x00\x03'
131 entry-0002 size 32
132 '\xd9\xcd\x81UvL5C\xf1\x0f\xad\x8aH\rt17Fo\x8dU!<\x8e\xae\xfc\xd1/\x06\xd4:\x80'
133 $ cd ..
134
135 Now on to asymmetric configs.
136
137 Pusher has sidedata enabled, pushed does not
138 --------------------------------------------
139
140 $ rm -rf sidedata-source sidedata-target
141 $ hg init sidedata-source --config format.exp-use-side-data=yes
142 $ cat << EOF >> sidedata-source/.hg/hgrc
143 > [extensions]
144 > testsidedata=$TESTDIR/testlib/ext-sidedata-5.py
145 > EOF
146 $ hg init sidedata-target --config format.exp-use-side-data=no
147 $ cd sidedata-source
148 $ echo a > a
149 $ echo b > b
150 $ echo c > c
151 $ hg commit -Am "initial"
152 adding a
153 adding b
154 adding c
155 $ echo aa > a
156 $ hg commit -m "other"
157 $ hg push -r . ../sidedata-target --traceback
158 pushing to ../sidedata-target
159 searching for changes
160 adding changesets
161 adding manifests
162 adding file changes
163 added 2 changesets with 4 changes to 3 files
164 $ hg -R ../sidedata-target log -G
165 o changeset: 1:7ec8b4049447
166 | tag: tip
167 | user: test
168 | date: Thu Jan 01 00:00:00 1970 +0000
169 | summary: other
170 |
171 o changeset: 0:05da661850d7
172 user: test
173 date: Thu Jan 01 00:00:00 1970 +0000
174 summary: initial
175
176
177 $ hg -R ../sidedata-target debugsidedata -c 0
178 $ hg -R ../sidedata-target debugsidedata -c 1 -v
179 $ hg -R ../sidedata-target debugsidedata -m 0
180 $ hg -R ../sidedata-target debugsidedata -m 1 -v
181 $ hg -R ../sidedata-target debugsidedata a 0
182 $ hg -R ../sidedata-target debugsidedata a 1 -v
183 $ cd ..
184
185 Pulled has sidedata enabled, puller does not
186 --------------------------------------------
187
188 $ rm -rf sidedata-source sidedata-target
189 $ hg init sidedata-source --config format.exp-use-side-data=yes
190 $ cat << EOF >> sidedata-source/.hg/hgrc
191 > [extensions]
192 > testsidedata=$TESTDIR/testlib/ext-sidedata-5.py
193 > EOF
194 $ hg init sidedata-target --config format.exp-use-side-data=no
195 $ cd sidedata-source
196 $ echo a > a
197 $ echo b > b
198 $ echo c > c
199 $ hg commit -Am "initial"
200 adding a
201 adding b
202 adding c
203 $ echo aa > a
204 $ hg commit -m "other"
205 $ hg pull -R ../sidedata-target ../sidedata-source
206 pulling from ../sidedata-source
207 requesting all changes
208 adding changesets
209 adding manifests
210 adding file changes
211 added 2 changesets with 4 changes to 3 files
212 new changesets 05da661850d7:7ec8b4049447
213 (run 'hg update' to get a working copy)
214 $ hg -R ../sidedata-target log -G
215 o changeset: 1:7ec8b4049447
216 | tag: tip
217 | user: test
218 | date: Thu Jan 01 00:00:00 1970 +0000
219 | summary: other
220 |
221 o changeset: 0:05da661850d7
222 user: test
223 date: Thu Jan 01 00:00:00 1970 +0000
224 summary: initial
225
226
227 $ hg -R ../sidedata-target debugsidedata -c 0
228 $ hg -R ../sidedata-target debugsidedata -c 1 -v
229 $ hg -R ../sidedata-target debugsidedata -m 0
230 $ hg -R ../sidedata-target debugsidedata -m 1 -v
231 $ hg -R ../sidedata-target debugsidedata a 0
232 $ hg -R ../sidedata-target debugsidedata a 1 -v
233 $ cd ..
234
235
236 Check sidedata exchange with on-the-fly generation and removal
237 ==============================================================
238
239 (Push) Target has strict superset of the source
240 -----------------------------------------------
241
242 $ hg init source-repo --config format.exp-use-side-data=yes
243 $ hg init target-repo --config format.exp-use-side-data=yes
244 $ cat << EOF >> target-repo/.hg/hgrc
245 > [extensions]
246 > testsidedata=$TESTDIR/testlib/ext-sidedata.py
247 > EOF
248 $ cd source-repo
249 $ echo aaa > a
250 $ hg add a
251 $ hg commit -m a
252 $ echo aaa > b
253 $ hg add b
254 $ hg commit -m b
255 $ echo xxx >> a
256 $ hg commit -m aa
257
258 No sidedata is generated in the source
259 $ hg debugsidedata -c 0
260
261 Check that sidedata capabilities are advertised
262 $ hg debugcapabilities ../target-repo | grep sidedata
263 exp-wanted-sidedata=1,2
264
265 We expect the client to abort the push since it's not capable of generating
266 what the server is asking
267 $ hg push -r . ../target-repo
268 pushing to ../target-repo
269 abort: cannot push: required sidedata category not supported by this client: '1'
270 [255]
271
272 Add the required capabilities
273 $ cat << EOF >> .hg/hgrc
274 > [extensions]
275 > testsidedata2=$TESTDIR/testlib/ext-sidedata-2.py
276 > EOF
277
278 We expect the target to have sidedata that was generated by the source on push
279 $ hg push -r . ../target-repo
280 pushing to ../target-repo
281 searching for changes
282 adding changesets
283 adding manifests
284 adding file changes
285 added 3 changesets with 3 changes to 2 files
286 $ cd ../target-repo
287 $ hg debugsidedata -c 0
288 2 sidedata entries
289 entry-0001 size 4
290 entry-0002 size 32
291 $ hg debugsidedata -c 1 -v
292 2 sidedata entries
293 entry-0001 size 4
294 '\x00\x00\x006'
295 entry-0002 size 32
296 '\x98\t\xf9\xc4v\xf0\xc5P\x90\xf7wRf\xe8\xe27e\xfc\xc1\x93\xa4\x96\xd0\x1d\x97\xaaG\x1d\xd7t\xfa\xde'
297 $ hg debugsidedata -m 2
298 2 sidedata entries
299 entry-0001 size 4
300 entry-0002 size 32
301 $ hg debugsidedata a 1
302 2 sidedata entries
303 entry-0001 size 4
304 entry-0002 size 32
305 $ cd ..
306
307 (Push) Difference is not subset/superset
308 ----------------------------------------
309
310 Source has one in common, one missing and one more sidedata category with the
311 target.
312
313 $ rm -rf source-repo target-repo
314 $ hg init source-repo --config format.exp-use-side-data=yes
315 $ cat << EOF >> source-repo/.hg/hgrc
316 > [extensions]
317 > testsidedata3=$TESTDIR/testlib/ext-sidedata-3.py
318 > EOF
319 $ hg init target-repo --config format.exp-use-side-data=yes
320 $ cat << EOF >> target-repo/.hg/hgrc
321 > [extensions]
322 > testsidedata4=$TESTDIR/testlib/ext-sidedata-4.py
323 > EOF
324 $ cd source-repo
325 $ echo aaa > a
326 $ hg add a
327 $ hg commit -m a
328 $ echo aaa > b
329 $ hg add b
330 $ hg commit -m b
331 $ echo xxx >> a
332 $ hg commit -m aa
333
334 Check that sidedata capabilities are advertised
335 $ hg debugcapabilities . | grep sidedata
336 exp-wanted-sidedata=1,2
337 $ hg debugcapabilities ../target-repo | grep sidedata
338 exp-wanted-sidedata=2,3
339
340 Sidedata is generated in the source, but only the right categories (entry-0001 and entry-0002)
341 $ hg debugsidedata -c 0
342 2 sidedata entries
343 entry-0001 size 4
344 entry-0002 size 32
345 $ hg debugsidedata -c 1 -v
346 2 sidedata entries
347 entry-0001 size 4
348 '\x00\x00\x006'
349 entry-0002 size 32
350 '\x98\t\xf9\xc4v\xf0\xc5P\x90\xf7wRf\xe8\xe27e\xfc\xc1\x93\xa4\x96\xd0\x1d\x97\xaaG\x1d\xd7t\xfa\xde'
351 $ hg debugsidedata -m 2
352 2 sidedata entries
353 entry-0001 size 4
354 entry-0002 size 32
355 $ hg debugsidedata a 1
356 2 sidedata entries
357 entry-0001 size 4
358 entry-0002 size 32
359
360
361 We expect the target to have sidedata that was generated by the source on push,
362 and also removed the sidedata categories that are not supported by the target.
363 Namely, we expect entry-0002 (only exchanged) and entry-0003 (generated),
364 but not entry-0001.
365
366 $ hg push -r . ../target-repo --traceback
367 pushing to ../target-repo
368 searching for changes
369 adding changesets
370 adding manifests
371 adding file changes
372 added 3 changesets with 3 changes to 2 files
373 $ cd ../target-repo
374 $ hg log -G
375 o changeset: 2:40f977031323
376 | tag: tip
377 | user: test
378 | date: Thu Jan 01 00:00:00 1970 +0000
379 | summary: aa
380 |
381 o changeset: 1:2707720c6597
382 | user: test
383 | date: Thu Jan 01 00:00:00 1970 +0000
384 | summary: b
385 |
386 o changeset: 0:7049e48789d7
387 user: test
388 date: Thu Jan 01 00:00:00 1970 +0000
389 summary: a
390
391 $ hg debugsidedata -c 0
392 2 sidedata entries
393 entry-0002 size 32
394 entry-0003 size 48
395 $ hg debugsidedata -c 1 -v
396 2 sidedata entries
397 entry-0002 size 32
398 '\x98\t\xf9\xc4v\xf0\xc5P\x90\xf7wRf\xe8\xe27e\xfc\xc1\x93\xa4\x96\xd0\x1d\x97\xaaG\x1d\xd7t\xfa\xde'
399 entry-0003 size 48
400 '\x87\xcf\xdfI/\xb5\xed\xeaC\xc1\xf0S\xf3X\x1c\xcc\x00m\xee\xe6#\xc1\xe3\xcaB8Fk\x82e\xfc\xc01\xf6\xb7\xb9\xb3([\xf6D\xa6\xcf\x9b\xea\x11{\x08'
401 $ hg debugsidedata -m 2
402 2 sidedata entries
403 entry-0002 size 32
404 entry-0003 size 48
405 $ hg debugsidedata a 1
406 2 sidedata entries
407 entry-0002 size 32
408 entry-0003 size 48
409 $ cd ..
410
411 (Pull) Target has strict superset of the source
412 -----------------------------------------------
413
414 $ rm -rf source-repo target-repo
415 $ hg init source-repo --config format.exp-use-side-data=yes
416 $ hg init target-repo --config format.exp-use-side-data=yes
417 $ cat << EOF >> target-repo/.hg/hgrc
418 > [extensions]
419 > testsidedata=$TESTDIR/testlib/ext-sidedata.py
420 > EOF
421 $ cd source-repo
422 $ echo aaa > a
423 $ hg add a
424 $ hg commit -m a
425 $ echo aaa > b
426 $ hg add b
427 $ hg commit -m b
428 $ echo xxx >> a
429 $ hg commit -m aa
430
431 No sidedata is generated in the source
432 $ hg debugsidedata -c 0
433
434 Check that sidedata capabilities are advertised
435 $ hg debugcapabilities ../target-repo | grep sidedata
436 exp-wanted-sidedata=1,2
437
438 $ cd ../target-repo
439
440 Add the required capabilities
441 $ cat << EOF >> .hg/hgrc
442 > [extensions]
443 > testsidedata2=$TESTDIR/testlib/ext-sidedata-2.py
444 > EOF
445
446 We expect the target to have sidedata that it generated on-the-fly during pull
447 $ hg pull -r . ../source-repo --traceback
448 pulling from ../source-repo
449 adding changesets
450 adding manifests
451 adding file changes
452 added 3 changesets with 3 changes to 2 files
453 new changesets 7049e48789d7:40f977031323
454 (run 'hg update' to get a working copy)
455 $ hg debugsidedata -c 0 --traceback
456 2 sidedata entries
457 entry-0001 size 4
458 entry-0002 size 32
459 $ hg debugsidedata -c 1 -v --traceback
460 2 sidedata entries
461 entry-0001 size 4
462 '\x00\x00\x006'
463 entry-0002 size 32
464 '\x98\t\xf9\xc4v\xf0\xc5P\x90\xf7wRf\xe8\xe27e\xfc\xc1\x93\xa4\x96\xd0\x1d\x97\xaaG\x1d\xd7t\xfa\xde'
465 $ hg debugsidedata -m 2
466 2 sidedata entries
467 entry-0001 size 4
468 entry-0002 size 32
469 $ hg debugsidedata a 1
470 2 sidedata entries
471 entry-0001 size 4
472 entry-0002 size 32
473 $ cd ..
@@ -0,0 +1,81 b''
1 # coding: utf8
2 # ext-sidedata-5.py - small extension to test (differently still) the sidedata
3 # logic
4 #
5 # Simulates a server for a simple sidedata exchange.
6 #
7 # Copyright 2021 Raphaël Gomès <rgomes@octobus.net>
8 #
9 # This software may be used and distributed according to the terms of the
10 # GNU General Public License version 2 or any later version.
11
12 from __future__ import absolute_import
13
14 import hashlib
15 import struct
16
17 from mercurial import (
18 extensions,
19 revlog,
20 )
21
22
23 from mercurial.revlogutils import sidedata as sidedatamod
24
25
26 def compute_sidedata_1(repo, revlog, rev, sidedata, text=None):
27 sidedata = sidedata.copy()
28 if text is None:
29 text = revlog.revision(rev)
30 sidedata[sidedatamod.SD_TEST1] = struct.pack('>I', len(text))
31 return sidedata
32
33
34 def compute_sidedata_2(repo, revlog, rev, sidedata, text=None):
35 sidedata = sidedata.copy()
36 if text is None:
37 text = revlog.revision(rev)
38 sha256 = hashlib.sha256(text).digest()
39 sidedata[sidedatamod.SD_TEST2] = struct.pack('>32s', sha256)
40 return sidedata
41
42
43 def reposetup(ui, repo):
44 # Sidedata keys happen to be the same as the categories, easier for testing.
45 for kind in (b'changelog', b'manifest', b'filelog'):
46 repo.register_sidedata_computer(
47 kind,
48 sidedatamod.SD_TEST1,
49 (sidedatamod.SD_TEST1,),
50 compute_sidedata_1,
51 )
52 repo.register_sidedata_computer(
53 kind,
54 sidedatamod.SD_TEST2,
55 (sidedatamod.SD_TEST2,),
56 compute_sidedata_2,
57 )
58
59 # We don't register sidedata computers because we don't care within these
60 # tests
61 repo.register_wanted_sidedata(sidedatamod.SD_TEST1)
62 repo.register_wanted_sidedata(sidedatamod.SD_TEST2)
63
64
65 def wrapaddrevision(
66 orig, self, text, transaction, link, p1, p2, *args, **kwargs
67 ):
68 if kwargs.get('sidedata') is None:
69 kwargs['sidedata'] = {}
70 sd = kwargs['sidedata']
71 ## let's store some arbitrary data just for testing
72 # text length
73 sd[sidedatamod.SD_TEST1] = struct.pack('>I', len(text))
74 # and sha2 hashes
75 sha256 = hashlib.sha256(text).digest()
76 sd[sidedatamod.SD_TEST2] = struct.pack('>32s', sha256)
77 return orig(self, text, transaction, link, p1, p2, *args, **kwargs)
78
79
80 def extsetup(ui):
81 extensions.wrapfunction(revlog.revlog, 'addrevision', wrapaddrevision)
@@ -1,1861 +1,1930 b''
1 # changegroup.py - Mercurial changegroup manipulation functions
1 # changegroup.py - Mercurial changegroup manipulation functions
2 #
2 #
3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import os
11 import os
11 import struct
12 import struct
12 import weakref
13 import weakref
13
14
14 from .i18n import _
15 from .i18n import _
15 from .node import (
16 from .node import (
16 hex,
17 hex,
17 nullid,
18 nullid,
18 nullrev,
19 nullrev,
19 short,
20 short,
20 )
21 )
21 from .pycompat import open
22 from .pycompat import open
22
23
23 from . import (
24 from . import (
24 error,
25 error,
25 match as matchmod,
26 match as matchmod,
26 mdiff,
27 mdiff,
27 phases,
28 phases,
28 pycompat,
29 pycompat,
29 requirements,
30 requirements,
30 scmutil,
31 scmutil,
31 util,
32 util,
32 )
33 )
33
34
34 from .interfaces import repository
35 from .interfaces import repository
35 from .revlogutils import sidedata as sidedatamod
36 from .revlogutils import sidedata as sidedatamod
36
37
37 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct(b"20s20s20s20s")
38 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct(b"20s20s20s20s")
38 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct(b"20s20s20s20s20s")
39 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct(b"20s20s20s20s20s")
39 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(b">20s20s20s20s20sH")
40 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(b">20s20s20s20s20sH")
40
41
41 LFS_REQUIREMENT = b'lfs'
42 LFS_REQUIREMENT = b'lfs'
42
43
43 readexactly = util.readexactly
44 readexactly = util.readexactly
44
45
45
46
46 def getchunk(stream):
47 def getchunk(stream):
47 """return the next chunk from stream as a string"""
48 """return the next chunk from stream as a string"""
48 d = readexactly(stream, 4)
49 d = readexactly(stream, 4)
49 l = struct.unpack(b">l", d)[0]
50 l = struct.unpack(b">l", d)[0]
50 if l <= 4:
51 if l <= 4:
51 if l:
52 if l:
52 raise error.Abort(_(b"invalid chunk length %d") % l)
53 raise error.Abort(_(b"invalid chunk length %d") % l)
53 return b""
54 return b""
54 return readexactly(stream, l - 4)
55 return readexactly(stream, l - 4)
55
56
56
57
57 def chunkheader(length):
58 def chunkheader(length):
58 """return a changegroup chunk header (string)"""
59 """return a changegroup chunk header (string)"""
59 return struct.pack(b">l", length + 4)
60 return struct.pack(b">l", length + 4)
60
61
61
62
62 def closechunk():
63 def closechunk():
63 """return a changegroup chunk header (string) for a zero-length chunk"""
64 """return a changegroup chunk header (string) for a zero-length chunk"""
64 return struct.pack(b">l", 0)
65 return struct.pack(b">l", 0)
65
66
66
67
67 def _fileheader(path):
68 def _fileheader(path):
68 """Obtain a changegroup chunk header for a named path."""
69 """Obtain a changegroup chunk header for a named path."""
69 return chunkheader(len(path)) + path
70 return chunkheader(len(path)) + path
70
71
71
72
72 def writechunks(ui, chunks, filename, vfs=None):
73 def writechunks(ui, chunks, filename, vfs=None):
73 """Write chunks to a file and return its filename.
74 """Write chunks to a file and return its filename.
74
75
75 The stream is assumed to be a bundle file.
76 The stream is assumed to be a bundle file.
76 Existing files will not be overwritten.
77 Existing files will not be overwritten.
77 If no filename is specified, a temporary file is created.
78 If no filename is specified, a temporary file is created.
78 """
79 """
79 fh = None
80 fh = None
80 cleanup = None
81 cleanup = None
81 try:
82 try:
82 if filename:
83 if filename:
83 if vfs:
84 if vfs:
84 fh = vfs.open(filename, b"wb")
85 fh = vfs.open(filename, b"wb")
85 else:
86 else:
86 # Increase default buffer size because default is usually
87 # Increase default buffer size because default is usually
87 # small (4k is common on Linux).
88 # small (4k is common on Linux).
88 fh = open(filename, b"wb", 131072)
89 fh = open(filename, b"wb", 131072)
89 else:
90 else:
90 fd, filename = pycompat.mkstemp(prefix=b"hg-bundle-", suffix=b".hg")
91 fd, filename = pycompat.mkstemp(prefix=b"hg-bundle-", suffix=b".hg")
91 fh = os.fdopen(fd, "wb")
92 fh = os.fdopen(fd, "wb")
92 cleanup = filename
93 cleanup = filename
93 for c in chunks:
94 for c in chunks:
94 fh.write(c)
95 fh.write(c)
95 cleanup = None
96 cleanup = None
96 return filename
97 return filename
97 finally:
98 finally:
98 if fh is not None:
99 if fh is not None:
99 fh.close()
100 fh.close()
100 if cleanup is not None:
101 if cleanup is not None:
101 if filename and vfs:
102 if filename and vfs:
102 vfs.unlink(cleanup)
103 vfs.unlink(cleanup)
103 else:
104 else:
104 os.unlink(cleanup)
105 os.unlink(cleanup)
105
106
106
107
107 class cg1unpacker(object):
108 class cg1unpacker(object):
108 """Unpacker for cg1 changegroup streams.
109 """Unpacker for cg1 changegroup streams.
109
110
110 A changegroup unpacker handles the framing of the revision data in
111 A changegroup unpacker handles the framing of the revision data in
111 the wire format. Most consumers will want to use the apply()
112 the wire format. Most consumers will want to use the apply()
112 method to add the changes from the changegroup to a repository.
113 method to add the changes from the changegroup to a repository.
113
114
114 If you're forwarding a changegroup unmodified to another consumer,
115 If you're forwarding a changegroup unmodified to another consumer,
115 use getchunks(), which returns an iterator of changegroup
116 use getchunks(), which returns an iterator of changegroup
116 chunks. This is mostly useful for cases where you need to know the
117 chunks. This is mostly useful for cases where you need to know the
117 data stream has ended by observing the end of the changegroup.
118 data stream has ended by observing the end of the changegroup.
118
119
119 deltachunk() is useful only if you're applying delta data. Most
120 deltachunk() is useful only if you're applying delta data. Most
120 consumers should prefer apply() instead.
121 consumers should prefer apply() instead.
121
122
122 A few other public methods exist. Those are used only for
123 A few other public methods exist. Those are used only for
123 bundlerepo and some debug commands - their use is discouraged.
124 bundlerepo and some debug commands - their use is discouraged.
124 """
125 """
125
126
126 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
127 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
127 deltaheadersize = deltaheader.size
128 deltaheadersize = deltaheader.size
128 version = b'01'
129 version = b'01'
129 _grouplistcount = 1 # One list of files after the manifests
130 _grouplistcount = 1 # One list of files after the manifests
130
131
131 def __init__(self, fh, alg, extras=None):
132 def __init__(self, fh, alg, extras=None):
132 if alg is None:
133 if alg is None:
133 alg = b'UN'
134 alg = b'UN'
134 if alg not in util.compengines.supportedbundletypes:
135 if alg not in util.compengines.supportedbundletypes:
135 raise error.Abort(_(b'unknown stream compression type: %s') % alg)
136 raise error.Abort(_(b'unknown stream compression type: %s') % alg)
136 if alg == b'BZ':
137 if alg == b'BZ':
137 alg = b'_truncatedBZ'
138 alg = b'_truncatedBZ'
138
139
139 compengine = util.compengines.forbundletype(alg)
140 compengine = util.compengines.forbundletype(alg)
140 self._stream = compengine.decompressorreader(fh)
141 self._stream = compengine.decompressorreader(fh)
141 self._type = alg
142 self._type = alg
142 self.extras = extras or {}
143 self.extras = extras or {}
143 self.callback = None
144 self.callback = None
144
145
145 # These methods (compressed, read, seek, tell) all appear to only
146 # These methods (compressed, read, seek, tell) all appear to only
146 # be used by bundlerepo, but it's a little hard to tell.
147 # be used by bundlerepo, but it's a little hard to tell.
147 def compressed(self):
148 def compressed(self):
148 return self._type is not None and self._type != b'UN'
149 return self._type is not None and self._type != b'UN'
149
150
150 def read(self, l):
151 def read(self, l):
151 return self._stream.read(l)
152 return self._stream.read(l)
152
153
153 def seek(self, pos):
154 def seek(self, pos):
154 return self._stream.seek(pos)
155 return self._stream.seek(pos)
155
156
156 def tell(self):
157 def tell(self):
157 return self._stream.tell()
158 return self._stream.tell()
158
159
159 def close(self):
160 def close(self):
160 return self._stream.close()
161 return self._stream.close()
161
162
162 def _chunklength(self):
163 def _chunklength(self):
163 d = readexactly(self._stream, 4)
164 d = readexactly(self._stream, 4)
164 l = struct.unpack(b">l", d)[0]
165 l = struct.unpack(b">l", d)[0]
165 if l <= 4:
166 if l <= 4:
166 if l:
167 if l:
167 raise error.Abort(_(b"invalid chunk length %d") % l)
168 raise error.Abort(_(b"invalid chunk length %d") % l)
168 return 0
169 return 0
169 if self.callback:
170 if self.callback:
170 self.callback()
171 self.callback()
171 return l - 4
172 return l - 4
172
173
173 def changelogheader(self):
174 def changelogheader(self):
174 """v10 does not have a changelog header chunk"""
175 """v10 does not have a changelog header chunk"""
175 return {}
176 return {}
176
177
177 def manifestheader(self):
178 def manifestheader(self):
178 """v10 does not have a manifest header chunk"""
179 """v10 does not have a manifest header chunk"""
179 return {}
180 return {}
180
181
181 def filelogheader(self):
182 def filelogheader(self):
182 """return the header of the filelogs chunk, v10 only has the filename"""
183 """return the header of the filelogs chunk, v10 only has the filename"""
183 l = self._chunklength()
184 l = self._chunklength()
184 if not l:
185 if not l:
185 return {}
186 return {}
186 fname = readexactly(self._stream, l)
187 fname = readexactly(self._stream, l)
187 return {b'filename': fname}
188 return {b'filename': fname}
188
189
189 def _deltaheader(self, headertuple, prevnode):
190 def _deltaheader(self, headertuple, prevnode):
190 node, p1, p2, cs = headertuple
191 node, p1, p2, cs = headertuple
191 if prevnode is None:
192 if prevnode is None:
192 deltabase = p1
193 deltabase = p1
193 else:
194 else:
194 deltabase = prevnode
195 deltabase = prevnode
195 flags = 0
196 flags = 0
196 return node, p1, p2, deltabase, cs, flags
197 return node, p1, p2, deltabase, cs, flags
197
198
198 def deltachunk(self, prevnode):
199 def deltachunk(self, prevnode):
199 l = self._chunklength()
200 l = self._chunklength()
200 if not l:
201 if not l:
201 return {}
202 return {}
202 headerdata = readexactly(self._stream, self.deltaheadersize)
203 headerdata = readexactly(self._stream, self.deltaheadersize)
203 header = self.deltaheader.unpack(headerdata)
204 header = self.deltaheader.unpack(headerdata)
204 delta = readexactly(self._stream, l - self.deltaheadersize)
205 delta = readexactly(self._stream, l - self.deltaheadersize)
205 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
206 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
206 # cg4 forward-compat
207 # cg4 forward-compat
207 sidedata = {}
208 sidedata = {}
208 return (node, p1, p2, cs, deltabase, delta, flags, sidedata)
209 return (node, p1, p2, cs, deltabase, delta, flags, sidedata)
209
210
210 def getchunks(self):
211 def getchunks(self):
211 """returns all the chunks contains in the bundle
212 """returns all the chunks contains in the bundle
212
213
213 Used when you need to forward the binary stream to a file or another
214 Used when you need to forward the binary stream to a file or another
214 network API. To do so, it parse the changegroup data, otherwise it will
215 network API. To do so, it parse the changegroup data, otherwise it will
215 block in case of sshrepo because it don't know the end of the stream.
216 block in case of sshrepo because it don't know the end of the stream.
216 """
217 """
217 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
218 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
218 # and a list of filelogs. For changegroup 3, we expect 4 parts:
219 # and a list of filelogs. For changegroup 3, we expect 4 parts:
219 # changelog, manifestlog, a list of tree manifestlogs, and a list of
220 # changelog, manifestlog, a list of tree manifestlogs, and a list of
220 # filelogs.
221 # filelogs.
221 #
222 #
222 # Changelog and manifestlog parts are terminated with empty chunks. The
223 # Changelog and manifestlog parts are terminated with empty chunks. The
223 # tree and file parts are a list of entry sections. Each entry section
224 # tree and file parts are a list of entry sections. Each entry section
224 # is a series of chunks terminating in an empty chunk. The list of these
225 # is a series of chunks terminating in an empty chunk. The list of these
225 # entry sections is terminated in yet another empty chunk, so we know
226 # entry sections is terminated in yet another empty chunk, so we know
226 # we've reached the end of the tree/file list when we reach an empty
227 # we've reached the end of the tree/file list when we reach an empty
227 # chunk that was proceeded by no non-empty chunks.
228 # chunk that was proceeded by no non-empty chunks.
228
229
229 parts = 0
230 parts = 0
230 while parts < 2 + self._grouplistcount:
231 while parts < 2 + self._grouplistcount:
231 noentries = True
232 noentries = True
232 while True:
233 while True:
233 chunk = getchunk(self)
234 chunk = getchunk(self)
234 if not chunk:
235 if not chunk:
235 # The first two empty chunks represent the end of the
236 # The first two empty chunks represent the end of the
236 # changelog and the manifestlog portions. The remaining
237 # changelog and the manifestlog portions. The remaining
237 # empty chunks represent either A) the end of individual
238 # empty chunks represent either A) the end of individual
238 # tree or file entries in the file list, or B) the end of
239 # tree or file entries in the file list, or B) the end of
239 # the entire list. It's the end of the entire list if there
240 # the entire list. It's the end of the entire list if there
240 # were no entries (i.e. noentries is True).
241 # were no entries (i.e. noentries is True).
241 if parts < 2:
242 if parts < 2:
242 parts += 1
243 parts += 1
243 elif noentries:
244 elif noentries:
244 parts += 1
245 parts += 1
245 break
246 break
246 noentries = False
247 noentries = False
247 yield chunkheader(len(chunk))
248 yield chunkheader(len(chunk))
248 pos = 0
249 pos = 0
249 while pos < len(chunk):
250 while pos < len(chunk):
250 next = pos + 2 ** 20
251 next = pos + 2 ** 20
251 yield chunk[pos:next]
252 yield chunk[pos:next]
252 pos = next
253 pos = next
253 yield closechunk()
254 yield closechunk()
254
255
255 def _unpackmanifests(self, repo, revmap, trp, prog):
256 def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None):
256 self.callback = prog.increment
257 self.callback = prog.increment
257 # no need to check for empty manifest group here:
258 # no need to check for empty manifest group here:
258 # if the result of the merge of 1 and 2 is the same in 3 and 4,
259 # if the result of the merge of 1 and 2 is the same in 3 and 4,
259 # no new manifest will be created and the manifest group will
260 # no new manifest will be created and the manifest group will
260 # be empty during the pull
261 # be empty during the pull
261 self.manifestheader()
262 self.manifestheader()
262 deltas = self.deltaiter()
263 deltas = self.deltaiter()
263 repo.manifestlog.getstorage(b'').addgroup(deltas, revmap, trp)
264 storage = repo.manifestlog.getstorage(b'')
265 storage.addgroup(deltas, revmap, trp, addrevisioncb=addrevisioncb)
264 prog.complete()
266 prog.complete()
265 self.callback = None
267 self.callback = None
266
268
267 def apply(
269 def apply(
268 self,
270 self,
269 repo,
271 repo,
270 tr,
272 tr,
271 srctype,
273 srctype,
272 url,
274 url,
273 targetphase=phases.draft,
275 targetphase=phases.draft,
274 expectedtotal=None,
276 expectedtotal=None,
275 sidedata_categories=None,
277 sidedata_categories=None,
276 ):
278 ):
277 """Add the changegroup returned by source.read() to this repo.
279 """Add the changegroup returned by source.read() to this repo.
278 srctype is a string like 'push', 'pull', or 'unbundle'. url is
280 srctype is a string like 'push', 'pull', or 'unbundle'. url is
279 the URL of the repo where this changegroup is coming from.
281 the URL of the repo where this changegroup is coming from.
280
282
281 Return an integer summarizing the change to this repo:
283 Return an integer summarizing the change to this repo:
282 - nothing changed or no source: 0
284 - nothing changed or no source: 0
283 - more heads than before: 1+added heads (2..n)
285 - more heads than before: 1+added heads (2..n)
284 - fewer heads than before: -1-removed heads (-2..-n)
286 - fewer heads than before: -1-removed heads (-2..-n)
285 - number of heads stays the same: 1
287 - number of heads stays the same: 1
286
288
287 `sidedata_categories` is an optional set of the remote's sidedata wanted
289 `sidedata_categories` is an optional set of the remote's sidedata wanted
288 categories.
290 categories.
289 """
291 """
290 repo = repo.unfiltered()
292 repo = repo.unfiltered()
291
293
292 # Only useful if we're adding sidedata categories. If both peers have
294 # Only useful if we're adding sidedata categories. If both peers have
293 # the same categories, then we simply don't do anything.
295 # the same categories, then we simply don't do anything.
294 if self.version == b'04' and srctype == b'pull':
296 if self.version == b'04' and srctype == b'pull':
295 sidedata_helpers = get_sidedata_helpers(
297 sidedata_helpers = get_sidedata_helpers(
296 repo,
298 repo,
297 sidedata_categories or set(),
299 sidedata_categories or set(),
298 pull=True,
300 pull=True,
299 )
301 )
300 else:
302 else:
301 sidedata_helpers = None
303 sidedata_helpers = None
302
304
303 def csmap(x):
305 def csmap(x):
304 repo.ui.debug(b"add changeset %s\n" % short(x))
306 repo.ui.debug(b"add changeset %s\n" % short(x))
305 return len(cl)
307 return len(cl)
306
308
307 def revmap(x):
309 def revmap(x):
308 return cl.rev(x)
310 return cl.rev(x)
309
311
310 try:
312 try:
311 # The transaction may already carry source information. In this
313 # The transaction may already carry source information. In this
312 # case we use the top level data. We overwrite the argument
314 # case we use the top level data. We overwrite the argument
313 # because we need to use the top level value (if they exist)
315 # because we need to use the top level value (if they exist)
314 # in this function.
316 # in this function.
315 srctype = tr.hookargs.setdefault(b'source', srctype)
317 srctype = tr.hookargs.setdefault(b'source', srctype)
316 tr.hookargs.setdefault(b'url', url)
318 tr.hookargs.setdefault(b'url', url)
317 repo.hook(
319 repo.hook(
318 b'prechangegroup', throw=True, **pycompat.strkwargs(tr.hookargs)
320 b'prechangegroup', throw=True, **pycompat.strkwargs(tr.hookargs)
319 )
321 )
320
322
321 # write changelog data to temp files so concurrent readers
323 # write changelog data to temp files so concurrent readers
322 # will not see an inconsistent view
324 # will not see an inconsistent view
323 cl = repo.changelog
325 cl = repo.changelog
324 cl.delayupdate(tr)
326 cl.delayupdate(tr)
325 oldheads = set(cl.heads())
327 oldheads = set(cl.heads())
326
328
327 trp = weakref.proxy(tr)
329 trp = weakref.proxy(tr)
328 # pull off the changeset group
330 # pull off the changeset group
329 repo.ui.status(_(b"adding changesets\n"))
331 repo.ui.status(_(b"adding changesets\n"))
330 clstart = len(cl)
332 clstart = len(cl)
331 progress = repo.ui.makeprogress(
333 progress = repo.ui.makeprogress(
332 _(b'changesets'), unit=_(b'chunks'), total=expectedtotal
334 _(b'changesets'), unit=_(b'chunks'), total=expectedtotal
333 )
335 )
334 self.callback = progress.increment
336 self.callback = progress.increment
335
337
336 efilesset = set()
338 efilesset = set()
337 duprevs = []
339 duprevs = []
338
340
339 def ondupchangelog(cl, rev):
341 def ondupchangelog(cl, rev):
340 if rev < clstart:
342 if rev < clstart:
341 duprevs.append(rev)
343 duprevs.append(rev)
342
344
343 def onchangelog(cl, rev):
345 def onchangelog(cl, rev):
344 ctx = cl.changelogrevision(rev)
346 ctx = cl.changelogrevision(rev)
345 efilesset.update(ctx.files)
347 efilesset.update(ctx.files)
346 repo.register_changeset(rev, ctx)
348 repo.register_changeset(rev, ctx)
347
349
348 self.changelogheader()
350 self.changelogheader()
349 deltas = self.deltaiter()
351 deltas = self.deltaiter()
350 if not cl.addgroup(
352 if not cl.addgroup(
351 deltas,
353 deltas,
352 csmap,
354 csmap,
353 trp,
355 trp,
354 alwayscache=True,
356 alwayscache=True,
355 addrevisioncb=onchangelog,
357 addrevisioncb=onchangelog,
356 duplicaterevisioncb=ondupchangelog,
358 duplicaterevisioncb=ondupchangelog,
357 ):
359 ):
358 repo.ui.develwarn(
360 repo.ui.develwarn(
359 b'applied empty changelog from changegroup',
361 b'applied empty changelog from changegroup',
360 config=b'warn-empty-changegroup',
362 config=b'warn-empty-changegroup',
361 )
363 )
362 efiles = len(efilesset)
364 efiles = len(efilesset)
363 clend = len(cl)
365 clend = len(cl)
364 changesets = clend - clstart
366 changesets = clend - clstart
365 progress.complete()
367 progress.complete()
366 del deltas
368 del deltas
367 # TODO Python 2.7 removal
369 # TODO Python 2.7 removal
368 # del efilesset
370 # del efilesset
369 efilesset = None
371 efilesset = None
370 self.callback = None
372 self.callback = None
371
373
374 # Keep track of the (non-changelog) revlogs we've updated and their
375 # range of new revisions for sidedata rewrite.
376 # TODO do something more efficient than keeping the reference to
377 # the revlogs, especially memory-wise.
378 touched_manifests = {}
379 touched_filelogs = {}
380
372 # pull off the manifest group
381 # pull off the manifest group
373 repo.ui.status(_(b"adding manifests\n"))
382 repo.ui.status(_(b"adding manifests\n"))
374 # We know that we'll never have more manifests than we had
383 # We know that we'll never have more manifests than we had
375 # changesets.
384 # changesets.
376 progress = repo.ui.makeprogress(
385 progress = repo.ui.makeprogress(
377 _(b'manifests'), unit=_(b'chunks'), total=changesets
386 _(b'manifests'), unit=_(b'chunks'), total=changesets
378 )
387 )
379 self._unpackmanifests(repo, revmap, trp, progress)
388 on_manifest_rev = None
389 if sidedata_helpers and b'manifest' in sidedata_helpers[1]:
390
391 def on_manifest_rev(manifest, rev):
392 range = touched_manifests.get(manifest)
393 if not range:
394 touched_manifests[manifest] = (rev, rev)
395 else:
396 assert rev == range[1] + 1
397 touched_manifests[manifest] = (range[0], rev)
398
399 self._unpackmanifests(
400 repo,
401 revmap,
402 trp,
403 progress,
404 addrevisioncb=on_manifest_rev,
405 )
380
406
381 needfiles = {}
407 needfiles = {}
382 if repo.ui.configbool(b'server', b'validate'):
408 if repo.ui.configbool(b'server', b'validate'):
383 cl = repo.changelog
409 cl = repo.changelog
384 ml = repo.manifestlog
410 ml = repo.manifestlog
385 # validate incoming csets have their manifests
411 # validate incoming csets have their manifests
386 for cset in pycompat.xrange(clstart, clend):
412 for cset in pycompat.xrange(clstart, clend):
387 mfnode = cl.changelogrevision(cset).manifest
413 mfnode = cl.changelogrevision(cset).manifest
388 mfest = ml[mfnode].readdelta()
414 mfest = ml[mfnode].readdelta()
389 # store file nodes we must see
415 # store file nodes we must see
390 for f, n in pycompat.iteritems(mfest):
416 for f, n in pycompat.iteritems(mfest):
391 needfiles.setdefault(f, set()).add(n)
417 needfiles.setdefault(f, set()).add(n)
392
418
419 on_filelog_rev = None
420 if sidedata_helpers and b'filelog' in sidedata_helpers[1]:
421
422 def on_filelog_rev(filelog, rev):
423 range = touched_filelogs.get(filelog)
424 if not range:
425 touched_filelogs[filelog] = (rev, rev)
426 else:
427 assert rev == range[1] + 1
428 touched_filelogs[filelog] = (range[0], rev)
429
393 # process the files
430 # process the files
394 repo.ui.status(_(b"adding file changes\n"))
431 repo.ui.status(_(b"adding file changes\n"))
395 newrevs, newfiles = _addchangegroupfiles(
432 newrevs, newfiles = _addchangegroupfiles(
396 repo, self, revmap, trp, efiles, needfiles
433 repo,
434 self,
435 revmap,
436 trp,
437 efiles,
438 needfiles,
439 addrevisioncb=on_filelog_rev,
397 )
440 )
398
441
442 if sidedata_helpers:
443 if b'changelog' in sidedata_helpers[1]:
444 cl.rewrite_sidedata(sidedata_helpers, clstart, clend - 1)
445 for mf, (startrev, endrev) in touched_manifests.items():
446 mf.rewrite_sidedata(sidedata_helpers, startrev, endrev)
447 for fl, (startrev, endrev) in touched_filelogs.items():
448 fl.rewrite_sidedata(sidedata_helpers, startrev, endrev)
449
399 # making sure the value exists
450 # making sure the value exists
400 tr.changes.setdefault(b'changegroup-count-changesets', 0)
451 tr.changes.setdefault(b'changegroup-count-changesets', 0)
401 tr.changes.setdefault(b'changegroup-count-revisions', 0)
452 tr.changes.setdefault(b'changegroup-count-revisions', 0)
402 tr.changes.setdefault(b'changegroup-count-files', 0)
453 tr.changes.setdefault(b'changegroup-count-files', 0)
403 tr.changes.setdefault(b'changegroup-count-heads', 0)
454 tr.changes.setdefault(b'changegroup-count-heads', 0)
404
455
405 # some code use bundle operation for internal purpose. They usually
456 # some code use bundle operation for internal purpose. They usually
406 # set `ui.quiet` to do this outside of user sight. Size the report
457 # set `ui.quiet` to do this outside of user sight. Size the report
407 # of such operation now happens at the end of the transaction, that
458 # of such operation now happens at the end of the transaction, that
408 # ui.quiet has not direct effect on the output.
459 # ui.quiet has not direct effect on the output.
409 #
460 #
410 # To preserve this intend use an inelegant hack, we fail to report
461 # To preserve this intend use an inelegant hack, we fail to report
411 # the change if `quiet` is set. We should probably move to
462 # the change if `quiet` is set. We should probably move to
412 # something better, but this is a good first step to allow the "end
463 # something better, but this is a good first step to allow the "end
413 # of transaction report" to pass tests.
464 # of transaction report" to pass tests.
414 if not repo.ui.quiet:
465 if not repo.ui.quiet:
415 tr.changes[b'changegroup-count-changesets'] += changesets
466 tr.changes[b'changegroup-count-changesets'] += changesets
416 tr.changes[b'changegroup-count-revisions'] += newrevs
467 tr.changes[b'changegroup-count-revisions'] += newrevs
417 tr.changes[b'changegroup-count-files'] += newfiles
468 tr.changes[b'changegroup-count-files'] += newfiles
418
469
419 deltaheads = 0
470 deltaheads = 0
420 if oldheads:
471 if oldheads:
421 heads = cl.heads()
472 heads = cl.heads()
422 deltaheads += len(heads) - len(oldheads)
473 deltaheads += len(heads) - len(oldheads)
423 for h in heads:
474 for h in heads:
424 if h not in oldheads and repo[h].closesbranch():
475 if h not in oldheads and repo[h].closesbranch():
425 deltaheads -= 1
476 deltaheads -= 1
426
477
427 # see previous comment about checking ui.quiet
478 # see previous comment about checking ui.quiet
428 if not repo.ui.quiet:
479 if not repo.ui.quiet:
429 tr.changes[b'changegroup-count-heads'] += deltaheads
480 tr.changes[b'changegroup-count-heads'] += deltaheads
430 repo.invalidatevolatilesets()
481 repo.invalidatevolatilesets()
431
482
432 if changesets > 0:
483 if changesets > 0:
433 if b'node' not in tr.hookargs:
484 if b'node' not in tr.hookargs:
434 tr.hookargs[b'node'] = hex(cl.node(clstart))
485 tr.hookargs[b'node'] = hex(cl.node(clstart))
435 tr.hookargs[b'node_last'] = hex(cl.node(clend - 1))
486 tr.hookargs[b'node_last'] = hex(cl.node(clend - 1))
436 hookargs = dict(tr.hookargs)
487 hookargs = dict(tr.hookargs)
437 else:
488 else:
438 hookargs = dict(tr.hookargs)
489 hookargs = dict(tr.hookargs)
439 hookargs[b'node'] = hex(cl.node(clstart))
490 hookargs[b'node'] = hex(cl.node(clstart))
440 hookargs[b'node_last'] = hex(cl.node(clend - 1))
491 hookargs[b'node_last'] = hex(cl.node(clend - 1))
441 repo.hook(
492 repo.hook(
442 b'pretxnchangegroup',
493 b'pretxnchangegroup',
443 throw=True,
494 throw=True,
444 **pycompat.strkwargs(hookargs)
495 **pycompat.strkwargs(hookargs)
445 )
496 )
446
497
447 added = pycompat.xrange(clstart, clend)
498 added = pycompat.xrange(clstart, clend)
448 phaseall = None
499 phaseall = None
449 if srctype in (b'push', b'serve'):
500 if srctype in (b'push', b'serve'):
450 # Old servers can not push the boundary themselves.
501 # Old servers can not push the boundary themselves.
451 # New servers won't push the boundary if changeset already
502 # New servers won't push the boundary if changeset already
452 # exists locally as secret
503 # exists locally as secret
453 #
504 #
454 # We should not use added here but the list of all change in
505 # We should not use added here but the list of all change in
455 # the bundle
506 # the bundle
456 if repo.publishing():
507 if repo.publishing():
457 targetphase = phaseall = phases.public
508 targetphase = phaseall = phases.public
458 else:
509 else:
459 # closer target phase computation
510 # closer target phase computation
460
511
461 # Those changesets have been pushed from the
512 # Those changesets have been pushed from the
462 # outside, their phases are going to be pushed
513 # outside, their phases are going to be pushed
463 # alongside. Therefor `targetphase` is
514 # alongside. Therefor `targetphase` is
464 # ignored.
515 # ignored.
465 targetphase = phaseall = phases.draft
516 targetphase = phaseall = phases.draft
466 if added:
517 if added:
467 phases.registernew(repo, tr, targetphase, added)
518 phases.registernew(repo, tr, targetphase, added)
468 if phaseall is not None:
519 if phaseall is not None:
469 if duprevs:
520 if duprevs:
470 duprevs.extend(added)
521 duprevs.extend(added)
471 else:
522 else:
472 duprevs = added
523 duprevs = added
473 phases.advanceboundary(repo, tr, phaseall, [], revs=duprevs)
524 phases.advanceboundary(repo, tr, phaseall, [], revs=duprevs)
474 duprevs = []
525 duprevs = []
475
526
476 if changesets > 0:
527 if changesets > 0:
477
528
478 def runhooks(unused_success):
529 def runhooks(unused_success):
479 # These hooks run when the lock releases, not when the
530 # These hooks run when the lock releases, not when the
480 # transaction closes. So it's possible for the changelog
531 # transaction closes. So it's possible for the changelog
481 # to have changed since we last saw it.
532 # to have changed since we last saw it.
482 if clstart >= len(repo):
533 if clstart >= len(repo):
483 return
534 return
484
535
485 repo.hook(b"changegroup", **pycompat.strkwargs(hookargs))
536 repo.hook(b"changegroup", **pycompat.strkwargs(hookargs))
486
537
487 for rev in added:
538 for rev in added:
488 args = hookargs.copy()
539 args = hookargs.copy()
489 args[b'node'] = hex(cl.node(rev))
540 args[b'node'] = hex(cl.node(rev))
490 del args[b'node_last']
541 del args[b'node_last']
491 repo.hook(b"incoming", **pycompat.strkwargs(args))
542 repo.hook(b"incoming", **pycompat.strkwargs(args))
492
543
493 newheads = [h for h in repo.heads() if h not in oldheads]
544 newheads = [h for h in repo.heads() if h not in oldheads]
494 repo.ui.log(
545 repo.ui.log(
495 b"incoming",
546 b"incoming",
496 b"%d incoming changes - new heads: %s\n",
547 b"%d incoming changes - new heads: %s\n",
497 len(added),
548 len(added),
498 b', '.join([hex(c[:6]) for c in newheads]),
549 b', '.join([hex(c[:6]) for c in newheads]),
499 )
550 )
500
551
501 tr.addpostclose(
552 tr.addpostclose(
502 b'changegroup-runhooks-%020i' % clstart,
553 b'changegroup-runhooks-%020i' % clstart,
503 lambda tr: repo._afterlock(runhooks),
554 lambda tr: repo._afterlock(runhooks),
504 )
555 )
505 finally:
556 finally:
506 repo.ui.flush()
557 repo.ui.flush()
507 # never return 0 here:
558 # never return 0 here:
508 if deltaheads < 0:
559 if deltaheads < 0:
509 ret = deltaheads - 1
560 ret = deltaheads - 1
510 else:
561 else:
511 ret = deltaheads + 1
562 ret = deltaheads + 1
512 return ret
563 return ret
513
564
514 def deltaiter(self):
565 def deltaiter(self):
515 """
566 """
516 returns an iterator of the deltas in this changegroup
567 returns an iterator of the deltas in this changegroup
517
568
518 Useful for passing to the underlying storage system to be stored.
569 Useful for passing to the underlying storage system to be stored.
519 """
570 """
520 chain = None
571 chain = None
521 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
572 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
522 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
573 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
523 yield chunkdata
574 yield chunkdata
524 chain = chunkdata[0]
575 chain = chunkdata[0]
525
576
526
577
527 class cg2unpacker(cg1unpacker):
578 class cg2unpacker(cg1unpacker):
528 """Unpacker for cg2 streams.
579 """Unpacker for cg2 streams.
529
580
530 cg2 streams add support for generaldelta, so the delta header
581 cg2 streams add support for generaldelta, so the delta header
531 format is slightly different. All other features about the data
582 format is slightly different. All other features about the data
532 remain the same.
583 remain the same.
533 """
584 """
534
585
535 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
586 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
536 deltaheadersize = deltaheader.size
587 deltaheadersize = deltaheader.size
537 version = b'02'
588 version = b'02'
538
589
539 def _deltaheader(self, headertuple, prevnode):
590 def _deltaheader(self, headertuple, prevnode):
540 node, p1, p2, deltabase, cs = headertuple
591 node, p1, p2, deltabase, cs = headertuple
541 flags = 0
592 flags = 0
542 return node, p1, p2, deltabase, cs, flags
593 return node, p1, p2, deltabase, cs, flags
543
594
544
595
545 class cg3unpacker(cg2unpacker):
596 class cg3unpacker(cg2unpacker):
546 """Unpacker for cg3 streams.
597 """Unpacker for cg3 streams.
547
598
548 cg3 streams add support for exchanging treemanifests and revlog
599 cg3 streams add support for exchanging treemanifests and revlog
549 flags. It adds the revlog flags to the delta header and an empty chunk
600 flags. It adds the revlog flags to the delta header and an empty chunk
550 separating manifests and files.
601 separating manifests and files.
551 """
602 """
552
603
553 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
604 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
554 deltaheadersize = deltaheader.size
605 deltaheadersize = deltaheader.size
555 version = b'03'
606 version = b'03'
556 _grouplistcount = 2 # One list of manifests and one list of files
607 _grouplistcount = 2 # One list of manifests and one list of files
557
608
558 def _deltaheader(self, headertuple, prevnode):
609 def _deltaheader(self, headertuple, prevnode):
559 node, p1, p2, deltabase, cs, flags = headertuple
610 node, p1, p2, deltabase, cs, flags = headertuple
560 return node, p1, p2, deltabase, cs, flags
611 return node, p1, p2, deltabase, cs, flags
561
612
562 def _unpackmanifests(self, repo, revmap, trp, prog):
613 def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None):
563 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
614 super(cg3unpacker, self)._unpackmanifests(
615 repo, revmap, trp, prog, addrevisioncb=addrevisioncb
616 )
564 for chunkdata in iter(self.filelogheader, {}):
617 for chunkdata in iter(self.filelogheader, {}):
565 # If we get here, there are directory manifests in the changegroup
618 # If we get here, there are directory manifests in the changegroup
566 d = chunkdata[b"filename"]
619 d = chunkdata[b"filename"]
567 repo.ui.debug(b"adding %s revisions\n" % d)
620 repo.ui.debug(b"adding %s revisions\n" % d)
568 deltas = self.deltaiter()
621 deltas = self.deltaiter()
569 if not repo.manifestlog.getstorage(d).addgroup(deltas, revmap, trp):
622 if not repo.manifestlog.getstorage(d).addgroup(
623 deltas, revmap, trp, addrevisioncb=addrevisioncb
624 ):
570 raise error.Abort(_(b"received dir revlog group is empty"))
625 raise error.Abort(_(b"received dir revlog group is empty"))
571
626
572
627
573 class cg4unpacker(cg3unpacker):
628 class cg4unpacker(cg3unpacker):
574 """Unpacker for cg4 streams.
629 """Unpacker for cg4 streams.
575
630
576 cg4 streams add support for exchanging sidedata.
631 cg4 streams add support for exchanging sidedata.
577 """
632 """
578
633
579 version = b'04'
634 version = b'04'
580
635
581 def deltachunk(self, prevnode):
636 def deltachunk(self, prevnode):
582 res = super(cg4unpacker, self).deltachunk(prevnode)
637 res = super(cg4unpacker, self).deltachunk(prevnode)
583 if not res:
638 if not res:
584 return res
639 return res
585
640
586 (node, p1, p2, cs, deltabase, delta, flags, _sidedata) = res
641 (node, p1, p2, cs, deltabase, delta, flags, _sidedata) = res
587
642
588 sidedata_raw = getchunk(self._stream)
643 sidedata_raw = getchunk(self._stream)
589 sidedata = {}
644 sidedata = {}
590 if len(sidedata_raw) > 0:
645 if len(sidedata_raw) > 0:
591 sidedata = sidedatamod.deserialize_sidedata(sidedata_raw)
646 sidedata = sidedatamod.deserialize_sidedata(sidedata_raw)
592
647
593 return node, p1, p2, cs, deltabase, delta, flags, sidedata
648 return node, p1, p2, cs, deltabase, delta, flags, sidedata
594
649
595
650
596 class headerlessfixup(object):
651 class headerlessfixup(object):
597 def __init__(self, fh, h):
652 def __init__(self, fh, h):
598 self._h = h
653 self._h = h
599 self._fh = fh
654 self._fh = fh
600
655
601 def read(self, n):
656 def read(self, n):
602 if self._h:
657 if self._h:
603 d, self._h = self._h[:n], self._h[n:]
658 d, self._h = self._h[:n], self._h[n:]
604 if len(d) < n:
659 if len(d) < n:
605 d += readexactly(self._fh, n - len(d))
660 d += readexactly(self._fh, n - len(d))
606 return d
661 return d
607 return readexactly(self._fh, n)
662 return readexactly(self._fh, n)
608
663
609
664
610 def _revisiondeltatochunks(delta, headerfn):
665 def _revisiondeltatochunks(delta, headerfn):
611 """Serialize a revisiondelta to changegroup chunks."""
666 """Serialize a revisiondelta to changegroup chunks."""
612
667
613 # The captured revision delta may be encoded as a delta against
668 # The captured revision delta may be encoded as a delta against
614 # a base revision or as a full revision. The changegroup format
669 # a base revision or as a full revision. The changegroup format
615 # requires that everything on the wire be deltas. So for full
670 # requires that everything on the wire be deltas. So for full
616 # revisions, we need to invent a header that says to rewrite
671 # revisions, we need to invent a header that says to rewrite
617 # data.
672 # data.
618
673
619 if delta.delta is not None:
674 if delta.delta is not None:
620 prefix, data = b'', delta.delta
675 prefix, data = b'', delta.delta
621 elif delta.basenode == nullid:
676 elif delta.basenode == nullid:
622 data = delta.revision
677 data = delta.revision
623 prefix = mdiff.trivialdiffheader(len(data))
678 prefix = mdiff.trivialdiffheader(len(data))
624 else:
679 else:
625 data = delta.revision
680 data = delta.revision
626 prefix = mdiff.replacediffheader(delta.baserevisionsize, len(data))
681 prefix = mdiff.replacediffheader(delta.baserevisionsize, len(data))
627
682
628 meta = headerfn(delta)
683 meta = headerfn(delta)
629
684
630 yield chunkheader(len(meta) + len(prefix) + len(data))
685 yield chunkheader(len(meta) + len(prefix) + len(data))
631 yield meta
686 yield meta
632 if prefix:
687 if prefix:
633 yield prefix
688 yield prefix
634 yield data
689 yield data
635
690
636 sidedata = delta.sidedata
691 sidedata = delta.sidedata
637 if sidedata is not None:
692 if sidedata is not None:
638 # Need a separate chunk for sidedata to be able to differentiate
693 # Need a separate chunk for sidedata to be able to differentiate
639 # "raw delta" length and sidedata length
694 # "raw delta" length and sidedata length
640 yield chunkheader(len(sidedata))
695 yield chunkheader(len(sidedata))
641 yield sidedata
696 yield sidedata
642
697
643
698
644 def _sortnodesellipsis(store, nodes, cl, lookup):
699 def _sortnodesellipsis(store, nodes, cl, lookup):
645 """Sort nodes for changegroup generation."""
700 """Sort nodes for changegroup generation."""
646 # Ellipses serving mode.
701 # Ellipses serving mode.
647 #
702 #
648 # In a perfect world, we'd generate better ellipsis-ified graphs
703 # In a perfect world, we'd generate better ellipsis-ified graphs
649 # for non-changelog revlogs. In practice, we haven't started doing
704 # for non-changelog revlogs. In practice, we haven't started doing
650 # that yet, so the resulting DAGs for the manifestlog and filelogs
705 # that yet, so the resulting DAGs for the manifestlog and filelogs
651 # are actually full of bogus parentage on all the ellipsis
706 # are actually full of bogus parentage on all the ellipsis
652 # nodes. This has the side effect that, while the contents are
707 # nodes. This has the side effect that, while the contents are
653 # correct, the individual DAGs might be completely out of whack in
708 # correct, the individual DAGs might be completely out of whack in
654 # a case like 882681bc3166 and its ancestors (back about 10
709 # a case like 882681bc3166 and its ancestors (back about 10
655 # revisions or so) in the main hg repo.
710 # revisions or so) in the main hg repo.
656 #
711 #
657 # The one invariant we *know* holds is that the new (potentially
712 # The one invariant we *know* holds is that the new (potentially
658 # bogus) DAG shape will be valid if we order the nodes in the
713 # bogus) DAG shape will be valid if we order the nodes in the
659 # order that they're introduced in dramatis personae by the
714 # order that they're introduced in dramatis personae by the
660 # changelog, so what we do is we sort the non-changelog histories
715 # changelog, so what we do is we sort the non-changelog histories
661 # by the order in which they are used by the changelog.
716 # by the order in which they are used by the changelog.
662 key = lambda n: cl.rev(lookup(n))
717 key = lambda n: cl.rev(lookup(n))
663 return sorted(nodes, key=key)
718 return sorted(nodes, key=key)
664
719
665
720
666 def _resolvenarrowrevisioninfo(
721 def _resolvenarrowrevisioninfo(
667 cl,
722 cl,
668 store,
723 store,
669 ischangelog,
724 ischangelog,
670 rev,
725 rev,
671 linkrev,
726 linkrev,
672 linknode,
727 linknode,
673 clrevtolocalrev,
728 clrevtolocalrev,
674 fullclnodes,
729 fullclnodes,
675 precomputedellipsis,
730 precomputedellipsis,
676 ):
731 ):
677 linkparents = precomputedellipsis[linkrev]
732 linkparents = precomputedellipsis[linkrev]
678
733
679 def local(clrev):
734 def local(clrev):
680 """Turn a changelog revnum into a local revnum.
735 """Turn a changelog revnum into a local revnum.
681
736
682 The ellipsis dag is stored as revnums on the changelog,
737 The ellipsis dag is stored as revnums on the changelog,
683 but when we're producing ellipsis entries for
738 but when we're producing ellipsis entries for
684 non-changelog revlogs, we need to turn those numbers into
739 non-changelog revlogs, we need to turn those numbers into
685 something local. This does that for us, and during the
740 something local. This does that for us, and during the
686 changelog sending phase will also expand the stored
741 changelog sending phase will also expand the stored
687 mappings as needed.
742 mappings as needed.
688 """
743 """
689 if clrev == nullrev:
744 if clrev == nullrev:
690 return nullrev
745 return nullrev
691
746
692 if ischangelog:
747 if ischangelog:
693 return clrev
748 return clrev
694
749
695 # Walk the ellipsis-ized changelog breadth-first looking for a
750 # Walk the ellipsis-ized changelog breadth-first looking for a
696 # change that has been linked from the current revlog.
751 # change that has been linked from the current revlog.
697 #
752 #
698 # For a flat manifest revlog only a single step should be necessary
753 # For a flat manifest revlog only a single step should be necessary
699 # as all relevant changelog entries are relevant to the flat
754 # as all relevant changelog entries are relevant to the flat
700 # manifest.
755 # manifest.
701 #
756 #
702 # For a filelog or tree manifest dirlog however not every changelog
757 # For a filelog or tree manifest dirlog however not every changelog
703 # entry will have been relevant, so we need to skip some changelog
758 # entry will have been relevant, so we need to skip some changelog
704 # nodes even after ellipsis-izing.
759 # nodes even after ellipsis-izing.
705 walk = [clrev]
760 walk = [clrev]
706 while walk:
761 while walk:
707 p = walk[0]
762 p = walk[0]
708 walk = walk[1:]
763 walk = walk[1:]
709 if p in clrevtolocalrev:
764 if p in clrevtolocalrev:
710 return clrevtolocalrev[p]
765 return clrevtolocalrev[p]
711 elif p in fullclnodes:
766 elif p in fullclnodes:
712 walk.extend([pp for pp in cl.parentrevs(p) if pp != nullrev])
767 walk.extend([pp for pp in cl.parentrevs(p) if pp != nullrev])
713 elif p in precomputedellipsis:
768 elif p in precomputedellipsis:
714 walk.extend(
769 walk.extend(
715 [pp for pp in precomputedellipsis[p] if pp != nullrev]
770 [pp for pp in precomputedellipsis[p] if pp != nullrev]
716 )
771 )
717 else:
772 else:
718 # In this case, we've got an ellipsis with parents
773 # In this case, we've got an ellipsis with parents
719 # outside the current bundle (likely an
774 # outside the current bundle (likely an
720 # incremental pull). We "know" that we can use the
775 # incremental pull). We "know" that we can use the
721 # value of this same revlog at whatever revision
776 # value of this same revlog at whatever revision
722 # is pointed to by linknode. "Know" is in scare
777 # is pointed to by linknode. "Know" is in scare
723 # quotes because I haven't done enough examination
778 # quotes because I haven't done enough examination
724 # of edge cases to convince myself this is really
779 # of edge cases to convince myself this is really
725 # a fact - it works for all the (admittedly
780 # a fact - it works for all the (admittedly
726 # thorough) cases in our testsuite, but I would be
781 # thorough) cases in our testsuite, but I would be
727 # somewhat unsurprised to find a case in the wild
782 # somewhat unsurprised to find a case in the wild
728 # where this breaks down a bit. That said, I don't
783 # where this breaks down a bit. That said, I don't
729 # know if it would hurt anything.
784 # know if it would hurt anything.
730 for i in pycompat.xrange(rev, 0, -1):
785 for i in pycompat.xrange(rev, 0, -1):
731 if store.linkrev(i) == clrev:
786 if store.linkrev(i) == clrev:
732 return i
787 return i
733 # We failed to resolve a parent for this node, so
788 # We failed to resolve a parent for this node, so
734 # we crash the changegroup construction.
789 # we crash the changegroup construction.
735 raise error.Abort(
790 raise error.Abort(
736 b"unable to resolve parent while packing '%s' %r"
791 b"unable to resolve parent while packing '%s' %r"
737 b' for changeset %r' % (store.indexfile, rev, clrev)
792 b' for changeset %r' % (store.indexfile, rev, clrev)
738 )
793 )
739
794
740 return nullrev
795 return nullrev
741
796
742 if not linkparents or (store.parentrevs(rev) == (nullrev, nullrev)):
797 if not linkparents or (store.parentrevs(rev) == (nullrev, nullrev)):
743 p1, p2 = nullrev, nullrev
798 p1, p2 = nullrev, nullrev
744 elif len(linkparents) == 1:
799 elif len(linkparents) == 1:
745 (p1,) = sorted(local(p) for p in linkparents)
800 (p1,) = sorted(local(p) for p in linkparents)
746 p2 = nullrev
801 p2 = nullrev
747 else:
802 else:
748 p1, p2 = sorted(local(p) for p in linkparents)
803 p1, p2 = sorted(local(p) for p in linkparents)
749
804
750 p1node, p2node = store.node(p1), store.node(p2)
805 p1node, p2node = store.node(p1), store.node(p2)
751
806
752 return p1node, p2node, linknode
807 return p1node, p2node, linknode
753
808
754
809
755 def deltagroup(
810 def deltagroup(
756 repo,
811 repo,
757 store,
812 store,
758 nodes,
813 nodes,
759 ischangelog,
814 ischangelog,
760 lookup,
815 lookup,
761 forcedeltaparentprev,
816 forcedeltaparentprev,
762 topic=None,
817 topic=None,
763 ellipses=False,
818 ellipses=False,
764 clrevtolocalrev=None,
819 clrevtolocalrev=None,
765 fullclnodes=None,
820 fullclnodes=None,
766 precomputedellipsis=None,
821 precomputedellipsis=None,
767 sidedata_helpers=None,
822 sidedata_helpers=None,
768 ):
823 ):
769 """Calculate deltas for a set of revisions.
824 """Calculate deltas for a set of revisions.
770
825
771 Is a generator of ``revisiondelta`` instances.
826 Is a generator of ``revisiondelta`` instances.
772
827
773 If topic is not None, progress detail will be generated using this
828 If topic is not None, progress detail will be generated using this
774 topic name (e.g. changesets, manifests, etc).
829 topic name (e.g. changesets, manifests, etc).
775
830
776 See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
831 See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
777 """
832 """
778 if not nodes:
833 if not nodes:
779 return
834 return
780
835
781 cl = repo.changelog
836 cl = repo.changelog
782
837
783 if ischangelog:
838 if ischangelog:
784 # `hg log` shows changesets in storage order. To preserve order
839 # `hg log` shows changesets in storage order. To preserve order
785 # across clones, send out changesets in storage order.
840 # across clones, send out changesets in storage order.
786 nodesorder = b'storage'
841 nodesorder = b'storage'
787 elif ellipses:
842 elif ellipses:
788 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
843 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
789 nodesorder = b'nodes'
844 nodesorder = b'nodes'
790 else:
845 else:
791 nodesorder = None
846 nodesorder = None
792
847
793 # Perform ellipses filtering and revision massaging. We do this before
848 # Perform ellipses filtering and revision massaging. We do this before
794 # emitrevisions() because a) filtering out revisions creates less work
849 # emitrevisions() because a) filtering out revisions creates less work
795 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
850 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
796 # assumptions about delta choices and we would possibly send a delta
851 # assumptions about delta choices and we would possibly send a delta
797 # referencing a missing base revision.
852 # referencing a missing base revision.
798 #
853 #
799 # Also, calling lookup() has side-effects with regards to populating
854 # Also, calling lookup() has side-effects with regards to populating
800 # data structures. If we don't call lookup() for each node or if we call
855 # data structures. If we don't call lookup() for each node or if we call
801 # lookup() after the first pass through each node, things can break -
856 # lookup() after the first pass through each node, things can break -
802 # possibly intermittently depending on the python hash seed! For that
857 # possibly intermittently depending on the python hash seed! For that
803 # reason, we store a mapping of all linknodes during the initial node
858 # reason, we store a mapping of all linknodes during the initial node
804 # pass rather than use lookup() on the output side.
859 # pass rather than use lookup() on the output side.
805 if ellipses:
860 if ellipses:
806 filtered = []
861 filtered = []
807 adjustedparents = {}
862 adjustedparents = {}
808 linknodes = {}
863 linknodes = {}
809
864
810 for node in nodes:
865 for node in nodes:
811 rev = store.rev(node)
866 rev = store.rev(node)
812 linknode = lookup(node)
867 linknode = lookup(node)
813 linkrev = cl.rev(linknode)
868 linkrev = cl.rev(linknode)
814 clrevtolocalrev[linkrev] = rev
869 clrevtolocalrev[linkrev] = rev
815
870
816 # If linknode is in fullclnodes, it means the corresponding
871 # If linknode is in fullclnodes, it means the corresponding
817 # changeset was a full changeset and is being sent unaltered.
872 # changeset was a full changeset and is being sent unaltered.
818 if linknode in fullclnodes:
873 if linknode in fullclnodes:
819 linknodes[node] = linknode
874 linknodes[node] = linknode
820
875
821 # If the corresponding changeset wasn't in the set computed
876 # If the corresponding changeset wasn't in the set computed
822 # as relevant to us, it should be dropped outright.
877 # as relevant to us, it should be dropped outright.
823 elif linkrev not in precomputedellipsis:
878 elif linkrev not in precomputedellipsis:
824 continue
879 continue
825
880
826 else:
881 else:
827 # We could probably do this later and avoid the dict
882 # We could probably do this later and avoid the dict
828 # holding state. But it likely doesn't matter.
883 # holding state. But it likely doesn't matter.
829 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
884 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
830 cl,
885 cl,
831 store,
886 store,
832 ischangelog,
887 ischangelog,
833 rev,
888 rev,
834 linkrev,
889 linkrev,
835 linknode,
890 linknode,
836 clrevtolocalrev,
891 clrevtolocalrev,
837 fullclnodes,
892 fullclnodes,
838 precomputedellipsis,
893 precomputedellipsis,
839 )
894 )
840
895
841 adjustedparents[node] = (p1node, p2node)
896 adjustedparents[node] = (p1node, p2node)
842 linknodes[node] = linknode
897 linknodes[node] = linknode
843
898
844 filtered.append(node)
899 filtered.append(node)
845
900
846 nodes = filtered
901 nodes = filtered
847
902
848 # We expect the first pass to be fast, so we only engage the progress
903 # We expect the first pass to be fast, so we only engage the progress
849 # meter for constructing the revision deltas.
904 # meter for constructing the revision deltas.
850 progress = None
905 progress = None
851 if topic is not None:
906 if topic is not None:
852 progress = repo.ui.makeprogress(
907 progress = repo.ui.makeprogress(
853 topic, unit=_(b'chunks'), total=len(nodes)
908 topic, unit=_(b'chunks'), total=len(nodes)
854 )
909 )
855
910
856 configtarget = repo.ui.config(b'devel', b'bundle.delta')
911 configtarget = repo.ui.config(b'devel', b'bundle.delta')
857 if configtarget not in (b'', b'p1', b'full'):
912 if configtarget not in (b'', b'p1', b'full'):
858 msg = _("""config "devel.bundle.delta" as unknown value: %s""")
913 msg = _("""config "devel.bundle.delta" as unknown value: %s""")
859 repo.ui.warn(msg % configtarget)
914 repo.ui.warn(msg % configtarget)
860
915
861 deltamode = repository.CG_DELTAMODE_STD
916 deltamode = repository.CG_DELTAMODE_STD
862 if forcedeltaparentprev:
917 if forcedeltaparentprev:
863 deltamode = repository.CG_DELTAMODE_PREV
918 deltamode = repository.CG_DELTAMODE_PREV
864 elif configtarget == b'p1':
919 elif configtarget == b'p1':
865 deltamode = repository.CG_DELTAMODE_P1
920 deltamode = repository.CG_DELTAMODE_P1
866 elif configtarget == b'full':
921 elif configtarget == b'full':
867 deltamode = repository.CG_DELTAMODE_FULL
922 deltamode = repository.CG_DELTAMODE_FULL
868
923
869 revisions = store.emitrevisions(
924 revisions = store.emitrevisions(
870 nodes,
925 nodes,
871 nodesorder=nodesorder,
926 nodesorder=nodesorder,
872 revisiondata=True,
927 revisiondata=True,
873 assumehaveparentrevisions=not ellipses,
928 assumehaveparentrevisions=not ellipses,
874 deltamode=deltamode,
929 deltamode=deltamode,
875 sidedata_helpers=sidedata_helpers,
930 sidedata_helpers=sidedata_helpers,
876 )
931 )
877
932
878 for i, revision in enumerate(revisions):
933 for i, revision in enumerate(revisions):
879 if progress:
934 if progress:
880 progress.update(i + 1)
935 progress.update(i + 1)
881
936
882 if ellipses:
937 if ellipses:
883 linknode = linknodes[revision.node]
938 linknode = linknodes[revision.node]
884
939
885 if revision.node in adjustedparents:
940 if revision.node in adjustedparents:
886 p1node, p2node = adjustedparents[revision.node]
941 p1node, p2node = adjustedparents[revision.node]
887 revision.p1node = p1node
942 revision.p1node = p1node
888 revision.p2node = p2node
943 revision.p2node = p2node
889 revision.flags |= repository.REVISION_FLAG_ELLIPSIS
944 revision.flags |= repository.REVISION_FLAG_ELLIPSIS
890
945
891 else:
946 else:
892 linknode = lookup(revision.node)
947 linknode = lookup(revision.node)
893
948
894 revision.linknode = linknode
949 revision.linknode = linknode
895 yield revision
950 yield revision
896
951
897 if progress:
952 if progress:
898 progress.complete()
953 progress.complete()
899
954
900
955
901 class cgpacker(object):
956 class cgpacker(object):
902 def __init__(
957 def __init__(
903 self,
958 self,
904 repo,
959 repo,
905 oldmatcher,
960 oldmatcher,
906 matcher,
961 matcher,
907 version,
962 version,
908 builddeltaheader,
963 builddeltaheader,
909 manifestsend,
964 manifestsend,
910 forcedeltaparentprev=False,
965 forcedeltaparentprev=False,
911 bundlecaps=None,
966 bundlecaps=None,
912 ellipses=False,
967 ellipses=False,
913 shallow=False,
968 shallow=False,
914 ellipsisroots=None,
969 ellipsisroots=None,
915 fullnodes=None,
970 fullnodes=None,
916 remote_sidedata=None,
971 remote_sidedata=None,
917 ):
972 ):
918 """Given a source repo, construct a bundler.
973 """Given a source repo, construct a bundler.
919
974
920 oldmatcher is a matcher that matches on files the client already has.
975 oldmatcher is a matcher that matches on files the client already has.
921 These will not be included in the changegroup.
976 These will not be included in the changegroup.
922
977
923 matcher is a matcher that matches on files to include in the
978 matcher is a matcher that matches on files to include in the
924 changegroup. Used to facilitate sparse changegroups.
979 changegroup. Used to facilitate sparse changegroups.
925
980
926 forcedeltaparentprev indicates whether delta parents must be against
981 forcedeltaparentprev indicates whether delta parents must be against
927 the previous revision in a delta group. This should only be used for
982 the previous revision in a delta group. This should only be used for
928 compatibility with changegroup version 1.
983 compatibility with changegroup version 1.
929
984
930 builddeltaheader is a callable that constructs the header for a group
985 builddeltaheader is a callable that constructs the header for a group
931 delta.
986 delta.
932
987
933 manifestsend is a chunk to send after manifests have been fully emitted.
988 manifestsend is a chunk to send after manifests have been fully emitted.
934
989
935 ellipses indicates whether ellipsis serving mode is enabled.
990 ellipses indicates whether ellipsis serving mode is enabled.
936
991
937 bundlecaps is optional and can be used to specify the set of
992 bundlecaps is optional and can be used to specify the set of
938 capabilities which can be used to build the bundle. While bundlecaps is
993 capabilities which can be used to build the bundle. While bundlecaps is
939 unused in core Mercurial, extensions rely on this feature to communicate
994 unused in core Mercurial, extensions rely on this feature to communicate
940 capabilities to customize the changegroup packer.
995 capabilities to customize the changegroup packer.
941
996
942 shallow indicates whether shallow data might be sent. The packer may
997 shallow indicates whether shallow data might be sent. The packer may
943 need to pack file contents not introduced by the changes being packed.
998 need to pack file contents not introduced by the changes being packed.
944
999
945 fullnodes is the set of changelog nodes which should not be ellipsis
1000 fullnodes is the set of changelog nodes which should not be ellipsis
946 nodes. We store this rather than the set of nodes that should be
1001 nodes. We store this rather than the set of nodes that should be
947 ellipsis because for very large histories we expect this to be
1002 ellipsis because for very large histories we expect this to be
948 significantly smaller.
1003 significantly smaller.
949
1004
950 remote_sidedata is the set of sidedata categories wanted by the remote.
1005 remote_sidedata is the set of sidedata categories wanted by the remote.
951 """
1006 """
952 assert oldmatcher
1007 assert oldmatcher
953 assert matcher
1008 assert matcher
954 self._oldmatcher = oldmatcher
1009 self._oldmatcher = oldmatcher
955 self._matcher = matcher
1010 self._matcher = matcher
956
1011
957 self.version = version
1012 self.version = version
958 self._forcedeltaparentprev = forcedeltaparentprev
1013 self._forcedeltaparentprev = forcedeltaparentprev
959 self._builddeltaheader = builddeltaheader
1014 self._builddeltaheader = builddeltaheader
960 self._manifestsend = manifestsend
1015 self._manifestsend = manifestsend
961 self._ellipses = ellipses
1016 self._ellipses = ellipses
962
1017
963 # Set of capabilities we can use to build the bundle.
1018 # Set of capabilities we can use to build the bundle.
964 if bundlecaps is None:
1019 if bundlecaps is None:
965 bundlecaps = set()
1020 bundlecaps = set()
966 self._bundlecaps = bundlecaps
1021 self._bundlecaps = bundlecaps
967 if remote_sidedata is None:
1022 if remote_sidedata is None:
968 remote_sidedata = set()
1023 remote_sidedata = set()
969 self._remote_sidedata = remote_sidedata
1024 self._remote_sidedata = remote_sidedata
970 self._isshallow = shallow
1025 self._isshallow = shallow
971 self._fullclnodes = fullnodes
1026 self._fullclnodes = fullnodes
972
1027
973 # Maps ellipsis revs to their roots at the changelog level.
1028 # Maps ellipsis revs to their roots at the changelog level.
974 self._precomputedellipsis = ellipsisroots
1029 self._precomputedellipsis = ellipsisroots
975
1030
976 self._repo = repo
1031 self._repo = repo
977
1032
978 if self._repo.ui.verbose and not self._repo.ui.debugflag:
1033 if self._repo.ui.verbose and not self._repo.ui.debugflag:
979 self._verbosenote = self._repo.ui.note
1034 self._verbosenote = self._repo.ui.note
980 else:
1035 else:
981 self._verbosenote = lambda s: None
1036 self._verbosenote = lambda s: None
982
1037
983 def generate(
1038 def generate(
984 self, commonrevs, clnodes, fastpathlinkrev, source, changelog=True
1039 self, commonrevs, clnodes, fastpathlinkrev, source, changelog=True
985 ):
1040 ):
986 """Yield a sequence of changegroup byte chunks.
1041 """Yield a sequence of changegroup byte chunks.
987 If changelog is False, changelog data won't be added to changegroup
1042 If changelog is False, changelog data won't be added to changegroup
988 """
1043 """
989
1044
990 repo = self._repo
1045 repo = self._repo
991 cl = repo.changelog
1046 cl = repo.changelog
992
1047
993 self._verbosenote(_(b'uncompressed size of bundle content:\n'))
1048 self._verbosenote(_(b'uncompressed size of bundle content:\n'))
994 size = 0
1049 size = 0
995
1050
996 sidedata_helpers = None
1051 sidedata_helpers = None
997 if self.version == b'04':
1052 if self.version == b'04':
998 remote_sidedata = self._remote_sidedata
1053 remote_sidedata = self._remote_sidedata
999 if source == b'strip':
1054 if source == b'strip':
1000 # We're our own remote when stripping, get the no-op helpers
1055 # We're our own remote when stripping, get the no-op helpers
1001 # TODO a better approach would be for the strip bundle to
1056 # TODO a better approach would be for the strip bundle to
1002 # correctly advertise its sidedata categories directly.
1057 # correctly advertise its sidedata categories directly.
1003 remote_sidedata = repo._wanted_sidedata
1058 remote_sidedata = repo._wanted_sidedata
1004 sidedata_helpers = get_sidedata_helpers(repo, remote_sidedata)
1059 sidedata_helpers = get_sidedata_helpers(repo, remote_sidedata)
1005
1060
1006 clstate, deltas = self._generatechangelog(
1061 clstate, deltas = self._generatechangelog(
1007 cl,
1062 cl,
1008 clnodes,
1063 clnodes,
1009 generate=changelog,
1064 generate=changelog,
1010 sidedata_helpers=sidedata_helpers,
1065 sidedata_helpers=sidedata_helpers,
1011 )
1066 )
1012 for delta in deltas:
1067 for delta in deltas:
1013 for chunk in _revisiondeltatochunks(delta, self._builddeltaheader):
1068 for chunk in _revisiondeltatochunks(delta, self._builddeltaheader):
1014 size += len(chunk)
1069 size += len(chunk)
1015 yield chunk
1070 yield chunk
1016
1071
1017 close = closechunk()
1072 close = closechunk()
1018 size += len(close)
1073 size += len(close)
1019 yield closechunk()
1074 yield closechunk()
1020
1075
1021 self._verbosenote(_(b'%8.i (changelog)\n') % size)
1076 self._verbosenote(_(b'%8.i (changelog)\n') % size)
1022
1077
1023 clrevorder = clstate[b'clrevorder']
1078 clrevorder = clstate[b'clrevorder']
1024 manifests = clstate[b'manifests']
1079 manifests = clstate[b'manifests']
1025 changedfiles = clstate[b'changedfiles']
1080 changedfiles = clstate[b'changedfiles']
1026
1081
1027 # We need to make sure that the linkrev in the changegroup refers to
1082 # We need to make sure that the linkrev in the changegroup refers to
1028 # the first changeset that introduced the manifest or file revision.
1083 # the first changeset that introduced the manifest or file revision.
1029 # The fastpath is usually safer than the slowpath, because the filelogs
1084 # The fastpath is usually safer than the slowpath, because the filelogs
1030 # are walked in revlog order.
1085 # are walked in revlog order.
1031 #
1086 #
1032 # When taking the slowpath when the manifest revlog uses generaldelta,
1087 # When taking the slowpath when the manifest revlog uses generaldelta,
1033 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
1088 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
1034 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
1089 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
1035 #
1090 #
1036 # When taking the fastpath, we are only vulnerable to reordering
1091 # When taking the fastpath, we are only vulnerable to reordering
1037 # of the changelog itself. The changelog never uses generaldelta and is
1092 # of the changelog itself. The changelog never uses generaldelta and is
1038 # never reordered. To handle this case, we simply take the slowpath,
1093 # never reordered. To handle this case, we simply take the slowpath,
1039 # which already has the 'clrevorder' logic. This was also fixed in
1094 # which already has the 'clrevorder' logic. This was also fixed in
1040 # cc0ff93d0c0c.
1095 # cc0ff93d0c0c.
1041
1096
1042 # Treemanifests don't work correctly with fastpathlinkrev
1097 # Treemanifests don't work correctly with fastpathlinkrev
1043 # either, because we don't discover which directory nodes to
1098 # either, because we don't discover which directory nodes to
1044 # send along with files. This could probably be fixed.
1099 # send along with files. This could probably be fixed.
1045 fastpathlinkrev = fastpathlinkrev and not scmutil.istreemanifest(repo)
1100 fastpathlinkrev = fastpathlinkrev and not scmutil.istreemanifest(repo)
1046
1101
1047 fnodes = {} # needed file nodes
1102 fnodes = {} # needed file nodes
1048
1103
1049 size = 0
1104 size = 0
1050 it = self.generatemanifests(
1105 it = self.generatemanifests(
1051 commonrevs,
1106 commonrevs,
1052 clrevorder,
1107 clrevorder,
1053 fastpathlinkrev,
1108 fastpathlinkrev,
1054 manifests,
1109 manifests,
1055 fnodes,
1110 fnodes,
1056 source,
1111 source,
1057 clstate[b'clrevtomanifestrev'],
1112 clstate[b'clrevtomanifestrev'],
1058 sidedata_helpers=sidedata_helpers,
1113 sidedata_helpers=sidedata_helpers,
1059 )
1114 )
1060
1115
1061 for tree, deltas in it:
1116 for tree, deltas in it:
1062 if tree:
1117 if tree:
1063 assert self.version in (b'03', b'04')
1118 assert self.version in (b'03', b'04')
1064 chunk = _fileheader(tree)
1119 chunk = _fileheader(tree)
1065 size += len(chunk)
1120 size += len(chunk)
1066 yield chunk
1121 yield chunk
1067
1122
1068 for delta in deltas:
1123 for delta in deltas:
1069 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
1124 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
1070 for chunk in chunks:
1125 for chunk in chunks:
1071 size += len(chunk)
1126 size += len(chunk)
1072 yield chunk
1127 yield chunk
1073
1128
1074 close = closechunk()
1129 close = closechunk()
1075 size += len(close)
1130 size += len(close)
1076 yield close
1131 yield close
1077
1132
1078 self._verbosenote(_(b'%8.i (manifests)\n') % size)
1133 self._verbosenote(_(b'%8.i (manifests)\n') % size)
1079 yield self._manifestsend
1134 yield self._manifestsend
1080
1135
1081 mfdicts = None
1136 mfdicts = None
1082 if self._ellipses and self._isshallow:
1137 if self._ellipses and self._isshallow:
1083 mfdicts = [
1138 mfdicts = [
1084 (repo.manifestlog[n].read(), lr)
1139 (repo.manifestlog[n].read(), lr)
1085 for (n, lr) in pycompat.iteritems(manifests)
1140 for (n, lr) in pycompat.iteritems(manifests)
1086 ]
1141 ]
1087
1142
1088 manifests.clear()
1143 manifests.clear()
1089 clrevs = {cl.rev(x) for x in clnodes}
1144 clrevs = {cl.rev(x) for x in clnodes}
1090
1145
1091 it = self.generatefiles(
1146 it = self.generatefiles(
1092 changedfiles,
1147 changedfiles,
1093 commonrevs,
1148 commonrevs,
1094 source,
1149 source,
1095 mfdicts,
1150 mfdicts,
1096 fastpathlinkrev,
1151 fastpathlinkrev,
1097 fnodes,
1152 fnodes,
1098 clrevs,
1153 clrevs,
1099 sidedata_helpers=sidedata_helpers,
1154 sidedata_helpers=sidedata_helpers,
1100 )
1155 )
1101
1156
1102 for path, deltas in it:
1157 for path, deltas in it:
1103 h = _fileheader(path)
1158 h = _fileheader(path)
1104 size = len(h)
1159 size = len(h)
1105 yield h
1160 yield h
1106
1161
1107 for delta in deltas:
1162 for delta in deltas:
1108 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
1163 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
1109 for chunk in chunks:
1164 for chunk in chunks:
1110 size += len(chunk)
1165 size += len(chunk)
1111 yield chunk
1166 yield chunk
1112
1167
1113 close = closechunk()
1168 close = closechunk()
1114 size += len(close)
1169 size += len(close)
1115 yield close
1170 yield close
1116
1171
1117 self._verbosenote(_(b'%8.i %s\n') % (size, path))
1172 self._verbosenote(_(b'%8.i %s\n') % (size, path))
1118
1173
1119 yield closechunk()
1174 yield closechunk()
1120
1175
1121 if clnodes:
1176 if clnodes:
1122 repo.hook(b'outgoing', node=hex(clnodes[0]), source=source)
1177 repo.hook(b'outgoing', node=hex(clnodes[0]), source=source)
1123
1178
1124 def _generatechangelog(
1179 def _generatechangelog(
1125 self, cl, nodes, generate=True, sidedata_helpers=None
1180 self, cl, nodes, generate=True, sidedata_helpers=None
1126 ):
1181 ):
1127 """Generate data for changelog chunks.
1182 """Generate data for changelog chunks.
1128
1183
1129 Returns a 2-tuple of a dict containing state and an iterable of
1184 Returns a 2-tuple of a dict containing state and an iterable of
1130 byte chunks. The state will not be fully populated until the
1185 byte chunks. The state will not be fully populated until the
1131 chunk stream has been fully consumed.
1186 chunk stream has been fully consumed.
1132
1187
1133 if generate is False, the state will be fully populated and no chunk
1188 if generate is False, the state will be fully populated and no chunk
1134 stream will be yielded
1189 stream will be yielded
1135
1190
1136 See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
1191 See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
1137 """
1192 """
1138 clrevorder = {}
1193 clrevorder = {}
1139 manifests = {}
1194 manifests = {}
1140 mfl = self._repo.manifestlog
1195 mfl = self._repo.manifestlog
1141 changedfiles = set()
1196 changedfiles = set()
1142 clrevtomanifestrev = {}
1197 clrevtomanifestrev = {}
1143
1198
1144 state = {
1199 state = {
1145 b'clrevorder': clrevorder,
1200 b'clrevorder': clrevorder,
1146 b'manifests': manifests,
1201 b'manifests': manifests,
1147 b'changedfiles': changedfiles,
1202 b'changedfiles': changedfiles,
1148 b'clrevtomanifestrev': clrevtomanifestrev,
1203 b'clrevtomanifestrev': clrevtomanifestrev,
1149 }
1204 }
1150
1205
1151 if not (generate or self._ellipses):
1206 if not (generate or self._ellipses):
1152 # sort the nodes in storage order
1207 # sort the nodes in storage order
1153 nodes = sorted(nodes, key=cl.rev)
1208 nodes = sorted(nodes, key=cl.rev)
1154 for node in nodes:
1209 for node in nodes:
1155 c = cl.changelogrevision(node)
1210 c = cl.changelogrevision(node)
1156 clrevorder[node] = len(clrevorder)
1211 clrevorder[node] = len(clrevorder)
1157 # record the first changeset introducing this manifest version
1212 # record the first changeset introducing this manifest version
1158 manifests.setdefault(c.manifest, node)
1213 manifests.setdefault(c.manifest, node)
1159 # Record a complete list of potentially-changed files in
1214 # Record a complete list of potentially-changed files in
1160 # this manifest.
1215 # this manifest.
1161 changedfiles.update(c.files)
1216 changedfiles.update(c.files)
1162
1217
1163 return state, ()
1218 return state, ()
1164
1219
1165 # Callback for the changelog, used to collect changed files and
1220 # Callback for the changelog, used to collect changed files and
1166 # manifest nodes.
1221 # manifest nodes.
1167 # Returns the linkrev node (identity in the changelog case).
1222 # Returns the linkrev node (identity in the changelog case).
1168 def lookupcl(x):
1223 def lookupcl(x):
1169 c = cl.changelogrevision(x)
1224 c = cl.changelogrevision(x)
1170 clrevorder[x] = len(clrevorder)
1225 clrevorder[x] = len(clrevorder)
1171
1226
1172 if self._ellipses:
1227 if self._ellipses:
1173 # Only update manifests if x is going to be sent. Otherwise we
1228 # Only update manifests if x is going to be sent. Otherwise we
1174 # end up with bogus linkrevs specified for manifests and
1229 # end up with bogus linkrevs specified for manifests and
1175 # we skip some manifest nodes that we should otherwise
1230 # we skip some manifest nodes that we should otherwise
1176 # have sent.
1231 # have sent.
1177 if (
1232 if (
1178 x in self._fullclnodes
1233 x in self._fullclnodes
1179 or cl.rev(x) in self._precomputedellipsis
1234 or cl.rev(x) in self._precomputedellipsis
1180 ):
1235 ):
1181
1236
1182 manifestnode = c.manifest
1237 manifestnode = c.manifest
1183 # Record the first changeset introducing this manifest
1238 # Record the first changeset introducing this manifest
1184 # version.
1239 # version.
1185 manifests.setdefault(manifestnode, x)
1240 manifests.setdefault(manifestnode, x)
1186 # Set this narrow-specific dict so we have the lowest
1241 # Set this narrow-specific dict so we have the lowest
1187 # manifest revnum to look up for this cl revnum. (Part of
1242 # manifest revnum to look up for this cl revnum. (Part of
1188 # mapping changelog ellipsis parents to manifest ellipsis
1243 # mapping changelog ellipsis parents to manifest ellipsis
1189 # parents)
1244 # parents)
1190 clrevtomanifestrev.setdefault(
1245 clrevtomanifestrev.setdefault(
1191 cl.rev(x), mfl.rev(manifestnode)
1246 cl.rev(x), mfl.rev(manifestnode)
1192 )
1247 )
1193 # We can't trust the changed files list in the changeset if the
1248 # We can't trust the changed files list in the changeset if the
1194 # client requested a shallow clone.
1249 # client requested a shallow clone.
1195 if self._isshallow:
1250 if self._isshallow:
1196 changedfiles.update(mfl[c.manifest].read().keys())
1251 changedfiles.update(mfl[c.manifest].read().keys())
1197 else:
1252 else:
1198 changedfiles.update(c.files)
1253 changedfiles.update(c.files)
1199 else:
1254 else:
1200 # record the first changeset introducing this manifest version
1255 # record the first changeset introducing this manifest version
1201 manifests.setdefault(c.manifest, x)
1256 manifests.setdefault(c.manifest, x)
1202 # Record a complete list of potentially-changed files in
1257 # Record a complete list of potentially-changed files in
1203 # this manifest.
1258 # this manifest.
1204 changedfiles.update(c.files)
1259 changedfiles.update(c.files)
1205
1260
1206 return x
1261 return x
1207
1262
1208 gen = deltagroup(
1263 gen = deltagroup(
1209 self._repo,
1264 self._repo,
1210 cl,
1265 cl,
1211 nodes,
1266 nodes,
1212 True,
1267 True,
1213 lookupcl,
1268 lookupcl,
1214 self._forcedeltaparentprev,
1269 self._forcedeltaparentprev,
1215 ellipses=self._ellipses,
1270 ellipses=self._ellipses,
1216 topic=_(b'changesets'),
1271 topic=_(b'changesets'),
1217 clrevtolocalrev={},
1272 clrevtolocalrev={},
1218 fullclnodes=self._fullclnodes,
1273 fullclnodes=self._fullclnodes,
1219 precomputedellipsis=self._precomputedellipsis,
1274 precomputedellipsis=self._precomputedellipsis,
1220 sidedata_helpers=sidedata_helpers,
1275 sidedata_helpers=sidedata_helpers,
1221 )
1276 )
1222
1277
1223 return state, gen
1278 return state, gen
1224
1279
1225 def generatemanifests(
1280 def generatemanifests(
1226 self,
1281 self,
1227 commonrevs,
1282 commonrevs,
1228 clrevorder,
1283 clrevorder,
1229 fastpathlinkrev,
1284 fastpathlinkrev,
1230 manifests,
1285 manifests,
1231 fnodes,
1286 fnodes,
1232 source,
1287 source,
1233 clrevtolocalrev,
1288 clrevtolocalrev,
1234 sidedata_helpers=None,
1289 sidedata_helpers=None,
1235 ):
1290 ):
1236 """Returns an iterator of changegroup chunks containing manifests.
1291 """Returns an iterator of changegroup chunks containing manifests.
1237
1292
1238 `source` is unused here, but is used by extensions like remotefilelog to
1293 `source` is unused here, but is used by extensions like remotefilelog to
1239 change what is sent based in pulls vs pushes, etc.
1294 change what is sent based in pulls vs pushes, etc.
1240
1295
1241 See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
1296 See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
1242 """
1297 """
1243 repo = self._repo
1298 repo = self._repo
1244 mfl = repo.manifestlog
1299 mfl = repo.manifestlog
1245 tmfnodes = {b'': manifests}
1300 tmfnodes = {b'': manifests}
1246
1301
1247 # Callback for the manifest, used to collect linkrevs for filelog
1302 # Callback for the manifest, used to collect linkrevs for filelog
1248 # revisions.
1303 # revisions.
1249 # Returns the linkrev node (collected in lookupcl).
1304 # Returns the linkrev node (collected in lookupcl).
1250 def makelookupmflinknode(tree, nodes):
1305 def makelookupmflinknode(tree, nodes):
1251 if fastpathlinkrev:
1306 if fastpathlinkrev:
1252 assert not tree
1307 assert not tree
1253 return (
1308 return (
1254 manifests.__getitem__
1309 manifests.__getitem__
1255 ) # pytype: disable=unsupported-operands
1310 ) # pytype: disable=unsupported-operands
1256
1311
1257 def lookupmflinknode(x):
1312 def lookupmflinknode(x):
1258 """Callback for looking up the linknode for manifests.
1313 """Callback for looking up the linknode for manifests.
1259
1314
1260 Returns the linkrev node for the specified manifest.
1315 Returns the linkrev node for the specified manifest.
1261
1316
1262 SIDE EFFECT:
1317 SIDE EFFECT:
1263
1318
1264 1) fclnodes gets populated with the list of relevant
1319 1) fclnodes gets populated with the list of relevant
1265 file nodes if we're not using fastpathlinkrev
1320 file nodes if we're not using fastpathlinkrev
1266 2) When treemanifests are in use, collects treemanifest nodes
1321 2) When treemanifests are in use, collects treemanifest nodes
1267 to send
1322 to send
1268
1323
1269 Note that this means manifests must be completely sent to
1324 Note that this means manifests must be completely sent to
1270 the client before you can trust the list of files and
1325 the client before you can trust the list of files and
1271 treemanifests to send.
1326 treemanifests to send.
1272 """
1327 """
1273 clnode = nodes[x]
1328 clnode = nodes[x]
1274 mdata = mfl.get(tree, x).readfast(shallow=True)
1329 mdata = mfl.get(tree, x).readfast(shallow=True)
1275 for p, n, fl in mdata.iterentries():
1330 for p, n, fl in mdata.iterentries():
1276 if fl == b't': # subdirectory manifest
1331 if fl == b't': # subdirectory manifest
1277 subtree = tree + p + b'/'
1332 subtree = tree + p + b'/'
1278 tmfclnodes = tmfnodes.setdefault(subtree, {})
1333 tmfclnodes = tmfnodes.setdefault(subtree, {})
1279 tmfclnode = tmfclnodes.setdefault(n, clnode)
1334 tmfclnode = tmfclnodes.setdefault(n, clnode)
1280 if clrevorder[clnode] < clrevorder[tmfclnode]:
1335 if clrevorder[clnode] < clrevorder[tmfclnode]:
1281 tmfclnodes[n] = clnode
1336 tmfclnodes[n] = clnode
1282 else:
1337 else:
1283 f = tree + p
1338 f = tree + p
1284 fclnodes = fnodes.setdefault(f, {})
1339 fclnodes = fnodes.setdefault(f, {})
1285 fclnode = fclnodes.setdefault(n, clnode)
1340 fclnode = fclnodes.setdefault(n, clnode)
1286 if clrevorder[clnode] < clrevorder[fclnode]:
1341 if clrevorder[clnode] < clrevorder[fclnode]:
1287 fclnodes[n] = clnode
1342 fclnodes[n] = clnode
1288 return clnode
1343 return clnode
1289
1344
1290 return lookupmflinknode
1345 return lookupmflinknode
1291
1346
1292 while tmfnodes:
1347 while tmfnodes:
1293 tree, nodes = tmfnodes.popitem()
1348 tree, nodes = tmfnodes.popitem()
1294
1349
1295 should_visit = self._matcher.visitdir(tree[:-1])
1350 should_visit = self._matcher.visitdir(tree[:-1])
1296 if tree and not should_visit:
1351 if tree and not should_visit:
1297 continue
1352 continue
1298
1353
1299 store = mfl.getstorage(tree)
1354 store = mfl.getstorage(tree)
1300
1355
1301 if not should_visit:
1356 if not should_visit:
1302 # No nodes to send because this directory is out of
1357 # No nodes to send because this directory is out of
1303 # the client's view of the repository (probably
1358 # the client's view of the repository (probably
1304 # because of narrow clones). Do this even for the root
1359 # because of narrow clones). Do this even for the root
1305 # directory (tree=='')
1360 # directory (tree=='')
1306 prunednodes = []
1361 prunednodes = []
1307 else:
1362 else:
1308 # Avoid sending any manifest nodes we can prove the
1363 # Avoid sending any manifest nodes we can prove the
1309 # client already has by checking linkrevs. See the
1364 # client already has by checking linkrevs. See the
1310 # related comment in generatefiles().
1365 # related comment in generatefiles().
1311 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1366 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1312
1367
1313 if tree and not prunednodes:
1368 if tree and not prunednodes:
1314 continue
1369 continue
1315
1370
1316 lookupfn = makelookupmflinknode(tree, nodes)
1371 lookupfn = makelookupmflinknode(tree, nodes)
1317
1372
1318 deltas = deltagroup(
1373 deltas = deltagroup(
1319 self._repo,
1374 self._repo,
1320 store,
1375 store,
1321 prunednodes,
1376 prunednodes,
1322 False,
1377 False,
1323 lookupfn,
1378 lookupfn,
1324 self._forcedeltaparentprev,
1379 self._forcedeltaparentprev,
1325 ellipses=self._ellipses,
1380 ellipses=self._ellipses,
1326 topic=_(b'manifests'),
1381 topic=_(b'manifests'),
1327 clrevtolocalrev=clrevtolocalrev,
1382 clrevtolocalrev=clrevtolocalrev,
1328 fullclnodes=self._fullclnodes,
1383 fullclnodes=self._fullclnodes,
1329 precomputedellipsis=self._precomputedellipsis,
1384 precomputedellipsis=self._precomputedellipsis,
1330 sidedata_helpers=sidedata_helpers,
1385 sidedata_helpers=sidedata_helpers,
1331 )
1386 )
1332
1387
1333 if not self._oldmatcher.visitdir(store.tree[:-1]):
1388 if not self._oldmatcher.visitdir(store.tree[:-1]):
1334 yield tree, deltas
1389 yield tree, deltas
1335 else:
1390 else:
1336 # 'deltas' is a generator and we need to consume it even if
1391 # 'deltas' is a generator and we need to consume it even if
1337 # we are not going to send it because a side-effect is that
1392 # we are not going to send it because a side-effect is that
1338 # it updates tmdnodes (via lookupfn)
1393 # it updates tmdnodes (via lookupfn)
1339 for d in deltas:
1394 for d in deltas:
1340 pass
1395 pass
1341 if not tree:
1396 if not tree:
1342 yield tree, []
1397 yield tree, []
1343
1398
1344 def _prunemanifests(self, store, nodes, commonrevs):
1399 def _prunemanifests(self, store, nodes, commonrevs):
1345 if not self._ellipses:
1400 if not self._ellipses:
1346 # In non-ellipses case and large repositories, it is better to
1401 # In non-ellipses case and large repositories, it is better to
1347 # prevent calling of store.rev and store.linkrev on a lot of
1402 # prevent calling of store.rev and store.linkrev on a lot of
1348 # nodes as compared to sending some extra data
1403 # nodes as compared to sending some extra data
1349 return nodes.copy()
1404 return nodes.copy()
1350 # This is split out as a separate method to allow filtering
1405 # This is split out as a separate method to allow filtering
1351 # commonrevs in extension code.
1406 # commonrevs in extension code.
1352 #
1407 #
1353 # TODO(augie): this shouldn't be required, instead we should
1408 # TODO(augie): this shouldn't be required, instead we should
1354 # make filtering of revisions to send delegated to the store
1409 # make filtering of revisions to send delegated to the store
1355 # layer.
1410 # layer.
1356 frev, flr = store.rev, store.linkrev
1411 frev, flr = store.rev, store.linkrev
1357 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1412 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1358
1413
1359 # The 'source' parameter is useful for extensions
1414 # The 'source' parameter is useful for extensions
1360 def generatefiles(
1415 def generatefiles(
1361 self,
1416 self,
1362 changedfiles,
1417 changedfiles,
1363 commonrevs,
1418 commonrevs,
1364 source,
1419 source,
1365 mfdicts,
1420 mfdicts,
1366 fastpathlinkrev,
1421 fastpathlinkrev,
1367 fnodes,
1422 fnodes,
1368 clrevs,
1423 clrevs,
1369 sidedata_helpers=None,
1424 sidedata_helpers=None,
1370 ):
1425 ):
1371 changedfiles = [
1426 changedfiles = [
1372 f
1427 f
1373 for f in changedfiles
1428 for f in changedfiles
1374 if self._matcher(f) and not self._oldmatcher(f)
1429 if self._matcher(f) and not self._oldmatcher(f)
1375 ]
1430 ]
1376
1431
1377 if not fastpathlinkrev:
1432 if not fastpathlinkrev:
1378
1433
1379 def normallinknodes(unused, fname):
1434 def normallinknodes(unused, fname):
1380 return fnodes.get(fname, {})
1435 return fnodes.get(fname, {})
1381
1436
1382 else:
1437 else:
1383 cln = self._repo.changelog.node
1438 cln = self._repo.changelog.node
1384
1439
1385 def normallinknodes(store, fname):
1440 def normallinknodes(store, fname):
1386 flinkrev = store.linkrev
1441 flinkrev = store.linkrev
1387 fnode = store.node
1442 fnode = store.node
1388 revs = ((r, flinkrev(r)) for r in store)
1443 revs = ((r, flinkrev(r)) for r in store)
1389 return {fnode(r): cln(lr) for r, lr in revs if lr in clrevs}
1444 return {fnode(r): cln(lr) for r, lr in revs if lr in clrevs}
1390
1445
1391 clrevtolocalrev = {}
1446 clrevtolocalrev = {}
1392
1447
1393 if self._isshallow:
1448 if self._isshallow:
1394 # In a shallow clone, the linknodes callback needs to also include
1449 # In a shallow clone, the linknodes callback needs to also include
1395 # those file nodes that are in the manifests we sent but weren't
1450 # those file nodes that are in the manifests we sent but weren't
1396 # introduced by those manifests.
1451 # introduced by those manifests.
1397 commonctxs = [self._repo[c] for c in commonrevs]
1452 commonctxs = [self._repo[c] for c in commonrevs]
1398 clrev = self._repo.changelog.rev
1453 clrev = self._repo.changelog.rev
1399
1454
1400 def linknodes(flog, fname):
1455 def linknodes(flog, fname):
1401 for c in commonctxs:
1456 for c in commonctxs:
1402 try:
1457 try:
1403 fnode = c.filenode(fname)
1458 fnode = c.filenode(fname)
1404 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1459 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1405 except error.ManifestLookupError:
1460 except error.ManifestLookupError:
1406 pass
1461 pass
1407 links = normallinknodes(flog, fname)
1462 links = normallinknodes(flog, fname)
1408 if len(links) != len(mfdicts):
1463 if len(links) != len(mfdicts):
1409 for mf, lr in mfdicts:
1464 for mf, lr in mfdicts:
1410 fnode = mf.get(fname, None)
1465 fnode = mf.get(fname, None)
1411 if fnode in links:
1466 if fnode in links:
1412 links[fnode] = min(links[fnode], lr, key=clrev)
1467 links[fnode] = min(links[fnode], lr, key=clrev)
1413 elif fnode:
1468 elif fnode:
1414 links[fnode] = lr
1469 links[fnode] = lr
1415 return links
1470 return links
1416
1471
1417 else:
1472 else:
1418 linknodes = normallinknodes
1473 linknodes = normallinknodes
1419
1474
1420 repo = self._repo
1475 repo = self._repo
1421 progress = repo.ui.makeprogress(
1476 progress = repo.ui.makeprogress(
1422 _(b'files'), unit=_(b'files'), total=len(changedfiles)
1477 _(b'files'), unit=_(b'files'), total=len(changedfiles)
1423 )
1478 )
1424 for i, fname in enumerate(sorted(changedfiles)):
1479 for i, fname in enumerate(sorted(changedfiles)):
1425 filerevlog = repo.file(fname)
1480 filerevlog = repo.file(fname)
1426 if not filerevlog:
1481 if not filerevlog:
1427 raise error.Abort(
1482 raise error.Abort(
1428 _(b"empty or missing file data for %s") % fname
1483 _(b"empty or missing file data for %s") % fname
1429 )
1484 )
1430
1485
1431 clrevtolocalrev.clear()
1486 clrevtolocalrev.clear()
1432
1487
1433 linkrevnodes = linknodes(filerevlog, fname)
1488 linkrevnodes = linknodes(filerevlog, fname)
1434 # Lookup for filenodes, we collected the linkrev nodes above in the
1489 # Lookup for filenodes, we collected the linkrev nodes above in the
1435 # fastpath case and with lookupmf in the slowpath case.
1490 # fastpath case and with lookupmf in the slowpath case.
1436 def lookupfilelog(x):
1491 def lookupfilelog(x):
1437 return linkrevnodes[x]
1492 return linkrevnodes[x]
1438
1493
1439 frev, flr = filerevlog.rev, filerevlog.linkrev
1494 frev, flr = filerevlog.rev, filerevlog.linkrev
1440 # Skip sending any filenode we know the client already
1495 # Skip sending any filenode we know the client already
1441 # has. This avoids over-sending files relatively
1496 # has. This avoids over-sending files relatively
1442 # inexpensively, so it's not a problem if we under-filter
1497 # inexpensively, so it's not a problem if we under-filter
1443 # here.
1498 # here.
1444 filenodes = [
1499 filenodes = [
1445 n for n in linkrevnodes if flr(frev(n)) not in commonrevs
1500 n for n in linkrevnodes if flr(frev(n)) not in commonrevs
1446 ]
1501 ]
1447
1502
1448 if not filenodes:
1503 if not filenodes:
1449 continue
1504 continue
1450
1505
1451 progress.update(i + 1, item=fname)
1506 progress.update(i + 1, item=fname)
1452
1507
1453 deltas = deltagroup(
1508 deltas = deltagroup(
1454 self._repo,
1509 self._repo,
1455 filerevlog,
1510 filerevlog,
1456 filenodes,
1511 filenodes,
1457 False,
1512 False,
1458 lookupfilelog,
1513 lookupfilelog,
1459 self._forcedeltaparentprev,
1514 self._forcedeltaparentprev,
1460 ellipses=self._ellipses,
1515 ellipses=self._ellipses,
1461 clrevtolocalrev=clrevtolocalrev,
1516 clrevtolocalrev=clrevtolocalrev,
1462 fullclnodes=self._fullclnodes,
1517 fullclnodes=self._fullclnodes,
1463 precomputedellipsis=self._precomputedellipsis,
1518 precomputedellipsis=self._precomputedellipsis,
1464 sidedata_helpers=sidedata_helpers,
1519 sidedata_helpers=sidedata_helpers,
1465 )
1520 )
1466
1521
1467 yield fname, deltas
1522 yield fname, deltas
1468
1523
1469 progress.complete()
1524 progress.complete()
1470
1525
1471
1526
1472 def _makecg1packer(
1527 def _makecg1packer(
1473 repo,
1528 repo,
1474 oldmatcher,
1529 oldmatcher,
1475 matcher,
1530 matcher,
1476 bundlecaps,
1531 bundlecaps,
1477 ellipses=False,
1532 ellipses=False,
1478 shallow=False,
1533 shallow=False,
1479 ellipsisroots=None,
1534 ellipsisroots=None,
1480 fullnodes=None,
1535 fullnodes=None,
1481 remote_sidedata=None,
1536 remote_sidedata=None,
1482 ):
1537 ):
1483 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1538 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1484 d.node, d.p1node, d.p2node, d.linknode
1539 d.node, d.p1node, d.p2node, d.linknode
1485 )
1540 )
1486
1541
1487 return cgpacker(
1542 return cgpacker(
1488 repo,
1543 repo,
1489 oldmatcher,
1544 oldmatcher,
1490 matcher,
1545 matcher,
1491 b'01',
1546 b'01',
1492 builddeltaheader=builddeltaheader,
1547 builddeltaheader=builddeltaheader,
1493 manifestsend=b'',
1548 manifestsend=b'',
1494 forcedeltaparentprev=True,
1549 forcedeltaparentprev=True,
1495 bundlecaps=bundlecaps,
1550 bundlecaps=bundlecaps,
1496 ellipses=ellipses,
1551 ellipses=ellipses,
1497 shallow=shallow,
1552 shallow=shallow,
1498 ellipsisroots=ellipsisroots,
1553 ellipsisroots=ellipsisroots,
1499 fullnodes=fullnodes,
1554 fullnodes=fullnodes,
1500 )
1555 )
1501
1556
1502
1557
1503 def _makecg2packer(
1558 def _makecg2packer(
1504 repo,
1559 repo,
1505 oldmatcher,
1560 oldmatcher,
1506 matcher,
1561 matcher,
1507 bundlecaps,
1562 bundlecaps,
1508 ellipses=False,
1563 ellipses=False,
1509 shallow=False,
1564 shallow=False,
1510 ellipsisroots=None,
1565 ellipsisroots=None,
1511 fullnodes=None,
1566 fullnodes=None,
1512 remote_sidedata=None,
1567 remote_sidedata=None,
1513 ):
1568 ):
1514 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1569 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1515 d.node, d.p1node, d.p2node, d.basenode, d.linknode
1570 d.node, d.p1node, d.p2node, d.basenode, d.linknode
1516 )
1571 )
1517
1572
1518 return cgpacker(
1573 return cgpacker(
1519 repo,
1574 repo,
1520 oldmatcher,
1575 oldmatcher,
1521 matcher,
1576 matcher,
1522 b'02',
1577 b'02',
1523 builddeltaheader=builddeltaheader,
1578 builddeltaheader=builddeltaheader,
1524 manifestsend=b'',
1579 manifestsend=b'',
1525 bundlecaps=bundlecaps,
1580 bundlecaps=bundlecaps,
1526 ellipses=ellipses,
1581 ellipses=ellipses,
1527 shallow=shallow,
1582 shallow=shallow,
1528 ellipsisroots=ellipsisroots,
1583 ellipsisroots=ellipsisroots,
1529 fullnodes=fullnodes,
1584 fullnodes=fullnodes,
1530 )
1585 )
1531
1586
1532
1587
1533 def _makecg3packer(
1588 def _makecg3packer(
1534 repo,
1589 repo,
1535 oldmatcher,
1590 oldmatcher,
1536 matcher,
1591 matcher,
1537 bundlecaps,
1592 bundlecaps,
1538 ellipses=False,
1593 ellipses=False,
1539 shallow=False,
1594 shallow=False,
1540 ellipsisroots=None,
1595 ellipsisroots=None,
1541 fullnodes=None,
1596 fullnodes=None,
1542 remote_sidedata=None,
1597 remote_sidedata=None,
1543 ):
1598 ):
1544 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1599 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1545 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1600 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1546 )
1601 )
1547
1602
1548 return cgpacker(
1603 return cgpacker(
1549 repo,
1604 repo,
1550 oldmatcher,
1605 oldmatcher,
1551 matcher,
1606 matcher,
1552 b'03',
1607 b'03',
1553 builddeltaheader=builddeltaheader,
1608 builddeltaheader=builddeltaheader,
1554 manifestsend=closechunk(),
1609 manifestsend=closechunk(),
1555 bundlecaps=bundlecaps,
1610 bundlecaps=bundlecaps,
1556 ellipses=ellipses,
1611 ellipses=ellipses,
1557 shallow=shallow,
1612 shallow=shallow,
1558 ellipsisroots=ellipsisroots,
1613 ellipsisroots=ellipsisroots,
1559 fullnodes=fullnodes,
1614 fullnodes=fullnodes,
1560 )
1615 )
1561
1616
1562
1617
1563 def _makecg4packer(
1618 def _makecg4packer(
1564 repo,
1619 repo,
1565 oldmatcher,
1620 oldmatcher,
1566 matcher,
1621 matcher,
1567 bundlecaps,
1622 bundlecaps,
1568 ellipses=False,
1623 ellipses=False,
1569 shallow=False,
1624 shallow=False,
1570 ellipsisroots=None,
1625 ellipsisroots=None,
1571 fullnodes=None,
1626 fullnodes=None,
1572 remote_sidedata=None,
1627 remote_sidedata=None,
1573 ):
1628 ):
1574 # Same header func as cg3. Sidedata is in a separate chunk from the delta to
1629 # Same header func as cg3. Sidedata is in a separate chunk from the delta to
1575 # differenciate "raw delta" and sidedata.
1630 # differenciate "raw delta" and sidedata.
1576 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1631 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1577 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1632 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1578 )
1633 )
1579
1634
1580 return cgpacker(
1635 return cgpacker(
1581 repo,
1636 repo,
1582 oldmatcher,
1637 oldmatcher,
1583 matcher,
1638 matcher,
1584 b'04',
1639 b'04',
1585 builddeltaheader=builddeltaheader,
1640 builddeltaheader=builddeltaheader,
1586 manifestsend=closechunk(),
1641 manifestsend=closechunk(),
1587 bundlecaps=bundlecaps,
1642 bundlecaps=bundlecaps,
1588 ellipses=ellipses,
1643 ellipses=ellipses,
1589 shallow=shallow,
1644 shallow=shallow,
1590 ellipsisroots=ellipsisroots,
1645 ellipsisroots=ellipsisroots,
1591 fullnodes=fullnodes,
1646 fullnodes=fullnodes,
1592 remote_sidedata=remote_sidedata,
1647 remote_sidedata=remote_sidedata,
1593 )
1648 )
1594
1649
1595
1650
1596 _packermap = {
1651 _packermap = {
1597 b'01': (_makecg1packer, cg1unpacker),
1652 b'01': (_makecg1packer, cg1unpacker),
1598 # cg2 adds support for exchanging generaldelta
1653 # cg2 adds support for exchanging generaldelta
1599 b'02': (_makecg2packer, cg2unpacker),
1654 b'02': (_makecg2packer, cg2unpacker),
1600 # cg3 adds support for exchanging revlog flags and treemanifests
1655 # cg3 adds support for exchanging revlog flags and treemanifests
1601 b'03': (_makecg3packer, cg3unpacker),
1656 b'03': (_makecg3packer, cg3unpacker),
1602 # ch4 adds support for exchanging sidedata
1657 # ch4 adds support for exchanging sidedata
1603 b'04': (_makecg4packer, cg4unpacker),
1658 b'04': (_makecg4packer, cg4unpacker),
1604 }
1659 }
1605
1660
1606
1661
1607 def allsupportedversions(repo):
1662 def allsupportedversions(repo):
1608 versions = set(_packermap.keys())
1663 versions = set(_packermap.keys())
1609 needv03 = False
1664 needv03 = False
1610 if (
1665 if (
1611 repo.ui.configbool(b'experimental', b'changegroup3')
1666 repo.ui.configbool(b'experimental', b'changegroup3')
1612 or repo.ui.configbool(b'experimental', b'treemanifest')
1667 or repo.ui.configbool(b'experimental', b'treemanifest')
1613 or scmutil.istreemanifest(repo)
1668 or scmutil.istreemanifest(repo)
1614 ):
1669 ):
1615 # we keep version 03 because we need to to exchange treemanifest data
1670 # we keep version 03 because we need to to exchange treemanifest data
1616 #
1671 #
1617 # we also keep vresion 01 and 02, because it is possible for repo to
1672 # we also keep vresion 01 and 02, because it is possible for repo to
1618 # contains both normal and tree manifest at the same time. so using
1673 # contains both normal and tree manifest at the same time. so using
1619 # older version to pull data is viable
1674 # older version to pull data is viable
1620 #
1675 #
1621 # (or even to push subset of history)
1676 # (or even to push subset of history)
1622 needv03 = True
1677 needv03 = True
1623 has_revlogv2 = requirements.REVLOGV2_REQUIREMENT in repo.requirements
1678 has_revlogv2 = requirements.REVLOGV2_REQUIREMENT in repo.requirements
1624 if not has_revlogv2:
1679 if not has_revlogv2:
1625 versions.discard(b'04')
1680 versions.discard(b'04')
1626 if not needv03:
1681 if not needv03:
1627 versions.discard(b'03')
1682 versions.discard(b'03')
1628 return versions
1683 return versions
1629
1684
1630
1685
1631 # Changegroup versions that can be applied to the repo
1686 # Changegroup versions that can be applied to the repo
1632 def supportedincomingversions(repo):
1687 def supportedincomingversions(repo):
1633 return allsupportedversions(repo)
1688 return allsupportedversions(repo)
1634
1689
1635
1690
1636 # Changegroup versions that can be created from the repo
1691 # Changegroup versions that can be created from the repo
1637 def supportedoutgoingversions(repo):
1692 def supportedoutgoingversions(repo):
1638 versions = allsupportedversions(repo)
1693 versions = allsupportedversions(repo)
1639 if scmutil.istreemanifest(repo):
1694 if scmutil.istreemanifest(repo):
1640 # Versions 01 and 02 support only flat manifests and it's just too
1695 # Versions 01 and 02 support only flat manifests and it's just too
1641 # expensive to convert between the flat manifest and tree manifest on
1696 # expensive to convert between the flat manifest and tree manifest on
1642 # the fly. Since tree manifests are hashed differently, all of history
1697 # the fly. Since tree manifests are hashed differently, all of history
1643 # would have to be converted. Instead, we simply don't even pretend to
1698 # would have to be converted. Instead, we simply don't even pretend to
1644 # support versions 01 and 02.
1699 # support versions 01 and 02.
1645 versions.discard(b'01')
1700 versions.discard(b'01')
1646 versions.discard(b'02')
1701 versions.discard(b'02')
1647 if requirements.NARROW_REQUIREMENT in repo.requirements:
1702 if requirements.NARROW_REQUIREMENT in repo.requirements:
1648 # Versions 01 and 02 don't support revlog flags, and we need to
1703 # Versions 01 and 02 don't support revlog flags, and we need to
1649 # support that for stripping and unbundling to work.
1704 # support that for stripping and unbundling to work.
1650 versions.discard(b'01')
1705 versions.discard(b'01')
1651 versions.discard(b'02')
1706 versions.discard(b'02')
1652 if LFS_REQUIREMENT in repo.requirements:
1707 if LFS_REQUIREMENT in repo.requirements:
1653 # Versions 01 and 02 don't support revlog flags, and we need to
1708 # Versions 01 and 02 don't support revlog flags, and we need to
1654 # mark LFS entries with REVIDX_EXTSTORED.
1709 # mark LFS entries with REVIDX_EXTSTORED.
1655 versions.discard(b'01')
1710 versions.discard(b'01')
1656 versions.discard(b'02')
1711 versions.discard(b'02')
1657
1712
1658 return versions
1713 return versions
1659
1714
1660
1715
1661 def localversion(repo):
1716 def localversion(repo):
1662 # Finds the best version to use for bundles that are meant to be used
1717 # Finds the best version to use for bundles that are meant to be used
1663 # locally, such as those from strip and shelve, and temporary bundles.
1718 # locally, such as those from strip and shelve, and temporary bundles.
1664 return max(supportedoutgoingversions(repo))
1719 return max(supportedoutgoingversions(repo))
1665
1720
1666
1721
1667 def safeversion(repo):
1722 def safeversion(repo):
1668 # Finds the smallest version that it's safe to assume clients of the repo
1723 # Finds the smallest version that it's safe to assume clients of the repo
1669 # will support. For example, all hg versions that support generaldelta also
1724 # will support. For example, all hg versions that support generaldelta also
1670 # support changegroup 02.
1725 # support changegroup 02.
1671 versions = supportedoutgoingversions(repo)
1726 versions = supportedoutgoingversions(repo)
1672 if requirements.GENERALDELTA_REQUIREMENT in repo.requirements:
1727 if requirements.GENERALDELTA_REQUIREMENT in repo.requirements:
1673 versions.discard(b'01')
1728 versions.discard(b'01')
1674 assert versions
1729 assert versions
1675 return min(versions)
1730 return min(versions)
1676
1731
1677
1732
1678 def getbundler(
1733 def getbundler(
1679 version,
1734 version,
1680 repo,
1735 repo,
1681 bundlecaps=None,
1736 bundlecaps=None,
1682 oldmatcher=None,
1737 oldmatcher=None,
1683 matcher=None,
1738 matcher=None,
1684 ellipses=False,
1739 ellipses=False,
1685 shallow=False,
1740 shallow=False,
1686 ellipsisroots=None,
1741 ellipsisroots=None,
1687 fullnodes=None,
1742 fullnodes=None,
1688 remote_sidedata=None,
1743 remote_sidedata=None,
1689 ):
1744 ):
1690 assert version in supportedoutgoingversions(repo)
1745 assert version in supportedoutgoingversions(repo)
1691
1746
1692 if matcher is None:
1747 if matcher is None:
1693 matcher = matchmod.always()
1748 matcher = matchmod.always()
1694 if oldmatcher is None:
1749 if oldmatcher is None:
1695 oldmatcher = matchmod.never()
1750 oldmatcher = matchmod.never()
1696
1751
1697 if version == b'01' and not matcher.always():
1752 if version == b'01' and not matcher.always():
1698 raise error.ProgrammingError(
1753 raise error.ProgrammingError(
1699 b'version 01 changegroups do not support sparse file matchers'
1754 b'version 01 changegroups do not support sparse file matchers'
1700 )
1755 )
1701
1756
1702 if ellipses and version in (b'01', b'02'):
1757 if ellipses and version in (b'01', b'02'):
1703 raise error.Abort(
1758 raise error.Abort(
1704 _(
1759 _(
1705 b'ellipsis nodes require at least cg3 on client and server, '
1760 b'ellipsis nodes require at least cg3 on client and server, '
1706 b'but negotiated version %s'
1761 b'but negotiated version %s'
1707 )
1762 )
1708 % version
1763 % version
1709 )
1764 )
1710
1765
1711 # Requested files could include files not in the local store. So
1766 # Requested files could include files not in the local store. So
1712 # filter those out.
1767 # filter those out.
1713 matcher = repo.narrowmatch(matcher)
1768 matcher = repo.narrowmatch(matcher)
1714
1769
1715 fn = _packermap[version][0]
1770 fn = _packermap[version][0]
1716 return fn(
1771 return fn(
1717 repo,
1772 repo,
1718 oldmatcher,
1773 oldmatcher,
1719 matcher,
1774 matcher,
1720 bundlecaps,
1775 bundlecaps,
1721 ellipses=ellipses,
1776 ellipses=ellipses,
1722 shallow=shallow,
1777 shallow=shallow,
1723 ellipsisroots=ellipsisroots,
1778 ellipsisroots=ellipsisroots,
1724 fullnodes=fullnodes,
1779 fullnodes=fullnodes,
1725 remote_sidedata=remote_sidedata,
1780 remote_sidedata=remote_sidedata,
1726 )
1781 )
1727
1782
1728
1783
1729 def getunbundler(version, fh, alg, extras=None):
1784 def getunbundler(version, fh, alg, extras=None):
1730 return _packermap[version][1](fh, alg, extras=extras)
1785 return _packermap[version][1](fh, alg, extras=extras)
1731
1786
1732
1787
1733 def _changegroupinfo(repo, nodes, source):
1788 def _changegroupinfo(repo, nodes, source):
1734 if repo.ui.verbose or source == b'bundle':
1789 if repo.ui.verbose or source == b'bundle':
1735 repo.ui.status(_(b"%d changesets found\n") % len(nodes))
1790 repo.ui.status(_(b"%d changesets found\n") % len(nodes))
1736 if repo.ui.debugflag:
1791 if repo.ui.debugflag:
1737 repo.ui.debug(b"list of changesets:\n")
1792 repo.ui.debug(b"list of changesets:\n")
1738 for node in nodes:
1793 for node in nodes:
1739 repo.ui.debug(b"%s\n" % hex(node))
1794 repo.ui.debug(b"%s\n" % hex(node))
1740
1795
1741
1796
1742 def makechangegroup(
1797 def makechangegroup(
1743 repo, outgoing, version, source, fastpath=False, bundlecaps=None
1798 repo, outgoing, version, source, fastpath=False, bundlecaps=None
1744 ):
1799 ):
1745 cgstream = makestream(
1800 cgstream = makestream(
1746 repo,
1801 repo,
1747 outgoing,
1802 outgoing,
1748 version,
1803 version,
1749 source,
1804 source,
1750 fastpath=fastpath,
1805 fastpath=fastpath,
1751 bundlecaps=bundlecaps,
1806 bundlecaps=bundlecaps,
1752 )
1807 )
1753 return getunbundler(
1808 return getunbundler(
1754 version,
1809 version,
1755 util.chunkbuffer(cgstream),
1810 util.chunkbuffer(cgstream),
1756 None,
1811 None,
1757 {b'clcount': len(outgoing.missing)},
1812 {b'clcount': len(outgoing.missing)},
1758 )
1813 )
1759
1814
1760
1815
1761 def makestream(
1816 def makestream(
1762 repo,
1817 repo,
1763 outgoing,
1818 outgoing,
1764 version,
1819 version,
1765 source,
1820 source,
1766 fastpath=False,
1821 fastpath=False,
1767 bundlecaps=None,
1822 bundlecaps=None,
1768 matcher=None,
1823 matcher=None,
1769 remote_sidedata=None,
1824 remote_sidedata=None,
1770 ):
1825 ):
1771 bundler = getbundler(
1826 bundler = getbundler(
1772 version,
1827 version,
1773 repo,
1828 repo,
1774 bundlecaps=bundlecaps,
1829 bundlecaps=bundlecaps,
1775 matcher=matcher,
1830 matcher=matcher,
1776 remote_sidedata=remote_sidedata,
1831 remote_sidedata=remote_sidedata,
1777 )
1832 )
1778
1833
1779 repo = repo.unfiltered()
1834 repo = repo.unfiltered()
1780 commonrevs = outgoing.common
1835 commonrevs = outgoing.common
1781 csets = outgoing.missing
1836 csets = outgoing.missing
1782 heads = outgoing.ancestorsof
1837 heads = outgoing.ancestorsof
1783 # We go through the fast path if we get told to, or if all (unfiltered
1838 # We go through the fast path if we get told to, or if all (unfiltered
1784 # heads have been requested (since we then know there all linkrevs will
1839 # heads have been requested (since we then know there all linkrevs will
1785 # be pulled by the client).
1840 # be pulled by the client).
1786 heads.sort()
1841 heads.sort()
1787 fastpathlinkrev = fastpath or (
1842 fastpathlinkrev = fastpath or (
1788 repo.filtername is None and heads == sorted(repo.heads())
1843 repo.filtername is None and heads == sorted(repo.heads())
1789 )
1844 )
1790
1845
1791 repo.hook(b'preoutgoing', throw=True, source=source)
1846 repo.hook(b'preoutgoing', throw=True, source=source)
1792 _changegroupinfo(repo, csets, source)
1847 _changegroupinfo(repo, csets, source)
1793 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1848 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1794
1849
1795
1850
1796 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1851 def _addchangegroupfiles(
1852 repo,
1853 source,
1854 revmap,
1855 trp,
1856 expectedfiles,
1857 needfiles,
1858 addrevisioncb=None,
1859 ):
1797 revisions = 0
1860 revisions = 0
1798 files = 0
1861 files = 0
1799 progress = repo.ui.makeprogress(
1862 progress = repo.ui.makeprogress(
1800 _(b'files'), unit=_(b'files'), total=expectedfiles
1863 _(b'files'), unit=_(b'files'), total=expectedfiles
1801 )
1864 )
1802 for chunkdata in iter(source.filelogheader, {}):
1865 for chunkdata in iter(source.filelogheader, {}):
1803 files += 1
1866 files += 1
1804 f = chunkdata[b"filename"]
1867 f = chunkdata[b"filename"]
1805 repo.ui.debug(b"adding %s revisions\n" % f)
1868 repo.ui.debug(b"adding %s revisions\n" % f)
1806 progress.increment()
1869 progress.increment()
1807 fl = repo.file(f)
1870 fl = repo.file(f)
1808 o = len(fl)
1871 o = len(fl)
1809 try:
1872 try:
1810 deltas = source.deltaiter()
1873 deltas = source.deltaiter()
1811 if not fl.addgroup(deltas, revmap, trp):
1874 added = fl.addgroup(
1875 deltas,
1876 revmap,
1877 trp,
1878 addrevisioncb=addrevisioncb,
1879 )
1880 if not added:
1812 raise error.Abort(_(b"received file revlog group is empty"))
1881 raise error.Abort(_(b"received file revlog group is empty"))
1813 except error.CensoredBaseError as e:
1882 except error.CensoredBaseError as e:
1814 raise error.Abort(_(b"received delta base is censored: %s") % e)
1883 raise error.Abort(_(b"received delta base is censored: %s") % e)
1815 revisions += len(fl) - o
1884 revisions += len(fl) - o
1816 if f in needfiles:
1885 if f in needfiles:
1817 needs = needfiles[f]
1886 needs = needfiles[f]
1818 for new in pycompat.xrange(o, len(fl)):
1887 for new in pycompat.xrange(o, len(fl)):
1819 n = fl.node(new)
1888 n = fl.node(new)
1820 if n in needs:
1889 if n in needs:
1821 needs.remove(n)
1890 needs.remove(n)
1822 else:
1891 else:
1823 raise error.Abort(_(b"received spurious file revlog entry"))
1892 raise error.Abort(_(b"received spurious file revlog entry"))
1824 if not needs:
1893 if not needs:
1825 del needfiles[f]
1894 del needfiles[f]
1826 progress.complete()
1895 progress.complete()
1827
1896
1828 for f, needs in pycompat.iteritems(needfiles):
1897 for f, needs in pycompat.iteritems(needfiles):
1829 fl = repo.file(f)
1898 fl = repo.file(f)
1830 for n in needs:
1899 for n in needs:
1831 try:
1900 try:
1832 fl.rev(n)
1901 fl.rev(n)
1833 except error.LookupError:
1902 except error.LookupError:
1834 raise error.Abort(
1903 raise error.Abort(
1835 _(b'missing file data for %s:%s - run hg verify')
1904 _(b'missing file data for %s:%s - run hg verify')
1836 % (f, hex(n))
1905 % (f, hex(n))
1837 )
1906 )
1838
1907
1839 return revisions, files
1908 return revisions, files
1840
1909
1841
1910
1842 def get_sidedata_helpers(repo, remote_sd_categories, pull=False):
1911 def get_sidedata_helpers(repo, remote_sd_categories, pull=False):
1843 # Computers for computing sidedata on-the-fly
1912 # Computers for computing sidedata on-the-fly
1844 sd_computers = collections.defaultdict(list)
1913 sd_computers = collections.defaultdict(list)
1845 # Computers for categories to remove from sidedata
1914 # Computers for categories to remove from sidedata
1846 sd_removers = collections.defaultdict(list)
1915 sd_removers = collections.defaultdict(list)
1847
1916
1848 to_generate = remote_sd_categories - repo._wanted_sidedata
1917 to_generate = remote_sd_categories - repo._wanted_sidedata
1849 to_remove = repo._wanted_sidedata - remote_sd_categories
1918 to_remove = repo._wanted_sidedata - remote_sd_categories
1850 if pull:
1919 if pull:
1851 to_generate, to_remove = to_remove, to_generate
1920 to_generate, to_remove = to_remove, to_generate
1852
1921
1853 for revlog_kind, computers in repo._sidedata_computers.items():
1922 for revlog_kind, computers in repo._sidedata_computers.items():
1854 for category, computer in computers.items():
1923 for category, computer in computers.items():
1855 if category in to_generate:
1924 if category in to_generate:
1856 sd_computers[revlog_kind].append(computer)
1925 sd_computers[revlog_kind].append(computer)
1857 if category in to_remove:
1926 if category in to_remove:
1858 sd_removers[revlog_kind].append(computer)
1927 sd_removers[revlog_kind].append(computer)
1859
1928
1860 sidedata_helpers = (repo, sd_computers, sd_removers)
1929 sidedata_helpers = (repo, sd_computers, sd_removers)
1861 return sidedata_helpers
1930 return sidedata_helpers
@@ -1,3207 +1,3258 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .pycompat import getattr
38 from .pycompat import getattr
39 from .revlogutils.constants import (
39 from .revlogutils.constants import (
40 FLAG_GENERALDELTA,
40 FLAG_GENERALDELTA,
41 FLAG_INLINE_DATA,
41 FLAG_INLINE_DATA,
42 REVLOGV0,
42 REVLOGV0,
43 REVLOGV1,
43 REVLOGV1,
44 REVLOGV1_FLAGS,
44 REVLOGV1_FLAGS,
45 REVLOGV2,
45 REVLOGV2,
46 REVLOGV2_FLAGS,
46 REVLOGV2_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
48 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_FORMAT,
49 REVLOG_DEFAULT_VERSION,
49 REVLOG_DEFAULT_VERSION,
50 )
50 )
51 from .revlogutils.flagutil import (
51 from .revlogutils.flagutil import (
52 REVIDX_DEFAULT_FLAGS,
52 REVIDX_DEFAULT_FLAGS,
53 REVIDX_ELLIPSIS,
53 REVIDX_ELLIPSIS,
54 REVIDX_EXTSTORED,
54 REVIDX_EXTSTORED,
55 REVIDX_FLAGS_ORDER,
55 REVIDX_FLAGS_ORDER,
56 REVIDX_HASCOPIESINFO,
56 REVIDX_HASCOPIESINFO,
57 REVIDX_ISCENSORED,
57 REVIDX_ISCENSORED,
58 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 REVIDX_RAWTEXT_CHANGING_FLAGS,
59 REVIDX_SIDEDATA,
59 REVIDX_SIDEDATA,
60 )
60 )
61 from .thirdparty import attr
61 from .thirdparty import attr
62 from . import (
62 from . import (
63 ancestor,
63 ancestor,
64 dagop,
64 dagop,
65 error,
65 error,
66 mdiff,
66 mdiff,
67 policy,
67 policy,
68 pycompat,
68 pycompat,
69 templatefilters,
69 templatefilters,
70 util,
70 util,
71 )
71 )
72 from .interfaces import (
72 from .interfaces import (
73 repository,
73 repository,
74 util as interfaceutil,
74 util as interfaceutil,
75 )
75 )
76 from .revlogutils import (
76 from .revlogutils import (
77 deltas as deltautil,
77 deltas as deltautil,
78 flagutil,
78 flagutil,
79 nodemap as nodemaputil,
79 nodemap as nodemaputil,
80 sidedata as sidedatautil,
80 sidedata as sidedatautil,
81 )
81 )
82 from .utils import (
82 from .utils import (
83 storageutil,
83 storageutil,
84 stringutil,
84 stringutil,
85 )
85 )
86 from .pure import parsers as pureparsers
86 from .pure import parsers as pureparsers
87
87
88 # blanked usage of all the name to prevent pyflakes constraints
88 # blanked usage of all the name to prevent pyflakes constraints
89 # We need these name available in the module for extensions.
89 # We need these name available in the module for extensions.
90 REVLOGV0
90 REVLOGV0
91 REVLOGV1
91 REVLOGV1
92 REVLOGV2
92 REVLOGV2
93 FLAG_INLINE_DATA
93 FLAG_INLINE_DATA
94 FLAG_GENERALDELTA
94 FLAG_GENERALDELTA
95 REVLOG_DEFAULT_FLAGS
95 REVLOG_DEFAULT_FLAGS
96 REVLOG_DEFAULT_FORMAT
96 REVLOG_DEFAULT_FORMAT
97 REVLOG_DEFAULT_VERSION
97 REVLOG_DEFAULT_VERSION
98 REVLOGV1_FLAGS
98 REVLOGV1_FLAGS
99 REVLOGV2_FLAGS
99 REVLOGV2_FLAGS
100 REVIDX_ISCENSORED
100 REVIDX_ISCENSORED
101 REVIDX_ELLIPSIS
101 REVIDX_ELLIPSIS
102 REVIDX_SIDEDATA
102 REVIDX_SIDEDATA
103 REVIDX_HASCOPIESINFO
103 REVIDX_HASCOPIESINFO
104 REVIDX_EXTSTORED
104 REVIDX_EXTSTORED
105 REVIDX_DEFAULT_FLAGS
105 REVIDX_DEFAULT_FLAGS
106 REVIDX_FLAGS_ORDER
106 REVIDX_FLAGS_ORDER
107 REVIDX_RAWTEXT_CHANGING_FLAGS
107 REVIDX_RAWTEXT_CHANGING_FLAGS
108
108
109 parsers = policy.importmod('parsers')
109 parsers = policy.importmod('parsers')
110 rustancestor = policy.importrust('ancestor')
110 rustancestor = policy.importrust('ancestor')
111 rustdagop = policy.importrust('dagop')
111 rustdagop = policy.importrust('dagop')
112 rustrevlog = policy.importrust('revlog')
112 rustrevlog = policy.importrust('revlog')
113
113
114 # Aliased for performance.
114 # Aliased for performance.
115 _zlibdecompress = zlib.decompress
115 _zlibdecompress = zlib.decompress
116
116
117 # max size of revlog with inline data
117 # max size of revlog with inline data
118 _maxinline = 131072
118 _maxinline = 131072
119 _chunksize = 1048576
119 _chunksize = 1048576
120
120
121 # Flag processors for REVIDX_ELLIPSIS.
121 # Flag processors for REVIDX_ELLIPSIS.
122 def ellipsisreadprocessor(rl, text):
122 def ellipsisreadprocessor(rl, text):
123 return text, False
123 return text, False
124
124
125
125
126 def ellipsiswriteprocessor(rl, text):
126 def ellipsiswriteprocessor(rl, text):
127 return text, False
127 return text, False
128
128
129
129
130 def ellipsisrawprocessor(rl, text):
130 def ellipsisrawprocessor(rl, text):
131 return False
131 return False
132
132
133
133
134 ellipsisprocessor = (
134 ellipsisprocessor = (
135 ellipsisreadprocessor,
135 ellipsisreadprocessor,
136 ellipsiswriteprocessor,
136 ellipsiswriteprocessor,
137 ellipsisrawprocessor,
137 ellipsisrawprocessor,
138 )
138 )
139
139
140
140
141 def getoffset(q):
141 def getoffset(q):
142 return int(q >> 16)
142 return int(q >> 16)
143
143
144
144
145 def gettype(q):
145 def gettype(q):
146 return int(q & 0xFFFF)
146 return int(q & 0xFFFF)
147
147
148
148
149 def offset_type(offset, type):
149 def offset_type(offset, type):
150 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
150 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
151 raise ValueError(b'unknown revlog index flags')
151 raise ValueError(b'unknown revlog index flags')
152 return int(int(offset) << 16 | type)
152 return int(int(offset) << 16 | type)
153
153
154
154
155 def _verify_revision(rl, skipflags, state, node):
155 def _verify_revision(rl, skipflags, state, node):
156 """Verify the integrity of the given revlog ``node`` while providing a hook
156 """Verify the integrity of the given revlog ``node`` while providing a hook
157 point for extensions to influence the operation."""
157 point for extensions to influence the operation."""
158 if skipflags:
158 if skipflags:
159 state[b'skipread'].add(node)
159 state[b'skipread'].add(node)
160 else:
160 else:
161 # Side-effect: read content and verify hash.
161 # Side-effect: read content and verify hash.
162 rl.revision(node)
162 rl.revision(node)
163
163
164
164
165 # True if a fast implementation for persistent-nodemap is available
165 # True if a fast implementation for persistent-nodemap is available
166 #
166 #
167 # We also consider we have a "fast" implementation in "pure" python because
167 # We also consider we have a "fast" implementation in "pure" python because
168 # people using pure don't really have performance consideration (and a
168 # people using pure don't really have performance consideration (and a
169 # wheelbarrow of other slowness source)
169 # wheelbarrow of other slowness source)
170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
171 parsers, 'BaseIndexObject'
171 parsers, 'BaseIndexObject'
172 )
172 )
173
173
174
174
175 @attr.s(slots=True, frozen=True)
175 @attr.s(slots=True, frozen=True)
176 class _revisioninfo(object):
176 class _revisioninfo(object):
177 """Information about a revision that allows building its fulltext
177 """Information about a revision that allows building its fulltext
178 node: expected hash of the revision
178 node: expected hash of the revision
179 p1, p2: parent revs of the revision
179 p1, p2: parent revs of the revision
180 btext: built text cache consisting of a one-element list
180 btext: built text cache consisting of a one-element list
181 cachedelta: (baserev, uncompressed_delta) or None
181 cachedelta: (baserev, uncompressed_delta) or None
182 flags: flags associated to the revision storage
182 flags: flags associated to the revision storage
183
183
184 One of btext[0] or cachedelta must be set.
184 One of btext[0] or cachedelta must be set.
185 """
185 """
186
186
187 node = attr.ib()
187 node = attr.ib()
188 p1 = attr.ib()
188 p1 = attr.ib()
189 p2 = attr.ib()
189 p2 = attr.ib()
190 btext = attr.ib()
190 btext = attr.ib()
191 textlen = attr.ib()
191 textlen = attr.ib()
192 cachedelta = attr.ib()
192 cachedelta = attr.ib()
193 flags = attr.ib()
193 flags = attr.ib()
194
194
195
195
196 @interfaceutil.implementer(repository.irevisiondelta)
196 @interfaceutil.implementer(repository.irevisiondelta)
197 @attr.s(slots=True)
197 @attr.s(slots=True)
198 class revlogrevisiondelta(object):
198 class revlogrevisiondelta(object):
199 node = attr.ib()
199 node = attr.ib()
200 p1node = attr.ib()
200 p1node = attr.ib()
201 p2node = attr.ib()
201 p2node = attr.ib()
202 basenode = attr.ib()
202 basenode = attr.ib()
203 flags = attr.ib()
203 flags = attr.ib()
204 baserevisionsize = attr.ib()
204 baserevisionsize = attr.ib()
205 revision = attr.ib()
205 revision = attr.ib()
206 delta = attr.ib()
206 delta = attr.ib()
207 sidedata = attr.ib()
207 sidedata = attr.ib()
208 linknode = attr.ib(default=None)
208 linknode = attr.ib(default=None)
209
209
210
210
211 @interfaceutil.implementer(repository.iverifyproblem)
211 @interfaceutil.implementer(repository.iverifyproblem)
212 @attr.s(frozen=True)
212 @attr.s(frozen=True)
213 class revlogproblem(object):
213 class revlogproblem(object):
214 warning = attr.ib(default=None)
214 warning = attr.ib(default=None)
215 error = attr.ib(default=None)
215 error = attr.ib(default=None)
216 node = attr.ib(default=None)
216 node = attr.ib(default=None)
217
217
218
218
219 # index v0:
219 # index v0:
220 # 4 bytes: offset
220 # 4 bytes: offset
221 # 4 bytes: compressed length
221 # 4 bytes: compressed length
222 # 4 bytes: base rev
222 # 4 bytes: base rev
223 # 4 bytes: link rev
223 # 4 bytes: link rev
224 # 20 bytes: parent 1 nodeid
224 # 20 bytes: parent 1 nodeid
225 # 20 bytes: parent 2 nodeid
225 # 20 bytes: parent 2 nodeid
226 # 20 bytes: nodeid
226 # 20 bytes: nodeid
227 indexformatv0 = struct.Struct(b">4l20s20s20s")
227 indexformatv0 = struct.Struct(b">4l20s20s20s")
228 indexformatv0_pack = indexformatv0.pack
228 indexformatv0_pack = indexformatv0.pack
229 indexformatv0_unpack = indexformatv0.unpack
229 indexformatv0_unpack = indexformatv0.unpack
230
230
231
231
232 class revlogoldindex(list):
232 class revlogoldindex(list):
233 @property
233 @property
234 def nodemap(self):
234 def nodemap(self):
235 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
235 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
236 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
236 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
237 return self._nodemap
237 return self._nodemap
238
238
239 @util.propertycache
239 @util.propertycache
240 def _nodemap(self):
240 def _nodemap(self):
241 nodemap = nodemaputil.NodeMap({nullid: nullrev})
241 nodemap = nodemaputil.NodeMap({nullid: nullrev})
242 for r in range(0, len(self)):
242 for r in range(0, len(self)):
243 n = self[r][7]
243 n = self[r][7]
244 nodemap[n] = r
244 nodemap[n] = r
245 return nodemap
245 return nodemap
246
246
247 def has_node(self, node):
247 def has_node(self, node):
248 """return True if the node exist in the index"""
248 """return True if the node exist in the index"""
249 return node in self._nodemap
249 return node in self._nodemap
250
250
251 def rev(self, node):
251 def rev(self, node):
252 """return a revision for a node
252 """return a revision for a node
253
253
254 If the node is unknown, raise a RevlogError"""
254 If the node is unknown, raise a RevlogError"""
255 return self._nodemap[node]
255 return self._nodemap[node]
256
256
257 def get_rev(self, node):
257 def get_rev(self, node):
258 """return a revision for a node
258 """return a revision for a node
259
259
260 If the node is unknown, return None"""
260 If the node is unknown, return None"""
261 return self._nodemap.get(node)
261 return self._nodemap.get(node)
262
262
263 def append(self, tup):
263 def append(self, tup):
264 self._nodemap[tup[7]] = len(self)
264 self._nodemap[tup[7]] = len(self)
265 super(revlogoldindex, self).append(tup)
265 super(revlogoldindex, self).append(tup)
266
266
267 def __delitem__(self, i):
267 def __delitem__(self, i):
268 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
268 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
269 raise ValueError(b"deleting slices only supports a:-1 with step 1")
269 raise ValueError(b"deleting slices only supports a:-1 with step 1")
270 for r in pycompat.xrange(i.start, len(self)):
270 for r in pycompat.xrange(i.start, len(self)):
271 del self._nodemap[self[r][7]]
271 del self._nodemap[self[r][7]]
272 super(revlogoldindex, self).__delitem__(i)
272 super(revlogoldindex, self).__delitem__(i)
273
273
274 def clearcaches(self):
274 def clearcaches(self):
275 self.__dict__.pop('_nodemap', None)
275 self.__dict__.pop('_nodemap', None)
276
276
277 def __getitem__(self, i):
277 def __getitem__(self, i):
278 if i == -1:
278 if i == -1:
279 return (0, 0, 0, -1, -1, -1, -1, nullid)
279 return (0, 0, 0, -1, -1, -1, -1, nullid)
280 return list.__getitem__(self, i)
280 return list.__getitem__(self, i)
281
281
282
282
283 class revlogoldio(object):
283 class revlogoldio(object):
284 def __init__(self):
284 def __init__(self):
285 self.size = indexformatv0.size
285 self.size = indexformatv0.size
286
286
287 def parseindex(self, data, inline):
287 def parseindex(self, data, inline):
288 s = self.size
288 s = self.size
289 index = []
289 index = []
290 nodemap = nodemaputil.NodeMap({nullid: nullrev})
290 nodemap = nodemaputil.NodeMap({nullid: nullrev})
291 n = off = 0
291 n = off = 0
292 l = len(data)
292 l = len(data)
293 while off + s <= l:
293 while off + s <= l:
294 cur = data[off : off + s]
294 cur = data[off : off + s]
295 off += s
295 off += s
296 e = indexformatv0_unpack(cur)
296 e = indexformatv0_unpack(cur)
297 # transform to revlogv1 format
297 # transform to revlogv1 format
298 e2 = (
298 e2 = (
299 offset_type(e[0], 0),
299 offset_type(e[0], 0),
300 e[1],
300 e[1],
301 -1,
301 -1,
302 e[2],
302 e[2],
303 e[3],
303 e[3],
304 nodemap.get(e[4], nullrev),
304 nodemap.get(e[4], nullrev),
305 nodemap.get(e[5], nullrev),
305 nodemap.get(e[5], nullrev),
306 e[6],
306 e[6],
307 )
307 )
308 index.append(e2)
308 index.append(e2)
309 nodemap[e[6]] = n
309 nodemap[e[6]] = n
310 n += 1
310 n += 1
311
311
312 index = revlogoldindex(index)
312 index = revlogoldindex(index)
313 return index, None
313 return index, None
314
314
315 def packentry(self, entry, node, version, rev):
315 def packentry(self, entry, node, version, rev):
316 if gettype(entry[0]):
316 if gettype(entry[0]):
317 raise error.RevlogError(
317 raise error.RevlogError(
318 _(b'index entry flags need revlog version 1')
318 _(b'index entry flags need revlog version 1')
319 )
319 )
320 e2 = (
320 e2 = (
321 getoffset(entry[0]),
321 getoffset(entry[0]),
322 entry[1],
322 entry[1],
323 entry[3],
323 entry[3],
324 entry[4],
324 entry[4],
325 node(entry[5]),
325 node(entry[5]),
326 node(entry[6]),
326 node(entry[6]),
327 entry[7],
327 entry[7],
328 )
328 )
329 return indexformatv0_pack(*e2)
329 return indexformatv0_pack(*e2)
330
330
331
331
332 # index ng:
332 # index ng:
333 # 6 bytes: offset
333 # 6 bytes: offset
334 # 2 bytes: flags
334 # 2 bytes: flags
335 # 4 bytes: compressed length
335 # 4 bytes: compressed length
336 # 4 bytes: uncompressed length
336 # 4 bytes: uncompressed length
337 # 4 bytes: base rev
337 # 4 bytes: base rev
338 # 4 bytes: link rev
338 # 4 bytes: link rev
339 # 4 bytes: parent 1 rev
339 # 4 bytes: parent 1 rev
340 # 4 bytes: parent 2 rev
340 # 4 bytes: parent 2 rev
341 # 32 bytes: nodeid
341 # 32 bytes: nodeid
342 indexformatng = struct.Struct(b">Qiiiiii20s12x")
342 indexformatng = struct.Struct(b">Qiiiiii20s12x")
343 indexformatng_pack = indexformatng.pack
343 indexformatng_pack = indexformatng.pack
344 versionformat = struct.Struct(b">I")
344 versionformat = struct.Struct(b">I")
345 versionformat_pack = versionformat.pack
345 versionformat_pack = versionformat.pack
346 versionformat_unpack = versionformat.unpack
346 versionformat_unpack = versionformat.unpack
347
347
348 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
348 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
349 # signed integer)
349 # signed integer)
350 _maxentrysize = 0x7FFFFFFF
350 _maxentrysize = 0x7FFFFFFF
351
351
352
352
353 class revlogio(object):
353 class revlogio(object):
354 def __init__(self):
354 def __init__(self):
355 self.size = indexformatng.size
355 self.size = indexformatng.size
356
356
357 def parseindex(self, data, inline):
357 def parseindex(self, data, inline):
358 # call the C implementation to parse the index data
358 # call the C implementation to parse the index data
359 index, cache = parsers.parse_index2(data, inline)
359 index, cache = parsers.parse_index2(data, inline)
360 return index, cache
360 return index, cache
361
361
362 def packentry(self, entry, node, version, rev):
362 def packentry(self, entry, node, version, rev):
363 p = indexformatng_pack(*entry)
363 p = indexformatng_pack(*entry)
364 if rev == 0:
364 if rev == 0:
365 p = versionformat_pack(version) + p[4:]
365 p = versionformat_pack(version) + p[4:]
366 return p
366 return p
367
367
368
368
369 indexformatv2 = struct.Struct(pureparsers.Index2Mixin.index_format)
369 indexformatv2 = struct.Struct(pureparsers.Index2Mixin.index_format)
370 indexformatv2_pack = indexformatv2.pack
370 indexformatv2_pack = indexformatv2.pack
371
371
372
372
373 class revlogv2io(object):
373 class revlogv2io(object):
374 def __init__(self):
374 def __init__(self):
375 self.size = indexformatv2.size
375 self.size = indexformatv2.size
376
376
377 def parseindex(self, data, inline):
377 def parseindex(self, data, inline):
378 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
378 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
379 return index, cache
379 return index, cache
380
380
381 def packentry(self, entry, node, version, rev):
381 def packentry(self, entry, node, version, rev):
382 p = indexformatv2_pack(*entry)
382 p = indexformatv2_pack(*entry)
383 if rev == 0:
383 if rev == 0:
384 p = versionformat_pack(version) + p[4:]
384 p = versionformat_pack(version) + p[4:]
385 return p
385 return p
386
386
387
387
388 NodemapRevlogIO = None
388 NodemapRevlogIO = None
389
389
390 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
390 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
391
391
392 class NodemapRevlogIO(revlogio):
392 class NodemapRevlogIO(revlogio):
393 """A debug oriented IO class that return a PersistentNodeMapIndexObject
393 """A debug oriented IO class that return a PersistentNodeMapIndexObject
394
394
395 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
395 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
396 """
396 """
397
397
398 def parseindex(self, data, inline):
398 def parseindex(self, data, inline):
399 index, cache = parsers.parse_index_devel_nodemap(data, inline)
399 index, cache = parsers.parse_index_devel_nodemap(data, inline)
400 return index, cache
400 return index, cache
401
401
402
402
403 class rustrevlogio(revlogio):
403 class rustrevlogio(revlogio):
404 def parseindex(self, data, inline):
404 def parseindex(self, data, inline):
405 index, cache = super(rustrevlogio, self).parseindex(data, inline)
405 index, cache = super(rustrevlogio, self).parseindex(data, inline)
406 return rustrevlog.MixedIndex(index), cache
406 return rustrevlog.MixedIndex(index), cache
407
407
408
408
409 class revlog(object):
409 class revlog(object):
410 """
410 """
411 the underlying revision storage object
411 the underlying revision storage object
412
412
413 A revlog consists of two parts, an index and the revision data.
413 A revlog consists of two parts, an index and the revision data.
414
414
415 The index is a file with a fixed record size containing
415 The index is a file with a fixed record size containing
416 information on each revision, including its nodeid (hash), the
416 information on each revision, including its nodeid (hash), the
417 nodeids of its parents, the position and offset of its data within
417 nodeids of its parents, the position and offset of its data within
418 the data file, and the revision it's based on. Finally, each entry
418 the data file, and the revision it's based on. Finally, each entry
419 contains a linkrev entry that can serve as a pointer to external
419 contains a linkrev entry that can serve as a pointer to external
420 data.
420 data.
421
421
422 The revision data itself is a linear collection of data chunks.
422 The revision data itself is a linear collection of data chunks.
423 Each chunk represents a revision and is usually represented as a
423 Each chunk represents a revision and is usually represented as a
424 delta against the previous chunk. To bound lookup time, runs of
424 delta against the previous chunk. To bound lookup time, runs of
425 deltas are limited to about 2 times the length of the original
425 deltas are limited to about 2 times the length of the original
426 version data. This makes retrieval of a version proportional to
426 version data. This makes retrieval of a version proportional to
427 its size, or O(1) relative to the number of revisions.
427 its size, or O(1) relative to the number of revisions.
428
428
429 Both pieces of the revlog are written to in an append-only
429 Both pieces of the revlog are written to in an append-only
430 fashion, which means we never need to rewrite a file to insert or
430 fashion, which means we never need to rewrite a file to insert or
431 remove data, and can use some simple techniques to avoid the need
431 remove data, and can use some simple techniques to avoid the need
432 for locking while reading.
432 for locking while reading.
433
433
434 If checkambig, indexfile is opened with checkambig=True at
434 If checkambig, indexfile is opened with checkambig=True at
435 writing, to avoid file stat ambiguity.
435 writing, to avoid file stat ambiguity.
436
436
437 If mmaplargeindex is True, and an mmapindexthreshold is set, the
437 If mmaplargeindex is True, and an mmapindexthreshold is set, the
438 index will be mmapped rather than read if it is larger than the
438 index will be mmapped rather than read if it is larger than the
439 configured threshold.
439 configured threshold.
440
440
441 If censorable is True, the revlog can have censored revisions.
441 If censorable is True, the revlog can have censored revisions.
442
442
443 If `upperboundcomp` is not None, this is the expected maximal gain from
443 If `upperboundcomp` is not None, this is the expected maximal gain from
444 compression for the data content.
444 compression for the data content.
445
445
446 `concurrencychecker` is an optional function that receives 3 arguments: a
446 `concurrencychecker` is an optional function that receives 3 arguments: a
447 file handle, a filename, and an expected position. It should check whether
447 file handle, a filename, and an expected position. It should check whether
448 the current position in the file handle is valid, and log/warn/fail (by
448 the current position in the file handle is valid, and log/warn/fail (by
449 raising).
449 raising).
450 """
450 """
451
451
452 _flagserrorclass = error.RevlogError
452 _flagserrorclass = error.RevlogError
453
453
454 def __init__(
454 def __init__(
455 self,
455 self,
456 opener,
456 opener,
457 indexfile,
457 indexfile,
458 datafile=None,
458 datafile=None,
459 checkambig=False,
459 checkambig=False,
460 mmaplargeindex=False,
460 mmaplargeindex=False,
461 censorable=False,
461 censorable=False,
462 upperboundcomp=None,
462 upperboundcomp=None,
463 persistentnodemap=False,
463 persistentnodemap=False,
464 concurrencychecker=None,
464 concurrencychecker=None,
465 ):
465 ):
466 """
466 """
467 create a revlog object
467 create a revlog object
468
468
469 opener is a function that abstracts the file opening operation
469 opener is a function that abstracts the file opening operation
470 and can be used to implement COW semantics or the like.
470 and can be used to implement COW semantics or the like.
471
471
472 """
472 """
473 self.upperboundcomp = upperboundcomp
473 self.upperboundcomp = upperboundcomp
474 self.indexfile = indexfile
474 self.indexfile = indexfile
475 self.datafile = datafile or (indexfile[:-2] + b".d")
475 self.datafile = datafile or (indexfile[:-2] + b".d")
476 self.nodemap_file = None
476 self.nodemap_file = None
477 if persistentnodemap:
477 if persistentnodemap:
478 self.nodemap_file = nodemaputil.get_nodemap_file(
478 self.nodemap_file = nodemaputil.get_nodemap_file(
479 opener, self.indexfile
479 opener, self.indexfile
480 )
480 )
481
481
482 self.opener = opener
482 self.opener = opener
483 # When True, indexfile is opened with checkambig=True at writing, to
483 # When True, indexfile is opened with checkambig=True at writing, to
484 # avoid file stat ambiguity.
484 # avoid file stat ambiguity.
485 self._checkambig = checkambig
485 self._checkambig = checkambig
486 self._mmaplargeindex = mmaplargeindex
486 self._mmaplargeindex = mmaplargeindex
487 self._censorable = censorable
487 self._censorable = censorable
488 # 3-tuple of (node, rev, text) for a raw revision.
488 # 3-tuple of (node, rev, text) for a raw revision.
489 self._revisioncache = None
489 self._revisioncache = None
490 # Maps rev to chain base rev.
490 # Maps rev to chain base rev.
491 self._chainbasecache = util.lrucachedict(100)
491 self._chainbasecache = util.lrucachedict(100)
492 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
492 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
493 self._chunkcache = (0, b'')
493 self._chunkcache = (0, b'')
494 # How much data to read and cache into the raw revlog data cache.
494 # How much data to read and cache into the raw revlog data cache.
495 self._chunkcachesize = 65536
495 self._chunkcachesize = 65536
496 self._maxchainlen = None
496 self._maxchainlen = None
497 self._deltabothparents = True
497 self._deltabothparents = True
498 self.index = None
498 self.index = None
499 self._nodemap_docket = None
499 self._nodemap_docket = None
500 # Mapping of partial identifiers to full nodes.
500 # Mapping of partial identifiers to full nodes.
501 self._pcache = {}
501 self._pcache = {}
502 # Mapping of revision integer to full node.
502 # Mapping of revision integer to full node.
503 self._compengine = b'zlib'
503 self._compengine = b'zlib'
504 self._compengineopts = {}
504 self._compengineopts = {}
505 self._maxdeltachainspan = -1
505 self._maxdeltachainspan = -1
506 self._withsparseread = False
506 self._withsparseread = False
507 self._sparserevlog = False
507 self._sparserevlog = False
508 self._srdensitythreshold = 0.50
508 self._srdensitythreshold = 0.50
509 self._srmingapsize = 262144
509 self._srmingapsize = 262144
510
510
511 # Make copy of flag processors so each revlog instance can support
511 # Make copy of flag processors so each revlog instance can support
512 # custom flags.
512 # custom flags.
513 self._flagprocessors = dict(flagutil.flagprocessors)
513 self._flagprocessors = dict(flagutil.flagprocessors)
514
514
515 # 2-tuple of file handles being used for active writing.
515 # 2-tuple of file handles being used for active writing.
516 self._writinghandles = None
516 self._writinghandles = None
517
517
518 self._loadindex()
518 self._loadindex()
519
519
520 self._concurrencychecker = concurrencychecker
520 self._concurrencychecker = concurrencychecker
521
521
522 def _loadindex(self):
522 def _loadindex(self):
523 mmapindexthreshold = None
523 mmapindexthreshold = None
524 opts = self.opener.options
524 opts = self.opener.options
525
525
526 if b'revlogv2' in opts:
526 if b'revlogv2' in opts:
527 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
527 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
528 elif b'revlogv1' in opts:
528 elif b'revlogv1' in opts:
529 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
529 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
530 if b'generaldelta' in opts:
530 if b'generaldelta' in opts:
531 newversionflags |= FLAG_GENERALDELTA
531 newversionflags |= FLAG_GENERALDELTA
532 elif b'revlogv0' in self.opener.options:
532 elif b'revlogv0' in self.opener.options:
533 newversionflags = REVLOGV0
533 newversionflags = REVLOGV0
534 else:
534 else:
535 newversionflags = REVLOG_DEFAULT_VERSION
535 newversionflags = REVLOG_DEFAULT_VERSION
536
536
537 if b'chunkcachesize' in opts:
537 if b'chunkcachesize' in opts:
538 self._chunkcachesize = opts[b'chunkcachesize']
538 self._chunkcachesize = opts[b'chunkcachesize']
539 if b'maxchainlen' in opts:
539 if b'maxchainlen' in opts:
540 self._maxchainlen = opts[b'maxchainlen']
540 self._maxchainlen = opts[b'maxchainlen']
541 if b'deltabothparents' in opts:
541 if b'deltabothparents' in opts:
542 self._deltabothparents = opts[b'deltabothparents']
542 self._deltabothparents = opts[b'deltabothparents']
543 self._lazydelta = bool(opts.get(b'lazydelta', True))
543 self._lazydelta = bool(opts.get(b'lazydelta', True))
544 self._lazydeltabase = False
544 self._lazydeltabase = False
545 if self._lazydelta:
545 if self._lazydelta:
546 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
546 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
547 if b'compengine' in opts:
547 if b'compengine' in opts:
548 self._compengine = opts[b'compengine']
548 self._compengine = opts[b'compengine']
549 if b'zlib.level' in opts:
549 if b'zlib.level' in opts:
550 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
550 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
551 if b'zstd.level' in opts:
551 if b'zstd.level' in opts:
552 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
552 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
553 if b'maxdeltachainspan' in opts:
553 if b'maxdeltachainspan' in opts:
554 self._maxdeltachainspan = opts[b'maxdeltachainspan']
554 self._maxdeltachainspan = opts[b'maxdeltachainspan']
555 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
555 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
556 mmapindexthreshold = opts[b'mmapindexthreshold']
556 mmapindexthreshold = opts[b'mmapindexthreshold']
557 self.hassidedata = bool(opts.get(b'side-data', False))
557 self.hassidedata = bool(opts.get(b'side-data', False))
558 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
558 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
559 withsparseread = bool(opts.get(b'with-sparse-read', False))
559 withsparseread = bool(opts.get(b'with-sparse-read', False))
560 # sparse-revlog forces sparse-read
560 # sparse-revlog forces sparse-read
561 self._withsparseread = self._sparserevlog or withsparseread
561 self._withsparseread = self._sparserevlog or withsparseread
562 if b'sparse-read-density-threshold' in opts:
562 if b'sparse-read-density-threshold' in opts:
563 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
563 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
564 if b'sparse-read-min-gap-size' in opts:
564 if b'sparse-read-min-gap-size' in opts:
565 self._srmingapsize = opts[b'sparse-read-min-gap-size']
565 self._srmingapsize = opts[b'sparse-read-min-gap-size']
566 if opts.get(b'enableellipsis'):
566 if opts.get(b'enableellipsis'):
567 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
567 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
568
568
569 # revlog v0 doesn't have flag processors
569 # revlog v0 doesn't have flag processors
570 for flag, processor in pycompat.iteritems(
570 for flag, processor in pycompat.iteritems(
571 opts.get(b'flagprocessors', {})
571 opts.get(b'flagprocessors', {})
572 ):
572 ):
573 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
573 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
574
574
575 if self._chunkcachesize <= 0:
575 if self._chunkcachesize <= 0:
576 raise error.RevlogError(
576 raise error.RevlogError(
577 _(b'revlog chunk cache size %r is not greater than 0')
577 _(b'revlog chunk cache size %r is not greater than 0')
578 % self._chunkcachesize
578 % self._chunkcachesize
579 )
579 )
580 elif self._chunkcachesize & (self._chunkcachesize - 1):
580 elif self._chunkcachesize & (self._chunkcachesize - 1):
581 raise error.RevlogError(
581 raise error.RevlogError(
582 _(b'revlog chunk cache size %r is not a power of 2')
582 _(b'revlog chunk cache size %r is not a power of 2')
583 % self._chunkcachesize
583 % self._chunkcachesize
584 )
584 )
585
585
586 indexdata = b''
586 indexdata = b''
587 self._initempty = True
587 self._initempty = True
588 try:
588 try:
589 with self._indexfp() as f:
589 with self._indexfp() as f:
590 if (
590 if (
591 mmapindexthreshold is not None
591 mmapindexthreshold is not None
592 and self.opener.fstat(f).st_size >= mmapindexthreshold
592 and self.opener.fstat(f).st_size >= mmapindexthreshold
593 ):
593 ):
594 # TODO: should .close() to release resources without
594 # TODO: should .close() to release resources without
595 # relying on Python GC
595 # relying on Python GC
596 indexdata = util.buffer(util.mmapread(f))
596 indexdata = util.buffer(util.mmapread(f))
597 else:
597 else:
598 indexdata = f.read()
598 indexdata = f.read()
599 if len(indexdata) > 0:
599 if len(indexdata) > 0:
600 versionflags = versionformat_unpack(indexdata[:4])[0]
600 versionflags = versionformat_unpack(indexdata[:4])[0]
601 self._initempty = False
601 self._initempty = False
602 else:
602 else:
603 versionflags = newversionflags
603 versionflags = newversionflags
604 except IOError as inst:
604 except IOError as inst:
605 if inst.errno != errno.ENOENT:
605 if inst.errno != errno.ENOENT:
606 raise
606 raise
607
607
608 versionflags = newversionflags
608 versionflags = newversionflags
609
609
610 self.version = versionflags
610 self.version = versionflags
611
611
612 flags = versionflags & ~0xFFFF
612 flags = versionflags & ~0xFFFF
613 fmt = versionflags & 0xFFFF
613 fmt = versionflags & 0xFFFF
614
614
615 if fmt == REVLOGV0:
615 if fmt == REVLOGV0:
616 if flags:
616 if flags:
617 raise error.RevlogError(
617 raise error.RevlogError(
618 _(b'unknown flags (%#04x) in version %d revlog %s')
618 _(b'unknown flags (%#04x) in version %d revlog %s')
619 % (flags >> 16, fmt, self.indexfile)
619 % (flags >> 16, fmt, self.indexfile)
620 )
620 )
621
621
622 self._inline = False
622 self._inline = False
623 self._generaldelta = False
623 self._generaldelta = False
624
624
625 elif fmt == REVLOGV1:
625 elif fmt == REVLOGV1:
626 if flags & ~REVLOGV1_FLAGS:
626 if flags & ~REVLOGV1_FLAGS:
627 raise error.RevlogError(
627 raise error.RevlogError(
628 _(b'unknown flags (%#04x) in version %d revlog %s')
628 _(b'unknown flags (%#04x) in version %d revlog %s')
629 % (flags >> 16, fmt, self.indexfile)
629 % (flags >> 16, fmt, self.indexfile)
630 )
630 )
631
631
632 self._inline = versionflags & FLAG_INLINE_DATA
632 self._inline = versionflags & FLAG_INLINE_DATA
633 self._generaldelta = versionflags & FLAG_GENERALDELTA
633 self._generaldelta = versionflags & FLAG_GENERALDELTA
634
634
635 elif fmt == REVLOGV2:
635 elif fmt == REVLOGV2:
636 if flags & ~REVLOGV2_FLAGS:
636 if flags & ~REVLOGV2_FLAGS:
637 raise error.RevlogError(
637 raise error.RevlogError(
638 _(b'unknown flags (%#04x) in version %d revlog %s')
638 _(b'unknown flags (%#04x) in version %d revlog %s')
639 % (flags >> 16, fmt, self.indexfile)
639 % (flags >> 16, fmt, self.indexfile)
640 )
640 )
641
641
642 # There is a bug in the transaction handling when going from an
642 # There is a bug in the transaction handling when going from an
643 # inline revlog to a separate index and data file. Turn it off until
643 # inline revlog to a separate index and data file. Turn it off until
644 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
644 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
645 # See issue6485
645 # See issue6485
646 self._inline = False
646 self._inline = False
647 # generaldelta implied by version 2 revlogs.
647 # generaldelta implied by version 2 revlogs.
648 self._generaldelta = True
648 self._generaldelta = True
649
649
650 else:
650 else:
651 raise error.RevlogError(
651 raise error.RevlogError(
652 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
652 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
653 )
653 )
654 # sparse-revlog can't be on without general-delta (issue6056)
654 # sparse-revlog can't be on without general-delta (issue6056)
655 if not self._generaldelta:
655 if not self._generaldelta:
656 self._sparserevlog = False
656 self._sparserevlog = False
657
657
658 self._storedeltachains = True
658 self._storedeltachains = True
659
659
660 devel_nodemap = (
660 devel_nodemap = (
661 self.nodemap_file
661 self.nodemap_file
662 and opts.get(b'devel-force-nodemap', False)
662 and opts.get(b'devel-force-nodemap', False)
663 and NodemapRevlogIO is not None
663 and NodemapRevlogIO is not None
664 )
664 )
665
665
666 use_rust_index = False
666 use_rust_index = False
667 if rustrevlog is not None:
667 if rustrevlog is not None:
668 if self.nodemap_file is not None:
668 if self.nodemap_file is not None:
669 use_rust_index = True
669 use_rust_index = True
670 else:
670 else:
671 use_rust_index = self.opener.options.get(b'rust.index')
671 use_rust_index = self.opener.options.get(b'rust.index')
672
672
673 self._io = revlogio()
673 self._io = revlogio()
674 if self.version == REVLOGV0:
674 if self.version == REVLOGV0:
675 self._io = revlogoldio()
675 self._io = revlogoldio()
676 elif fmt == REVLOGV2:
676 elif fmt == REVLOGV2:
677 self._io = revlogv2io()
677 self._io = revlogv2io()
678 elif devel_nodemap:
678 elif devel_nodemap:
679 self._io = NodemapRevlogIO()
679 self._io = NodemapRevlogIO()
680 elif use_rust_index:
680 elif use_rust_index:
681 self._io = rustrevlogio()
681 self._io = rustrevlogio()
682 try:
682 try:
683 d = self._io.parseindex(indexdata, self._inline)
683 d = self._io.parseindex(indexdata, self._inline)
684 index, _chunkcache = d
684 index, _chunkcache = d
685 use_nodemap = (
685 use_nodemap = (
686 not self._inline
686 not self._inline
687 and self.nodemap_file is not None
687 and self.nodemap_file is not None
688 and util.safehasattr(index, 'update_nodemap_data')
688 and util.safehasattr(index, 'update_nodemap_data')
689 )
689 )
690 if use_nodemap:
690 if use_nodemap:
691 nodemap_data = nodemaputil.persisted_data(self)
691 nodemap_data = nodemaputil.persisted_data(self)
692 if nodemap_data is not None:
692 if nodemap_data is not None:
693 docket = nodemap_data[0]
693 docket = nodemap_data[0]
694 if (
694 if (
695 len(d[0]) > docket.tip_rev
695 len(d[0]) > docket.tip_rev
696 and d[0][docket.tip_rev][7] == docket.tip_node
696 and d[0][docket.tip_rev][7] == docket.tip_node
697 ):
697 ):
698 # no changelog tampering
698 # no changelog tampering
699 self._nodemap_docket = docket
699 self._nodemap_docket = docket
700 index.update_nodemap_data(*nodemap_data)
700 index.update_nodemap_data(*nodemap_data)
701 except (ValueError, IndexError):
701 except (ValueError, IndexError):
702 raise error.RevlogError(
702 raise error.RevlogError(
703 _(b"index %s is corrupted") % self.indexfile
703 _(b"index %s is corrupted") % self.indexfile
704 )
704 )
705 self.index, self._chunkcache = d
705 self.index, self._chunkcache = d
706 if not self._chunkcache:
706 if not self._chunkcache:
707 self._chunkclear()
707 self._chunkclear()
708 # revnum -> (chain-length, sum-delta-length)
708 # revnum -> (chain-length, sum-delta-length)
709 self._chaininfocache = util.lrucachedict(500)
709 self._chaininfocache = util.lrucachedict(500)
710 # revlog header -> revlog compressor
710 # revlog header -> revlog compressor
711 self._decompressors = {}
711 self._decompressors = {}
712
712
713 @util.propertycache
713 @util.propertycache
714 def _compressor(self):
714 def _compressor(self):
715 engine = util.compengines[self._compengine]
715 engine = util.compengines[self._compengine]
716 return engine.revlogcompressor(self._compengineopts)
716 return engine.revlogcompressor(self._compengineopts)
717
717
718 def _indexfp(self, mode=b'r'):
718 def _indexfp(self, mode=b'r'):
719 """file object for the revlog's index file"""
719 """file object for the revlog's index file"""
720 args = {'mode': mode}
720 args = {'mode': mode}
721 if mode != b'r':
721 if mode != b'r':
722 args['checkambig'] = self._checkambig
722 args['checkambig'] = self._checkambig
723 if mode == b'w':
723 if mode == b'w':
724 args['atomictemp'] = True
724 args['atomictemp'] = True
725 return self.opener(self.indexfile, **args)
725 return self.opener(self.indexfile, **args)
726
726
727 def _datafp(self, mode=b'r'):
727 def _datafp(self, mode=b'r'):
728 """file object for the revlog's data file"""
728 """file object for the revlog's data file"""
729 return self.opener(self.datafile, mode=mode)
729 return self.opener(self.datafile, mode=mode)
730
730
731 @contextlib.contextmanager
731 @contextlib.contextmanager
732 def _datareadfp(self, existingfp=None):
732 def _datareadfp(self, existingfp=None):
733 """file object suitable to read data"""
733 """file object suitable to read data"""
734 # Use explicit file handle, if given.
734 # Use explicit file handle, if given.
735 if existingfp is not None:
735 if existingfp is not None:
736 yield existingfp
736 yield existingfp
737
737
738 # Use a file handle being actively used for writes, if available.
738 # Use a file handle being actively used for writes, if available.
739 # There is some danger to doing this because reads will seek the
739 # There is some danger to doing this because reads will seek the
740 # file. However, _writeentry() performs a SEEK_END before all writes,
740 # file. However, _writeentry() performs a SEEK_END before all writes,
741 # so we should be safe.
741 # so we should be safe.
742 elif self._writinghandles:
742 elif self._writinghandles:
743 if self._inline:
743 if self._inline:
744 yield self._writinghandles[0]
744 yield self._writinghandles[0]
745 else:
745 else:
746 yield self._writinghandles[1]
746 yield self._writinghandles[1]
747
747
748 # Otherwise open a new file handle.
748 # Otherwise open a new file handle.
749 else:
749 else:
750 if self._inline:
750 if self._inline:
751 func = self._indexfp
751 func = self._indexfp
752 else:
752 else:
753 func = self._datafp
753 func = self._datafp
754 with func() as fp:
754 with func() as fp:
755 yield fp
755 yield fp
756
756
757 def tiprev(self):
757 def tiprev(self):
758 return len(self.index) - 1
758 return len(self.index) - 1
759
759
760 def tip(self):
760 def tip(self):
761 return self.node(self.tiprev())
761 return self.node(self.tiprev())
762
762
763 def __contains__(self, rev):
763 def __contains__(self, rev):
764 return 0 <= rev < len(self)
764 return 0 <= rev < len(self)
765
765
766 def __len__(self):
766 def __len__(self):
767 return len(self.index)
767 return len(self.index)
768
768
769 def __iter__(self):
769 def __iter__(self):
770 return iter(pycompat.xrange(len(self)))
770 return iter(pycompat.xrange(len(self)))
771
771
772 def revs(self, start=0, stop=None):
772 def revs(self, start=0, stop=None):
773 """iterate over all rev in this revlog (from start to stop)"""
773 """iterate over all rev in this revlog (from start to stop)"""
774 return storageutil.iterrevs(len(self), start=start, stop=stop)
774 return storageutil.iterrevs(len(self), start=start, stop=stop)
775
775
776 @property
776 @property
777 def nodemap(self):
777 def nodemap(self):
778 msg = (
778 msg = (
779 b"revlog.nodemap is deprecated, "
779 b"revlog.nodemap is deprecated, "
780 b"use revlog.index.[has_node|rev|get_rev]"
780 b"use revlog.index.[has_node|rev|get_rev]"
781 )
781 )
782 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
782 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
783 return self.index.nodemap
783 return self.index.nodemap
784
784
785 @property
785 @property
786 def _nodecache(self):
786 def _nodecache(self):
787 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
787 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
788 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
788 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
789 return self.index.nodemap
789 return self.index.nodemap
790
790
791 def hasnode(self, node):
791 def hasnode(self, node):
792 try:
792 try:
793 self.rev(node)
793 self.rev(node)
794 return True
794 return True
795 except KeyError:
795 except KeyError:
796 return False
796 return False
797
797
798 def candelta(self, baserev, rev):
798 def candelta(self, baserev, rev):
799 """whether two revisions (baserev, rev) can be delta-ed or not"""
799 """whether two revisions (baserev, rev) can be delta-ed or not"""
800 # Disable delta if either rev requires a content-changing flag
800 # Disable delta if either rev requires a content-changing flag
801 # processor (ex. LFS). This is because such flag processor can alter
801 # processor (ex. LFS). This is because such flag processor can alter
802 # the rawtext content that the delta will be based on, and two clients
802 # the rawtext content that the delta will be based on, and two clients
803 # could have a same revlog node with different flags (i.e. different
803 # could have a same revlog node with different flags (i.e. different
804 # rawtext contents) and the delta could be incompatible.
804 # rawtext contents) and the delta could be incompatible.
805 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
805 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
806 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
806 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
807 ):
807 ):
808 return False
808 return False
809 return True
809 return True
810
810
811 def update_caches(self, transaction):
811 def update_caches(self, transaction):
812 if self.nodemap_file is not None:
812 if self.nodemap_file is not None:
813 if transaction is None:
813 if transaction is None:
814 nodemaputil.update_persistent_nodemap(self)
814 nodemaputil.update_persistent_nodemap(self)
815 else:
815 else:
816 nodemaputil.setup_persistent_nodemap(transaction, self)
816 nodemaputil.setup_persistent_nodemap(transaction, self)
817
817
818 def clearcaches(self):
818 def clearcaches(self):
819 self._revisioncache = None
819 self._revisioncache = None
820 self._chainbasecache.clear()
820 self._chainbasecache.clear()
821 self._chunkcache = (0, b'')
821 self._chunkcache = (0, b'')
822 self._pcache = {}
822 self._pcache = {}
823 self._nodemap_docket = None
823 self._nodemap_docket = None
824 self.index.clearcaches()
824 self.index.clearcaches()
825 # The python code is the one responsible for validating the docket, we
825 # The python code is the one responsible for validating the docket, we
826 # end up having to refresh it here.
826 # end up having to refresh it here.
827 use_nodemap = (
827 use_nodemap = (
828 not self._inline
828 not self._inline
829 and self.nodemap_file is not None
829 and self.nodemap_file is not None
830 and util.safehasattr(self.index, 'update_nodemap_data')
830 and util.safehasattr(self.index, 'update_nodemap_data')
831 )
831 )
832 if use_nodemap:
832 if use_nodemap:
833 nodemap_data = nodemaputil.persisted_data(self)
833 nodemap_data = nodemaputil.persisted_data(self)
834 if nodemap_data is not None:
834 if nodemap_data is not None:
835 self._nodemap_docket = nodemap_data[0]
835 self._nodemap_docket = nodemap_data[0]
836 self.index.update_nodemap_data(*nodemap_data)
836 self.index.update_nodemap_data(*nodemap_data)
837
837
838 def rev(self, node):
838 def rev(self, node):
839 try:
839 try:
840 return self.index.rev(node)
840 return self.index.rev(node)
841 except TypeError:
841 except TypeError:
842 raise
842 raise
843 except error.RevlogError:
843 except error.RevlogError:
844 # parsers.c radix tree lookup failed
844 # parsers.c radix tree lookup failed
845 if node == wdirid or node in wdirfilenodeids:
845 if node == wdirid or node in wdirfilenodeids:
846 raise error.WdirUnsupported
846 raise error.WdirUnsupported
847 raise error.LookupError(node, self.indexfile, _(b'no node'))
847 raise error.LookupError(node, self.indexfile, _(b'no node'))
848
848
849 # Accessors for index entries.
849 # Accessors for index entries.
850
850
851 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
851 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
852 # are flags.
852 # are flags.
853 def start(self, rev):
853 def start(self, rev):
854 return int(self.index[rev][0] >> 16)
854 return int(self.index[rev][0] >> 16)
855
855
856 def flags(self, rev):
856 def flags(self, rev):
857 return self.index[rev][0] & 0xFFFF
857 return self.index[rev][0] & 0xFFFF
858
858
859 def length(self, rev):
859 def length(self, rev):
860 return self.index[rev][1]
860 return self.index[rev][1]
861
861
862 def sidedata_length(self, rev):
862 def sidedata_length(self, rev):
863 if self.version & 0xFFFF != REVLOGV2:
863 if self.version & 0xFFFF != REVLOGV2:
864 return 0
864 return 0
865 return self.index[rev][9]
865 return self.index[rev][9]
866
866
867 def rawsize(self, rev):
867 def rawsize(self, rev):
868 """return the length of the uncompressed text for a given revision"""
868 """return the length of the uncompressed text for a given revision"""
869 l = self.index[rev][2]
869 l = self.index[rev][2]
870 if l >= 0:
870 if l >= 0:
871 return l
871 return l
872
872
873 t = self.rawdata(rev)
873 t = self.rawdata(rev)
874 return len(t)
874 return len(t)
875
875
876 def size(self, rev):
876 def size(self, rev):
877 """length of non-raw text (processed by a "read" flag processor)"""
877 """length of non-raw text (processed by a "read" flag processor)"""
878 # fast path: if no "read" flag processor could change the content,
878 # fast path: if no "read" flag processor could change the content,
879 # size is rawsize. note: ELLIPSIS is known to not change the content.
879 # size is rawsize. note: ELLIPSIS is known to not change the content.
880 flags = self.flags(rev)
880 flags = self.flags(rev)
881 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
881 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
882 return self.rawsize(rev)
882 return self.rawsize(rev)
883
883
884 return len(self.revision(rev, raw=False))
884 return len(self.revision(rev, raw=False))
885
885
886 def chainbase(self, rev):
886 def chainbase(self, rev):
887 base = self._chainbasecache.get(rev)
887 base = self._chainbasecache.get(rev)
888 if base is not None:
888 if base is not None:
889 return base
889 return base
890
890
891 index = self.index
891 index = self.index
892 iterrev = rev
892 iterrev = rev
893 base = index[iterrev][3]
893 base = index[iterrev][3]
894 while base != iterrev:
894 while base != iterrev:
895 iterrev = base
895 iterrev = base
896 base = index[iterrev][3]
896 base = index[iterrev][3]
897
897
898 self._chainbasecache[rev] = base
898 self._chainbasecache[rev] = base
899 return base
899 return base
900
900
901 def linkrev(self, rev):
901 def linkrev(self, rev):
902 return self.index[rev][4]
902 return self.index[rev][4]
903
903
904 def parentrevs(self, rev):
904 def parentrevs(self, rev):
905 try:
905 try:
906 entry = self.index[rev]
906 entry = self.index[rev]
907 except IndexError:
907 except IndexError:
908 if rev == wdirrev:
908 if rev == wdirrev:
909 raise error.WdirUnsupported
909 raise error.WdirUnsupported
910 raise
910 raise
911
911
912 return entry[5], entry[6]
912 return entry[5], entry[6]
913
913
914 # fast parentrevs(rev) where rev isn't filtered
914 # fast parentrevs(rev) where rev isn't filtered
915 _uncheckedparentrevs = parentrevs
915 _uncheckedparentrevs = parentrevs
916
916
917 def node(self, rev):
917 def node(self, rev):
918 try:
918 try:
919 return self.index[rev][7]
919 return self.index[rev][7]
920 except IndexError:
920 except IndexError:
921 if rev == wdirrev:
921 if rev == wdirrev:
922 raise error.WdirUnsupported
922 raise error.WdirUnsupported
923 raise
923 raise
924
924
925 # Derived from index values.
925 # Derived from index values.
926
926
927 def end(self, rev):
927 def end(self, rev):
928 return self.start(rev) + self.length(rev)
928 return self.start(rev) + self.length(rev)
929
929
930 def parents(self, node):
930 def parents(self, node):
931 i = self.index
931 i = self.index
932 d = i[self.rev(node)]
932 d = i[self.rev(node)]
933 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
933 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
934
934
935 def chainlen(self, rev):
935 def chainlen(self, rev):
936 return self._chaininfo(rev)[0]
936 return self._chaininfo(rev)[0]
937
937
938 def _chaininfo(self, rev):
938 def _chaininfo(self, rev):
939 chaininfocache = self._chaininfocache
939 chaininfocache = self._chaininfocache
940 if rev in chaininfocache:
940 if rev in chaininfocache:
941 return chaininfocache[rev]
941 return chaininfocache[rev]
942 index = self.index
942 index = self.index
943 generaldelta = self._generaldelta
943 generaldelta = self._generaldelta
944 iterrev = rev
944 iterrev = rev
945 e = index[iterrev]
945 e = index[iterrev]
946 clen = 0
946 clen = 0
947 compresseddeltalen = 0
947 compresseddeltalen = 0
948 while iterrev != e[3]:
948 while iterrev != e[3]:
949 clen += 1
949 clen += 1
950 compresseddeltalen += e[1]
950 compresseddeltalen += e[1]
951 if generaldelta:
951 if generaldelta:
952 iterrev = e[3]
952 iterrev = e[3]
953 else:
953 else:
954 iterrev -= 1
954 iterrev -= 1
955 if iterrev in chaininfocache:
955 if iterrev in chaininfocache:
956 t = chaininfocache[iterrev]
956 t = chaininfocache[iterrev]
957 clen += t[0]
957 clen += t[0]
958 compresseddeltalen += t[1]
958 compresseddeltalen += t[1]
959 break
959 break
960 e = index[iterrev]
960 e = index[iterrev]
961 else:
961 else:
962 # Add text length of base since decompressing that also takes
962 # Add text length of base since decompressing that also takes
963 # work. For cache hits the length is already included.
963 # work. For cache hits the length is already included.
964 compresseddeltalen += e[1]
964 compresseddeltalen += e[1]
965 r = (clen, compresseddeltalen)
965 r = (clen, compresseddeltalen)
966 chaininfocache[rev] = r
966 chaininfocache[rev] = r
967 return r
967 return r
968
968
969 def _deltachain(self, rev, stoprev=None):
969 def _deltachain(self, rev, stoprev=None):
970 """Obtain the delta chain for a revision.
970 """Obtain the delta chain for a revision.
971
971
972 ``stoprev`` specifies a revision to stop at. If not specified, we
972 ``stoprev`` specifies a revision to stop at. If not specified, we
973 stop at the base of the chain.
973 stop at the base of the chain.
974
974
975 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
975 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
976 revs in ascending order and ``stopped`` is a bool indicating whether
976 revs in ascending order and ``stopped`` is a bool indicating whether
977 ``stoprev`` was hit.
977 ``stoprev`` was hit.
978 """
978 """
979 # Try C implementation.
979 # Try C implementation.
980 try:
980 try:
981 return self.index.deltachain(rev, stoprev, self._generaldelta)
981 return self.index.deltachain(rev, stoprev, self._generaldelta)
982 except AttributeError:
982 except AttributeError:
983 pass
983 pass
984
984
985 chain = []
985 chain = []
986
986
987 # Alias to prevent attribute lookup in tight loop.
987 # Alias to prevent attribute lookup in tight loop.
988 index = self.index
988 index = self.index
989 generaldelta = self._generaldelta
989 generaldelta = self._generaldelta
990
990
991 iterrev = rev
991 iterrev = rev
992 e = index[iterrev]
992 e = index[iterrev]
993 while iterrev != e[3] and iterrev != stoprev:
993 while iterrev != e[3] and iterrev != stoprev:
994 chain.append(iterrev)
994 chain.append(iterrev)
995 if generaldelta:
995 if generaldelta:
996 iterrev = e[3]
996 iterrev = e[3]
997 else:
997 else:
998 iterrev -= 1
998 iterrev -= 1
999 e = index[iterrev]
999 e = index[iterrev]
1000
1000
1001 if iterrev == stoprev:
1001 if iterrev == stoprev:
1002 stopped = True
1002 stopped = True
1003 else:
1003 else:
1004 chain.append(iterrev)
1004 chain.append(iterrev)
1005 stopped = False
1005 stopped = False
1006
1006
1007 chain.reverse()
1007 chain.reverse()
1008 return chain, stopped
1008 return chain, stopped
1009
1009
1010 def ancestors(self, revs, stoprev=0, inclusive=False):
1010 def ancestors(self, revs, stoprev=0, inclusive=False):
1011 """Generate the ancestors of 'revs' in reverse revision order.
1011 """Generate the ancestors of 'revs' in reverse revision order.
1012 Does not generate revs lower than stoprev.
1012 Does not generate revs lower than stoprev.
1013
1013
1014 See the documentation for ancestor.lazyancestors for more details."""
1014 See the documentation for ancestor.lazyancestors for more details."""
1015
1015
1016 # first, make sure start revisions aren't filtered
1016 # first, make sure start revisions aren't filtered
1017 revs = list(revs)
1017 revs = list(revs)
1018 checkrev = self.node
1018 checkrev = self.node
1019 for r in revs:
1019 for r in revs:
1020 checkrev(r)
1020 checkrev(r)
1021 # and we're sure ancestors aren't filtered as well
1021 # and we're sure ancestors aren't filtered as well
1022
1022
1023 if rustancestor is not None:
1023 if rustancestor is not None:
1024 lazyancestors = rustancestor.LazyAncestors
1024 lazyancestors = rustancestor.LazyAncestors
1025 arg = self.index
1025 arg = self.index
1026 else:
1026 else:
1027 lazyancestors = ancestor.lazyancestors
1027 lazyancestors = ancestor.lazyancestors
1028 arg = self._uncheckedparentrevs
1028 arg = self._uncheckedparentrevs
1029 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1029 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1030
1030
1031 def descendants(self, revs):
1031 def descendants(self, revs):
1032 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1032 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1033
1033
1034 def findcommonmissing(self, common=None, heads=None):
1034 def findcommonmissing(self, common=None, heads=None):
1035 """Return a tuple of the ancestors of common and the ancestors of heads
1035 """Return a tuple of the ancestors of common and the ancestors of heads
1036 that are not ancestors of common. In revset terminology, we return the
1036 that are not ancestors of common. In revset terminology, we return the
1037 tuple:
1037 tuple:
1038
1038
1039 ::common, (::heads) - (::common)
1039 ::common, (::heads) - (::common)
1040
1040
1041 The list is sorted by revision number, meaning it is
1041 The list is sorted by revision number, meaning it is
1042 topologically sorted.
1042 topologically sorted.
1043
1043
1044 'heads' and 'common' are both lists of node IDs. If heads is
1044 'heads' and 'common' are both lists of node IDs. If heads is
1045 not supplied, uses all of the revlog's heads. If common is not
1045 not supplied, uses all of the revlog's heads. If common is not
1046 supplied, uses nullid."""
1046 supplied, uses nullid."""
1047 if common is None:
1047 if common is None:
1048 common = [nullid]
1048 common = [nullid]
1049 if heads is None:
1049 if heads is None:
1050 heads = self.heads()
1050 heads = self.heads()
1051
1051
1052 common = [self.rev(n) for n in common]
1052 common = [self.rev(n) for n in common]
1053 heads = [self.rev(n) for n in heads]
1053 heads = [self.rev(n) for n in heads]
1054
1054
1055 # we want the ancestors, but inclusive
1055 # we want the ancestors, but inclusive
1056 class lazyset(object):
1056 class lazyset(object):
1057 def __init__(self, lazyvalues):
1057 def __init__(self, lazyvalues):
1058 self.addedvalues = set()
1058 self.addedvalues = set()
1059 self.lazyvalues = lazyvalues
1059 self.lazyvalues = lazyvalues
1060
1060
1061 def __contains__(self, value):
1061 def __contains__(self, value):
1062 return value in self.addedvalues or value in self.lazyvalues
1062 return value in self.addedvalues or value in self.lazyvalues
1063
1063
1064 def __iter__(self):
1064 def __iter__(self):
1065 added = self.addedvalues
1065 added = self.addedvalues
1066 for r in added:
1066 for r in added:
1067 yield r
1067 yield r
1068 for r in self.lazyvalues:
1068 for r in self.lazyvalues:
1069 if not r in added:
1069 if not r in added:
1070 yield r
1070 yield r
1071
1071
1072 def add(self, value):
1072 def add(self, value):
1073 self.addedvalues.add(value)
1073 self.addedvalues.add(value)
1074
1074
1075 def update(self, values):
1075 def update(self, values):
1076 self.addedvalues.update(values)
1076 self.addedvalues.update(values)
1077
1077
1078 has = lazyset(self.ancestors(common))
1078 has = lazyset(self.ancestors(common))
1079 has.add(nullrev)
1079 has.add(nullrev)
1080 has.update(common)
1080 has.update(common)
1081
1081
1082 # take all ancestors from heads that aren't in has
1082 # take all ancestors from heads that aren't in has
1083 missing = set()
1083 missing = set()
1084 visit = collections.deque(r for r in heads if r not in has)
1084 visit = collections.deque(r for r in heads if r not in has)
1085 while visit:
1085 while visit:
1086 r = visit.popleft()
1086 r = visit.popleft()
1087 if r in missing:
1087 if r in missing:
1088 continue
1088 continue
1089 else:
1089 else:
1090 missing.add(r)
1090 missing.add(r)
1091 for p in self.parentrevs(r):
1091 for p in self.parentrevs(r):
1092 if p not in has:
1092 if p not in has:
1093 visit.append(p)
1093 visit.append(p)
1094 missing = list(missing)
1094 missing = list(missing)
1095 missing.sort()
1095 missing.sort()
1096 return has, [self.node(miss) for miss in missing]
1096 return has, [self.node(miss) for miss in missing]
1097
1097
1098 def incrementalmissingrevs(self, common=None):
1098 def incrementalmissingrevs(self, common=None):
1099 """Return an object that can be used to incrementally compute the
1099 """Return an object that can be used to incrementally compute the
1100 revision numbers of the ancestors of arbitrary sets that are not
1100 revision numbers of the ancestors of arbitrary sets that are not
1101 ancestors of common. This is an ancestor.incrementalmissingancestors
1101 ancestors of common. This is an ancestor.incrementalmissingancestors
1102 object.
1102 object.
1103
1103
1104 'common' is a list of revision numbers. If common is not supplied, uses
1104 'common' is a list of revision numbers. If common is not supplied, uses
1105 nullrev.
1105 nullrev.
1106 """
1106 """
1107 if common is None:
1107 if common is None:
1108 common = [nullrev]
1108 common = [nullrev]
1109
1109
1110 if rustancestor is not None:
1110 if rustancestor is not None:
1111 return rustancestor.MissingAncestors(self.index, common)
1111 return rustancestor.MissingAncestors(self.index, common)
1112 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1112 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1113
1113
1114 def findmissingrevs(self, common=None, heads=None):
1114 def findmissingrevs(self, common=None, heads=None):
1115 """Return the revision numbers of the ancestors of heads that
1115 """Return the revision numbers of the ancestors of heads that
1116 are not ancestors of common.
1116 are not ancestors of common.
1117
1117
1118 More specifically, return a list of revision numbers corresponding to
1118 More specifically, return a list of revision numbers corresponding to
1119 nodes N such that every N satisfies the following constraints:
1119 nodes N such that every N satisfies the following constraints:
1120
1120
1121 1. N is an ancestor of some node in 'heads'
1121 1. N is an ancestor of some node in 'heads'
1122 2. N is not an ancestor of any node in 'common'
1122 2. N is not an ancestor of any node in 'common'
1123
1123
1124 The list is sorted by revision number, meaning it is
1124 The list is sorted by revision number, meaning it is
1125 topologically sorted.
1125 topologically sorted.
1126
1126
1127 'heads' and 'common' are both lists of revision numbers. If heads is
1127 'heads' and 'common' are both lists of revision numbers. If heads is
1128 not supplied, uses all of the revlog's heads. If common is not
1128 not supplied, uses all of the revlog's heads. If common is not
1129 supplied, uses nullid."""
1129 supplied, uses nullid."""
1130 if common is None:
1130 if common is None:
1131 common = [nullrev]
1131 common = [nullrev]
1132 if heads is None:
1132 if heads is None:
1133 heads = self.headrevs()
1133 heads = self.headrevs()
1134
1134
1135 inc = self.incrementalmissingrevs(common=common)
1135 inc = self.incrementalmissingrevs(common=common)
1136 return inc.missingancestors(heads)
1136 return inc.missingancestors(heads)
1137
1137
1138 def findmissing(self, common=None, heads=None):
1138 def findmissing(self, common=None, heads=None):
1139 """Return the ancestors of heads that are not ancestors of common.
1139 """Return the ancestors of heads that are not ancestors of common.
1140
1140
1141 More specifically, return a list of nodes N such that every N
1141 More specifically, return a list of nodes N such that every N
1142 satisfies the following constraints:
1142 satisfies the following constraints:
1143
1143
1144 1. N is an ancestor of some node in 'heads'
1144 1. N is an ancestor of some node in 'heads'
1145 2. N is not an ancestor of any node in 'common'
1145 2. N is not an ancestor of any node in 'common'
1146
1146
1147 The list is sorted by revision number, meaning it is
1147 The list is sorted by revision number, meaning it is
1148 topologically sorted.
1148 topologically sorted.
1149
1149
1150 'heads' and 'common' are both lists of node IDs. If heads is
1150 'heads' and 'common' are both lists of node IDs. If heads is
1151 not supplied, uses all of the revlog's heads. If common is not
1151 not supplied, uses all of the revlog's heads. If common is not
1152 supplied, uses nullid."""
1152 supplied, uses nullid."""
1153 if common is None:
1153 if common is None:
1154 common = [nullid]
1154 common = [nullid]
1155 if heads is None:
1155 if heads is None:
1156 heads = self.heads()
1156 heads = self.heads()
1157
1157
1158 common = [self.rev(n) for n in common]
1158 common = [self.rev(n) for n in common]
1159 heads = [self.rev(n) for n in heads]
1159 heads = [self.rev(n) for n in heads]
1160
1160
1161 inc = self.incrementalmissingrevs(common=common)
1161 inc = self.incrementalmissingrevs(common=common)
1162 return [self.node(r) for r in inc.missingancestors(heads)]
1162 return [self.node(r) for r in inc.missingancestors(heads)]
1163
1163
1164 def nodesbetween(self, roots=None, heads=None):
1164 def nodesbetween(self, roots=None, heads=None):
1165 """Return a topological path from 'roots' to 'heads'.
1165 """Return a topological path from 'roots' to 'heads'.
1166
1166
1167 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1167 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1168 topologically sorted list of all nodes N that satisfy both of
1168 topologically sorted list of all nodes N that satisfy both of
1169 these constraints:
1169 these constraints:
1170
1170
1171 1. N is a descendant of some node in 'roots'
1171 1. N is a descendant of some node in 'roots'
1172 2. N is an ancestor of some node in 'heads'
1172 2. N is an ancestor of some node in 'heads'
1173
1173
1174 Every node is considered to be both a descendant and an ancestor
1174 Every node is considered to be both a descendant and an ancestor
1175 of itself, so every reachable node in 'roots' and 'heads' will be
1175 of itself, so every reachable node in 'roots' and 'heads' will be
1176 included in 'nodes'.
1176 included in 'nodes'.
1177
1177
1178 'outroots' is the list of reachable nodes in 'roots', i.e., the
1178 'outroots' is the list of reachable nodes in 'roots', i.e., the
1179 subset of 'roots' that is returned in 'nodes'. Likewise,
1179 subset of 'roots' that is returned in 'nodes'. Likewise,
1180 'outheads' is the subset of 'heads' that is also in 'nodes'.
1180 'outheads' is the subset of 'heads' that is also in 'nodes'.
1181
1181
1182 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1182 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1183 unspecified, uses nullid as the only root. If 'heads' is
1183 unspecified, uses nullid as the only root. If 'heads' is
1184 unspecified, uses list of all of the revlog's heads."""
1184 unspecified, uses list of all of the revlog's heads."""
1185 nonodes = ([], [], [])
1185 nonodes = ([], [], [])
1186 if roots is not None:
1186 if roots is not None:
1187 roots = list(roots)
1187 roots = list(roots)
1188 if not roots:
1188 if not roots:
1189 return nonodes
1189 return nonodes
1190 lowestrev = min([self.rev(n) for n in roots])
1190 lowestrev = min([self.rev(n) for n in roots])
1191 else:
1191 else:
1192 roots = [nullid] # Everybody's a descendant of nullid
1192 roots = [nullid] # Everybody's a descendant of nullid
1193 lowestrev = nullrev
1193 lowestrev = nullrev
1194 if (lowestrev == nullrev) and (heads is None):
1194 if (lowestrev == nullrev) and (heads is None):
1195 # We want _all_ the nodes!
1195 # We want _all_ the nodes!
1196 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1196 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1197 if heads is None:
1197 if heads is None:
1198 # All nodes are ancestors, so the latest ancestor is the last
1198 # All nodes are ancestors, so the latest ancestor is the last
1199 # node.
1199 # node.
1200 highestrev = len(self) - 1
1200 highestrev = len(self) - 1
1201 # Set ancestors to None to signal that every node is an ancestor.
1201 # Set ancestors to None to signal that every node is an ancestor.
1202 ancestors = None
1202 ancestors = None
1203 # Set heads to an empty dictionary for later discovery of heads
1203 # Set heads to an empty dictionary for later discovery of heads
1204 heads = {}
1204 heads = {}
1205 else:
1205 else:
1206 heads = list(heads)
1206 heads = list(heads)
1207 if not heads:
1207 if not heads:
1208 return nonodes
1208 return nonodes
1209 ancestors = set()
1209 ancestors = set()
1210 # Turn heads into a dictionary so we can remove 'fake' heads.
1210 # Turn heads into a dictionary so we can remove 'fake' heads.
1211 # Also, later we will be using it to filter out the heads we can't
1211 # Also, later we will be using it to filter out the heads we can't
1212 # find from roots.
1212 # find from roots.
1213 heads = dict.fromkeys(heads, False)
1213 heads = dict.fromkeys(heads, False)
1214 # Start at the top and keep marking parents until we're done.
1214 # Start at the top and keep marking parents until we're done.
1215 nodestotag = set(heads)
1215 nodestotag = set(heads)
1216 # Remember where the top was so we can use it as a limit later.
1216 # Remember where the top was so we can use it as a limit later.
1217 highestrev = max([self.rev(n) for n in nodestotag])
1217 highestrev = max([self.rev(n) for n in nodestotag])
1218 while nodestotag:
1218 while nodestotag:
1219 # grab a node to tag
1219 # grab a node to tag
1220 n = nodestotag.pop()
1220 n = nodestotag.pop()
1221 # Never tag nullid
1221 # Never tag nullid
1222 if n == nullid:
1222 if n == nullid:
1223 continue
1223 continue
1224 # A node's revision number represents its place in a
1224 # A node's revision number represents its place in a
1225 # topologically sorted list of nodes.
1225 # topologically sorted list of nodes.
1226 r = self.rev(n)
1226 r = self.rev(n)
1227 if r >= lowestrev:
1227 if r >= lowestrev:
1228 if n not in ancestors:
1228 if n not in ancestors:
1229 # If we are possibly a descendant of one of the roots
1229 # If we are possibly a descendant of one of the roots
1230 # and we haven't already been marked as an ancestor
1230 # and we haven't already been marked as an ancestor
1231 ancestors.add(n) # Mark as ancestor
1231 ancestors.add(n) # Mark as ancestor
1232 # Add non-nullid parents to list of nodes to tag.
1232 # Add non-nullid parents to list of nodes to tag.
1233 nodestotag.update(
1233 nodestotag.update(
1234 [p for p in self.parents(n) if p != nullid]
1234 [p for p in self.parents(n) if p != nullid]
1235 )
1235 )
1236 elif n in heads: # We've seen it before, is it a fake head?
1236 elif n in heads: # We've seen it before, is it a fake head?
1237 # So it is, real heads should not be the ancestors of
1237 # So it is, real heads should not be the ancestors of
1238 # any other heads.
1238 # any other heads.
1239 heads.pop(n)
1239 heads.pop(n)
1240 if not ancestors:
1240 if not ancestors:
1241 return nonodes
1241 return nonodes
1242 # Now that we have our set of ancestors, we want to remove any
1242 # Now that we have our set of ancestors, we want to remove any
1243 # roots that are not ancestors.
1243 # roots that are not ancestors.
1244
1244
1245 # If one of the roots was nullid, everything is included anyway.
1245 # If one of the roots was nullid, everything is included anyway.
1246 if lowestrev > nullrev:
1246 if lowestrev > nullrev:
1247 # But, since we weren't, let's recompute the lowest rev to not
1247 # But, since we weren't, let's recompute the lowest rev to not
1248 # include roots that aren't ancestors.
1248 # include roots that aren't ancestors.
1249
1249
1250 # Filter out roots that aren't ancestors of heads
1250 # Filter out roots that aren't ancestors of heads
1251 roots = [root for root in roots if root in ancestors]
1251 roots = [root for root in roots if root in ancestors]
1252 # Recompute the lowest revision
1252 # Recompute the lowest revision
1253 if roots:
1253 if roots:
1254 lowestrev = min([self.rev(root) for root in roots])
1254 lowestrev = min([self.rev(root) for root in roots])
1255 else:
1255 else:
1256 # No more roots? Return empty list
1256 # No more roots? Return empty list
1257 return nonodes
1257 return nonodes
1258 else:
1258 else:
1259 # We are descending from nullid, and don't need to care about
1259 # We are descending from nullid, and don't need to care about
1260 # any other roots.
1260 # any other roots.
1261 lowestrev = nullrev
1261 lowestrev = nullrev
1262 roots = [nullid]
1262 roots = [nullid]
1263 # Transform our roots list into a set.
1263 # Transform our roots list into a set.
1264 descendants = set(roots)
1264 descendants = set(roots)
1265 # Also, keep the original roots so we can filter out roots that aren't
1265 # Also, keep the original roots so we can filter out roots that aren't
1266 # 'real' roots (i.e. are descended from other roots).
1266 # 'real' roots (i.e. are descended from other roots).
1267 roots = descendants.copy()
1267 roots = descendants.copy()
1268 # Our topologically sorted list of output nodes.
1268 # Our topologically sorted list of output nodes.
1269 orderedout = []
1269 orderedout = []
1270 # Don't start at nullid since we don't want nullid in our output list,
1270 # Don't start at nullid since we don't want nullid in our output list,
1271 # and if nullid shows up in descendants, empty parents will look like
1271 # and if nullid shows up in descendants, empty parents will look like
1272 # they're descendants.
1272 # they're descendants.
1273 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1273 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1274 n = self.node(r)
1274 n = self.node(r)
1275 isdescendant = False
1275 isdescendant = False
1276 if lowestrev == nullrev: # Everybody is a descendant of nullid
1276 if lowestrev == nullrev: # Everybody is a descendant of nullid
1277 isdescendant = True
1277 isdescendant = True
1278 elif n in descendants:
1278 elif n in descendants:
1279 # n is already a descendant
1279 # n is already a descendant
1280 isdescendant = True
1280 isdescendant = True
1281 # This check only needs to be done here because all the roots
1281 # This check only needs to be done here because all the roots
1282 # will start being marked is descendants before the loop.
1282 # will start being marked is descendants before the loop.
1283 if n in roots:
1283 if n in roots:
1284 # If n was a root, check if it's a 'real' root.
1284 # If n was a root, check if it's a 'real' root.
1285 p = tuple(self.parents(n))
1285 p = tuple(self.parents(n))
1286 # If any of its parents are descendants, it's not a root.
1286 # If any of its parents are descendants, it's not a root.
1287 if (p[0] in descendants) or (p[1] in descendants):
1287 if (p[0] in descendants) or (p[1] in descendants):
1288 roots.remove(n)
1288 roots.remove(n)
1289 else:
1289 else:
1290 p = tuple(self.parents(n))
1290 p = tuple(self.parents(n))
1291 # A node is a descendant if either of its parents are
1291 # A node is a descendant if either of its parents are
1292 # descendants. (We seeded the dependents list with the roots
1292 # descendants. (We seeded the dependents list with the roots
1293 # up there, remember?)
1293 # up there, remember?)
1294 if (p[0] in descendants) or (p[1] in descendants):
1294 if (p[0] in descendants) or (p[1] in descendants):
1295 descendants.add(n)
1295 descendants.add(n)
1296 isdescendant = True
1296 isdescendant = True
1297 if isdescendant and ((ancestors is None) or (n in ancestors)):
1297 if isdescendant and ((ancestors is None) or (n in ancestors)):
1298 # Only include nodes that are both descendants and ancestors.
1298 # Only include nodes that are both descendants and ancestors.
1299 orderedout.append(n)
1299 orderedout.append(n)
1300 if (ancestors is not None) and (n in heads):
1300 if (ancestors is not None) and (n in heads):
1301 # We're trying to figure out which heads are reachable
1301 # We're trying to figure out which heads are reachable
1302 # from roots.
1302 # from roots.
1303 # Mark this head as having been reached
1303 # Mark this head as having been reached
1304 heads[n] = True
1304 heads[n] = True
1305 elif ancestors is None:
1305 elif ancestors is None:
1306 # Otherwise, we're trying to discover the heads.
1306 # Otherwise, we're trying to discover the heads.
1307 # Assume this is a head because if it isn't, the next step
1307 # Assume this is a head because if it isn't, the next step
1308 # will eventually remove it.
1308 # will eventually remove it.
1309 heads[n] = True
1309 heads[n] = True
1310 # But, obviously its parents aren't.
1310 # But, obviously its parents aren't.
1311 for p in self.parents(n):
1311 for p in self.parents(n):
1312 heads.pop(p, None)
1312 heads.pop(p, None)
1313 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1313 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1314 roots = list(roots)
1314 roots = list(roots)
1315 assert orderedout
1315 assert orderedout
1316 assert roots
1316 assert roots
1317 assert heads
1317 assert heads
1318 return (orderedout, roots, heads)
1318 return (orderedout, roots, heads)
1319
1319
1320 def headrevs(self, revs=None):
1320 def headrevs(self, revs=None):
1321 if revs is None:
1321 if revs is None:
1322 try:
1322 try:
1323 return self.index.headrevs()
1323 return self.index.headrevs()
1324 except AttributeError:
1324 except AttributeError:
1325 return self._headrevs()
1325 return self._headrevs()
1326 if rustdagop is not None:
1326 if rustdagop is not None:
1327 return rustdagop.headrevs(self.index, revs)
1327 return rustdagop.headrevs(self.index, revs)
1328 return dagop.headrevs(revs, self._uncheckedparentrevs)
1328 return dagop.headrevs(revs, self._uncheckedparentrevs)
1329
1329
1330 def computephases(self, roots):
1330 def computephases(self, roots):
1331 return self.index.computephasesmapsets(roots)
1331 return self.index.computephasesmapsets(roots)
1332
1332
1333 def _headrevs(self):
1333 def _headrevs(self):
1334 count = len(self)
1334 count = len(self)
1335 if not count:
1335 if not count:
1336 return [nullrev]
1336 return [nullrev]
1337 # we won't iter over filtered rev so nobody is a head at start
1337 # we won't iter over filtered rev so nobody is a head at start
1338 ishead = [0] * (count + 1)
1338 ishead = [0] * (count + 1)
1339 index = self.index
1339 index = self.index
1340 for r in self:
1340 for r in self:
1341 ishead[r] = 1 # I may be an head
1341 ishead[r] = 1 # I may be an head
1342 e = index[r]
1342 e = index[r]
1343 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1343 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1344 return [r for r, val in enumerate(ishead) if val]
1344 return [r for r, val in enumerate(ishead) if val]
1345
1345
1346 def heads(self, start=None, stop=None):
1346 def heads(self, start=None, stop=None):
1347 """return the list of all nodes that have no children
1347 """return the list of all nodes that have no children
1348
1348
1349 if start is specified, only heads that are descendants of
1349 if start is specified, only heads that are descendants of
1350 start will be returned
1350 start will be returned
1351 if stop is specified, it will consider all the revs from stop
1351 if stop is specified, it will consider all the revs from stop
1352 as if they had no children
1352 as if they had no children
1353 """
1353 """
1354 if start is None and stop is None:
1354 if start is None and stop is None:
1355 if not len(self):
1355 if not len(self):
1356 return [nullid]
1356 return [nullid]
1357 return [self.node(r) for r in self.headrevs()]
1357 return [self.node(r) for r in self.headrevs()]
1358
1358
1359 if start is None:
1359 if start is None:
1360 start = nullrev
1360 start = nullrev
1361 else:
1361 else:
1362 start = self.rev(start)
1362 start = self.rev(start)
1363
1363
1364 stoprevs = {self.rev(n) for n in stop or []}
1364 stoprevs = {self.rev(n) for n in stop or []}
1365
1365
1366 revs = dagop.headrevssubset(
1366 revs = dagop.headrevssubset(
1367 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1367 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1368 )
1368 )
1369
1369
1370 return [self.node(rev) for rev in revs]
1370 return [self.node(rev) for rev in revs]
1371
1371
1372 def children(self, node):
1372 def children(self, node):
1373 """find the children of a given node"""
1373 """find the children of a given node"""
1374 c = []
1374 c = []
1375 p = self.rev(node)
1375 p = self.rev(node)
1376 for r in self.revs(start=p + 1):
1376 for r in self.revs(start=p + 1):
1377 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1377 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1378 if prevs:
1378 if prevs:
1379 for pr in prevs:
1379 for pr in prevs:
1380 if pr == p:
1380 if pr == p:
1381 c.append(self.node(r))
1381 c.append(self.node(r))
1382 elif p == nullrev:
1382 elif p == nullrev:
1383 c.append(self.node(r))
1383 c.append(self.node(r))
1384 return c
1384 return c
1385
1385
1386 def commonancestorsheads(self, a, b):
1386 def commonancestorsheads(self, a, b):
1387 """calculate all the heads of the common ancestors of nodes a and b"""
1387 """calculate all the heads of the common ancestors of nodes a and b"""
1388 a, b = self.rev(a), self.rev(b)
1388 a, b = self.rev(a), self.rev(b)
1389 ancs = self._commonancestorsheads(a, b)
1389 ancs = self._commonancestorsheads(a, b)
1390 return pycompat.maplist(self.node, ancs)
1390 return pycompat.maplist(self.node, ancs)
1391
1391
1392 def _commonancestorsheads(self, *revs):
1392 def _commonancestorsheads(self, *revs):
1393 """calculate all the heads of the common ancestors of revs"""
1393 """calculate all the heads of the common ancestors of revs"""
1394 try:
1394 try:
1395 ancs = self.index.commonancestorsheads(*revs)
1395 ancs = self.index.commonancestorsheads(*revs)
1396 except (AttributeError, OverflowError): # C implementation failed
1396 except (AttributeError, OverflowError): # C implementation failed
1397 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1397 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1398 return ancs
1398 return ancs
1399
1399
1400 def isancestor(self, a, b):
1400 def isancestor(self, a, b):
1401 """return True if node a is an ancestor of node b
1401 """return True if node a is an ancestor of node b
1402
1402
1403 A revision is considered an ancestor of itself."""
1403 A revision is considered an ancestor of itself."""
1404 a, b = self.rev(a), self.rev(b)
1404 a, b = self.rev(a), self.rev(b)
1405 return self.isancestorrev(a, b)
1405 return self.isancestorrev(a, b)
1406
1406
1407 def isancestorrev(self, a, b):
1407 def isancestorrev(self, a, b):
1408 """return True if revision a is an ancestor of revision b
1408 """return True if revision a is an ancestor of revision b
1409
1409
1410 A revision is considered an ancestor of itself.
1410 A revision is considered an ancestor of itself.
1411
1411
1412 The implementation of this is trivial but the use of
1412 The implementation of this is trivial but the use of
1413 reachableroots is not."""
1413 reachableroots is not."""
1414 if a == nullrev:
1414 if a == nullrev:
1415 return True
1415 return True
1416 elif a == b:
1416 elif a == b:
1417 return True
1417 return True
1418 elif a > b:
1418 elif a > b:
1419 return False
1419 return False
1420 return bool(self.reachableroots(a, [b], [a], includepath=False))
1420 return bool(self.reachableroots(a, [b], [a], includepath=False))
1421
1421
1422 def reachableroots(self, minroot, heads, roots, includepath=False):
1422 def reachableroots(self, minroot, heads, roots, includepath=False):
1423 """return (heads(::(<roots> and <roots>::<heads>)))
1423 """return (heads(::(<roots> and <roots>::<heads>)))
1424
1424
1425 If includepath is True, return (<roots>::<heads>)."""
1425 If includepath is True, return (<roots>::<heads>)."""
1426 try:
1426 try:
1427 return self.index.reachableroots2(
1427 return self.index.reachableroots2(
1428 minroot, heads, roots, includepath
1428 minroot, heads, roots, includepath
1429 )
1429 )
1430 except AttributeError:
1430 except AttributeError:
1431 return dagop._reachablerootspure(
1431 return dagop._reachablerootspure(
1432 self.parentrevs, minroot, roots, heads, includepath
1432 self.parentrevs, minroot, roots, heads, includepath
1433 )
1433 )
1434
1434
1435 def ancestor(self, a, b):
1435 def ancestor(self, a, b):
1436 """calculate the "best" common ancestor of nodes a and b"""
1436 """calculate the "best" common ancestor of nodes a and b"""
1437
1437
1438 a, b = self.rev(a), self.rev(b)
1438 a, b = self.rev(a), self.rev(b)
1439 try:
1439 try:
1440 ancs = self.index.ancestors(a, b)
1440 ancs = self.index.ancestors(a, b)
1441 except (AttributeError, OverflowError):
1441 except (AttributeError, OverflowError):
1442 ancs = ancestor.ancestors(self.parentrevs, a, b)
1442 ancs = ancestor.ancestors(self.parentrevs, a, b)
1443 if ancs:
1443 if ancs:
1444 # choose a consistent winner when there's a tie
1444 # choose a consistent winner when there's a tie
1445 return min(map(self.node, ancs))
1445 return min(map(self.node, ancs))
1446 return nullid
1446 return nullid
1447
1447
1448 def _match(self, id):
1448 def _match(self, id):
1449 if isinstance(id, int):
1449 if isinstance(id, int):
1450 # rev
1450 # rev
1451 return self.node(id)
1451 return self.node(id)
1452 if len(id) == 20:
1452 if len(id) == 20:
1453 # possibly a binary node
1453 # possibly a binary node
1454 # odds of a binary node being all hex in ASCII are 1 in 10**25
1454 # odds of a binary node being all hex in ASCII are 1 in 10**25
1455 try:
1455 try:
1456 node = id
1456 node = id
1457 self.rev(node) # quick search the index
1457 self.rev(node) # quick search the index
1458 return node
1458 return node
1459 except error.LookupError:
1459 except error.LookupError:
1460 pass # may be partial hex id
1460 pass # may be partial hex id
1461 try:
1461 try:
1462 # str(rev)
1462 # str(rev)
1463 rev = int(id)
1463 rev = int(id)
1464 if b"%d" % rev != id:
1464 if b"%d" % rev != id:
1465 raise ValueError
1465 raise ValueError
1466 if rev < 0:
1466 if rev < 0:
1467 rev = len(self) + rev
1467 rev = len(self) + rev
1468 if rev < 0 or rev >= len(self):
1468 if rev < 0 or rev >= len(self):
1469 raise ValueError
1469 raise ValueError
1470 return self.node(rev)
1470 return self.node(rev)
1471 except (ValueError, OverflowError):
1471 except (ValueError, OverflowError):
1472 pass
1472 pass
1473 if len(id) == 40:
1473 if len(id) == 40:
1474 try:
1474 try:
1475 # a full hex nodeid?
1475 # a full hex nodeid?
1476 node = bin(id)
1476 node = bin(id)
1477 self.rev(node)
1477 self.rev(node)
1478 return node
1478 return node
1479 except (TypeError, error.LookupError):
1479 except (TypeError, error.LookupError):
1480 pass
1480 pass
1481
1481
1482 def _partialmatch(self, id):
1482 def _partialmatch(self, id):
1483 # we don't care wdirfilenodeids as they should be always full hash
1483 # we don't care wdirfilenodeids as they should be always full hash
1484 maybewdir = wdirhex.startswith(id)
1484 maybewdir = wdirhex.startswith(id)
1485 try:
1485 try:
1486 partial = self.index.partialmatch(id)
1486 partial = self.index.partialmatch(id)
1487 if partial and self.hasnode(partial):
1487 if partial and self.hasnode(partial):
1488 if maybewdir:
1488 if maybewdir:
1489 # single 'ff...' match in radix tree, ambiguous with wdir
1489 # single 'ff...' match in radix tree, ambiguous with wdir
1490 raise error.RevlogError
1490 raise error.RevlogError
1491 return partial
1491 return partial
1492 if maybewdir:
1492 if maybewdir:
1493 # no 'ff...' match in radix tree, wdir identified
1493 # no 'ff...' match in radix tree, wdir identified
1494 raise error.WdirUnsupported
1494 raise error.WdirUnsupported
1495 return None
1495 return None
1496 except error.RevlogError:
1496 except error.RevlogError:
1497 # parsers.c radix tree lookup gave multiple matches
1497 # parsers.c radix tree lookup gave multiple matches
1498 # fast path: for unfiltered changelog, radix tree is accurate
1498 # fast path: for unfiltered changelog, radix tree is accurate
1499 if not getattr(self, 'filteredrevs', None):
1499 if not getattr(self, 'filteredrevs', None):
1500 raise error.AmbiguousPrefixLookupError(
1500 raise error.AmbiguousPrefixLookupError(
1501 id, self.indexfile, _(b'ambiguous identifier')
1501 id, self.indexfile, _(b'ambiguous identifier')
1502 )
1502 )
1503 # fall through to slow path that filters hidden revisions
1503 # fall through to slow path that filters hidden revisions
1504 except (AttributeError, ValueError):
1504 except (AttributeError, ValueError):
1505 # we are pure python, or key was too short to search radix tree
1505 # we are pure python, or key was too short to search radix tree
1506 pass
1506 pass
1507
1507
1508 if id in self._pcache:
1508 if id in self._pcache:
1509 return self._pcache[id]
1509 return self._pcache[id]
1510
1510
1511 if len(id) <= 40:
1511 if len(id) <= 40:
1512 try:
1512 try:
1513 # hex(node)[:...]
1513 # hex(node)[:...]
1514 l = len(id) // 2 # grab an even number of digits
1514 l = len(id) // 2 # grab an even number of digits
1515 prefix = bin(id[: l * 2])
1515 prefix = bin(id[: l * 2])
1516 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1516 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1517 nl = [
1517 nl = [
1518 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1518 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1519 ]
1519 ]
1520 if nullhex.startswith(id):
1520 if nullhex.startswith(id):
1521 nl.append(nullid)
1521 nl.append(nullid)
1522 if len(nl) > 0:
1522 if len(nl) > 0:
1523 if len(nl) == 1 and not maybewdir:
1523 if len(nl) == 1 and not maybewdir:
1524 self._pcache[id] = nl[0]
1524 self._pcache[id] = nl[0]
1525 return nl[0]
1525 return nl[0]
1526 raise error.AmbiguousPrefixLookupError(
1526 raise error.AmbiguousPrefixLookupError(
1527 id, self.indexfile, _(b'ambiguous identifier')
1527 id, self.indexfile, _(b'ambiguous identifier')
1528 )
1528 )
1529 if maybewdir:
1529 if maybewdir:
1530 raise error.WdirUnsupported
1530 raise error.WdirUnsupported
1531 return None
1531 return None
1532 except TypeError:
1532 except TypeError:
1533 pass
1533 pass
1534
1534
1535 def lookup(self, id):
1535 def lookup(self, id):
1536 """locate a node based on:
1536 """locate a node based on:
1537 - revision number or str(revision number)
1537 - revision number or str(revision number)
1538 - nodeid or subset of hex nodeid
1538 - nodeid or subset of hex nodeid
1539 """
1539 """
1540 n = self._match(id)
1540 n = self._match(id)
1541 if n is not None:
1541 if n is not None:
1542 return n
1542 return n
1543 n = self._partialmatch(id)
1543 n = self._partialmatch(id)
1544 if n:
1544 if n:
1545 return n
1545 return n
1546
1546
1547 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1547 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1548
1548
1549 def shortest(self, node, minlength=1):
1549 def shortest(self, node, minlength=1):
1550 """Find the shortest unambiguous prefix that matches node."""
1550 """Find the shortest unambiguous prefix that matches node."""
1551
1551
1552 def isvalid(prefix):
1552 def isvalid(prefix):
1553 try:
1553 try:
1554 matchednode = self._partialmatch(prefix)
1554 matchednode = self._partialmatch(prefix)
1555 except error.AmbiguousPrefixLookupError:
1555 except error.AmbiguousPrefixLookupError:
1556 return False
1556 return False
1557 except error.WdirUnsupported:
1557 except error.WdirUnsupported:
1558 # single 'ff...' match
1558 # single 'ff...' match
1559 return True
1559 return True
1560 if matchednode is None:
1560 if matchednode is None:
1561 raise error.LookupError(node, self.indexfile, _(b'no node'))
1561 raise error.LookupError(node, self.indexfile, _(b'no node'))
1562 return True
1562 return True
1563
1563
1564 def maybewdir(prefix):
1564 def maybewdir(prefix):
1565 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1565 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1566
1566
1567 hexnode = hex(node)
1567 hexnode = hex(node)
1568
1568
1569 def disambiguate(hexnode, minlength):
1569 def disambiguate(hexnode, minlength):
1570 """Disambiguate against wdirid."""
1570 """Disambiguate against wdirid."""
1571 for length in range(minlength, len(hexnode) + 1):
1571 for length in range(minlength, len(hexnode) + 1):
1572 prefix = hexnode[:length]
1572 prefix = hexnode[:length]
1573 if not maybewdir(prefix):
1573 if not maybewdir(prefix):
1574 return prefix
1574 return prefix
1575
1575
1576 if not getattr(self, 'filteredrevs', None):
1576 if not getattr(self, 'filteredrevs', None):
1577 try:
1577 try:
1578 length = max(self.index.shortest(node), minlength)
1578 length = max(self.index.shortest(node), minlength)
1579 return disambiguate(hexnode, length)
1579 return disambiguate(hexnode, length)
1580 except error.RevlogError:
1580 except error.RevlogError:
1581 if node != wdirid:
1581 if node != wdirid:
1582 raise error.LookupError(node, self.indexfile, _(b'no node'))
1582 raise error.LookupError(node, self.indexfile, _(b'no node'))
1583 except AttributeError:
1583 except AttributeError:
1584 # Fall through to pure code
1584 # Fall through to pure code
1585 pass
1585 pass
1586
1586
1587 if node == wdirid:
1587 if node == wdirid:
1588 for length in range(minlength, len(hexnode) + 1):
1588 for length in range(minlength, len(hexnode) + 1):
1589 prefix = hexnode[:length]
1589 prefix = hexnode[:length]
1590 if isvalid(prefix):
1590 if isvalid(prefix):
1591 return prefix
1591 return prefix
1592
1592
1593 for length in range(minlength, len(hexnode) + 1):
1593 for length in range(minlength, len(hexnode) + 1):
1594 prefix = hexnode[:length]
1594 prefix = hexnode[:length]
1595 if isvalid(prefix):
1595 if isvalid(prefix):
1596 return disambiguate(hexnode, length)
1596 return disambiguate(hexnode, length)
1597
1597
1598 def cmp(self, node, text):
1598 def cmp(self, node, text):
1599 """compare text with a given file revision
1599 """compare text with a given file revision
1600
1600
1601 returns True if text is different than what is stored.
1601 returns True if text is different than what is stored.
1602 """
1602 """
1603 p1, p2 = self.parents(node)
1603 p1, p2 = self.parents(node)
1604 return storageutil.hashrevisionsha1(text, p1, p2) != node
1604 return storageutil.hashrevisionsha1(text, p1, p2) != node
1605
1605
1606 def _cachesegment(self, offset, data):
1606 def _cachesegment(self, offset, data):
1607 """Add a segment to the revlog cache.
1607 """Add a segment to the revlog cache.
1608
1608
1609 Accepts an absolute offset and the data that is at that location.
1609 Accepts an absolute offset and the data that is at that location.
1610 """
1610 """
1611 o, d = self._chunkcache
1611 o, d = self._chunkcache
1612 # try to add to existing cache
1612 # try to add to existing cache
1613 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1613 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1614 self._chunkcache = o, d + data
1614 self._chunkcache = o, d + data
1615 else:
1615 else:
1616 self._chunkcache = offset, data
1616 self._chunkcache = offset, data
1617
1617
1618 def _readsegment(self, offset, length, df=None):
1618 def _readsegment(self, offset, length, df=None):
1619 """Load a segment of raw data from the revlog.
1619 """Load a segment of raw data from the revlog.
1620
1620
1621 Accepts an absolute offset, length to read, and an optional existing
1621 Accepts an absolute offset, length to read, and an optional existing
1622 file handle to read from.
1622 file handle to read from.
1623
1623
1624 If an existing file handle is passed, it will be seeked and the
1624 If an existing file handle is passed, it will be seeked and the
1625 original seek position will NOT be restored.
1625 original seek position will NOT be restored.
1626
1626
1627 Returns a str or buffer of raw byte data.
1627 Returns a str or buffer of raw byte data.
1628
1628
1629 Raises if the requested number of bytes could not be read.
1629 Raises if the requested number of bytes could not be read.
1630 """
1630 """
1631 # Cache data both forward and backward around the requested
1631 # Cache data both forward and backward around the requested
1632 # data, in a fixed size window. This helps speed up operations
1632 # data, in a fixed size window. This helps speed up operations
1633 # involving reading the revlog backwards.
1633 # involving reading the revlog backwards.
1634 cachesize = self._chunkcachesize
1634 cachesize = self._chunkcachesize
1635 realoffset = offset & ~(cachesize - 1)
1635 realoffset = offset & ~(cachesize - 1)
1636 reallength = (
1636 reallength = (
1637 (offset + length + cachesize) & ~(cachesize - 1)
1637 (offset + length + cachesize) & ~(cachesize - 1)
1638 ) - realoffset
1638 ) - realoffset
1639 with self._datareadfp(df) as df:
1639 with self._datareadfp(df) as df:
1640 df.seek(realoffset)
1640 df.seek(realoffset)
1641 d = df.read(reallength)
1641 d = df.read(reallength)
1642
1642
1643 self._cachesegment(realoffset, d)
1643 self._cachesegment(realoffset, d)
1644 if offset != realoffset or reallength != length:
1644 if offset != realoffset or reallength != length:
1645 startoffset = offset - realoffset
1645 startoffset = offset - realoffset
1646 if len(d) - startoffset < length:
1646 if len(d) - startoffset < length:
1647 raise error.RevlogError(
1647 raise error.RevlogError(
1648 _(
1648 _(
1649 b'partial read of revlog %s; expected %d bytes from '
1649 b'partial read of revlog %s; expected %d bytes from '
1650 b'offset %d, got %d'
1650 b'offset %d, got %d'
1651 )
1651 )
1652 % (
1652 % (
1653 self.indexfile if self._inline else self.datafile,
1653 self.indexfile if self._inline else self.datafile,
1654 length,
1654 length,
1655 realoffset,
1655 realoffset,
1656 len(d) - startoffset,
1656 len(d) - startoffset,
1657 )
1657 )
1658 )
1658 )
1659
1659
1660 return util.buffer(d, startoffset, length)
1660 return util.buffer(d, startoffset, length)
1661
1661
1662 if len(d) < length:
1662 if len(d) < length:
1663 raise error.RevlogError(
1663 raise error.RevlogError(
1664 _(
1664 _(
1665 b'partial read of revlog %s; expected %d bytes from offset '
1665 b'partial read of revlog %s; expected %d bytes from offset '
1666 b'%d, got %d'
1666 b'%d, got %d'
1667 )
1667 )
1668 % (
1668 % (
1669 self.indexfile if self._inline else self.datafile,
1669 self.indexfile if self._inline else self.datafile,
1670 length,
1670 length,
1671 offset,
1671 offset,
1672 len(d),
1672 len(d),
1673 )
1673 )
1674 )
1674 )
1675
1675
1676 return d
1676 return d
1677
1677
1678 def _getsegment(self, offset, length, df=None):
1678 def _getsegment(self, offset, length, df=None):
1679 """Obtain a segment of raw data from the revlog.
1679 """Obtain a segment of raw data from the revlog.
1680
1680
1681 Accepts an absolute offset, length of bytes to obtain, and an
1681 Accepts an absolute offset, length of bytes to obtain, and an
1682 optional file handle to the already-opened revlog. If the file
1682 optional file handle to the already-opened revlog. If the file
1683 handle is used, it's original seek position will not be preserved.
1683 handle is used, it's original seek position will not be preserved.
1684
1684
1685 Requests for data may be returned from a cache.
1685 Requests for data may be returned from a cache.
1686
1686
1687 Returns a str or a buffer instance of raw byte data.
1687 Returns a str or a buffer instance of raw byte data.
1688 """
1688 """
1689 o, d = self._chunkcache
1689 o, d = self._chunkcache
1690 l = len(d)
1690 l = len(d)
1691
1691
1692 # is it in the cache?
1692 # is it in the cache?
1693 cachestart = offset - o
1693 cachestart = offset - o
1694 cacheend = cachestart + length
1694 cacheend = cachestart + length
1695 if cachestart >= 0 and cacheend <= l:
1695 if cachestart >= 0 and cacheend <= l:
1696 if cachestart == 0 and cacheend == l:
1696 if cachestart == 0 and cacheend == l:
1697 return d # avoid a copy
1697 return d # avoid a copy
1698 return util.buffer(d, cachestart, cacheend - cachestart)
1698 return util.buffer(d, cachestart, cacheend - cachestart)
1699
1699
1700 return self._readsegment(offset, length, df=df)
1700 return self._readsegment(offset, length, df=df)
1701
1701
1702 def _getsegmentforrevs(self, startrev, endrev, df=None):
1702 def _getsegmentforrevs(self, startrev, endrev, df=None):
1703 """Obtain a segment of raw data corresponding to a range of revisions.
1703 """Obtain a segment of raw data corresponding to a range of revisions.
1704
1704
1705 Accepts the start and end revisions and an optional already-open
1705 Accepts the start and end revisions and an optional already-open
1706 file handle to be used for reading. If the file handle is read, its
1706 file handle to be used for reading. If the file handle is read, its
1707 seek position will not be preserved.
1707 seek position will not be preserved.
1708
1708
1709 Requests for data may be satisfied by a cache.
1709 Requests for data may be satisfied by a cache.
1710
1710
1711 Returns a 2-tuple of (offset, data) for the requested range of
1711 Returns a 2-tuple of (offset, data) for the requested range of
1712 revisions. Offset is the integer offset from the beginning of the
1712 revisions. Offset is the integer offset from the beginning of the
1713 revlog and data is a str or buffer of the raw byte data.
1713 revlog and data is a str or buffer of the raw byte data.
1714
1714
1715 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1715 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1716 to determine where each revision's data begins and ends.
1716 to determine where each revision's data begins and ends.
1717 """
1717 """
1718 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1718 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1719 # (functions are expensive).
1719 # (functions are expensive).
1720 index = self.index
1720 index = self.index
1721 istart = index[startrev]
1721 istart = index[startrev]
1722 start = int(istart[0] >> 16)
1722 start = int(istart[0] >> 16)
1723 if startrev == endrev:
1723 if startrev == endrev:
1724 end = start + istart[1]
1724 end = start + istart[1]
1725 else:
1725 else:
1726 iend = index[endrev]
1726 iend = index[endrev]
1727 end = int(iend[0] >> 16) + iend[1]
1727 end = int(iend[0] >> 16) + iend[1]
1728
1728
1729 if self._inline:
1729 if self._inline:
1730 start += (startrev + 1) * self._io.size
1730 start += (startrev + 1) * self._io.size
1731 end += (endrev + 1) * self._io.size
1731 end += (endrev + 1) * self._io.size
1732 length = end - start
1732 length = end - start
1733
1733
1734 return start, self._getsegment(start, length, df=df)
1734 return start, self._getsegment(start, length, df=df)
1735
1735
1736 def _chunk(self, rev, df=None):
1736 def _chunk(self, rev, df=None):
1737 """Obtain a single decompressed chunk for a revision.
1737 """Obtain a single decompressed chunk for a revision.
1738
1738
1739 Accepts an integer revision and an optional already-open file handle
1739 Accepts an integer revision and an optional already-open file handle
1740 to be used for reading. If used, the seek position of the file will not
1740 to be used for reading. If used, the seek position of the file will not
1741 be preserved.
1741 be preserved.
1742
1742
1743 Returns a str holding uncompressed data for the requested revision.
1743 Returns a str holding uncompressed data for the requested revision.
1744 """
1744 """
1745 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1745 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1746
1746
1747 def _chunks(self, revs, df=None, targetsize=None):
1747 def _chunks(self, revs, df=None, targetsize=None):
1748 """Obtain decompressed chunks for the specified revisions.
1748 """Obtain decompressed chunks for the specified revisions.
1749
1749
1750 Accepts an iterable of numeric revisions that are assumed to be in
1750 Accepts an iterable of numeric revisions that are assumed to be in
1751 ascending order. Also accepts an optional already-open file handle
1751 ascending order. Also accepts an optional already-open file handle
1752 to be used for reading. If used, the seek position of the file will
1752 to be used for reading. If used, the seek position of the file will
1753 not be preserved.
1753 not be preserved.
1754
1754
1755 This function is similar to calling ``self._chunk()`` multiple times,
1755 This function is similar to calling ``self._chunk()`` multiple times,
1756 but is faster.
1756 but is faster.
1757
1757
1758 Returns a list with decompressed data for each requested revision.
1758 Returns a list with decompressed data for each requested revision.
1759 """
1759 """
1760 if not revs:
1760 if not revs:
1761 return []
1761 return []
1762 start = self.start
1762 start = self.start
1763 length = self.length
1763 length = self.length
1764 inline = self._inline
1764 inline = self._inline
1765 iosize = self._io.size
1765 iosize = self._io.size
1766 buffer = util.buffer
1766 buffer = util.buffer
1767
1767
1768 l = []
1768 l = []
1769 ladd = l.append
1769 ladd = l.append
1770
1770
1771 if not self._withsparseread:
1771 if not self._withsparseread:
1772 slicedchunks = (revs,)
1772 slicedchunks = (revs,)
1773 else:
1773 else:
1774 slicedchunks = deltautil.slicechunk(
1774 slicedchunks = deltautil.slicechunk(
1775 self, revs, targetsize=targetsize
1775 self, revs, targetsize=targetsize
1776 )
1776 )
1777
1777
1778 for revschunk in slicedchunks:
1778 for revschunk in slicedchunks:
1779 firstrev = revschunk[0]
1779 firstrev = revschunk[0]
1780 # Skip trailing revisions with empty diff
1780 # Skip trailing revisions with empty diff
1781 for lastrev in revschunk[::-1]:
1781 for lastrev in revschunk[::-1]:
1782 if length(lastrev) != 0:
1782 if length(lastrev) != 0:
1783 break
1783 break
1784
1784
1785 try:
1785 try:
1786 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1786 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1787 except OverflowError:
1787 except OverflowError:
1788 # issue4215 - we can't cache a run of chunks greater than
1788 # issue4215 - we can't cache a run of chunks greater than
1789 # 2G on Windows
1789 # 2G on Windows
1790 return [self._chunk(rev, df=df) for rev in revschunk]
1790 return [self._chunk(rev, df=df) for rev in revschunk]
1791
1791
1792 decomp = self.decompress
1792 decomp = self.decompress
1793 for rev in revschunk:
1793 for rev in revschunk:
1794 chunkstart = start(rev)
1794 chunkstart = start(rev)
1795 if inline:
1795 if inline:
1796 chunkstart += (rev + 1) * iosize
1796 chunkstart += (rev + 1) * iosize
1797 chunklength = length(rev)
1797 chunklength = length(rev)
1798 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1798 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1799
1799
1800 return l
1800 return l
1801
1801
1802 def _chunkclear(self):
1802 def _chunkclear(self):
1803 """Clear the raw chunk cache."""
1803 """Clear the raw chunk cache."""
1804 self._chunkcache = (0, b'')
1804 self._chunkcache = (0, b'')
1805
1805
1806 def deltaparent(self, rev):
1806 def deltaparent(self, rev):
1807 """return deltaparent of the given revision"""
1807 """return deltaparent of the given revision"""
1808 base = self.index[rev][3]
1808 base = self.index[rev][3]
1809 if base == rev:
1809 if base == rev:
1810 return nullrev
1810 return nullrev
1811 elif self._generaldelta:
1811 elif self._generaldelta:
1812 return base
1812 return base
1813 else:
1813 else:
1814 return rev - 1
1814 return rev - 1
1815
1815
1816 def issnapshot(self, rev):
1816 def issnapshot(self, rev):
1817 """tells whether rev is a snapshot"""
1817 """tells whether rev is a snapshot"""
1818 if not self._sparserevlog:
1818 if not self._sparserevlog:
1819 return self.deltaparent(rev) == nullrev
1819 return self.deltaparent(rev) == nullrev
1820 elif util.safehasattr(self.index, b'issnapshot'):
1820 elif util.safehasattr(self.index, b'issnapshot'):
1821 # directly assign the method to cache the testing and access
1821 # directly assign the method to cache the testing and access
1822 self.issnapshot = self.index.issnapshot
1822 self.issnapshot = self.index.issnapshot
1823 return self.issnapshot(rev)
1823 return self.issnapshot(rev)
1824 if rev == nullrev:
1824 if rev == nullrev:
1825 return True
1825 return True
1826 entry = self.index[rev]
1826 entry = self.index[rev]
1827 base = entry[3]
1827 base = entry[3]
1828 if base == rev:
1828 if base == rev:
1829 return True
1829 return True
1830 if base == nullrev:
1830 if base == nullrev:
1831 return True
1831 return True
1832 p1 = entry[5]
1832 p1 = entry[5]
1833 p2 = entry[6]
1833 p2 = entry[6]
1834 if base == p1 or base == p2:
1834 if base == p1 or base == p2:
1835 return False
1835 return False
1836 return self.issnapshot(base)
1836 return self.issnapshot(base)
1837
1837
1838 def snapshotdepth(self, rev):
1838 def snapshotdepth(self, rev):
1839 """number of snapshot in the chain before this one"""
1839 """number of snapshot in the chain before this one"""
1840 if not self.issnapshot(rev):
1840 if not self.issnapshot(rev):
1841 raise error.ProgrammingError(b'revision %d not a snapshot')
1841 raise error.ProgrammingError(b'revision %d not a snapshot')
1842 return len(self._deltachain(rev)[0]) - 1
1842 return len(self._deltachain(rev)[0]) - 1
1843
1843
1844 def revdiff(self, rev1, rev2):
1844 def revdiff(self, rev1, rev2):
1845 """return or calculate a delta between two revisions
1845 """return or calculate a delta between two revisions
1846
1846
1847 The delta calculated is in binary form and is intended to be written to
1847 The delta calculated is in binary form and is intended to be written to
1848 revlog data directly. So this function needs raw revision data.
1848 revlog data directly. So this function needs raw revision data.
1849 """
1849 """
1850 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1850 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1851 return bytes(self._chunk(rev2))
1851 return bytes(self._chunk(rev2))
1852
1852
1853 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1853 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1854
1854
1855 def _processflags(self, text, flags, operation, raw=False):
1855 def _processflags(self, text, flags, operation, raw=False):
1856 """deprecated entry point to access flag processors"""
1856 """deprecated entry point to access flag processors"""
1857 msg = b'_processflag(...) use the specialized variant'
1857 msg = b'_processflag(...) use the specialized variant'
1858 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1858 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1859 if raw:
1859 if raw:
1860 return text, flagutil.processflagsraw(self, text, flags)
1860 return text, flagutil.processflagsraw(self, text, flags)
1861 elif operation == b'read':
1861 elif operation == b'read':
1862 return flagutil.processflagsread(self, text, flags)
1862 return flagutil.processflagsread(self, text, flags)
1863 else: # write operation
1863 else: # write operation
1864 return flagutil.processflagswrite(self, text, flags)
1864 return flagutil.processflagswrite(self, text, flags)
1865
1865
1866 def revision(self, nodeorrev, _df=None, raw=False):
1866 def revision(self, nodeorrev, _df=None, raw=False):
1867 """return an uncompressed revision of a given node or revision
1867 """return an uncompressed revision of a given node or revision
1868 number.
1868 number.
1869
1869
1870 _df - an existing file handle to read from. (internal-only)
1870 _df - an existing file handle to read from. (internal-only)
1871 raw - an optional argument specifying if the revision data is to be
1871 raw - an optional argument specifying if the revision data is to be
1872 treated as raw data when applying flag transforms. 'raw' should be set
1872 treated as raw data when applying flag transforms. 'raw' should be set
1873 to True when generating changegroups or in debug commands.
1873 to True when generating changegroups or in debug commands.
1874 """
1874 """
1875 if raw:
1875 if raw:
1876 msg = (
1876 msg = (
1877 b'revlog.revision(..., raw=True) is deprecated, '
1877 b'revlog.revision(..., raw=True) is deprecated, '
1878 b'use revlog.rawdata(...)'
1878 b'use revlog.rawdata(...)'
1879 )
1879 )
1880 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1880 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1881 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1881 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1882
1882
1883 def sidedata(self, nodeorrev, _df=None):
1883 def sidedata(self, nodeorrev, _df=None):
1884 """a map of extra data related to the changeset but not part of the hash
1884 """a map of extra data related to the changeset but not part of the hash
1885
1885
1886 This function currently return a dictionary. However, more advanced
1886 This function currently return a dictionary. However, more advanced
1887 mapping object will likely be used in the future for a more
1887 mapping object will likely be used in the future for a more
1888 efficient/lazy code.
1888 efficient/lazy code.
1889 """
1889 """
1890 return self._revisiondata(nodeorrev, _df)[1]
1890 return self._revisiondata(nodeorrev, _df)[1]
1891
1891
1892 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1892 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1893 # deal with <nodeorrev> argument type
1893 # deal with <nodeorrev> argument type
1894 if isinstance(nodeorrev, int):
1894 if isinstance(nodeorrev, int):
1895 rev = nodeorrev
1895 rev = nodeorrev
1896 node = self.node(rev)
1896 node = self.node(rev)
1897 else:
1897 else:
1898 node = nodeorrev
1898 node = nodeorrev
1899 rev = None
1899 rev = None
1900
1900
1901 # fast path the special `nullid` rev
1901 # fast path the special `nullid` rev
1902 if node == nullid:
1902 if node == nullid:
1903 return b"", {}
1903 return b"", {}
1904
1904
1905 # ``rawtext`` is the text as stored inside the revlog. Might be the
1905 # ``rawtext`` is the text as stored inside the revlog. Might be the
1906 # revision or might need to be processed to retrieve the revision.
1906 # revision or might need to be processed to retrieve the revision.
1907 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1907 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1908
1908
1909 if self.version & 0xFFFF == REVLOGV2:
1909 if self.version & 0xFFFF == REVLOGV2:
1910 if rev is None:
1910 if rev is None:
1911 rev = self.rev(node)
1911 rev = self.rev(node)
1912 sidedata = self._sidedata(rev)
1912 sidedata = self._sidedata(rev)
1913 else:
1913 else:
1914 sidedata = {}
1914 sidedata = {}
1915
1915
1916 if raw and validated:
1916 if raw and validated:
1917 # if we don't want to process the raw text and that raw
1917 # if we don't want to process the raw text and that raw
1918 # text is cached, we can exit early.
1918 # text is cached, we can exit early.
1919 return rawtext, sidedata
1919 return rawtext, sidedata
1920 if rev is None:
1920 if rev is None:
1921 rev = self.rev(node)
1921 rev = self.rev(node)
1922 # the revlog's flag for this revision
1922 # the revlog's flag for this revision
1923 # (usually alter its state or content)
1923 # (usually alter its state or content)
1924 flags = self.flags(rev)
1924 flags = self.flags(rev)
1925
1925
1926 if validated and flags == REVIDX_DEFAULT_FLAGS:
1926 if validated and flags == REVIDX_DEFAULT_FLAGS:
1927 # no extra flags set, no flag processor runs, text = rawtext
1927 # no extra flags set, no flag processor runs, text = rawtext
1928 return rawtext, sidedata
1928 return rawtext, sidedata
1929
1929
1930 if raw:
1930 if raw:
1931 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1931 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1932 text = rawtext
1932 text = rawtext
1933 else:
1933 else:
1934 r = flagutil.processflagsread(self, rawtext, flags)
1934 r = flagutil.processflagsread(self, rawtext, flags)
1935 text, validatehash = r
1935 text, validatehash = r
1936 if validatehash:
1936 if validatehash:
1937 self.checkhash(text, node, rev=rev)
1937 self.checkhash(text, node, rev=rev)
1938 if not validated:
1938 if not validated:
1939 self._revisioncache = (node, rev, rawtext)
1939 self._revisioncache = (node, rev, rawtext)
1940
1940
1941 return text, sidedata
1941 return text, sidedata
1942
1942
1943 def _rawtext(self, node, rev, _df=None):
1943 def _rawtext(self, node, rev, _df=None):
1944 """return the possibly unvalidated rawtext for a revision
1944 """return the possibly unvalidated rawtext for a revision
1945
1945
1946 returns (rev, rawtext, validated)
1946 returns (rev, rawtext, validated)
1947 """
1947 """
1948
1948
1949 # revision in the cache (could be useful to apply delta)
1949 # revision in the cache (could be useful to apply delta)
1950 cachedrev = None
1950 cachedrev = None
1951 # An intermediate text to apply deltas to
1951 # An intermediate text to apply deltas to
1952 basetext = None
1952 basetext = None
1953
1953
1954 # Check if we have the entry in cache
1954 # Check if we have the entry in cache
1955 # The cache entry looks like (node, rev, rawtext)
1955 # The cache entry looks like (node, rev, rawtext)
1956 if self._revisioncache:
1956 if self._revisioncache:
1957 if self._revisioncache[0] == node:
1957 if self._revisioncache[0] == node:
1958 return (rev, self._revisioncache[2], True)
1958 return (rev, self._revisioncache[2], True)
1959 cachedrev = self._revisioncache[1]
1959 cachedrev = self._revisioncache[1]
1960
1960
1961 if rev is None:
1961 if rev is None:
1962 rev = self.rev(node)
1962 rev = self.rev(node)
1963
1963
1964 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1964 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1965 if stopped:
1965 if stopped:
1966 basetext = self._revisioncache[2]
1966 basetext = self._revisioncache[2]
1967
1967
1968 # drop cache to save memory, the caller is expected to
1968 # drop cache to save memory, the caller is expected to
1969 # update self._revisioncache after validating the text
1969 # update self._revisioncache after validating the text
1970 self._revisioncache = None
1970 self._revisioncache = None
1971
1971
1972 targetsize = None
1972 targetsize = None
1973 rawsize = self.index[rev][2]
1973 rawsize = self.index[rev][2]
1974 if 0 <= rawsize:
1974 if 0 <= rawsize:
1975 targetsize = 4 * rawsize
1975 targetsize = 4 * rawsize
1976
1976
1977 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1977 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1978 if basetext is None:
1978 if basetext is None:
1979 basetext = bytes(bins[0])
1979 basetext = bytes(bins[0])
1980 bins = bins[1:]
1980 bins = bins[1:]
1981
1981
1982 rawtext = mdiff.patches(basetext, bins)
1982 rawtext = mdiff.patches(basetext, bins)
1983 del basetext # let us have a chance to free memory early
1983 del basetext # let us have a chance to free memory early
1984 return (rev, rawtext, False)
1984 return (rev, rawtext, False)
1985
1985
1986 def _sidedata(self, rev):
1986 def _sidedata(self, rev):
1987 """Return the sidedata for a given revision number."""
1987 """Return the sidedata for a given revision number."""
1988 index_entry = self.index[rev]
1988 index_entry = self.index[rev]
1989 sidedata_offset = index_entry[8]
1989 sidedata_offset = index_entry[8]
1990 sidedata_size = index_entry[9]
1990 sidedata_size = index_entry[9]
1991
1991
1992 if self._inline:
1992 if self._inline:
1993 sidedata_offset += self._io.size * (1 + rev)
1993 sidedata_offset += self._io.size * (1 + rev)
1994 if sidedata_size == 0:
1994 if sidedata_size == 0:
1995 return {}
1995 return {}
1996
1996
1997 segment = self._getsegment(sidedata_offset, sidedata_size)
1997 segment = self._getsegment(sidedata_offset, sidedata_size)
1998 sidedata = sidedatautil.deserialize_sidedata(segment)
1998 sidedata = sidedatautil.deserialize_sidedata(segment)
1999 return sidedata
1999 return sidedata
2000
2000
2001 def rawdata(self, nodeorrev, _df=None):
2001 def rawdata(self, nodeorrev, _df=None):
2002 """return an uncompressed raw data of a given node or revision number.
2002 """return an uncompressed raw data of a given node or revision number.
2003
2003
2004 _df - an existing file handle to read from. (internal-only)
2004 _df - an existing file handle to read from. (internal-only)
2005 """
2005 """
2006 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2006 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2007
2007
2008 def hash(self, text, p1, p2):
2008 def hash(self, text, p1, p2):
2009 """Compute a node hash.
2009 """Compute a node hash.
2010
2010
2011 Available as a function so that subclasses can replace the hash
2011 Available as a function so that subclasses can replace the hash
2012 as needed.
2012 as needed.
2013 """
2013 """
2014 return storageutil.hashrevisionsha1(text, p1, p2)
2014 return storageutil.hashrevisionsha1(text, p1, p2)
2015
2015
2016 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2016 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2017 """Check node hash integrity.
2017 """Check node hash integrity.
2018
2018
2019 Available as a function so that subclasses can extend hash mismatch
2019 Available as a function so that subclasses can extend hash mismatch
2020 behaviors as needed.
2020 behaviors as needed.
2021 """
2021 """
2022 try:
2022 try:
2023 if p1 is None and p2 is None:
2023 if p1 is None and p2 is None:
2024 p1, p2 = self.parents(node)
2024 p1, p2 = self.parents(node)
2025 if node != self.hash(text, p1, p2):
2025 if node != self.hash(text, p1, p2):
2026 # Clear the revision cache on hash failure. The revision cache
2026 # Clear the revision cache on hash failure. The revision cache
2027 # only stores the raw revision and clearing the cache does have
2027 # only stores the raw revision and clearing the cache does have
2028 # the side-effect that we won't have a cache hit when the raw
2028 # the side-effect that we won't have a cache hit when the raw
2029 # revision data is accessed. But this case should be rare and
2029 # revision data is accessed. But this case should be rare and
2030 # it is extra work to teach the cache about the hash
2030 # it is extra work to teach the cache about the hash
2031 # verification state.
2031 # verification state.
2032 if self._revisioncache and self._revisioncache[0] == node:
2032 if self._revisioncache and self._revisioncache[0] == node:
2033 self._revisioncache = None
2033 self._revisioncache = None
2034
2034
2035 revornode = rev
2035 revornode = rev
2036 if revornode is None:
2036 if revornode is None:
2037 revornode = templatefilters.short(hex(node))
2037 revornode = templatefilters.short(hex(node))
2038 raise error.RevlogError(
2038 raise error.RevlogError(
2039 _(b"integrity check failed on %s:%s")
2039 _(b"integrity check failed on %s:%s")
2040 % (self.indexfile, pycompat.bytestr(revornode))
2040 % (self.indexfile, pycompat.bytestr(revornode))
2041 )
2041 )
2042 except error.RevlogError:
2042 except error.RevlogError:
2043 if self._censorable and storageutil.iscensoredtext(text):
2043 if self._censorable and storageutil.iscensoredtext(text):
2044 raise error.CensoredNodeError(self.indexfile, node, text)
2044 raise error.CensoredNodeError(self.indexfile, node, text)
2045 raise
2045 raise
2046
2046
2047 def _enforceinlinesize(self, tr, fp=None):
2047 def _enforceinlinesize(self, tr, fp=None):
2048 """Check if the revlog is too big for inline and convert if so.
2048 """Check if the revlog is too big for inline and convert if so.
2049
2049
2050 This should be called after revisions are added to the revlog. If the
2050 This should be called after revisions are added to the revlog. If the
2051 revlog has grown too large to be an inline revlog, it will convert it
2051 revlog has grown too large to be an inline revlog, it will convert it
2052 to use multiple index and data files.
2052 to use multiple index and data files.
2053 """
2053 """
2054 tiprev = len(self) - 1
2054 tiprev = len(self) - 1
2055 if (
2055 if (
2056 not self._inline
2056 not self._inline
2057 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2057 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2058 ):
2058 ):
2059 return
2059 return
2060
2060
2061 troffset = tr.findoffset(self.indexfile)
2061 troffset = tr.findoffset(self.indexfile)
2062 if troffset is None:
2062 if troffset is None:
2063 raise error.RevlogError(
2063 raise error.RevlogError(
2064 _(b"%s not found in the transaction") % self.indexfile
2064 _(b"%s not found in the transaction") % self.indexfile
2065 )
2065 )
2066 trindex = 0
2066 trindex = 0
2067 tr.add(self.datafile, 0)
2067 tr.add(self.datafile, 0)
2068
2068
2069 if fp:
2069 if fp:
2070 fp.flush()
2070 fp.flush()
2071 fp.close()
2071 fp.close()
2072 # We can't use the cached file handle after close(). So prevent
2072 # We can't use the cached file handle after close(). So prevent
2073 # its usage.
2073 # its usage.
2074 self._writinghandles = None
2074 self._writinghandles = None
2075
2075
2076 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2076 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2077 for r in self:
2077 for r in self:
2078 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2078 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2079 if troffset <= self.start(r):
2079 if troffset <= self.start(r):
2080 trindex = r
2080 trindex = r
2081
2081
2082 with self._indexfp(b'w') as fp:
2082 with self._indexfp(b'w') as fp:
2083 self.version &= ~FLAG_INLINE_DATA
2083 self.version &= ~FLAG_INLINE_DATA
2084 self._inline = False
2084 self._inline = False
2085 io = self._io
2085 io = self._io
2086 for i in self:
2086 for i in self:
2087 e = io.packentry(self.index[i], self.node, self.version, i)
2087 e = io.packentry(self.index[i], self.node, self.version, i)
2088 fp.write(e)
2088 fp.write(e)
2089
2089
2090 # the temp file replace the real index when we exit the context
2090 # the temp file replace the real index when we exit the context
2091 # manager
2091 # manager
2092
2092
2093 tr.replace(self.indexfile, trindex * self._io.size)
2093 tr.replace(self.indexfile, trindex * self._io.size)
2094 nodemaputil.setup_persistent_nodemap(tr, self)
2094 nodemaputil.setup_persistent_nodemap(tr, self)
2095 self._chunkclear()
2095 self._chunkclear()
2096
2096
2097 def _nodeduplicatecallback(self, transaction, node):
2097 def _nodeduplicatecallback(self, transaction, node):
2098 """called when trying to add a node already stored."""
2098 """called when trying to add a node already stored."""
2099
2099
2100 def addrevision(
2100 def addrevision(
2101 self,
2101 self,
2102 text,
2102 text,
2103 transaction,
2103 transaction,
2104 link,
2104 link,
2105 p1,
2105 p1,
2106 p2,
2106 p2,
2107 cachedelta=None,
2107 cachedelta=None,
2108 node=None,
2108 node=None,
2109 flags=REVIDX_DEFAULT_FLAGS,
2109 flags=REVIDX_DEFAULT_FLAGS,
2110 deltacomputer=None,
2110 deltacomputer=None,
2111 sidedata=None,
2111 sidedata=None,
2112 ):
2112 ):
2113 """add a revision to the log
2113 """add a revision to the log
2114
2114
2115 text - the revision data to add
2115 text - the revision data to add
2116 transaction - the transaction object used for rollback
2116 transaction - the transaction object used for rollback
2117 link - the linkrev data to add
2117 link - the linkrev data to add
2118 p1, p2 - the parent nodeids of the revision
2118 p1, p2 - the parent nodeids of the revision
2119 cachedelta - an optional precomputed delta
2119 cachedelta - an optional precomputed delta
2120 node - nodeid of revision; typically node is not specified, and it is
2120 node - nodeid of revision; typically node is not specified, and it is
2121 computed by default as hash(text, p1, p2), however subclasses might
2121 computed by default as hash(text, p1, p2), however subclasses might
2122 use different hashing method (and override checkhash() in such case)
2122 use different hashing method (and override checkhash() in such case)
2123 flags - the known flags to set on the revision
2123 flags - the known flags to set on the revision
2124 deltacomputer - an optional deltacomputer instance shared between
2124 deltacomputer - an optional deltacomputer instance shared between
2125 multiple calls
2125 multiple calls
2126 """
2126 """
2127 if link == nullrev:
2127 if link == nullrev:
2128 raise error.RevlogError(
2128 raise error.RevlogError(
2129 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2129 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2130 )
2130 )
2131
2131
2132 if sidedata is None:
2132 if sidedata is None:
2133 sidedata = {}
2133 sidedata = {}
2134 elif not self.hassidedata:
2134 elif not self.hassidedata:
2135 raise error.ProgrammingError(
2135 raise error.ProgrammingError(
2136 _(b"trying to add sidedata to a revlog who don't support them")
2136 _(b"trying to add sidedata to a revlog who don't support them")
2137 )
2137 )
2138
2138
2139 if flags:
2139 if flags:
2140 node = node or self.hash(text, p1, p2)
2140 node = node or self.hash(text, p1, p2)
2141
2141
2142 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2142 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2143
2143
2144 # If the flag processor modifies the revision data, ignore any provided
2144 # If the flag processor modifies the revision data, ignore any provided
2145 # cachedelta.
2145 # cachedelta.
2146 if rawtext != text:
2146 if rawtext != text:
2147 cachedelta = None
2147 cachedelta = None
2148
2148
2149 if len(rawtext) > _maxentrysize:
2149 if len(rawtext) > _maxentrysize:
2150 raise error.RevlogError(
2150 raise error.RevlogError(
2151 _(
2151 _(
2152 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2152 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2153 )
2153 )
2154 % (self.indexfile, len(rawtext))
2154 % (self.indexfile, len(rawtext))
2155 )
2155 )
2156
2156
2157 node = node or self.hash(rawtext, p1, p2)
2157 node = node or self.hash(rawtext, p1, p2)
2158 rev = self.index.get_rev(node)
2158 rev = self.index.get_rev(node)
2159 if rev is not None:
2159 if rev is not None:
2160 return rev
2160 return rev
2161
2161
2162 if validatehash:
2162 if validatehash:
2163 self.checkhash(rawtext, node, p1=p1, p2=p2)
2163 self.checkhash(rawtext, node, p1=p1, p2=p2)
2164
2164
2165 return self.addrawrevision(
2165 return self.addrawrevision(
2166 rawtext,
2166 rawtext,
2167 transaction,
2167 transaction,
2168 link,
2168 link,
2169 p1,
2169 p1,
2170 p2,
2170 p2,
2171 node,
2171 node,
2172 flags,
2172 flags,
2173 cachedelta=cachedelta,
2173 cachedelta=cachedelta,
2174 deltacomputer=deltacomputer,
2174 deltacomputer=deltacomputer,
2175 sidedata=sidedata,
2175 sidedata=sidedata,
2176 )
2176 )
2177
2177
2178 def addrawrevision(
2178 def addrawrevision(
2179 self,
2179 self,
2180 rawtext,
2180 rawtext,
2181 transaction,
2181 transaction,
2182 link,
2182 link,
2183 p1,
2183 p1,
2184 p2,
2184 p2,
2185 node,
2185 node,
2186 flags,
2186 flags,
2187 cachedelta=None,
2187 cachedelta=None,
2188 deltacomputer=None,
2188 deltacomputer=None,
2189 sidedata=None,
2189 sidedata=None,
2190 ):
2190 ):
2191 """add a raw revision with known flags, node and parents
2191 """add a raw revision with known flags, node and parents
2192 useful when reusing a revision not stored in this revlog (ex: received
2192 useful when reusing a revision not stored in this revlog (ex: received
2193 over wire, or read from an external bundle).
2193 over wire, or read from an external bundle).
2194 """
2194 """
2195 dfh = None
2195 dfh = None
2196 if not self._inline:
2196 if not self._inline:
2197 dfh = self._datafp(b"a+")
2197 dfh = self._datafp(b"a+")
2198 ifh = self._indexfp(b"a+")
2198 ifh = self._indexfp(b"a+")
2199 try:
2199 try:
2200 return self._addrevision(
2200 return self._addrevision(
2201 node,
2201 node,
2202 rawtext,
2202 rawtext,
2203 transaction,
2203 transaction,
2204 link,
2204 link,
2205 p1,
2205 p1,
2206 p2,
2206 p2,
2207 flags,
2207 flags,
2208 cachedelta,
2208 cachedelta,
2209 ifh,
2209 ifh,
2210 dfh,
2210 dfh,
2211 deltacomputer=deltacomputer,
2211 deltacomputer=deltacomputer,
2212 sidedata=sidedata,
2212 sidedata=sidedata,
2213 )
2213 )
2214 finally:
2214 finally:
2215 if dfh:
2215 if dfh:
2216 dfh.close()
2216 dfh.close()
2217 ifh.close()
2217 ifh.close()
2218
2218
2219 def compress(self, data):
2219 def compress(self, data):
2220 """Generate a possibly-compressed representation of data."""
2220 """Generate a possibly-compressed representation of data."""
2221 if not data:
2221 if not data:
2222 return b'', data
2222 return b'', data
2223
2223
2224 compressed = self._compressor.compress(data)
2224 compressed = self._compressor.compress(data)
2225
2225
2226 if compressed:
2226 if compressed:
2227 # The revlog compressor added the header in the returned data.
2227 # The revlog compressor added the header in the returned data.
2228 return b'', compressed
2228 return b'', compressed
2229
2229
2230 if data[0:1] == b'\0':
2230 if data[0:1] == b'\0':
2231 return b'', data
2231 return b'', data
2232 return b'u', data
2232 return b'u', data
2233
2233
2234 def decompress(self, data):
2234 def decompress(self, data):
2235 """Decompress a revlog chunk.
2235 """Decompress a revlog chunk.
2236
2236
2237 The chunk is expected to begin with a header identifying the
2237 The chunk is expected to begin with a header identifying the
2238 format type so it can be routed to an appropriate decompressor.
2238 format type so it can be routed to an appropriate decompressor.
2239 """
2239 """
2240 if not data:
2240 if not data:
2241 return data
2241 return data
2242
2242
2243 # Revlogs are read much more frequently than they are written and many
2243 # Revlogs are read much more frequently than they are written and many
2244 # chunks only take microseconds to decompress, so performance is
2244 # chunks only take microseconds to decompress, so performance is
2245 # important here.
2245 # important here.
2246 #
2246 #
2247 # We can make a few assumptions about revlogs:
2247 # We can make a few assumptions about revlogs:
2248 #
2248 #
2249 # 1) the majority of chunks will be compressed (as opposed to inline
2249 # 1) the majority of chunks will be compressed (as opposed to inline
2250 # raw data).
2250 # raw data).
2251 # 2) decompressing *any* data will likely by at least 10x slower than
2251 # 2) decompressing *any* data will likely by at least 10x slower than
2252 # returning raw inline data.
2252 # returning raw inline data.
2253 # 3) we want to prioritize common and officially supported compression
2253 # 3) we want to prioritize common and officially supported compression
2254 # engines
2254 # engines
2255 #
2255 #
2256 # It follows that we want to optimize for "decompress compressed data
2256 # It follows that we want to optimize for "decompress compressed data
2257 # when encoded with common and officially supported compression engines"
2257 # when encoded with common and officially supported compression engines"
2258 # case over "raw data" and "data encoded by less common or non-official
2258 # case over "raw data" and "data encoded by less common or non-official
2259 # compression engines." That is why we have the inline lookup first
2259 # compression engines." That is why we have the inline lookup first
2260 # followed by the compengines lookup.
2260 # followed by the compengines lookup.
2261 #
2261 #
2262 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2262 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2263 # compressed chunks. And this matters for changelog and manifest reads.
2263 # compressed chunks. And this matters for changelog and manifest reads.
2264 t = data[0:1]
2264 t = data[0:1]
2265
2265
2266 if t == b'x':
2266 if t == b'x':
2267 try:
2267 try:
2268 return _zlibdecompress(data)
2268 return _zlibdecompress(data)
2269 except zlib.error as e:
2269 except zlib.error as e:
2270 raise error.RevlogError(
2270 raise error.RevlogError(
2271 _(b'revlog decompress error: %s')
2271 _(b'revlog decompress error: %s')
2272 % stringutil.forcebytestr(e)
2272 % stringutil.forcebytestr(e)
2273 )
2273 )
2274 # '\0' is more common than 'u' so it goes first.
2274 # '\0' is more common than 'u' so it goes first.
2275 elif t == b'\0':
2275 elif t == b'\0':
2276 return data
2276 return data
2277 elif t == b'u':
2277 elif t == b'u':
2278 return util.buffer(data, 1)
2278 return util.buffer(data, 1)
2279
2279
2280 try:
2280 try:
2281 compressor = self._decompressors[t]
2281 compressor = self._decompressors[t]
2282 except KeyError:
2282 except KeyError:
2283 try:
2283 try:
2284 engine = util.compengines.forrevlogheader(t)
2284 engine = util.compengines.forrevlogheader(t)
2285 compressor = engine.revlogcompressor(self._compengineopts)
2285 compressor = engine.revlogcompressor(self._compengineopts)
2286 self._decompressors[t] = compressor
2286 self._decompressors[t] = compressor
2287 except KeyError:
2287 except KeyError:
2288 raise error.RevlogError(_(b'unknown compression type %r') % t)
2288 raise error.RevlogError(_(b'unknown compression type %r') % t)
2289
2289
2290 return compressor.decompress(data)
2290 return compressor.decompress(data)
2291
2291
2292 def _addrevision(
2292 def _addrevision(
2293 self,
2293 self,
2294 node,
2294 node,
2295 rawtext,
2295 rawtext,
2296 transaction,
2296 transaction,
2297 link,
2297 link,
2298 p1,
2298 p1,
2299 p2,
2299 p2,
2300 flags,
2300 flags,
2301 cachedelta,
2301 cachedelta,
2302 ifh,
2302 ifh,
2303 dfh,
2303 dfh,
2304 alwayscache=False,
2304 alwayscache=False,
2305 deltacomputer=None,
2305 deltacomputer=None,
2306 sidedata=None,
2306 sidedata=None,
2307 ):
2307 ):
2308 """internal function to add revisions to the log
2308 """internal function to add revisions to the log
2309
2309
2310 see addrevision for argument descriptions.
2310 see addrevision for argument descriptions.
2311
2311
2312 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2312 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2313
2313
2314 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2314 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2315 be used.
2315 be used.
2316
2316
2317 invariants:
2317 invariants:
2318 - rawtext is optional (can be None); if not set, cachedelta must be set.
2318 - rawtext is optional (can be None); if not set, cachedelta must be set.
2319 if both are set, they must correspond to each other.
2319 if both are set, they must correspond to each other.
2320 """
2320 """
2321 if node == nullid:
2321 if node == nullid:
2322 raise error.RevlogError(
2322 raise error.RevlogError(
2323 _(b"%s: attempt to add null revision") % self.indexfile
2323 _(b"%s: attempt to add null revision") % self.indexfile
2324 )
2324 )
2325 if node == wdirid or node in wdirfilenodeids:
2325 if node == wdirid or node in wdirfilenodeids:
2326 raise error.RevlogError(
2326 raise error.RevlogError(
2327 _(b"%s: attempt to add wdir revision") % self.indexfile
2327 _(b"%s: attempt to add wdir revision") % self.indexfile
2328 )
2328 )
2329
2329
2330 if self._inline:
2330 if self._inline:
2331 fh = ifh
2331 fh = ifh
2332 else:
2332 else:
2333 fh = dfh
2333 fh = dfh
2334
2334
2335 btext = [rawtext]
2335 btext = [rawtext]
2336
2336
2337 curr = len(self)
2337 curr = len(self)
2338 prev = curr - 1
2338 prev = curr - 1
2339
2339
2340 offset = self._get_data_offset(prev)
2340 offset = self._get_data_offset(prev)
2341
2341
2342 if self._concurrencychecker:
2342 if self._concurrencychecker:
2343 if self._inline:
2343 if self._inline:
2344 # offset is "as if" it were in the .d file, so we need to add on
2344 # offset is "as if" it were in the .d file, so we need to add on
2345 # the size of the entry metadata.
2345 # the size of the entry metadata.
2346 self._concurrencychecker(
2346 self._concurrencychecker(
2347 ifh, self.indexfile, offset + curr * self._io.size
2347 ifh, self.indexfile, offset + curr * self._io.size
2348 )
2348 )
2349 else:
2349 else:
2350 # Entries in the .i are a consistent size.
2350 # Entries in the .i are a consistent size.
2351 self._concurrencychecker(
2351 self._concurrencychecker(
2352 ifh, self.indexfile, curr * self._io.size
2352 ifh, self.indexfile, curr * self._io.size
2353 )
2353 )
2354 self._concurrencychecker(dfh, self.datafile, offset)
2354 self._concurrencychecker(dfh, self.datafile, offset)
2355
2355
2356 p1r, p2r = self.rev(p1), self.rev(p2)
2356 p1r, p2r = self.rev(p1), self.rev(p2)
2357
2357
2358 # full versions are inserted when the needed deltas
2358 # full versions are inserted when the needed deltas
2359 # become comparable to the uncompressed text
2359 # become comparable to the uncompressed text
2360 if rawtext is None:
2360 if rawtext is None:
2361 # need rawtext size, before changed by flag processors, which is
2361 # need rawtext size, before changed by flag processors, which is
2362 # the non-raw size. use revlog explicitly to avoid filelog's extra
2362 # the non-raw size. use revlog explicitly to avoid filelog's extra
2363 # logic that might remove metadata size.
2363 # logic that might remove metadata size.
2364 textlen = mdiff.patchedsize(
2364 textlen = mdiff.patchedsize(
2365 revlog.size(self, cachedelta[0]), cachedelta[1]
2365 revlog.size(self, cachedelta[0]), cachedelta[1]
2366 )
2366 )
2367 else:
2367 else:
2368 textlen = len(rawtext)
2368 textlen = len(rawtext)
2369
2369
2370 if deltacomputer is None:
2370 if deltacomputer is None:
2371 deltacomputer = deltautil.deltacomputer(self)
2371 deltacomputer = deltautil.deltacomputer(self)
2372
2372
2373 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2373 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2374
2374
2375 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2375 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2376
2376
2377 if sidedata:
2377 if sidedata:
2378 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2378 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2379 sidedata_offset = offset + deltainfo.deltalen
2379 sidedata_offset = offset + deltainfo.deltalen
2380 else:
2380 else:
2381 serialized_sidedata = b""
2381 serialized_sidedata = b""
2382 # Don't store the offset if the sidedata is empty, that way
2382 # Don't store the offset if the sidedata is empty, that way
2383 # we can easily detect empty sidedata and they will be no different
2383 # we can easily detect empty sidedata and they will be no different
2384 # than ones we manually add.
2384 # than ones we manually add.
2385 sidedata_offset = 0
2385 sidedata_offset = 0
2386
2386
2387 e = (
2387 e = (
2388 offset_type(offset, flags),
2388 offset_type(offset, flags),
2389 deltainfo.deltalen,
2389 deltainfo.deltalen,
2390 textlen,
2390 textlen,
2391 deltainfo.base,
2391 deltainfo.base,
2392 link,
2392 link,
2393 p1r,
2393 p1r,
2394 p2r,
2394 p2r,
2395 node,
2395 node,
2396 sidedata_offset,
2396 sidedata_offset,
2397 len(serialized_sidedata),
2397 len(serialized_sidedata),
2398 )
2398 )
2399
2399
2400 if self.version & 0xFFFF != REVLOGV2:
2400 if self.version & 0xFFFF != REVLOGV2:
2401 e = e[:8]
2401 e = e[:8]
2402
2402
2403 self.index.append(e)
2403 self.index.append(e)
2404 entry = self._io.packentry(e, self.node, self.version, curr)
2404 entry = self._io.packentry(e, self.node, self.version, curr)
2405 self._writeentry(
2405 self._writeentry(
2406 transaction,
2406 transaction,
2407 ifh,
2407 ifh,
2408 dfh,
2408 dfh,
2409 entry,
2409 entry,
2410 deltainfo.data,
2410 deltainfo.data,
2411 link,
2411 link,
2412 offset,
2412 offset,
2413 serialized_sidedata,
2413 serialized_sidedata,
2414 )
2414 )
2415
2415
2416 rawtext = btext[0]
2416 rawtext = btext[0]
2417
2417
2418 if alwayscache and rawtext is None:
2418 if alwayscache and rawtext is None:
2419 rawtext = deltacomputer.buildtext(revinfo, fh)
2419 rawtext = deltacomputer.buildtext(revinfo, fh)
2420
2420
2421 if type(rawtext) == bytes: # only accept immutable objects
2421 if type(rawtext) == bytes: # only accept immutable objects
2422 self._revisioncache = (node, curr, rawtext)
2422 self._revisioncache = (node, curr, rawtext)
2423 self._chainbasecache[curr] = deltainfo.chainbase
2423 self._chainbasecache[curr] = deltainfo.chainbase
2424 return curr
2424 return curr
2425
2425
2426 def _get_data_offset(self, prev):
2426 def _get_data_offset(self, prev):
2427 """Returns the current offset in the (in-transaction) data file.
2427 """Returns the current offset in the (in-transaction) data file.
2428 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2428 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2429 file to store that information: since sidedata can be rewritten to the
2429 file to store that information: since sidedata can be rewritten to the
2430 end of the data file within a transaction, you can have cases where, for
2430 end of the data file within a transaction, you can have cases where, for
2431 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2431 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2432 to `n - 1`'s sidedata being written after `n`'s data.
2432 to `n - 1`'s sidedata being written after `n`'s data.
2433
2433
2434 TODO cache this in a docket file before getting out of experimental."""
2434 TODO cache this in a docket file before getting out of experimental."""
2435 if self.version & 0xFFFF != REVLOGV2:
2435 if self.version & 0xFFFF != REVLOGV2:
2436 return self.end(prev)
2436 return self.end(prev)
2437
2437
2438 offset = 0
2438 offset = 0
2439 for rev, entry in enumerate(self.index):
2439 for rev, entry in enumerate(self.index):
2440 sidedata_end = entry[8] + entry[9]
2440 sidedata_end = entry[8] + entry[9]
2441 # Sidedata for a previous rev has potentially been written after
2441 # Sidedata for a previous rev has potentially been written after
2442 # this rev's end, so take the max.
2442 # this rev's end, so take the max.
2443 offset = max(self.end(rev), offset, sidedata_end)
2443 offset = max(self.end(rev), offset, sidedata_end)
2444 return offset
2444 return offset
2445
2445
2446 def _writeentry(
2446 def _writeentry(
2447 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2447 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2448 ):
2448 ):
2449 # Files opened in a+ mode have inconsistent behavior on various
2449 # Files opened in a+ mode have inconsistent behavior on various
2450 # platforms. Windows requires that a file positioning call be made
2450 # platforms. Windows requires that a file positioning call be made
2451 # when the file handle transitions between reads and writes. See
2451 # when the file handle transitions between reads and writes. See
2452 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2452 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2453 # platforms, Python or the platform itself can be buggy. Some versions
2453 # platforms, Python or the platform itself can be buggy. Some versions
2454 # of Solaris have been observed to not append at the end of the file
2454 # of Solaris have been observed to not append at the end of the file
2455 # if the file was seeked to before the end. See issue4943 for more.
2455 # if the file was seeked to before the end. See issue4943 for more.
2456 #
2456 #
2457 # We work around this issue by inserting a seek() before writing.
2457 # We work around this issue by inserting a seek() before writing.
2458 # Note: This is likely not necessary on Python 3. However, because
2458 # Note: This is likely not necessary on Python 3. However, because
2459 # the file handle is reused for reads and may be seeked there, we need
2459 # the file handle is reused for reads and may be seeked there, we need
2460 # to be careful before changing this.
2460 # to be careful before changing this.
2461 ifh.seek(0, os.SEEK_END)
2461 ifh.seek(0, os.SEEK_END)
2462 if dfh:
2462 if dfh:
2463 dfh.seek(0, os.SEEK_END)
2463 dfh.seek(0, os.SEEK_END)
2464
2464
2465 curr = len(self) - 1
2465 curr = len(self) - 1
2466 if not self._inline:
2466 if not self._inline:
2467 transaction.add(self.datafile, offset)
2467 transaction.add(self.datafile, offset)
2468 transaction.add(self.indexfile, curr * len(entry))
2468 transaction.add(self.indexfile, curr * len(entry))
2469 if data[0]:
2469 if data[0]:
2470 dfh.write(data[0])
2470 dfh.write(data[0])
2471 dfh.write(data[1])
2471 dfh.write(data[1])
2472 if sidedata:
2472 if sidedata:
2473 dfh.write(sidedata)
2473 dfh.write(sidedata)
2474 ifh.write(entry)
2474 ifh.write(entry)
2475 else:
2475 else:
2476 offset += curr * self._io.size
2476 offset += curr * self._io.size
2477 transaction.add(self.indexfile, offset)
2477 transaction.add(self.indexfile, offset)
2478 ifh.write(entry)
2478 ifh.write(entry)
2479 ifh.write(data[0])
2479 ifh.write(data[0])
2480 ifh.write(data[1])
2480 ifh.write(data[1])
2481 if sidedata:
2481 if sidedata:
2482 ifh.write(sidedata)
2482 ifh.write(sidedata)
2483 self._enforceinlinesize(transaction, ifh)
2483 self._enforceinlinesize(transaction, ifh)
2484 nodemaputil.setup_persistent_nodemap(transaction, self)
2484 nodemaputil.setup_persistent_nodemap(transaction, self)
2485
2485
2486 def addgroup(
2486 def addgroup(
2487 self,
2487 self,
2488 deltas,
2488 deltas,
2489 linkmapper,
2489 linkmapper,
2490 transaction,
2490 transaction,
2491 alwayscache=False,
2491 alwayscache=False,
2492 addrevisioncb=None,
2492 addrevisioncb=None,
2493 duplicaterevisioncb=None,
2493 duplicaterevisioncb=None,
2494 ):
2494 ):
2495 """
2495 """
2496 add a delta group
2496 add a delta group
2497
2497
2498 given a set of deltas, add them to the revision log. the
2498 given a set of deltas, add them to the revision log. the
2499 first delta is against its parent, which should be in our
2499 first delta is against its parent, which should be in our
2500 log, the rest are against the previous delta.
2500 log, the rest are against the previous delta.
2501
2501
2502 If ``addrevisioncb`` is defined, it will be called with arguments of
2502 If ``addrevisioncb`` is defined, it will be called with arguments of
2503 this revlog and the node that was added.
2503 this revlog and the node that was added.
2504 """
2504 """
2505
2505
2506 if self._writinghandles:
2506 if self._writinghandles:
2507 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2507 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2508
2508
2509 r = len(self)
2509 r = len(self)
2510 end = 0
2510 end = 0
2511 if r:
2511 if r:
2512 end = self.end(r - 1)
2512 end = self.end(r - 1)
2513 ifh = self._indexfp(b"a+")
2513 ifh = self._indexfp(b"a+")
2514 isize = r * self._io.size
2514 isize = r * self._io.size
2515 if self._inline:
2515 if self._inline:
2516 transaction.add(self.indexfile, end + isize)
2516 transaction.add(self.indexfile, end + isize)
2517 dfh = None
2517 dfh = None
2518 else:
2518 else:
2519 transaction.add(self.indexfile, isize)
2519 transaction.add(self.indexfile, isize)
2520 transaction.add(self.datafile, end)
2520 transaction.add(self.datafile, end)
2521 dfh = self._datafp(b"a+")
2521 dfh = self._datafp(b"a+")
2522
2522
2523 def flush():
2523 def flush():
2524 if dfh:
2524 if dfh:
2525 dfh.flush()
2525 dfh.flush()
2526 ifh.flush()
2526 ifh.flush()
2527
2527
2528 self._writinghandles = (ifh, dfh)
2528 self._writinghandles = (ifh, dfh)
2529 empty = True
2529 empty = True
2530
2530
2531 try:
2531 try:
2532 deltacomputer = deltautil.deltacomputer(self)
2532 deltacomputer = deltautil.deltacomputer(self)
2533 # loop through our set of deltas
2533 # loop through our set of deltas
2534 for data in deltas:
2534 for data in deltas:
2535 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2535 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2536 link = linkmapper(linknode)
2536 link = linkmapper(linknode)
2537 flags = flags or REVIDX_DEFAULT_FLAGS
2537 flags = flags or REVIDX_DEFAULT_FLAGS
2538
2538
2539 rev = self.index.get_rev(node)
2539 rev = self.index.get_rev(node)
2540 if rev is not None:
2540 if rev is not None:
2541 # this can happen if two branches make the same change
2541 # this can happen if two branches make the same change
2542 self._nodeduplicatecallback(transaction, rev)
2542 self._nodeduplicatecallback(transaction, rev)
2543 if duplicaterevisioncb:
2543 if duplicaterevisioncb:
2544 duplicaterevisioncb(self, rev)
2544 duplicaterevisioncb(self, rev)
2545 empty = False
2545 empty = False
2546 continue
2546 continue
2547
2547
2548 for p in (p1, p2):
2548 for p in (p1, p2):
2549 if not self.index.has_node(p):
2549 if not self.index.has_node(p):
2550 raise error.LookupError(
2550 raise error.LookupError(
2551 p, self.indexfile, _(b'unknown parent')
2551 p, self.indexfile, _(b'unknown parent')
2552 )
2552 )
2553
2553
2554 if not self.index.has_node(deltabase):
2554 if not self.index.has_node(deltabase):
2555 raise error.LookupError(
2555 raise error.LookupError(
2556 deltabase, self.indexfile, _(b'unknown delta base')
2556 deltabase, self.indexfile, _(b'unknown delta base')
2557 )
2557 )
2558
2558
2559 baserev = self.rev(deltabase)
2559 baserev = self.rev(deltabase)
2560
2560
2561 if baserev != nullrev and self.iscensored(baserev):
2561 if baserev != nullrev and self.iscensored(baserev):
2562 # if base is censored, delta must be full replacement in a
2562 # if base is censored, delta must be full replacement in a
2563 # single patch operation
2563 # single patch operation
2564 hlen = struct.calcsize(b">lll")
2564 hlen = struct.calcsize(b">lll")
2565 oldlen = self.rawsize(baserev)
2565 oldlen = self.rawsize(baserev)
2566 newlen = len(delta) - hlen
2566 newlen = len(delta) - hlen
2567 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2567 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2568 raise error.CensoredBaseError(
2568 raise error.CensoredBaseError(
2569 self.indexfile, self.node(baserev)
2569 self.indexfile, self.node(baserev)
2570 )
2570 )
2571
2571
2572 if not flags and self._peek_iscensored(baserev, delta, flush):
2572 if not flags and self._peek_iscensored(baserev, delta, flush):
2573 flags |= REVIDX_ISCENSORED
2573 flags |= REVIDX_ISCENSORED
2574
2574
2575 # We assume consumers of addrevisioncb will want to retrieve
2575 # We assume consumers of addrevisioncb will want to retrieve
2576 # the added revision, which will require a call to
2576 # the added revision, which will require a call to
2577 # revision(). revision() will fast path if there is a cache
2577 # revision(). revision() will fast path if there is a cache
2578 # hit. So, we tell _addrevision() to always cache in this case.
2578 # hit. So, we tell _addrevision() to always cache in this case.
2579 # We're only using addgroup() in the context of changegroup
2579 # We're only using addgroup() in the context of changegroup
2580 # generation so the revision data can always be handled as raw
2580 # generation so the revision data can always be handled as raw
2581 # by the flagprocessor.
2581 # by the flagprocessor.
2582 rev = self._addrevision(
2582 rev = self._addrevision(
2583 node,
2583 node,
2584 None,
2584 None,
2585 transaction,
2585 transaction,
2586 link,
2586 link,
2587 p1,
2587 p1,
2588 p2,
2588 p2,
2589 flags,
2589 flags,
2590 (baserev, delta),
2590 (baserev, delta),
2591 ifh,
2591 ifh,
2592 dfh,
2592 dfh,
2593 alwayscache=alwayscache,
2593 alwayscache=alwayscache,
2594 deltacomputer=deltacomputer,
2594 deltacomputer=deltacomputer,
2595 sidedata=sidedata,
2595 sidedata=sidedata,
2596 )
2596 )
2597
2597
2598 if addrevisioncb:
2598 if addrevisioncb:
2599 addrevisioncb(self, rev)
2599 addrevisioncb(self, rev)
2600 empty = False
2600 empty = False
2601
2601
2602 if not dfh and not self._inline:
2602 if not dfh and not self._inline:
2603 # addrevision switched from inline to conventional
2603 # addrevision switched from inline to conventional
2604 # reopen the index
2604 # reopen the index
2605 ifh.close()
2605 ifh.close()
2606 dfh = self._datafp(b"a+")
2606 dfh = self._datafp(b"a+")
2607 ifh = self._indexfp(b"a+")
2607 ifh = self._indexfp(b"a+")
2608 self._writinghandles = (ifh, dfh)
2608 self._writinghandles = (ifh, dfh)
2609 finally:
2609 finally:
2610 self._writinghandles = None
2610 self._writinghandles = None
2611
2611
2612 if dfh:
2612 if dfh:
2613 dfh.close()
2613 dfh.close()
2614 ifh.close()
2614 ifh.close()
2615 return not empty
2615 return not empty
2616
2616
2617 def iscensored(self, rev):
2617 def iscensored(self, rev):
2618 """Check if a file revision is censored."""
2618 """Check if a file revision is censored."""
2619 if not self._censorable:
2619 if not self._censorable:
2620 return False
2620 return False
2621
2621
2622 return self.flags(rev) & REVIDX_ISCENSORED
2622 return self.flags(rev) & REVIDX_ISCENSORED
2623
2623
2624 def _peek_iscensored(self, baserev, delta, flush):
2624 def _peek_iscensored(self, baserev, delta, flush):
2625 """Quickly check if a delta produces a censored revision."""
2625 """Quickly check if a delta produces a censored revision."""
2626 if not self._censorable:
2626 if not self._censorable:
2627 return False
2627 return False
2628
2628
2629 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2629 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2630
2630
2631 def getstrippoint(self, minlink):
2631 def getstrippoint(self, minlink):
2632 """find the minimum rev that must be stripped to strip the linkrev
2632 """find the minimum rev that must be stripped to strip the linkrev
2633
2633
2634 Returns a tuple containing the minimum rev and a set of all revs that
2634 Returns a tuple containing the minimum rev and a set of all revs that
2635 have linkrevs that will be broken by this strip.
2635 have linkrevs that will be broken by this strip.
2636 """
2636 """
2637 return storageutil.resolvestripinfo(
2637 return storageutil.resolvestripinfo(
2638 minlink,
2638 minlink,
2639 len(self) - 1,
2639 len(self) - 1,
2640 self.headrevs(),
2640 self.headrevs(),
2641 self.linkrev,
2641 self.linkrev,
2642 self.parentrevs,
2642 self.parentrevs,
2643 )
2643 )
2644
2644
2645 def strip(self, minlink, transaction):
2645 def strip(self, minlink, transaction):
2646 """truncate the revlog on the first revision with a linkrev >= minlink
2646 """truncate the revlog on the first revision with a linkrev >= minlink
2647
2647
2648 This function is called when we're stripping revision minlink and
2648 This function is called when we're stripping revision minlink and
2649 its descendants from the repository.
2649 its descendants from the repository.
2650
2650
2651 We have to remove all revisions with linkrev >= minlink, because
2651 We have to remove all revisions with linkrev >= minlink, because
2652 the equivalent changelog revisions will be renumbered after the
2652 the equivalent changelog revisions will be renumbered after the
2653 strip.
2653 strip.
2654
2654
2655 So we truncate the revlog on the first of these revisions, and
2655 So we truncate the revlog on the first of these revisions, and
2656 trust that the caller has saved the revisions that shouldn't be
2656 trust that the caller has saved the revisions that shouldn't be
2657 removed and that it'll re-add them after this truncation.
2657 removed and that it'll re-add them after this truncation.
2658 """
2658 """
2659 if len(self) == 0:
2659 if len(self) == 0:
2660 return
2660 return
2661
2661
2662 rev, _ = self.getstrippoint(minlink)
2662 rev, _ = self.getstrippoint(minlink)
2663 if rev == len(self):
2663 if rev == len(self):
2664 return
2664 return
2665
2665
2666 # first truncate the files on disk
2666 # first truncate the files on disk
2667 end = self.start(rev)
2667 end = self.start(rev)
2668 if not self._inline:
2668 if not self._inline:
2669 transaction.add(self.datafile, end)
2669 transaction.add(self.datafile, end)
2670 end = rev * self._io.size
2670 end = rev * self._io.size
2671 else:
2671 else:
2672 end += rev * self._io.size
2672 end += rev * self._io.size
2673
2673
2674 transaction.add(self.indexfile, end)
2674 transaction.add(self.indexfile, end)
2675
2675
2676 # then reset internal state in memory to forget those revisions
2676 # then reset internal state in memory to forget those revisions
2677 self._revisioncache = None
2677 self._revisioncache = None
2678 self._chaininfocache = util.lrucachedict(500)
2678 self._chaininfocache = util.lrucachedict(500)
2679 self._chunkclear()
2679 self._chunkclear()
2680
2680
2681 del self.index[rev:-1]
2681 del self.index[rev:-1]
2682
2682
2683 def checksize(self):
2683 def checksize(self):
2684 """Check size of index and data files
2684 """Check size of index and data files
2685
2685
2686 return a (dd, di) tuple.
2686 return a (dd, di) tuple.
2687 - dd: extra bytes for the "data" file
2687 - dd: extra bytes for the "data" file
2688 - di: extra bytes for the "index" file
2688 - di: extra bytes for the "index" file
2689
2689
2690 A healthy revlog will return (0, 0).
2690 A healthy revlog will return (0, 0).
2691 """
2691 """
2692 expected = 0
2692 expected = 0
2693 if len(self):
2693 if len(self):
2694 expected = max(0, self.end(len(self) - 1))
2694 expected = max(0, self.end(len(self) - 1))
2695
2695
2696 try:
2696 try:
2697 with self._datafp() as f:
2697 with self._datafp() as f:
2698 f.seek(0, io.SEEK_END)
2698 f.seek(0, io.SEEK_END)
2699 actual = f.tell()
2699 actual = f.tell()
2700 dd = actual - expected
2700 dd = actual - expected
2701 except IOError as inst:
2701 except IOError as inst:
2702 if inst.errno != errno.ENOENT:
2702 if inst.errno != errno.ENOENT:
2703 raise
2703 raise
2704 dd = 0
2704 dd = 0
2705
2705
2706 try:
2706 try:
2707 f = self.opener(self.indexfile)
2707 f = self.opener(self.indexfile)
2708 f.seek(0, io.SEEK_END)
2708 f.seek(0, io.SEEK_END)
2709 actual = f.tell()
2709 actual = f.tell()
2710 f.close()
2710 f.close()
2711 s = self._io.size
2711 s = self._io.size
2712 i = max(0, actual // s)
2712 i = max(0, actual // s)
2713 di = actual - (i * s)
2713 di = actual - (i * s)
2714 if self._inline:
2714 if self._inline:
2715 databytes = 0
2715 databytes = 0
2716 for r in self:
2716 for r in self:
2717 databytes += max(0, self.length(r))
2717 databytes += max(0, self.length(r))
2718 dd = 0
2718 dd = 0
2719 di = actual - len(self) * s - databytes
2719 di = actual - len(self) * s - databytes
2720 except IOError as inst:
2720 except IOError as inst:
2721 if inst.errno != errno.ENOENT:
2721 if inst.errno != errno.ENOENT:
2722 raise
2722 raise
2723 di = 0
2723 di = 0
2724
2724
2725 return (dd, di)
2725 return (dd, di)
2726
2726
2727 def files(self):
2727 def files(self):
2728 res = [self.indexfile]
2728 res = [self.indexfile]
2729 if not self._inline:
2729 if not self._inline:
2730 res.append(self.datafile)
2730 res.append(self.datafile)
2731 return res
2731 return res
2732
2732
2733 def emitrevisions(
2733 def emitrevisions(
2734 self,
2734 self,
2735 nodes,
2735 nodes,
2736 nodesorder=None,
2736 nodesorder=None,
2737 revisiondata=False,
2737 revisiondata=False,
2738 assumehaveparentrevisions=False,
2738 assumehaveparentrevisions=False,
2739 deltamode=repository.CG_DELTAMODE_STD,
2739 deltamode=repository.CG_DELTAMODE_STD,
2740 sidedata_helpers=None,
2740 sidedata_helpers=None,
2741 ):
2741 ):
2742 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2742 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2743 raise error.ProgrammingError(
2743 raise error.ProgrammingError(
2744 b'unhandled value for nodesorder: %s' % nodesorder
2744 b'unhandled value for nodesorder: %s' % nodesorder
2745 )
2745 )
2746
2746
2747 if nodesorder is None and not self._generaldelta:
2747 if nodesorder is None and not self._generaldelta:
2748 nodesorder = b'storage'
2748 nodesorder = b'storage'
2749
2749
2750 if (
2750 if (
2751 not self._storedeltachains
2751 not self._storedeltachains
2752 and deltamode != repository.CG_DELTAMODE_PREV
2752 and deltamode != repository.CG_DELTAMODE_PREV
2753 ):
2753 ):
2754 deltamode = repository.CG_DELTAMODE_FULL
2754 deltamode = repository.CG_DELTAMODE_FULL
2755
2755
2756 return storageutil.emitrevisions(
2756 return storageutil.emitrevisions(
2757 self,
2757 self,
2758 nodes,
2758 nodes,
2759 nodesorder,
2759 nodesorder,
2760 revlogrevisiondelta,
2760 revlogrevisiondelta,
2761 deltaparentfn=self.deltaparent,
2761 deltaparentfn=self.deltaparent,
2762 candeltafn=self.candelta,
2762 candeltafn=self.candelta,
2763 rawsizefn=self.rawsize,
2763 rawsizefn=self.rawsize,
2764 revdifffn=self.revdiff,
2764 revdifffn=self.revdiff,
2765 flagsfn=self.flags,
2765 flagsfn=self.flags,
2766 deltamode=deltamode,
2766 deltamode=deltamode,
2767 revisiondata=revisiondata,
2767 revisiondata=revisiondata,
2768 assumehaveparentrevisions=assumehaveparentrevisions,
2768 assumehaveparentrevisions=assumehaveparentrevisions,
2769 sidedata_helpers=sidedata_helpers,
2769 sidedata_helpers=sidedata_helpers,
2770 )
2770 )
2771
2771
2772 DELTAREUSEALWAYS = b'always'
2772 DELTAREUSEALWAYS = b'always'
2773 DELTAREUSESAMEREVS = b'samerevs'
2773 DELTAREUSESAMEREVS = b'samerevs'
2774 DELTAREUSENEVER = b'never'
2774 DELTAREUSENEVER = b'never'
2775
2775
2776 DELTAREUSEFULLADD = b'fulladd'
2776 DELTAREUSEFULLADD = b'fulladd'
2777
2777
2778 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2778 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2779
2779
2780 def clone(
2780 def clone(
2781 self,
2781 self,
2782 tr,
2782 tr,
2783 destrevlog,
2783 destrevlog,
2784 addrevisioncb=None,
2784 addrevisioncb=None,
2785 deltareuse=DELTAREUSESAMEREVS,
2785 deltareuse=DELTAREUSESAMEREVS,
2786 forcedeltabothparents=None,
2786 forcedeltabothparents=None,
2787 sidedatacompanion=None,
2787 sidedatacompanion=None,
2788 ):
2788 ):
2789 """Copy this revlog to another, possibly with format changes.
2789 """Copy this revlog to another, possibly with format changes.
2790
2790
2791 The destination revlog will contain the same revisions and nodes.
2791 The destination revlog will contain the same revisions and nodes.
2792 However, it may not be bit-for-bit identical due to e.g. delta encoding
2792 However, it may not be bit-for-bit identical due to e.g. delta encoding
2793 differences.
2793 differences.
2794
2794
2795 The ``deltareuse`` argument control how deltas from the existing revlog
2795 The ``deltareuse`` argument control how deltas from the existing revlog
2796 are preserved in the destination revlog. The argument can have the
2796 are preserved in the destination revlog. The argument can have the
2797 following values:
2797 following values:
2798
2798
2799 DELTAREUSEALWAYS
2799 DELTAREUSEALWAYS
2800 Deltas will always be reused (if possible), even if the destination
2800 Deltas will always be reused (if possible), even if the destination
2801 revlog would not select the same revisions for the delta. This is the
2801 revlog would not select the same revisions for the delta. This is the
2802 fastest mode of operation.
2802 fastest mode of operation.
2803 DELTAREUSESAMEREVS
2803 DELTAREUSESAMEREVS
2804 Deltas will be reused if the destination revlog would pick the same
2804 Deltas will be reused if the destination revlog would pick the same
2805 revisions for the delta. This mode strikes a balance between speed
2805 revisions for the delta. This mode strikes a balance between speed
2806 and optimization.
2806 and optimization.
2807 DELTAREUSENEVER
2807 DELTAREUSENEVER
2808 Deltas will never be reused. This is the slowest mode of execution.
2808 Deltas will never be reused. This is the slowest mode of execution.
2809 This mode can be used to recompute deltas (e.g. if the diff/delta
2809 This mode can be used to recompute deltas (e.g. if the diff/delta
2810 algorithm changes).
2810 algorithm changes).
2811 DELTAREUSEFULLADD
2811 DELTAREUSEFULLADD
2812 Revision will be re-added as if their were new content. This is
2812 Revision will be re-added as if their were new content. This is
2813 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2813 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2814 eg: large file detection and handling.
2814 eg: large file detection and handling.
2815
2815
2816 Delta computation can be slow, so the choice of delta reuse policy can
2816 Delta computation can be slow, so the choice of delta reuse policy can
2817 significantly affect run time.
2817 significantly affect run time.
2818
2818
2819 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2819 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2820 two extremes. Deltas will be reused if they are appropriate. But if the
2820 two extremes. Deltas will be reused if they are appropriate. But if the
2821 delta could choose a better revision, it will do so. This means if you
2821 delta could choose a better revision, it will do so. This means if you
2822 are converting a non-generaldelta revlog to a generaldelta revlog,
2822 are converting a non-generaldelta revlog to a generaldelta revlog,
2823 deltas will be recomputed if the delta's parent isn't a parent of the
2823 deltas will be recomputed if the delta's parent isn't a parent of the
2824 revision.
2824 revision.
2825
2825
2826 In addition to the delta policy, the ``forcedeltabothparents``
2826 In addition to the delta policy, the ``forcedeltabothparents``
2827 argument controls whether to force compute deltas against both parents
2827 argument controls whether to force compute deltas against both parents
2828 for merges. By default, the current default is used.
2828 for merges. By default, the current default is used.
2829
2829
2830 If not None, the `sidedatacompanion` is callable that accept two
2830 If not None, the `sidedatacompanion` is callable that accept two
2831 arguments:
2831 arguments:
2832
2832
2833 (srcrevlog, rev)
2833 (srcrevlog, rev)
2834
2834
2835 and return a quintet that control changes to sidedata content from the
2835 and return a quintet that control changes to sidedata content from the
2836 old revision to the new clone result:
2836 old revision to the new clone result:
2837
2837
2838 (dropall, filterout, update, new_flags, dropped_flags)
2838 (dropall, filterout, update, new_flags, dropped_flags)
2839
2839
2840 * if `dropall` is True, all sidedata should be dropped
2840 * if `dropall` is True, all sidedata should be dropped
2841 * `filterout` is a set of sidedata keys that should be dropped
2841 * `filterout` is a set of sidedata keys that should be dropped
2842 * `update` is a mapping of additionnal/new key -> value
2842 * `update` is a mapping of additionnal/new key -> value
2843 * new_flags is a bitfields of new flags that the revision should get
2843 * new_flags is a bitfields of new flags that the revision should get
2844 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2844 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2845 """
2845 """
2846 if deltareuse not in self.DELTAREUSEALL:
2846 if deltareuse not in self.DELTAREUSEALL:
2847 raise ValueError(
2847 raise ValueError(
2848 _(b'value for deltareuse invalid: %s') % deltareuse
2848 _(b'value for deltareuse invalid: %s') % deltareuse
2849 )
2849 )
2850
2850
2851 if len(destrevlog):
2851 if len(destrevlog):
2852 raise ValueError(_(b'destination revlog is not empty'))
2852 raise ValueError(_(b'destination revlog is not empty'))
2853
2853
2854 if getattr(self, 'filteredrevs', None):
2854 if getattr(self, 'filteredrevs', None):
2855 raise ValueError(_(b'source revlog has filtered revisions'))
2855 raise ValueError(_(b'source revlog has filtered revisions'))
2856 if getattr(destrevlog, 'filteredrevs', None):
2856 if getattr(destrevlog, 'filteredrevs', None):
2857 raise ValueError(_(b'destination revlog has filtered revisions'))
2857 raise ValueError(_(b'destination revlog has filtered revisions'))
2858
2858
2859 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2859 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2860 # if possible.
2860 # if possible.
2861 oldlazydelta = destrevlog._lazydelta
2861 oldlazydelta = destrevlog._lazydelta
2862 oldlazydeltabase = destrevlog._lazydeltabase
2862 oldlazydeltabase = destrevlog._lazydeltabase
2863 oldamd = destrevlog._deltabothparents
2863 oldamd = destrevlog._deltabothparents
2864
2864
2865 try:
2865 try:
2866 if deltareuse == self.DELTAREUSEALWAYS:
2866 if deltareuse == self.DELTAREUSEALWAYS:
2867 destrevlog._lazydeltabase = True
2867 destrevlog._lazydeltabase = True
2868 destrevlog._lazydelta = True
2868 destrevlog._lazydelta = True
2869 elif deltareuse == self.DELTAREUSESAMEREVS:
2869 elif deltareuse == self.DELTAREUSESAMEREVS:
2870 destrevlog._lazydeltabase = False
2870 destrevlog._lazydeltabase = False
2871 destrevlog._lazydelta = True
2871 destrevlog._lazydelta = True
2872 elif deltareuse == self.DELTAREUSENEVER:
2872 elif deltareuse == self.DELTAREUSENEVER:
2873 destrevlog._lazydeltabase = False
2873 destrevlog._lazydeltabase = False
2874 destrevlog._lazydelta = False
2874 destrevlog._lazydelta = False
2875
2875
2876 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2876 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2877
2877
2878 self._clone(
2878 self._clone(
2879 tr,
2879 tr,
2880 destrevlog,
2880 destrevlog,
2881 addrevisioncb,
2881 addrevisioncb,
2882 deltareuse,
2882 deltareuse,
2883 forcedeltabothparents,
2883 forcedeltabothparents,
2884 sidedatacompanion,
2884 sidedatacompanion,
2885 )
2885 )
2886
2886
2887 finally:
2887 finally:
2888 destrevlog._lazydelta = oldlazydelta
2888 destrevlog._lazydelta = oldlazydelta
2889 destrevlog._lazydeltabase = oldlazydeltabase
2889 destrevlog._lazydeltabase = oldlazydeltabase
2890 destrevlog._deltabothparents = oldamd
2890 destrevlog._deltabothparents = oldamd
2891
2891
2892 def _clone(
2892 def _clone(
2893 self,
2893 self,
2894 tr,
2894 tr,
2895 destrevlog,
2895 destrevlog,
2896 addrevisioncb,
2896 addrevisioncb,
2897 deltareuse,
2897 deltareuse,
2898 forcedeltabothparents,
2898 forcedeltabothparents,
2899 sidedatacompanion,
2899 sidedatacompanion,
2900 ):
2900 ):
2901 """perform the core duty of `revlog.clone` after parameter processing"""
2901 """perform the core duty of `revlog.clone` after parameter processing"""
2902 deltacomputer = deltautil.deltacomputer(destrevlog)
2902 deltacomputer = deltautil.deltacomputer(destrevlog)
2903 index = self.index
2903 index = self.index
2904 for rev in self:
2904 for rev in self:
2905 entry = index[rev]
2905 entry = index[rev]
2906
2906
2907 # Some classes override linkrev to take filtered revs into
2907 # Some classes override linkrev to take filtered revs into
2908 # account. Use raw entry from index.
2908 # account. Use raw entry from index.
2909 flags = entry[0] & 0xFFFF
2909 flags = entry[0] & 0xFFFF
2910 linkrev = entry[4]
2910 linkrev = entry[4]
2911 p1 = index[entry[5]][7]
2911 p1 = index[entry[5]][7]
2912 p2 = index[entry[6]][7]
2912 p2 = index[entry[6]][7]
2913 node = entry[7]
2913 node = entry[7]
2914
2914
2915 sidedataactions = (False, [], {}, 0, 0)
2915 sidedataactions = (False, [], {}, 0, 0)
2916 if sidedatacompanion is not None:
2916 if sidedatacompanion is not None:
2917 sidedataactions = sidedatacompanion(self, rev)
2917 sidedataactions = sidedatacompanion(self, rev)
2918
2918
2919 # (Possibly) reuse the delta from the revlog if allowed and
2919 # (Possibly) reuse the delta from the revlog if allowed and
2920 # the revlog chunk is a delta.
2920 # the revlog chunk is a delta.
2921 cachedelta = None
2921 cachedelta = None
2922 rawtext = None
2922 rawtext = None
2923 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2923 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2924 dropall = sidedataactions[0]
2924 dropall = sidedataactions[0]
2925 filterout = sidedataactions[1]
2925 filterout = sidedataactions[1]
2926 update = sidedataactions[2]
2926 update = sidedataactions[2]
2927 new_flags = sidedataactions[3]
2927 new_flags = sidedataactions[3]
2928 dropped_flags = sidedataactions[4]
2928 dropped_flags = sidedataactions[4]
2929 text, sidedata = self._revisiondata(rev)
2929 text, sidedata = self._revisiondata(rev)
2930 if dropall:
2930 if dropall:
2931 sidedata = {}
2931 sidedata = {}
2932 for key in filterout:
2932 for key in filterout:
2933 sidedata.pop(key, None)
2933 sidedata.pop(key, None)
2934 sidedata.update(update)
2934 sidedata.update(update)
2935 if not sidedata:
2935 if not sidedata:
2936 sidedata = None
2936 sidedata = None
2937
2937
2938 flags |= new_flags
2938 flags |= new_flags
2939 flags &= ~dropped_flags
2939 flags &= ~dropped_flags
2940
2940
2941 destrevlog.addrevision(
2941 destrevlog.addrevision(
2942 text,
2942 text,
2943 tr,
2943 tr,
2944 linkrev,
2944 linkrev,
2945 p1,
2945 p1,
2946 p2,
2946 p2,
2947 cachedelta=cachedelta,
2947 cachedelta=cachedelta,
2948 node=node,
2948 node=node,
2949 flags=flags,
2949 flags=flags,
2950 deltacomputer=deltacomputer,
2950 deltacomputer=deltacomputer,
2951 sidedata=sidedata,
2951 sidedata=sidedata,
2952 )
2952 )
2953 else:
2953 else:
2954 if destrevlog._lazydelta:
2954 if destrevlog._lazydelta:
2955 dp = self.deltaparent(rev)
2955 dp = self.deltaparent(rev)
2956 if dp != nullrev:
2956 if dp != nullrev:
2957 cachedelta = (dp, bytes(self._chunk(rev)))
2957 cachedelta = (dp, bytes(self._chunk(rev)))
2958
2958
2959 if not cachedelta:
2959 if not cachedelta:
2960 rawtext = self.rawdata(rev)
2960 rawtext = self.rawdata(rev)
2961
2961
2962 ifh = destrevlog.opener(
2962 ifh = destrevlog.opener(
2963 destrevlog.indexfile, b'a+', checkambig=False
2963 destrevlog.indexfile, b'a+', checkambig=False
2964 )
2964 )
2965 dfh = None
2965 dfh = None
2966 if not destrevlog._inline:
2966 if not destrevlog._inline:
2967 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2967 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2968 try:
2968 try:
2969 destrevlog._addrevision(
2969 destrevlog._addrevision(
2970 node,
2970 node,
2971 rawtext,
2971 rawtext,
2972 tr,
2972 tr,
2973 linkrev,
2973 linkrev,
2974 p1,
2974 p1,
2975 p2,
2975 p2,
2976 flags,
2976 flags,
2977 cachedelta,
2977 cachedelta,
2978 ifh,
2978 ifh,
2979 dfh,
2979 dfh,
2980 deltacomputer=deltacomputer,
2980 deltacomputer=deltacomputer,
2981 )
2981 )
2982 finally:
2982 finally:
2983 if dfh:
2983 if dfh:
2984 dfh.close()
2984 dfh.close()
2985 ifh.close()
2985 ifh.close()
2986
2986
2987 if addrevisioncb:
2987 if addrevisioncb:
2988 addrevisioncb(self, rev, node)
2988 addrevisioncb(self, rev, node)
2989
2989
2990 def censorrevision(self, tr, censornode, tombstone=b''):
2990 def censorrevision(self, tr, censornode, tombstone=b''):
2991 if (self.version & 0xFFFF) == REVLOGV0:
2991 if (self.version & 0xFFFF) == REVLOGV0:
2992 raise error.RevlogError(
2992 raise error.RevlogError(
2993 _(b'cannot censor with version %d revlogs') % self.version
2993 _(b'cannot censor with version %d revlogs') % self.version
2994 )
2994 )
2995
2995
2996 censorrev = self.rev(censornode)
2996 censorrev = self.rev(censornode)
2997 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2997 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2998
2998
2999 if len(tombstone) > self.rawsize(censorrev):
2999 if len(tombstone) > self.rawsize(censorrev):
3000 raise error.Abort(
3000 raise error.Abort(
3001 _(b'censor tombstone must be no longer than censored data')
3001 _(b'censor tombstone must be no longer than censored data')
3002 )
3002 )
3003
3003
3004 # Rewriting the revlog in place is hard. Our strategy for censoring is
3004 # Rewriting the revlog in place is hard. Our strategy for censoring is
3005 # to create a new revlog, copy all revisions to it, then replace the
3005 # to create a new revlog, copy all revisions to it, then replace the
3006 # revlogs on transaction close.
3006 # revlogs on transaction close.
3007
3007
3008 newindexfile = self.indexfile + b'.tmpcensored'
3008 newindexfile = self.indexfile + b'.tmpcensored'
3009 newdatafile = self.datafile + b'.tmpcensored'
3009 newdatafile = self.datafile + b'.tmpcensored'
3010
3010
3011 # This is a bit dangerous. We could easily have a mismatch of state.
3011 # This is a bit dangerous. We could easily have a mismatch of state.
3012 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
3012 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
3013 newrl.version = self.version
3013 newrl.version = self.version
3014 newrl._generaldelta = self._generaldelta
3014 newrl._generaldelta = self._generaldelta
3015 newrl._io = self._io
3015 newrl._io = self._io
3016
3016
3017 for rev in self.revs():
3017 for rev in self.revs():
3018 node = self.node(rev)
3018 node = self.node(rev)
3019 p1, p2 = self.parents(node)
3019 p1, p2 = self.parents(node)
3020
3020
3021 if rev == censorrev:
3021 if rev == censorrev:
3022 newrl.addrawrevision(
3022 newrl.addrawrevision(
3023 tombstone,
3023 tombstone,
3024 tr,
3024 tr,
3025 self.linkrev(censorrev),
3025 self.linkrev(censorrev),
3026 p1,
3026 p1,
3027 p2,
3027 p2,
3028 censornode,
3028 censornode,
3029 REVIDX_ISCENSORED,
3029 REVIDX_ISCENSORED,
3030 )
3030 )
3031
3031
3032 if newrl.deltaparent(rev) != nullrev:
3032 if newrl.deltaparent(rev) != nullrev:
3033 raise error.Abort(
3033 raise error.Abort(
3034 _(
3034 _(
3035 b'censored revision stored as delta; '
3035 b'censored revision stored as delta; '
3036 b'cannot censor'
3036 b'cannot censor'
3037 ),
3037 ),
3038 hint=_(
3038 hint=_(
3039 b'censoring of revlogs is not '
3039 b'censoring of revlogs is not '
3040 b'fully implemented; please report '
3040 b'fully implemented; please report '
3041 b'this bug'
3041 b'this bug'
3042 ),
3042 ),
3043 )
3043 )
3044 continue
3044 continue
3045
3045
3046 if self.iscensored(rev):
3046 if self.iscensored(rev):
3047 if self.deltaparent(rev) != nullrev:
3047 if self.deltaparent(rev) != nullrev:
3048 raise error.Abort(
3048 raise error.Abort(
3049 _(
3049 _(
3050 b'cannot censor due to censored '
3050 b'cannot censor due to censored '
3051 b'revision having delta stored'
3051 b'revision having delta stored'
3052 )
3052 )
3053 )
3053 )
3054 rawtext = self._chunk(rev)
3054 rawtext = self._chunk(rev)
3055 else:
3055 else:
3056 rawtext = self.rawdata(rev)
3056 rawtext = self.rawdata(rev)
3057
3057
3058 newrl.addrawrevision(
3058 newrl.addrawrevision(
3059 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3059 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3060 )
3060 )
3061
3061
3062 tr.addbackup(self.indexfile, location=b'store')
3062 tr.addbackup(self.indexfile, location=b'store')
3063 if not self._inline:
3063 if not self._inline:
3064 tr.addbackup(self.datafile, location=b'store')
3064 tr.addbackup(self.datafile, location=b'store')
3065
3065
3066 self.opener.rename(newrl.indexfile, self.indexfile)
3066 self.opener.rename(newrl.indexfile, self.indexfile)
3067 if not self._inline:
3067 if not self._inline:
3068 self.opener.rename(newrl.datafile, self.datafile)
3068 self.opener.rename(newrl.datafile, self.datafile)
3069
3069
3070 self.clearcaches()
3070 self.clearcaches()
3071 self._loadindex()
3071 self._loadindex()
3072
3072
3073 def verifyintegrity(self, state):
3073 def verifyintegrity(self, state):
3074 """Verifies the integrity of the revlog.
3074 """Verifies the integrity of the revlog.
3075
3075
3076 Yields ``revlogproblem`` instances describing problems that are
3076 Yields ``revlogproblem`` instances describing problems that are
3077 found.
3077 found.
3078 """
3078 """
3079 dd, di = self.checksize()
3079 dd, di = self.checksize()
3080 if dd:
3080 if dd:
3081 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3081 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3082 if di:
3082 if di:
3083 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3083 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3084
3084
3085 version = self.version & 0xFFFF
3085 version = self.version & 0xFFFF
3086
3086
3087 # The verifier tells us what version revlog we should be.
3087 # The verifier tells us what version revlog we should be.
3088 if version != state[b'expectedversion']:
3088 if version != state[b'expectedversion']:
3089 yield revlogproblem(
3089 yield revlogproblem(
3090 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3090 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3091 % (self.indexfile, version, state[b'expectedversion'])
3091 % (self.indexfile, version, state[b'expectedversion'])
3092 )
3092 )
3093
3093
3094 state[b'skipread'] = set()
3094 state[b'skipread'] = set()
3095 state[b'safe_renamed'] = set()
3095 state[b'safe_renamed'] = set()
3096
3096
3097 for rev in self:
3097 for rev in self:
3098 node = self.node(rev)
3098 node = self.node(rev)
3099
3099
3100 # Verify contents. 4 cases to care about:
3100 # Verify contents. 4 cases to care about:
3101 #
3101 #
3102 # common: the most common case
3102 # common: the most common case
3103 # rename: with a rename
3103 # rename: with a rename
3104 # meta: file content starts with b'\1\n', the metadata
3104 # meta: file content starts with b'\1\n', the metadata
3105 # header defined in filelog.py, but without a rename
3105 # header defined in filelog.py, but without a rename
3106 # ext: content stored externally
3106 # ext: content stored externally
3107 #
3107 #
3108 # More formally, their differences are shown below:
3108 # More formally, their differences are shown below:
3109 #
3109 #
3110 # | common | rename | meta | ext
3110 # | common | rename | meta | ext
3111 # -------------------------------------------------------
3111 # -------------------------------------------------------
3112 # flags() | 0 | 0 | 0 | not 0
3112 # flags() | 0 | 0 | 0 | not 0
3113 # renamed() | False | True | False | ?
3113 # renamed() | False | True | False | ?
3114 # rawtext[0:2]=='\1\n'| False | True | True | ?
3114 # rawtext[0:2]=='\1\n'| False | True | True | ?
3115 #
3115 #
3116 # "rawtext" means the raw text stored in revlog data, which
3116 # "rawtext" means the raw text stored in revlog data, which
3117 # could be retrieved by "rawdata(rev)". "text"
3117 # could be retrieved by "rawdata(rev)". "text"
3118 # mentioned below is "revision(rev)".
3118 # mentioned below is "revision(rev)".
3119 #
3119 #
3120 # There are 3 different lengths stored physically:
3120 # There are 3 different lengths stored physically:
3121 # 1. L1: rawsize, stored in revlog index
3121 # 1. L1: rawsize, stored in revlog index
3122 # 2. L2: len(rawtext), stored in revlog data
3122 # 2. L2: len(rawtext), stored in revlog data
3123 # 3. L3: len(text), stored in revlog data if flags==0, or
3123 # 3. L3: len(text), stored in revlog data if flags==0, or
3124 # possibly somewhere else if flags!=0
3124 # possibly somewhere else if flags!=0
3125 #
3125 #
3126 # L1 should be equal to L2. L3 could be different from them.
3126 # L1 should be equal to L2. L3 could be different from them.
3127 # "text" may or may not affect commit hash depending on flag
3127 # "text" may or may not affect commit hash depending on flag
3128 # processors (see flagutil.addflagprocessor).
3128 # processors (see flagutil.addflagprocessor).
3129 #
3129 #
3130 # | common | rename | meta | ext
3130 # | common | rename | meta | ext
3131 # -------------------------------------------------
3131 # -------------------------------------------------
3132 # rawsize() | L1 | L1 | L1 | L1
3132 # rawsize() | L1 | L1 | L1 | L1
3133 # size() | L1 | L2-LM | L1(*) | L1 (?)
3133 # size() | L1 | L2-LM | L1(*) | L1 (?)
3134 # len(rawtext) | L2 | L2 | L2 | L2
3134 # len(rawtext) | L2 | L2 | L2 | L2
3135 # len(text) | L2 | L2 | L2 | L3
3135 # len(text) | L2 | L2 | L2 | L3
3136 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3136 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3137 #
3137 #
3138 # LM: length of metadata, depending on rawtext
3138 # LM: length of metadata, depending on rawtext
3139 # (*): not ideal, see comment in filelog.size
3139 # (*): not ideal, see comment in filelog.size
3140 # (?): could be "- len(meta)" if the resolved content has
3140 # (?): could be "- len(meta)" if the resolved content has
3141 # rename metadata
3141 # rename metadata
3142 #
3142 #
3143 # Checks needed to be done:
3143 # Checks needed to be done:
3144 # 1. length check: L1 == L2, in all cases.
3144 # 1. length check: L1 == L2, in all cases.
3145 # 2. hash check: depending on flag processor, we may need to
3145 # 2. hash check: depending on flag processor, we may need to
3146 # use either "text" (external), or "rawtext" (in revlog).
3146 # use either "text" (external), or "rawtext" (in revlog).
3147
3147
3148 try:
3148 try:
3149 skipflags = state.get(b'skipflags', 0)
3149 skipflags = state.get(b'skipflags', 0)
3150 if skipflags:
3150 if skipflags:
3151 skipflags &= self.flags(rev)
3151 skipflags &= self.flags(rev)
3152
3152
3153 _verify_revision(self, skipflags, state, node)
3153 _verify_revision(self, skipflags, state, node)
3154
3154
3155 l1 = self.rawsize(rev)
3155 l1 = self.rawsize(rev)
3156 l2 = len(self.rawdata(node))
3156 l2 = len(self.rawdata(node))
3157
3157
3158 if l1 != l2:
3158 if l1 != l2:
3159 yield revlogproblem(
3159 yield revlogproblem(
3160 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3160 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3161 node=node,
3161 node=node,
3162 )
3162 )
3163
3163
3164 except error.CensoredNodeError:
3164 except error.CensoredNodeError:
3165 if state[b'erroroncensored']:
3165 if state[b'erroroncensored']:
3166 yield revlogproblem(
3166 yield revlogproblem(
3167 error=_(b'censored file data'), node=node
3167 error=_(b'censored file data'), node=node
3168 )
3168 )
3169 state[b'skipread'].add(node)
3169 state[b'skipread'].add(node)
3170 except Exception as e:
3170 except Exception as e:
3171 yield revlogproblem(
3171 yield revlogproblem(
3172 error=_(b'unpacking %s: %s')
3172 error=_(b'unpacking %s: %s')
3173 % (short(node), stringutil.forcebytestr(e)),
3173 % (short(node), stringutil.forcebytestr(e)),
3174 node=node,
3174 node=node,
3175 )
3175 )
3176 state[b'skipread'].add(node)
3176 state[b'skipread'].add(node)
3177
3177
3178 def storageinfo(
3178 def storageinfo(
3179 self,
3179 self,
3180 exclusivefiles=False,
3180 exclusivefiles=False,
3181 sharedfiles=False,
3181 sharedfiles=False,
3182 revisionscount=False,
3182 revisionscount=False,
3183 trackedsize=False,
3183 trackedsize=False,
3184 storedsize=False,
3184 storedsize=False,
3185 ):
3185 ):
3186 d = {}
3186 d = {}
3187
3187
3188 if exclusivefiles:
3188 if exclusivefiles:
3189 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3189 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3190 if not self._inline:
3190 if not self._inline:
3191 d[b'exclusivefiles'].append((self.opener, self.datafile))
3191 d[b'exclusivefiles'].append((self.opener, self.datafile))
3192
3192
3193 if sharedfiles:
3193 if sharedfiles:
3194 d[b'sharedfiles'] = []
3194 d[b'sharedfiles'] = []
3195
3195
3196 if revisionscount:
3196 if revisionscount:
3197 d[b'revisionscount'] = len(self)
3197 d[b'revisionscount'] = len(self)
3198
3198
3199 if trackedsize:
3199 if trackedsize:
3200 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3200 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3201
3201
3202 if storedsize:
3202 if storedsize:
3203 d[b'storedsize'] = sum(
3203 d[b'storedsize'] = sum(
3204 self.opener.stat(path).st_size for path in self.files()
3204 self.opener.stat(path).st_size for path in self.files()
3205 )
3205 )
3206
3206
3207 return d
3207 return d
3208
3209 def rewrite_sidedata(self, helpers, startrev, endrev):
3210 if self.version & 0xFFFF != REVLOGV2:
3211 return
3212 # inline are not yet supported because they suffer from an issue when
3213 # rewriting them (since it's not an append-only operation).
3214 # See issue6485.
3215 assert not self._inline
3216 if not helpers[1] and not helpers[2]:
3217 # Nothing to generate or remove
3218 return
3219
3220 new_entries = []
3221 # append the new sidedata
3222 with self._datafp(b'a+') as fp:
3223 # Maybe this bug still exists, see revlog._writeentry
3224 fp.seek(0, os.SEEK_END)
3225 current_offset = fp.tell()
3226 for rev in range(startrev, endrev + 1):
3227 entry = self.index[rev]
3228 new_sidedata = storageutil.run_sidedata_helpers(
3229 store=self,
3230 sidedata_helpers=helpers,
3231 sidedata={},
3232 rev=rev,
3233 )
3234
3235 serialized_sidedata = sidedatautil.serialize_sidedata(
3236 new_sidedata
3237 )
3238 if entry[8] != 0 or entry[9] != 0:
3239 # rewriting entries that already have sidedata is not
3240 # supported yet, because it introduces garbage data in the
3241 # revlog.
3242 msg = "Rewriting existing sidedata is not supported yet"
3243 raise error.Abort(msg)
3244 entry = entry[:8]
3245 entry += (current_offset, len(serialized_sidedata))
3246
3247 fp.write(serialized_sidedata)
3248 new_entries.append(entry)
3249 current_offset += len(serialized_sidedata)
3250
3251 # rewrite the new index entries
3252 with self._indexfp(b'w+') as fp:
3253 fp.seek(startrev * self._io.size)
3254 for i, entry in enumerate(new_entries):
3255 rev = startrev + i
3256 self.index.replace_sidedata_info(rev, entry[8], entry[9])
3257 packed = self._io.packentry(entry, self.node, self.version, rev)
3258 fp.write(packed)
@@ -1,499 +1,497 b''
1 #testcases extra sidedata
1 #testcases extra sidedata
2
2
3 #if extra
3 #if extra
4 $ cat >> $HGRCPATH << EOF
4 $ cat >> $HGRCPATH << EOF
5 > [experimental]
5 > [experimental]
6 > copies.write-to=changeset-only
6 > copies.write-to=changeset-only
7 > copies.read-from=changeset-only
7 > copies.read-from=changeset-only
8 > [alias]
8 > [alias]
9 > changesetcopies = log -r . -T 'files: {files}
9 > changesetcopies = log -r . -T 'files: {files}
10 > {extras % "{ifcontains("files", key, "{key}: {value}\n")}"}
10 > {extras % "{ifcontains("files", key, "{key}: {value}\n")}"}
11 > {extras % "{ifcontains("copies", key, "{key}: {value}\n")}"}'
11 > {extras % "{ifcontains("copies", key, "{key}: {value}\n")}"}'
12 > EOF
12 > EOF
13 #endif
13 #endif
14
14
15 #if sidedata
15 #if sidedata
16 $ cat >> $HGRCPATH << EOF
16 $ cat >> $HGRCPATH << EOF
17 > [format]
17 > [format]
18 > exp-use-copies-side-data-changeset = yes
18 > exp-use-copies-side-data-changeset = yes
19 > EOF
19 > EOF
20 #endif
20 #endif
21
21
22 $ cat >> $HGRCPATH << EOF
22 $ cat >> $HGRCPATH << EOF
23 > [alias]
23 > [alias]
24 > showcopies = log -r . -T '{file_copies % "{source} -> {name}\n"}'
24 > showcopies = log -r . -T '{file_copies % "{source} -> {name}\n"}'
25 > [extensions]
25 > [extensions]
26 > rebase =
26 > rebase =
27 > split =
27 > split =
28 > EOF
28 > EOF
29
29
30 Check that copies are recorded correctly
30 Check that copies are recorded correctly
31
31
32 $ hg init repo
32 $ hg init repo
33 $ cd repo
33 $ cd repo
34 #if sidedata
34 #if sidedata
35 $ hg debugformat -v
35 $ hg debugformat -v
36 format-variant repo config default
36 format-variant repo config default
37 fncache: yes yes yes
37 fncache: yes yes yes
38 dotencode: yes yes yes
38 dotencode: yes yes yes
39 generaldelta: yes yes yes
39 generaldelta: yes yes yes
40 share-safe: no no no
40 share-safe: no no no
41 sparserevlog: yes yes yes
41 sparserevlog: yes yes yes
42 persistent-nodemap: no no no
42 persistent-nodemap: no no no
43 copies-sdc: yes yes no
43 copies-sdc: yes yes no
44 revlog-v2: yes yes no
44 revlog-v2: yes yes no
45 plain-cl-delta: yes yes yes
45 plain-cl-delta: yes yes yes
46 compression: zlib zlib zlib
46 compression: zlib zlib zlib
47 compression-level: default default default
47 compression-level: default default default
48 #else
48 #else
49 $ hg debugformat -v
49 $ hg debugformat -v
50 format-variant repo config default
50 format-variant repo config default
51 fncache: yes yes yes
51 fncache: yes yes yes
52 dotencode: yes yes yes
52 dotencode: yes yes yes
53 generaldelta: yes yes yes
53 generaldelta: yes yes yes
54 share-safe: no no no
54 share-safe: no no no
55 sparserevlog: yes yes yes
55 sparserevlog: yes yes yes
56 persistent-nodemap: no no no
56 persistent-nodemap: no no no
57 copies-sdc: no no no
57 copies-sdc: no no no
58 revlog-v2: no no no
58 revlog-v2: no no no
59 plain-cl-delta: yes yes yes
59 plain-cl-delta: yes yes yes
60 compression: zlib zlib zlib
60 compression: zlib zlib zlib
61 compression-level: default default default
61 compression-level: default default default
62 #endif
62 #endif
63 $ echo a > a
63 $ echo a > a
64 $ hg add a
64 $ hg add a
65 $ hg ci -m initial
65 $ hg ci -m initial
66 $ hg cp a b
66 $ hg cp a b
67 $ hg cp a c
67 $ hg cp a c
68 $ hg cp a d
68 $ hg cp a d
69 $ hg ci -m 'copy a to b, c, and d'
69 $ hg ci -m 'copy a to b, c, and d'
70
70
71 #if extra
71 #if extra
72
72
73 $ hg changesetcopies
73 $ hg changesetcopies
74 files: b c d
74 files: b c d
75 filesadded: 0
75 filesadded: 0
76 1
76 1
77 2
77 2
78
78
79 p1copies: 0\x00a (esc)
79 p1copies: 0\x00a (esc)
80 1\x00a (esc)
80 1\x00a (esc)
81 2\x00a (esc)
81 2\x00a (esc)
82 #else
82 #else
83 $ hg debugsidedata -c -v -- -1
83 $ hg debugsidedata -c -v -- -1
84 1 sidedata entries
84 1 sidedata entries
85 entry-0014 size 44
85 entry-0014 size 44
86 '\x00\x00\x00\x04\x00\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x00\x06\x00\x00\x00\x03\x00\x00\x00\x00\x06\x00\x00\x00\x04\x00\x00\x00\x00abcd'
86 '\x00\x00\x00\x04\x00\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x00\x06\x00\x00\x00\x03\x00\x00\x00\x00\x06\x00\x00\x00\x04\x00\x00\x00\x00abcd'
87 #endif
87 #endif
88
88
89 $ hg showcopies
89 $ hg showcopies
90 a -> b
90 a -> b
91 a -> c
91 a -> c
92 a -> d
92 a -> d
93
93
94 #if extra
94 #if extra
95
95
96 $ hg showcopies --config experimental.copies.read-from=compatibility
96 $ hg showcopies --config experimental.copies.read-from=compatibility
97 a -> b
97 a -> b
98 a -> c
98 a -> c
99 a -> d
99 a -> d
100 $ hg showcopies --config experimental.copies.read-from=filelog-only
100 $ hg showcopies --config experimental.copies.read-from=filelog-only
101
101
102 #endif
102 #endif
103
103
104 Check that renames are recorded correctly
104 Check that renames are recorded correctly
105
105
106 $ hg mv b b2
106 $ hg mv b b2
107 $ hg ci -m 'rename b to b2'
107 $ hg ci -m 'rename b to b2'
108
108
109 #if extra
109 #if extra
110
110
111 $ hg changesetcopies
111 $ hg changesetcopies
112 files: b b2
112 files: b b2
113 filesadded: 1
113 filesadded: 1
114 filesremoved: 0
114 filesremoved: 0
115
115
116 p1copies: 1\x00b (esc)
116 p1copies: 1\x00b (esc)
117
117
118 #else
118 #else
119 $ hg debugsidedata -c -v -- -1
119 $ hg debugsidedata -c -v -- -1
120 1 sidedata entries
120 1 sidedata entries
121 entry-0014 size 25
121 entry-0014 size 25
122 '\x00\x00\x00\x02\x0c\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x03\x00\x00\x00\x00bb2'
122 '\x00\x00\x00\x02\x0c\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x03\x00\x00\x00\x00bb2'
123 #endif
123 #endif
124
124
125 $ hg showcopies
125 $ hg showcopies
126 b -> b2
126 b -> b2
127
127
128
128
129 Rename onto existing file. This should get recorded in the changeset files list and in the extras,
129 Rename onto existing file. This should get recorded in the changeset files list and in the extras,
130 even though there is no filelog entry.
130 even though there is no filelog entry.
131
131
132 $ hg cp b2 c --force
132 $ hg cp b2 c --force
133 $ hg st --copies
133 $ hg st --copies
134 M c
134 M c
135 b2
135 b2
136
136
137 #if extra
137 #if extra
138
138
139 $ hg debugindex c
139 $ hg debugindex c
140 rev linkrev nodeid p1 p2
140 rev linkrev nodeid p1 p2
141 0 1 b789fdd96dc2 000000000000 000000000000
141 0 1 b789fdd96dc2 000000000000 000000000000
142
142
143 #else
143 #else
144
144
145 $ hg debugindex c
145 $ hg debugindex c
146 rev linkrev nodeid p1 p2
146 rev linkrev nodeid p1 p2
147 0 1 37d9b5d994ea 000000000000 000000000000
147 0 1 37d9b5d994ea 000000000000 000000000000
148
148
149 #endif
149 #endif
150
150
151
151
152 $ hg ci -m 'move b onto d'
152 $ hg ci -m 'move b onto d'
153
153
154 #if extra
154 #if extra
155
155
156 $ hg changesetcopies
156 $ hg changesetcopies
157 files: c
157 files: c
158
158
159 p1copies: 0\x00b2 (esc)
159 p1copies: 0\x00b2 (esc)
160
160
161 #else
161 #else
162 $ hg debugsidedata -c -v -- -1
162 $ hg debugsidedata -c -v -- -1
163 1 sidedata entries
163 1 sidedata entries
164 entry-0014 size 25
164 entry-0014 size 25
165 '\x00\x00\x00\x02\x00\x00\x00\x00\x02\x00\x00\x00\x00\x16\x00\x00\x00\x03\x00\x00\x00\x00b2c'
165 '\x00\x00\x00\x02\x00\x00\x00\x00\x02\x00\x00\x00\x00\x16\x00\x00\x00\x03\x00\x00\x00\x00b2c'
166 #endif
166 #endif
167
167
168 $ hg showcopies
168 $ hg showcopies
169 b2 -> c
169 b2 -> c
170
170
171 #if extra
171 #if extra
172
172
173 $ hg debugindex c
173 $ hg debugindex c
174 rev linkrev nodeid p1 p2
174 rev linkrev nodeid p1 p2
175 0 1 b789fdd96dc2 000000000000 000000000000
175 0 1 b789fdd96dc2 000000000000 000000000000
176
176
177 #else
177 #else
178
178
179 $ hg debugindex c
179 $ hg debugindex c
180 rev linkrev nodeid p1 p2
180 rev linkrev nodeid p1 p2
181 0 1 37d9b5d994ea 000000000000 000000000000
181 0 1 37d9b5d994ea 000000000000 000000000000
182 1 3 029625640347 000000000000 000000000000
182 1 3 029625640347 000000000000 000000000000
183
183
184 #endif
184 #endif
185
185
186 Create a merge commit with copying done during merge.
186 Create a merge commit with copying done during merge.
187
187
188 $ hg co 0
188 $ hg co 0
189 0 files updated, 0 files merged, 3 files removed, 0 files unresolved
189 0 files updated, 0 files merged, 3 files removed, 0 files unresolved
190 $ hg cp a e
190 $ hg cp a e
191 $ hg cp a f
191 $ hg cp a f
192 $ hg ci -m 'copy a to e and f'
192 $ hg ci -m 'copy a to e and f'
193 created new head
193 created new head
194 $ hg merge 3
194 $ hg merge 3
195 3 files updated, 0 files merged, 0 files removed, 0 files unresolved
195 3 files updated, 0 files merged, 0 files removed, 0 files unresolved
196 (branch merge, don't forget to commit)
196 (branch merge, don't forget to commit)
197 File 'a' exists on both sides, so 'g' could be recorded as being from p1 or p2, but we currently
197 File 'a' exists on both sides, so 'g' could be recorded as being from p1 or p2, but we currently
198 always record it as being from p1
198 always record it as being from p1
199 $ hg cp a g
199 $ hg cp a g
200 File 'd' exists only in p2, so 'h' should be from p2
200 File 'd' exists only in p2, so 'h' should be from p2
201 $ hg cp d h
201 $ hg cp d h
202 File 'f' exists only in p1, so 'i' should be from p1
202 File 'f' exists only in p1, so 'i' should be from p1
203 $ hg cp f i
203 $ hg cp f i
204 $ hg ci -m 'merge'
204 $ hg ci -m 'merge'
205
205
206 #if extra
206 #if extra
207
207
208 $ hg changesetcopies
208 $ hg changesetcopies
209 files: g h i
209 files: g h i
210 filesadded: 0
210 filesadded: 0
211 1
211 1
212 2
212 2
213
213
214 p1copies: 0\x00a (esc)
214 p1copies: 0\x00a (esc)
215 2\x00f (esc)
215 2\x00f (esc)
216 p2copies: 1\x00d (esc)
216 p2copies: 1\x00d (esc)
217
217
218 #else
218 #else
219 $ hg debugsidedata -c -v -- -1
219 $ hg debugsidedata -c -v -- -1
220 1 sidedata entries
220 1 sidedata entries
221 entry-0014 size 64
221 entry-0014 size 64
222 '\x00\x00\x00\x06\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x06\x00\x00\x00\x04\x00\x00\x00\x00\x07\x00\x00\x00\x05\x00\x00\x00\x01\x06\x00\x00\x00\x06\x00\x00\x00\x02adfghi'
222 '\x00\x00\x00\x06\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x06\x00\x00\x00\x04\x00\x00\x00\x00\x07\x00\x00\x00\x05\x00\x00\x00\x01\x06\x00\x00\x00\x06\x00\x00\x00\x02adfghi'
223 #endif
223 #endif
224
224
225 $ hg showcopies
225 $ hg showcopies
226 a -> g
226 a -> g
227 d -> h
227 d -> h
228 f -> i
228 f -> i
229
229
230 Test writing to both changeset and filelog
230 Test writing to both changeset and filelog
231
231
232 $ hg cp a j
232 $ hg cp a j
233 #if extra
233 #if extra
234 $ hg ci -m 'copy a to j' --config experimental.copies.write-to=compatibility
234 $ hg ci -m 'copy a to j' --config experimental.copies.write-to=compatibility
235 $ hg changesetcopies
235 $ hg changesetcopies
236 files: j
236 files: j
237 filesadded: 0
237 filesadded: 0
238 filesremoved:
238 filesremoved:
239
239
240 p1copies: 0\x00a (esc)
240 p1copies: 0\x00a (esc)
241 p2copies:
241 p2copies:
242 #else
242 #else
243 $ hg ci -m 'copy a to j'
243 $ hg ci -m 'copy a to j'
244 $ hg debugsidedata -c -v -- -1
244 $ hg debugsidedata -c -v -- -1
245 1 sidedata entries
245 1 sidedata entries
246 entry-0014 size 24
246 entry-0014 size 24
247 '\x00\x00\x00\x02\x00\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x00aj'
247 '\x00\x00\x00\x02\x00\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x00aj'
248 #endif
248 #endif
249 $ hg debugdata j 0
249 $ hg debugdata j 0
250 \x01 (esc)
250 \x01 (esc)
251 copy: a
251 copy: a
252 copyrev: b789fdd96dc2f3bd229c1dd8eedf0fc60e2b68e3
252 copyrev: b789fdd96dc2f3bd229c1dd8eedf0fc60e2b68e3
253 \x01 (esc)
253 \x01 (esc)
254 a
254 a
255 $ hg showcopies
255 $ hg showcopies
256 a -> j
256 a -> j
257 $ hg showcopies --config experimental.copies.read-from=compatibility
257 $ hg showcopies --config experimental.copies.read-from=compatibility
258 a -> j
258 a -> j
259 $ hg showcopies --config experimental.copies.read-from=filelog-only
259 $ hg showcopies --config experimental.copies.read-from=filelog-only
260 a -> j
260 a -> j
261 Existing copy information in the changeset gets removed on amend and writing
261 Existing copy information in the changeset gets removed on amend and writing
262 copy information on to the filelog
262 copy information on to the filelog
263 #if extra
263 #if extra
264 $ hg ci --amend -m 'copy a to j, v2' \
264 $ hg ci --amend -m 'copy a to j, v2' \
265 > --config experimental.copies.write-to=filelog-only
265 > --config experimental.copies.write-to=filelog-only
266 saved backup bundle to $TESTTMP/repo/.hg/strip-backup/*-*-amend.hg (glob)
266 saved backup bundle to $TESTTMP/repo/.hg/strip-backup/*-*-amend.hg (glob)
267 $ hg changesetcopies
267 $ hg changesetcopies
268 files: j
268 files: j
269
269
270 #else
270 #else
271 $ hg ci --amend -m 'copy a to j, v2'
271 $ hg ci --amend -m 'copy a to j, v2'
272 saved backup bundle to $TESTTMP/repo/.hg/strip-backup/*-*-amend.hg (glob)
272 saved backup bundle to $TESTTMP/repo/.hg/strip-backup/*-*-amend.hg (glob)
273 $ hg debugsidedata -c -v -- -1
273 $ hg debugsidedata -c -v -- -1
274 1 sidedata entries (missing-correct-output !)
274 1 sidedata entries
275 entry-0014 size 24 (missing-correct-output !)
275 entry-0014 size 24
276 '\x00\x00\x00\x02\x00\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x00aj' (missing-correct-output !)
276 '\x00\x00\x00\x02\x00\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x00aj'
277 #endif
277 #endif
278 $ hg showcopies --config experimental.copies.read-from=filelog-only
278 $ hg showcopies --config experimental.copies.read-from=filelog-only
279 a -> j (sidedata missing-correct-output !)
279 a -> j
280 a -> j (no-sidedata !)
281 The entries should be written to extras even if they're empty (so the client
280 The entries should be written to extras even if they're empty (so the client
282 won't have to fall back to reading from filelogs)
281 won't have to fall back to reading from filelogs)
283 $ echo x >> j
282 $ echo x >> j
284 #if extra
283 #if extra
285 $ hg ci -m 'modify j' --config experimental.copies.write-to=compatibility
284 $ hg ci -m 'modify j' --config experimental.copies.write-to=compatibility
286 $ hg changesetcopies
285 $ hg changesetcopies
287 files: j
286 files: j
288 filesadded:
287 filesadded:
289 filesremoved:
288 filesremoved:
290
289
291 p1copies:
290 p1copies:
292 p2copies:
291 p2copies:
293 #else
292 #else
294 $ hg ci -m 'modify j'
293 $ hg ci -m 'modify j'
295 $ hg debugsidedata -c -v -- -1
294 $ hg debugsidedata -c -v -- -1
296 1 sidedata entries
295 1 sidedata entries
297 entry-0014 size 14
296 entry-0014 size 14
298 '\x00\x00\x00\x01\x14\x00\x00\x00\x01\x00\x00\x00\x00j'
297 '\x00\x00\x00\x01\x14\x00\x00\x00\x01\x00\x00\x00\x00j'
299 #endif
298 #endif
300
299
301 Test writing only to filelog
300 Test writing only to filelog
302
301
303 $ hg cp a k
302 $ hg cp a k
304 #if extra
303 #if extra
305 $ hg ci -m 'copy a to k' --config experimental.copies.write-to=filelog-only
304 $ hg ci -m 'copy a to k' --config experimental.copies.write-to=filelog-only
306
305
307 $ hg changesetcopies
306 $ hg changesetcopies
308 files: k
307 files: k
309
308
310 #else
309 #else
311 $ hg ci -m 'copy a to k'
310 $ hg ci -m 'copy a to k'
312 $ hg debugsidedata -c -v -- -1
311 $ hg debugsidedata -c -v -- -1
313 1 sidedata entries
312 1 sidedata entries
314 entry-0014 size 24
313 entry-0014 size 24
315 '\x00\x00\x00\x02\x00\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x00ak'
314 '\x00\x00\x00\x02\x00\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x00ak'
316 #endif
315 #endif
317
316
318 $ hg debugdata k 0
317 $ hg debugdata k 0
319 \x01 (esc)
318 \x01 (esc)
320 copy: a
319 copy: a
321 copyrev: b789fdd96dc2f3bd229c1dd8eedf0fc60e2b68e3
320 copyrev: b789fdd96dc2f3bd229c1dd8eedf0fc60e2b68e3
322 \x01 (esc)
321 \x01 (esc)
323 a
322 a
324 #if extra
323 #if extra
325 $ hg showcopies
324 $ hg showcopies
326
325
327 $ hg showcopies --config experimental.copies.read-from=compatibility
326 $ hg showcopies --config experimental.copies.read-from=compatibility
328 a -> k
327 a -> k
329 $ hg showcopies --config experimental.copies.read-from=filelog-only
328 $ hg showcopies --config experimental.copies.read-from=filelog-only
330 a -> k
329 a -> k
331 #else
330 #else
332 $ hg showcopies
331 $ hg showcopies
333 a -> k
332 a -> k
334 #endif
333 #endif
335
334
336 $ cd ..
335 $ cd ..
337
336
338 Test rebasing a commit with copy information
337 Test rebasing a commit with copy information
339
338
340 $ hg init rebase-rename
339 $ hg init rebase-rename
341 $ cd rebase-rename
340 $ cd rebase-rename
342 $ echo a > a
341 $ echo a > a
343 $ hg ci -Aqm 'add a'
342 $ hg ci -Aqm 'add a'
344 $ echo a2 > a
343 $ echo a2 > a
345 $ hg ci -m 'modify a'
344 $ hg ci -m 'modify a'
346 $ hg co -q 0
345 $ hg co -q 0
347 $ hg mv a b
346 $ hg mv a b
348 $ hg ci -qm 'rename a to b'
347 $ hg ci -qm 'rename a to b'
349 Not only do we want this to run in-memory, it shouldn't fall back to
348 Not only do we want this to run in-memory, it shouldn't fall back to
350 on-disk merge (no conflicts), so we force it to be in-memory
349 on-disk merge (no conflicts), so we force it to be in-memory
351 with no fallback.
350 with no fallback.
352 $ hg rebase -d 1 --config rebase.experimental.inmemory=yes --config devel.rebase.force-in-memory-merge=yes
351 $ hg rebase -d 1 --config rebase.experimental.inmemory=yes --config devel.rebase.force-in-memory-merge=yes
353 rebasing 2:* tip "rename a to b" (glob)
352 rebasing 2:* tip "rename a to b" (glob)
354 merging a and b to b
353 merging a and b to b
355 saved backup bundle to $TESTTMP/rebase-rename/.hg/strip-backup/*-*-rebase.hg (glob)
354 saved backup bundle to $TESTTMP/rebase-rename/.hg/strip-backup/*-*-rebase.hg (glob)
356 $ hg st --change . --copies
355 $ hg st --change . --copies
357 A b
356 A b
358 a (sidedata missing-correct-output !)
357 a
359 a (no-sidedata !)
360 R a
358 R a
361 $ cd ..
359 $ cd ..
362
360
363 Test splitting a commit
361 Test splitting a commit
364
362
365 $ hg init split
363 $ hg init split
366 $ cd split
364 $ cd split
367 $ echo a > a
365 $ echo a > a
368 $ echo b > b
366 $ echo b > b
369 $ hg ci -Aqm 'add a and b'
367 $ hg ci -Aqm 'add a and b'
370 $ echo a2 > a
368 $ echo a2 > a
371 $ hg mv b c
369 $ hg mv b c
372 $ hg ci -m 'modify a, move b to c'
370 $ hg ci -m 'modify a, move b to c'
373 $ hg --config ui.interactive=yes split <<EOF
371 $ hg --config ui.interactive=yes split <<EOF
374 > y
372 > y
375 > y
373 > y
376 > n
374 > n
377 > y
375 > y
378 > EOF
376 > EOF
379 diff --git a/a b/a
377 diff --git a/a b/a
380 1 hunks, 1 lines changed
378 1 hunks, 1 lines changed
381 examine changes to 'a'?
379 examine changes to 'a'?
382 (enter ? for help) [Ynesfdaq?] y
380 (enter ? for help) [Ynesfdaq?] y
383
381
384 @@ -1,1 +1,1 @@
382 @@ -1,1 +1,1 @@
385 -a
383 -a
386 +a2
384 +a2
387 record this change to 'a'?
385 record this change to 'a'?
388 (enter ? for help) [Ynesfdaq?] y
386 (enter ? for help) [Ynesfdaq?] y
389
387
390 diff --git a/b b/c
388 diff --git a/b b/c
391 rename from b
389 rename from b
392 rename to c
390 rename to c
393 examine changes to 'b' and 'c'?
391 examine changes to 'b' and 'c'?
394 (enter ? for help) [Ynesfdaq?] n
392 (enter ? for help) [Ynesfdaq?] n
395
393
396 created new head
394 created new head
397 diff --git a/b b/c
395 diff --git a/b b/c
398 rename from b
396 rename from b
399 rename to c
397 rename to c
400 examine changes to 'b' and 'c'?
398 examine changes to 'b' and 'c'?
401 (enter ? for help) [Ynesfdaq?] y
399 (enter ? for help) [Ynesfdaq?] y
402
400
403 saved backup bundle to $TESTTMP/split/.hg/strip-backup/*-*-split.hg (glob)
401 saved backup bundle to $TESTTMP/split/.hg/strip-backup/*-*-split.hg (glob)
404 $ cd ..
402 $ cd ..
405
403
406 Test committing half a rename
404 Test committing half a rename
407
405
408 $ hg init partial
406 $ hg init partial
409 $ cd partial
407 $ cd partial
410 $ echo a > a
408 $ echo a > a
411 $ hg ci -Aqm 'add a'
409 $ hg ci -Aqm 'add a'
412 $ hg mv a b
410 $ hg mv a b
413 $ hg ci -m 'remove a' a
411 $ hg ci -m 'remove a' a
414
412
415 #if sidedata
413 #if sidedata
416
414
417 Test upgrading/downgrading to sidedata storage
415 Test upgrading/downgrading to sidedata storage
418 ==============================================
416 ==============================================
419
417
420 downgrading (keeping some sidedata)
418 downgrading (keeping some sidedata)
421
419
422 $ hg debugformat -v
420 $ hg debugformat -v
423 format-variant repo config default
421 format-variant repo config default
424 fncache: yes yes yes
422 fncache: yes yes yes
425 dotencode: yes yes yes
423 dotencode: yes yes yes
426 generaldelta: yes yes yes
424 generaldelta: yes yes yes
427 share-safe: no no no
425 share-safe: no no no
428 sparserevlog: yes yes yes
426 sparserevlog: yes yes yes
429 persistent-nodemap: no no no
427 persistent-nodemap: no no no
430 copies-sdc: yes yes no
428 copies-sdc: yes yes no
431 revlog-v2: yes yes no
429 revlog-v2: yes yes no
432 plain-cl-delta: yes yes yes
430 plain-cl-delta: yes yes yes
433 compression: zlib zlib zlib
431 compression: zlib zlib zlib
434 compression-level: default default default
432 compression-level: default default default
435 $ hg debugsidedata -c -- 0
433 $ hg debugsidedata -c -- 0
436 1 sidedata entries
434 1 sidedata entries
437 entry-0014 size 14
435 entry-0014 size 14
438 $ hg debugsidedata -c -- 1
436 $ hg debugsidedata -c -- 1
439 1 sidedata entries
437 1 sidedata entries
440 entry-0014 size 14
438 entry-0014 size 14
441 $ hg debugsidedata -m -- 0
439 $ hg debugsidedata -m -- 0
442 $ cat << EOF > .hg/hgrc
440 $ cat << EOF > .hg/hgrc
443 > [format]
441 > [format]
444 > exp-use-side-data = yes
442 > exp-use-side-data = yes
445 > exp-use-copies-side-data-changeset = no
443 > exp-use-copies-side-data-changeset = no
446 > EOF
444 > EOF
447 $ hg debugupgraderepo --run --quiet --no-backup > /dev/null
445 $ hg debugupgraderepo --run --quiet --no-backup > /dev/null
448 $ hg debugformat -v
446 $ hg debugformat -v
449 format-variant repo config default
447 format-variant repo config default
450 fncache: yes yes yes
448 fncache: yes yes yes
451 dotencode: yes yes yes
449 dotencode: yes yes yes
452 generaldelta: yes yes yes
450 generaldelta: yes yes yes
453 share-safe: no no no
451 share-safe: no no no
454 sparserevlog: yes yes yes
452 sparserevlog: yes yes yes
455 persistent-nodemap: no no no
453 persistent-nodemap: no no no
456 copies-sdc: no no no
454 copies-sdc: no no no
457 revlog-v2: yes yes no
455 revlog-v2: yes yes no
458 plain-cl-delta: yes yes yes
456 plain-cl-delta: yes yes yes
459 compression: zlib zlib zlib
457 compression: zlib zlib zlib
460 compression-level: default default default
458 compression-level: default default default
461 $ hg debugsidedata -c -- 0
459 $ hg debugsidedata -c -- 0
462 1 sidedata entries
460 1 sidedata entries
463 entry-0014 size 14
461 entry-0014 size 14
464 $ hg debugsidedata -c -- 1
462 $ hg debugsidedata -c -- 1
465 1 sidedata entries
463 1 sidedata entries
466 entry-0014 size 14
464 entry-0014 size 14
467 $ hg debugsidedata -m -- 0
465 $ hg debugsidedata -m -- 0
468
466
469 upgrading
467 upgrading
470
468
471 $ cat << EOF > .hg/hgrc
469 $ cat << EOF > .hg/hgrc
472 > [format]
470 > [format]
473 > exp-use-copies-side-data-changeset = yes
471 > exp-use-copies-side-data-changeset = yes
474 > EOF
472 > EOF
475 $ hg debugupgraderepo --run --quiet --no-backup > /dev/null
473 $ hg debugupgraderepo --run --quiet --no-backup > /dev/null
476 $ hg debugformat -v
474 $ hg debugformat -v
477 format-variant repo config default
475 format-variant repo config default
478 fncache: yes yes yes
476 fncache: yes yes yes
479 dotencode: yes yes yes
477 dotencode: yes yes yes
480 generaldelta: yes yes yes
478 generaldelta: yes yes yes
481 share-safe: no no no
479 share-safe: no no no
482 sparserevlog: yes yes yes
480 sparserevlog: yes yes yes
483 persistent-nodemap: no no no
481 persistent-nodemap: no no no
484 copies-sdc: yes yes no
482 copies-sdc: yes yes no
485 revlog-v2: yes yes no
483 revlog-v2: yes yes no
486 plain-cl-delta: yes yes yes
484 plain-cl-delta: yes yes yes
487 compression: zlib zlib zlib
485 compression: zlib zlib zlib
488 compression-level: default default default
486 compression-level: default default default
489 $ hg debugsidedata -c -- 0
487 $ hg debugsidedata -c -- 0
490 1 sidedata entries
488 1 sidedata entries
491 entry-0014 size 14
489 entry-0014 size 14
492 $ hg debugsidedata -c -- 1
490 $ hg debugsidedata -c -- 1
493 1 sidedata entries
491 1 sidedata entries
494 entry-0014 size 14
492 entry-0014 size 14
495 $ hg debugsidedata -m -- 0
493 $ hg debugsidedata -m -- 0
496
494
497 #endif
495 #endif
498
496
499 $ cd ..
497 $ cd ..
@@ -1,88 +1,50 b''
1 # ext-sidedata.py - small extension to test the sidedata logic
1 # coding: utf8
2 # ext-sidedata-2.py - small extension to test (differently) the sidedata logic
2 #
3 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net)
4 # Simulates a client for a complex sidedata exchange.
5 #
6 # Copyright 2021 Raphaël Gomès <rgomes@octobus.net>
4 #
7 #
5 # This software may be used and distributed according to the terms of the
8 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
9 # GNU General Public License version 2 or any later version.
7
10
8 from __future__ import absolute_import
11 from __future__ import absolute_import
9
12
10 import hashlib
13 import hashlib
11 import struct
14 import struct
12
15
13 from mercurial.node import (
16 from mercurial.revlogutils import sidedata as sidedatamod
14 nullid,
15 nullrev,
16 )
17 from mercurial import (
18 extensions,
19 requirements,
20 revlog,
21 )
22
23 from mercurial.upgrade_utils import engine as upgrade_engine
24
25 from mercurial.revlogutils import sidedata
26
17
27
18
28 def wrapaddrevision(
19 def compute_sidedata_1(repo, revlog, rev, sidedata, text=None):
29 orig, self, text, transaction, link, p1, p2, *args, **kwargs
20 sidedata = sidedata.copy()
30 ):
21 if text is None:
31 if kwargs.get('sidedata') is None:
22 text = revlog.revision(rev)
32 kwargs['sidedata'] = {}
23 sidedata[sidedatamod.SD_TEST1] = struct.pack('>I', len(text))
33 sd = kwargs['sidedata']
24 return sidedata
34 ## let's store some arbitrary data just for testing
35 # text length
36 sd[sidedata.SD_TEST1] = struct.pack('>I', len(text))
37 # and sha2 hashes
38 sha256 = hashlib.sha256(text).digest()
39 sd[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
40 return orig(self, text, transaction, link, p1, p2, *args, **kwargs)
41
25
42
26
43 def wrap_revisiondata(orig, self, nodeorrev, *args, **kwargs):
27 def compute_sidedata_2(repo, revlog, rev, sidedata, text=None):
44 text, sd = orig(self, nodeorrev, *args, **kwargs)
28 sidedata = sidedata.copy()
45 if getattr(self, 'sidedatanocheck', False):
29 if text is None:
46 return text, sd
30 text = revlog.revision(rev)
47 if self.version & 0xFFFF != 2:
31 sha256 = hashlib.sha256(text).digest()
48 return text, sd
32 sidedata[sidedatamod.SD_TEST2] = struct.pack('>32s', sha256)
49 if nodeorrev != nullrev and nodeorrev != nullid:
33 return sidedata
50 if len(text) != struct.unpack('>I', sd[sidedata.SD_TEST1])[0]:
51 raise RuntimeError('text size mismatch')
52 expected = sd[sidedata.SD_TEST2]
53 got = hashlib.sha256(text).digest()
54 if got != expected:
55 raise RuntimeError('sha256 mismatch')
56 return text, sd
57
34
58
35
59 def wrapgetsidedatacompanion(orig, srcrepo, dstrepo):
36 def reposetup(ui, repo):
60 sidedatacompanion = orig(srcrepo, dstrepo)
37 # Sidedata keys happen to be the same as the categories, easier for testing.
61 addedreqs = dstrepo.requirements - srcrepo.requirements
38 for kind in (b'changelog', b'manifest', b'filelog'):
62 if requirements.SIDEDATA_REQUIREMENT in addedreqs:
39 repo.register_sidedata_computer(
63 assert sidedatacompanion is None # deal with composition later
40 kind,
64
41 sidedatamod.SD_TEST1,
65 def sidedatacompanion(revlog, rev):
42 (sidedatamod.SD_TEST1,),
66 update = {}
43 compute_sidedata_1,
67 revlog.sidedatanocheck = True
44 )
68 try:
45 repo.register_sidedata_computer(
69 text = revlog.revision(rev)
46 kind,
70 finally:
47 sidedatamod.SD_TEST2,
71 del revlog.sidedatanocheck
48 (sidedatamod.SD_TEST2,),
72 ## let's store some arbitrary data just for testing
49 compute_sidedata_2,
73 # text length
50 )
74 update[sidedata.SD_TEST1] = struct.pack('>I', len(text))
75 # and sha2 hashes
76 sha256 = hashlib.sha256(text).digest()
77 update[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
78 return False, (), update, 0, 0
79
80 return sidedatacompanion
81
82
83 def extsetup(ui):
84 extensions.wrapfunction(revlog.revlog, 'addrevision', wrapaddrevision)
85 extensions.wrapfunction(revlog.revlog, '_revisiondata', wrap_revisiondata)
86 extensions.wrapfunction(
87 upgrade_engine, 'getsidedatacompanion', wrapgetsidedatacompanion
88 )
@@ -1,88 +1,88 b''
1 # ext-sidedata.py - small extension to test the sidedata logic
1 # coding: utf8
2 # ext-sidedata-3.py - small extension to test (differently still) the sidedata
3 # logic
2 #
4 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net)
5 # Simulates a client for a complex sidedata exchange.
6 #
7 # Copyright 2021 Raphaël Gomès <rgomes@octobus.net>
4 #
8 #
5 # This software may be used and distributed according to the terms of the
9 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
10 # GNU General Public License version 2 or any later version.
7
11
8 from __future__ import absolute_import
12 from __future__ import absolute_import
9
13
10 import hashlib
14 import hashlib
11 import struct
15 import struct
12
16
13 from mercurial.node import (
14 nullid,
15 nullrev,
16 )
17 from mercurial import (
17 from mercurial import (
18 extensions,
18 extensions,
19 requirements,
20 revlog,
19 revlog,
21 )
20 )
22
21
23 from mercurial.upgrade_utils import engine as upgrade_engine
22 from mercurial.revlogutils import sidedata as sidedatamod
23
24
25 def compute_sidedata_1(repo, revlog, rev, sidedata, text=None):
26 sidedata = sidedata.copy()
27 if text is None:
28 text = revlog.revision(rev)
29 sidedata[sidedatamod.SD_TEST1] = struct.pack('>I', len(text))
30 return sidedata
31
24
32
25 from mercurial.revlogutils import sidedata
33 def compute_sidedata_2(repo, revlog, rev, sidedata, text=None):
34 sidedata = sidedata.copy()
35 if text is None:
36 text = revlog.revision(rev)
37 sha256 = hashlib.sha256(text).digest()
38 sidedata[sidedatamod.SD_TEST2] = struct.pack('>32s', sha256)
39 return sidedata
40
41
42 def compute_sidedata_3(repo, revlog, rev, sidedata, text=None):
43 sidedata = sidedata.copy()
44 if text is None:
45 text = revlog.revision(rev)
46 sha384 = hashlib.sha384(text).digest()
47 sidedata[sidedatamod.SD_TEST3] = struct.pack('>48s', sha384)
48 return sidedata
26
49
27
50
28 def wrapaddrevision(
51 def wrapaddrevision(
29 orig, self, text, transaction, link, p1, p2, *args, **kwargs
52 orig, self, text, transaction, link, p1, p2, *args, **kwargs
30 ):
53 ):
31 if kwargs.get('sidedata') is None:
54 if kwargs.get('sidedata') is None:
32 kwargs['sidedata'] = {}
55 kwargs['sidedata'] = {}
33 sd = kwargs['sidedata']
56 sd = kwargs['sidedata']
34 ## let's store some arbitrary data just for testing
57 sd = compute_sidedata_1(None, self, None, sd, text=text)
35 # text length
58 kwargs['sidedata'] = compute_sidedata_2(None, self, None, sd, text=text)
36 sd[sidedata.SD_TEST1] = struct.pack('>I', len(text))
37 # and sha2 hashes
38 sha256 = hashlib.sha256(text).digest()
39 sd[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
40 return orig(self, text, transaction, link, p1, p2, *args, **kwargs)
59 return orig(self, text, transaction, link, p1, p2, *args, **kwargs)
41
60
42
61
43 def wrap_revisiondata(orig, self, nodeorrev, *args, **kwargs):
44 text, sd = orig(self, nodeorrev, *args, **kwargs)
45 if getattr(self, 'sidedatanocheck', False):
46 return text, sd
47 if self.version & 0xFFFF != 2:
48 return text, sd
49 if nodeorrev != nullrev and nodeorrev != nullid:
50 if len(text) != struct.unpack('>I', sd[sidedata.SD_TEST1])[0]:
51 raise RuntimeError('text size mismatch')
52 expected = sd[sidedata.SD_TEST2]
53 got = hashlib.sha256(text).digest()
54 if got != expected:
55 raise RuntimeError('sha256 mismatch')
56 return text, sd
57
58
59 def wrapgetsidedatacompanion(orig, srcrepo, dstrepo):
60 sidedatacompanion = orig(srcrepo, dstrepo)
61 addedreqs = dstrepo.requirements - srcrepo.requirements
62 if requirements.SIDEDATA_REQUIREMENT in addedreqs:
63 assert sidedatacompanion is None # deal with composition later
64
65 def sidedatacompanion(revlog, rev):
66 update = {}
67 revlog.sidedatanocheck = True
68 try:
69 text = revlog.revision(rev)
70 finally:
71 del revlog.sidedatanocheck
72 ## let's store some arbitrary data just for testing
73 # text length
74 update[sidedata.SD_TEST1] = struct.pack('>I', len(text))
75 # and sha2 hashes
76 sha256 = hashlib.sha256(text).digest()
77 update[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
78 return False, (), update, 0, 0
79
80 return sidedatacompanion
81
82
83 def extsetup(ui):
62 def extsetup(ui):
84 extensions.wrapfunction(revlog.revlog, 'addrevision', wrapaddrevision)
63 extensions.wrapfunction(revlog.revlog, 'addrevision', wrapaddrevision)
85 extensions.wrapfunction(revlog.revlog, '_revisiondata', wrap_revisiondata)
64
86 extensions.wrapfunction(
65
87 upgrade_engine, 'getsidedatacompanion', wrapgetsidedatacompanion
66 def reposetup(ui, repo):
88 )
67 # Sidedata keys happen to be the same as the categories, easier for testing.
68 for kind in (b'changelog', b'manifest', b'filelog'):
69 repo.register_sidedata_computer(
70 kind,
71 sidedatamod.SD_TEST1,
72 (sidedatamod.SD_TEST1,),
73 compute_sidedata_1,
74 )
75 repo.register_sidedata_computer(
76 kind,
77 sidedatamod.SD_TEST2,
78 (sidedatamod.SD_TEST2,),
79 compute_sidedata_2,
80 )
81 repo.register_sidedata_computer(
82 kind,
83 sidedatamod.SD_TEST3,
84 (sidedatamod.SD_TEST3,),
85 compute_sidedata_3,
86 )
87 repo.register_wanted_sidedata(sidedatamod.SD_TEST1)
88 repo.register_wanted_sidedata(sidedatamod.SD_TEST2)
@@ -1,88 +1,19 b''
1 # ext-sidedata.py - small extension to test the sidedata logic
1 # coding: utf8
2 # ext-sidedata-4.py - small extension to test (differently still) the sidedata
3 # logic
2 #
4 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net)
5 # Simulates a server for a complex sidedata exchange.
6 #
7 # Copyright 2021 Raphaël Gomès <rgomes@octobus.net>
4 #
8 #
5 # This software may be used and distributed according to the terms of the
9 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
10 # GNU General Public License version 2 or any later version.
7
11
8 from __future__ import absolute_import
12 from __future__ import absolute_import
9
13
10 import hashlib
11 import struct
12
13 from mercurial.node import (
14 nullid,
15 nullrev,
16 )
17 from mercurial import (
18 extensions,
19 requirements,
20 revlog,
21 )
22
23 from mercurial.upgrade_utils import engine as upgrade_engine
24
25 from mercurial.revlogutils import sidedata
14 from mercurial.revlogutils import sidedata
26
15
27
16
28 def wrapaddrevision(
17 def reposetup(ui, repo):
29 orig, self, text, transaction, link, p1, p2, *args, **kwargs
18 repo.register_wanted_sidedata(sidedata.SD_TEST2)
30 ):
19 repo.register_wanted_sidedata(sidedata.SD_TEST3)
31 if kwargs.get('sidedata') is None:
32 kwargs['sidedata'] = {}
33 sd = kwargs['sidedata']
34 ## let's store some arbitrary data just for testing
35 # text length
36 sd[sidedata.SD_TEST1] = struct.pack('>I', len(text))
37 # and sha2 hashes
38 sha256 = hashlib.sha256(text).digest()
39 sd[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
40 return orig(self, text, transaction, link, p1, p2, *args, **kwargs)
41
42
43 def wrap_revisiondata(orig, self, nodeorrev, *args, **kwargs):
44 text, sd = orig(self, nodeorrev, *args, **kwargs)
45 if getattr(self, 'sidedatanocheck', False):
46 return text, sd
47 if self.version & 0xFFFF != 2:
48 return text, sd
49 if nodeorrev != nullrev and nodeorrev != nullid:
50 if len(text) != struct.unpack('>I', sd[sidedata.SD_TEST1])[0]:
51 raise RuntimeError('text size mismatch')
52 expected = sd[sidedata.SD_TEST2]
53 got = hashlib.sha256(text).digest()
54 if got != expected:
55 raise RuntimeError('sha256 mismatch')
56 return text, sd
57
58
59 def wrapgetsidedatacompanion(orig, srcrepo, dstrepo):
60 sidedatacompanion = orig(srcrepo, dstrepo)
61 addedreqs = dstrepo.requirements - srcrepo.requirements
62 if requirements.SIDEDATA_REQUIREMENT in addedreqs:
63 assert sidedatacompanion is None # deal with composition later
64
65 def sidedatacompanion(revlog, rev):
66 update = {}
67 revlog.sidedatanocheck = True
68 try:
69 text = revlog.revision(rev)
70 finally:
71 del revlog.sidedatanocheck
72 ## let's store some arbitrary data just for testing
73 # text length
74 update[sidedata.SD_TEST1] = struct.pack('>I', len(text))
75 # and sha2 hashes
76 sha256 = hashlib.sha256(text).digest()
77 update[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
78 return False, (), update, 0, 0
79
80 return sidedatacompanion
81
82
83 def extsetup(ui):
84 extensions.wrapfunction(revlog.revlog, 'addrevision', wrapaddrevision)
85 extensions.wrapfunction(revlog.revlog, '_revisiondata', wrap_revisiondata)
86 extensions.wrapfunction(
87 upgrade_engine, 'getsidedatacompanion', wrapgetsidedatacompanion
88 )
@@ -1,88 +1,96 b''
1 # ext-sidedata.py - small extension to test the sidedata logic
1 # ext-sidedata.py - small extension to test the sidedata logic
2 #
2 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net)
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import hashlib
10 import hashlib
11 import struct
11 import struct
12
12
13 from mercurial.node import (
13 from mercurial.node import (
14 nullid,
14 nullid,
15 nullrev,
15 nullrev,
16 )
16 )
17 from mercurial import (
17 from mercurial import (
18 extensions,
18 extensions,
19 requirements,
19 requirements,
20 revlog,
20 revlog,
21 )
21 )
22
22
23 from mercurial.upgrade_utils import engine as upgrade_engine
23 from mercurial.upgrade_utils import engine as upgrade_engine
24
24
25 from mercurial.revlogutils import sidedata
25 from mercurial.revlogutils import sidedata
26
26
27
27
28 def wrapaddrevision(
28 def wrapaddrevision(
29 orig, self, text, transaction, link, p1, p2, *args, **kwargs
29 orig, self, text, transaction, link, p1, p2, *args, **kwargs
30 ):
30 ):
31 if kwargs.get('sidedata') is None:
31 if kwargs.get('sidedata') is None:
32 kwargs['sidedata'] = {}
32 kwargs['sidedata'] = {}
33 sd = kwargs['sidedata']
33 sd = kwargs['sidedata']
34 ## let's store some arbitrary data just for testing
34 ## let's store some arbitrary data just for testing
35 # text length
35 # text length
36 sd[sidedata.SD_TEST1] = struct.pack('>I', len(text))
36 sd[sidedata.SD_TEST1] = struct.pack('>I', len(text))
37 # and sha2 hashes
37 # and sha2 hashes
38 sha256 = hashlib.sha256(text).digest()
38 sha256 = hashlib.sha256(text).digest()
39 sd[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
39 sd[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
40 return orig(self, text, transaction, link, p1, p2, *args, **kwargs)
40 return orig(self, text, transaction, link, p1, p2, *args, **kwargs)
41
41
42
42
43 def wrap_revisiondata(orig, self, nodeorrev, *args, **kwargs):
43 def wrap_revisiondata(orig, self, nodeorrev, *args, **kwargs):
44 text, sd = orig(self, nodeorrev, *args, **kwargs)
44 text, sd = orig(self, nodeorrev, *args, **kwargs)
45 if getattr(self, 'sidedatanocheck', False):
45 if getattr(self, 'sidedatanocheck', False):
46 return text, sd
46 return text, sd
47 if self.version & 0xFFFF != 2:
47 if self.version & 0xFFFF != 2:
48 return text, sd
48 return text, sd
49 if nodeorrev != nullrev and nodeorrev != nullid:
49 if nodeorrev != nullrev and nodeorrev != nullid:
50 if len(text) != struct.unpack('>I', sd[sidedata.SD_TEST1])[0]:
50 cat1 = sd.get(sidedata.SD_TEST1)
51 if cat1 is not None and len(text) != struct.unpack('>I', cat1)[0]:
51 raise RuntimeError('text size mismatch')
52 raise RuntimeError('text size mismatch')
52 expected = sd[sidedata.SD_TEST2]
53 expected = sd.get(sidedata.SD_TEST2)
53 got = hashlib.sha256(text).digest()
54 got = hashlib.sha256(text).digest()
54 if got != expected:
55 if expected is not None and got != expected:
55 raise RuntimeError('sha256 mismatch')
56 raise RuntimeError('sha256 mismatch')
56 return text, sd
57 return text, sd
57
58
58
59
59 def wrapgetsidedatacompanion(orig, srcrepo, dstrepo):
60 def wrapgetsidedatacompanion(orig, srcrepo, dstrepo):
60 sidedatacompanion = orig(srcrepo, dstrepo)
61 sidedatacompanion = orig(srcrepo, dstrepo)
61 addedreqs = dstrepo.requirements - srcrepo.requirements
62 addedreqs = dstrepo.requirements - srcrepo.requirements
62 if requirements.SIDEDATA_REQUIREMENT in addedreqs:
63 if requirements.SIDEDATA_REQUIREMENT in addedreqs:
63 assert sidedatacompanion is None # deal with composition later
64 assert sidedatacompanion is None # deal with composition later
64
65
65 def sidedatacompanion(revlog, rev):
66 def sidedatacompanion(revlog, rev):
66 update = {}
67 update = {}
67 revlog.sidedatanocheck = True
68 revlog.sidedatanocheck = True
68 try:
69 try:
69 text = revlog.revision(rev)
70 text = revlog.revision(rev)
70 finally:
71 finally:
71 del revlog.sidedatanocheck
72 del revlog.sidedatanocheck
72 ## let's store some arbitrary data just for testing
73 ## let's store some arbitrary data just for testing
73 # text length
74 # text length
74 update[sidedata.SD_TEST1] = struct.pack('>I', len(text))
75 update[sidedata.SD_TEST1] = struct.pack('>I', len(text))
75 # and sha2 hashes
76 # and sha2 hashes
76 sha256 = hashlib.sha256(text).digest()
77 sha256 = hashlib.sha256(text).digest()
77 update[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
78 update[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
78 return False, (), update, 0, 0
79 return False, (), update, 0, 0
79
80
80 return sidedatacompanion
81 return sidedatacompanion
81
82
82
83
83 def extsetup(ui):
84 def extsetup(ui):
84 extensions.wrapfunction(revlog.revlog, 'addrevision', wrapaddrevision)
85 extensions.wrapfunction(revlog.revlog, 'addrevision', wrapaddrevision)
85 extensions.wrapfunction(revlog.revlog, '_revisiondata', wrap_revisiondata)
86 extensions.wrapfunction(revlog.revlog, '_revisiondata', wrap_revisiondata)
86 extensions.wrapfunction(
87 extensions.wrapfunction(
87 upgrade_engine, 'getsidedatacompanion', wrapgetsidedatacompanion
88 upgrade_engine, 'getsidedatacompanion', wrapgetsidedatacompanion
88 )
89 )
90
91
92 def reposetup(ui, repo):
93 # We don't register sidedata computers because we don't care within these
94 # tests
95 repo.register_wanted_sidedata(sidedata.SD_TEST1)
96 repo.register_wanted_sidedata(sidedata.SD_TEST2)
General Comments 0
You need to be logged in to leave comments. Login now