##// END OF EJS Templates
sidedata: replace sidedata upgrade mechanism with the new one...
Raphaël Gomès -
r47847:27f1191b default
parent child Browse files
Show More
@@ -1,967 +1,922 b''
1 1 # coding: utf-8
2 2 # metadata.py -- code related to various metadata computation and access.
3 3 #
4 4 # Copyright 2019 Google, Inc <martinvonz@google.com>
5 5 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9 from __future__ import absolute_import, print_function
10 10
11 11 import multiprocessing
12 12 import struct
13 13
14 14 from .node import nullrev
15 15 from . import (
16 16 error,
17 pycompat,
18 17 requirements as requirementsmod,
19 18 util,
20 19 )
21 20
22 21 from .revlogutils import (
23 22 constants as revlogconst,
24 23 flagutil as sidedataflag,
25 24 sidedata as sidedatamod,
26 25 )
27 26
28 27
29 28 class ChangingFiles(object):
30 29 """A class recording the changes made to files by a changeset
31 30
32 31 Actions performed on files are gathered into 3 sets:
33 32
34 33 - added: files actively added in the changeset.
35 34 - merged: files whose history got merged
36 35 - removed: files removed in the revision
37 36 - salvaged: files that might have been deleted by a merge but were not
38 37 - touched: files affected by the merge
39 38
40 39 and copies information is held by 2 mappings
41 40
42 41 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
43 42 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
44 43
45 44 See their inline help for details.
46 45 """
47 46
48 47 def __init__(
49 48 self,
50 49 touched=None,
51 50 added=None,
52 51 removed=None,
53 52 merged=None,
54 53 salvaged=None,
55 54 p1_copies=None,
56 55 p2_copies=None,
57 56 ):
58 57 self._added = set(() if added is None else added)
59 58 self._merged = set(() if merged is None else merged)
60 59 self._removed = set(() if removed is None else removed)
61 60 self._touched = set(() if touched is None else touched)
62 61 self._salvaged = set(() if salvaged is None else salvaged)
63 62 self._touched.update(self._added)
64 63 self._touched.update(self._merged)
65 64 self._touched.update(self._removed)
66 65 self._p1_copies = dict(() if p1_copies is None else p1_copies)
67 66 self._p2_copies = dict(() if p2_copies is None else p2_copies)
68 67
69 68 def __eq__(self, other):
70 69 return (
71 70 self.added == other.added
72 71 and self.merged == other.merged
73 72 and self.removed == other.removed
74 73 and self.salvaged == other.salvaged
75 74 and self.touched == other.touched
76 75 and self.copied_from_p1 == other.copied_from_p1
77 76 and self.copied_from_p2 == other.copied_from_p2
78 77 )
79 78
80 79 @property
81 80 def has_copies_info(self):
82 81 return bool(
83 82 self.removed
84 83 or self.merged
85 84 or self.salvaged
86 85 or self.copied_from_p1
87 86 or self.copied_from_p2
88 87 )
89 88
90 89 @util.propertycache
91 90 def added(self):
92 91 """files actively added in the changeset
93 92
94 93 Any file present in that revision that was absent in all the changeset's
95 94 parents.
96 95
97 96 In case of merge, this means a file absent in one of the parents but
98 97 existing in the other will *not* be contained in this set. (They were
99 98 added by an ancestor)
100 99 """
101 100 return frozenset(self._added)
102 101
103 102 def mark_added(self, filename):
104 103 if 'added' in vars(self):
105 104 del self.added
106 105 self._added.add(filename)
107 106 self.mark_touched(filename)
108 107
109 108 def update_added(self, filenames):
110 109 for f in filenames:
111 110 self.mark_added(f)
112 111
113 112 @util.propertycache
114 113 def merged(self):
115 114 """files actively merged during a merge
116 115
117 116 Any modified files which had modification on both size that needed merging.
118 117
119 118 In this case a new filenode was created and it has two parents.
120 119 """
121 120 return frozenset(self._merged)
122 121
123 122 def mark_merged(self, filename):
124 123 if 'merged' in vars(self):
125 124 del self.merged
126 125 self._merged.add(filename)
127 126 self.mark_touched(filename)
128 127
129 128 def update_merged(self, filenames):
130 129 for f in filenames:
131 130 self.mark_merged(f)
132 131
133 132 @util.propertycache
134 133 def removed(self):
135 134 """files actively removed by the changeset
136 135
137 136 In case of merge this will only contain the set of files removing "new"
138 137 content. For any file absent in the current changeset:
139 138
140 139 a) If the file exists in both parents, it is clearly "actively" removed
141 140 by this changeset.
142 141
143 142 b) If a file exists in only one parent and in none of the common
144 143 ancestors, then the file was newly added in one of the merged branches
145 144 and then got "actively" removed.
146 145
147 146 c) If a file exists in only one parent and at least one of the common
148 147 ancestors using the same filenode, then the file was unchanged on one
149 148 side and deleted on the other side. The merge "passively" propagated
150 149 that deletion, but didn't "actively" remove the file. In this case the
151 150 file is *not* included in the `removed` set.
152 151
153 152 d) If a file exists in only one parent and at least one of the common
154 153 ancestors using a different filenode, then the file was changed on one
155 154 side and removed on the other side. The merge process "actively"
156 155 decided to drop the new change and delete the file. Unlike in the
157 156 previous case, (c), the file included in the `removed` set.
158 157
159 158 Summary table for merge:
160 159
161 160 case | exists in parents | exists in gca || removed
162 161 (a) | both | * || yes
163 162 (b) | one | none || yes
164 163 (c) | one | same filenode || no
165 164 (d) | one | new filenode || yes
166 165 """
167 166 return frozenset(self._removed)
168 167
169 168 def mark_removed(self, filename):
170 169 if 'removed' in vars(self):
171 170 del self.removed
172 171 self._removed.add(filename)
173 172 self.mark_touched(filename)
174 173
175 174 def update_removed(self, filenames):
176 175 for f in filenames:
177 176 self.mark_removed(f)
178 177
179 178 @util.propertycache
180 179 def salvaged(self):
181 180 """files that might have been deleted by a merge, but still exists.
182 181
183 182 During a merge, the manifest merging might select some files for
184 183 removal, or for a removed/changed conflict. If at commit time the file
185 184 still exists, its removal was "reverted" and the file is "salvaged"
186 185 """
187 186 return frozenset(self._salvaged)
188 187
189 188 def mark_salvaged(self, filename):
190 189 if "salvaged" in vars(self):
191 190 del self.salvaged
192 191 self._salvaged.add(filename)
193 192 self.mark_touched(filename)
194 193
195 194 def update_salvaged(self, filenames):
196 195 for f in filenames:
197 196 self.mark_salvaged(f)
198 197
199 198 @util.propertycache
200 199 def touched(self):
201 200 """files either actively modified, added or removed"""
202 201 return frozenset(self._touched)
203 202
204 203 def mark_touched(self, filename):
205 204 if 'touched' in vars(self):
206 205 del self.touched
207 206 self._touched.add(filename)
208 207
209 208 def update_touched(self, filenames):
210 209 for f in filenames:
211 210 self.mark_touched(f)
212 211
213 212 @util.propertycache
214 213 def copied_from_p1(self):
215 214 return self._p1_copies.copy()
216 215
217 216 def mark_copied_from_p1(self, source, dest):
218 217 if 'copied_from_p1' in vars(self):
219 218 del self.copied_from_p1
220 219 self._p1_copies[dest] = source
221 220
222 221 def update_copies_from_p1(self, copies):
223 222 for dest, source in copies.items():
224 223 self.mark_copied_from_p1(source, dest)
225 224
226 225 @util.propertycache
227 226 def copied_from_p2(self):
228 227 return self._p2_copies.copy()
229 228
230 229 def mark_copied_from_p2(self, source, dest):
231 230 if 'copied_from_p2' in vars(self):
232 231 del self.copied_from_p2
233 232 self._p2_copies[dest] = source
234 233
235 234 def update_copies_from_p2(self, copies):
236 235 for dest, source in copies.items():
237 236 self.mark_copied_from_p2(source, dest)
238 237
239 238
240 239 def compute_all_files_changes(ctx):
241 240 """compute the files changed by a revision"""
242 241 p1 = ctx.p1()
243 242 p2 = ctx.p2()
244 243 if p1.rev() == nullrev and p2.rev() == nullrev:
245 244 return _process_root(ctx)
246 245 elif p1.rev() != nullrev and p2.rev() == nullrev:
247 246 return _process_linear(p1, ctx)
248 247 elif p1.rev() == nullrev and p2.rev() != nullrev:
249 248 # In the wild, one can encounter changeset where p1 is null but p2 is not
250 249 return _process_linear(p1, ctx, parent=2)
251 250 elif p1.rev() == p2.rev():
252 251 # In the wild, one can encounter such "non-merge"
253 252 return _process_linear(p1, ctx)
254 253 else:
255 254 return _process_merge(p1, p2, ctx)
256 255
257 256
258 257 def _process_root(ctx):
259 258 """compute the appropriate changed files for a changeset with no parents"""
260 259 # Simple, there was nothing before it, so everything is added.
261 260 md = ChangingFiles()
262 261 manifest = ctx.manifest()
263 262 for filename in manifest:
264 263 md.mark_added(filename)
265 264 return md
266 265
267 266
268 267 def _process_linear(parent_ctx, children_ctx, parent=1):
269 268 """compute the appropriate changed files for a changeset with a single parent"""
270 269 md = ChangingFiles()
271 270 parent_manifest = parent_ctx.manifest()
272 271 children_manifest = children_ctx.manifest()
273 272
274 273 copies_candidate = []
275 274
276 275 for filename, d in parent_manifest.diff(children_manifest).items():
277 276 if d[1][0] is None:
278 277 # no filenode for the "new" value, file is absent
279 278 md.mark_removed(filename)
280 279 else:
281 280 copies_candidate.append(filename)
282 281 if d[0][0] is None:
283 282 # not filenode for the "old" value file was absent
284 283 md.mark_added(filename)
285 284 else:
286 285 # filenode for both "old" and "new"
287 286 md.mark_touched(filename)
288 287
289 288 if parent == 1:
290 289 copied = md.mark_copied_from_p1
291 290 elif parent == 2:
292 291 copied = md.mark_copied_from_p2
293 292 else:
294 293 assert False, "bad parent value %d" % parent
295 294
296 295 for filename in copies_candidate:
297 296 copy_info = children_ctx[filename].renamed()
298 297 if copy_info:
299 298 source, srcnode = copy_info
300 299 copied(source, filename)
301 300
302 301 return md
303 302
304 303
305 304 def _process_merge(p1_ctx, p2_ctx, ctx):
306 305 """compute the appropriate changed files for a changeset with two parents
307 306
308 307 This is a more advance case. The information we need to record is summarise
309 308 in the following table:
310 309
311 310 ┌──────────────┬──────────────┬──────────────┬──────────────┬──────────────┐
312 311 │ diff ╲ diff │ ø │ (Some, None) │ (None, Some) │ (Some, Some) │
313 312 │ p2 ╲ p1 │ │ │ │ │
314 313 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
315 314 │ │ │🄱 No Changes │🄳 No Changes │ │
316 315 │ ø │🄰 No Changes │ OR │ OR │🄵 No Changes │
317 316 │ │ │🄲 Deleted[1] │🄴 Salvaged[2]│ [3] │
318 317 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
319 318 │ │🄶 No Changes │ │ │ │
320 319 │ (Some, None) │ OR │🄻 Deleted │ ø │ ø │
321 320 │ │🄷 Deleted[1] │ │ │ │
322 321 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
323 322 │ │🄸 No Changes │ │ │ 🄽 Touched │
324 323 │ (None, Some) │ OR │ ø │🄼 Added │OR 🅀 Salvaged │
325 324 │ │🄹 Salvaged[2]│ │ (copied?) │ (copied?) │
326 325 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
327 326 │ │ │ │ 🄾 Touched │ 🄿 Merged │
328 327 │ (Some, Some) │🄺 No Changes │ ø │OR 🅁 Salvaged │OR 🅂 Touched │
329 328 │ │ [3] │ │ (copied?) │ (copied?) │
330 329 └──────────────┴──────────────┴──────────────┴──────────────┴──────────────┘
331 330
332 331 Special case [1]:
333 332
334 333 The situation is:
335 334 - parent-A: file exists,
336 335 - parent-B: no file,
337 336 - working-copy: no file.
338 337
339 338 Detecting a "deletion" will depend on the presence of actual change on
340 339 the "parent-A" branch:
341 340
342 341 Subcase 🄱 or 🄶 : if the state of the file in "parent-A" is unchanged
343 342 compared to the merge ancestors, then parent-A branch left the file
344 343 untouched while parent-B deleted it. We simply apply the change from
345 344 "parent-B" branch the file was automatically dropped.
346 345 The result is:
347 346 - file is not recorded as touched by the merge.
348 347
349 348 Subcase 🄲 or 🄷 : otherwise, the change from parent-A branch were explicitly dropped and
350 349 the file was "deleted again". From a user perspective, the message
351 350 about "locally changed" while "remotely deleted" (or the other way
352 351 around) was issued and the user chose to deleted the file.
353 352 The result:
354 353 - file is recorded as touched by the merge.
355 354
356 355
357 356 Special case [2]:
358 357
359 358 The situation is:
360 359 - parent-A: no file,
361 360 - parent-B: file,
362 361 - working-copy: file (same content as parent-B).
363 362
364 363 There are three subcases depending on the ancestors contents:
365 364
366 365 - A) the file is missing in all ancestors,
367 366 - B) at least one ancestor has the file with filenode ≠ from parent-B,
368 367 - C) all ancestors use the same filenode as parent-B,
369 368
370 369 Subcase (A) is the simpler, nothing happend on parent-A side while
371 370 parent-B added it.
372 371
373 372 The result:
374 373 - the file is not marked as touched by the merge.
375 374
376 375 Subcase (B) is the counter part of "Special case [1]", the file was
377 376 modified on parent-B side, while parent-A side deleted it. However this
378 377 time, the conflict was solved by keeping the file (and its
379 378 modification). We consider the file as "salvaged".
380 379
381 380 The result:
382 381 - the file is marked as "salvaged" by the merge.
383 382
384 383 Subcase (C) is subtle variation of the case above. In this case, the
385 384 file in unchanged on the parent-B side and actively removed on the
386 385 parent-A side. So the merge machinery correctly decide it should be
387 386 removed. However, the file was explicitly restored to its parent-B
388 387 content before the merge was commited. The file is be marked
389 388 as salvaged too. From the merge result perspective, this is similar to
390 389 Subcase (B), however from the merge resolution perspective they differ
391 390 since in (C), there was some conflict not obvious solution to the
392 391 merge (That got reversed)
393 392
394 393 Special case [3]:
395 394
396 395 The situation is:
397 396 - parent-A: file,
398 397 - parent-B: file (different filenode as parent-A),
399 398 - working-copy: file (same filenode as parent-B).
400 399
401 400 This case is in theory much simple, for this to happens, this mean the
402 401 filenode in parent-A is purely replacing the one in parent-B (either a
403 402 descendant, or a full new file history, see changeset). So the merge
404 403 introduce no changes, and the file is not affected by the merge...
405 404
406 405 However, in the wild it is possible to find commit with the above is not
407 406 True. For example repository have some commit where the *new* node is an
408 407 ancestor of the node in parent-A, or where parent-A and parent-B are two
409 408 branches of the same file history, yet not merge-filenode were created
410 409 (while the "merge" should have led to a "modification").
411 410
412 411 Detecting such cases (and not recording the file as modified) would be a
413 412 nice bonus. However do not any of this yet.
414 413 """
415 414
416 415 repo = ctx.repo()
417 416 md = ChangingFiles()
418 417
419 418 m = ctx.manifest()
420 419 p1m = p1_ctx.manifest()
421 420 p2m = p2_ctx.manifest()
422 421 diff_p1 = p1m.diff(m)
423 422 diff_p2 = p2m.diff(m)
424 423
425 424 cahs = ctx.repo().changelog.commonancestorsheads(
426 425 p1_ctx.node(), p2_ctx.node()
427 426 )
428 427 if not cahs:
429 428 cahs = [nullrev]
430 429 mas = [ctx.repo()[r].manifest() for r in cahs]
431 430
432 431 copy_candidates = []
433 432
434 433 # Dealing with case 🄰 happens automatically. Since there are no entry in
435 434 # d1 nor d2, we won't iterate on it ever.
436 435
437 436 # Iteration over d1 content will deal with all cases, but the one in the
438 437 # first column of the table.
439 438 for filename, d1 in diff_p1.items():
440 439
441 440 d2 = diff_p2.pop(filename, None)
442 441
443 442 if d2 is None:
444 443 # this deal with the first line of the table.
445 444 _process_other_unchanged(md, mas, filename, d1)
446 445 else:
447 446
448 447 if d1[0][0] is None and d2[0][0] is None:
449 448 # case 🄼 — both deleted the file.
450 449 md.mark_added(filename)
451 450 copy_candidates.append(filename)
452 451 elif d1[1][0] is None and d2[1][0] is None:
453 452 # case 🄻 — both deleted the file.
454 453 md.mark_removed(filename)
455 454 elif d1[1][0] is not None and d2[1][0] is not None:
456 455 if d1[0][0] is None or d2[0][0] is None:
457 456 if any(_find(ma, filename) is not None for ma in mas):
458 457 # case 🅀 or 🅁
459 458 md.mark_salvaged(filename)
460 459 else:
461 460 # case 🄽 🄾 : touched
462 461 md.mark_touched(filename)
463 462 else:
464 463 fctx = repo.filectx(filename, fileid=d1[1][0])
465 464 if fctx.p2().rev() == nullrev:
466 465 # case 🅂
467 466 # lets assume we can trust the file history. If the
468 467 # filenode is not a merge, the file was not merged.
469 468 md.mark_touched(filename)
470 469 else:
471 470 # case 🄿
472 471 md.mark_merged(filename)
473 472 copy_candidates.append(filename)
474 473 else:
475 474 # Impossible case, the post-merge file status cannot be None on
476 475 # one side and Something on the other side.
477 476 assert False, "unreachable"
478 477
479 478 # Iteration over remaining d2 content deal with the first column of the
480 479 # table.
481 480 for filename, d2 in diff_p2.items():
482 481 _process_other_unchanged(md, mas, filename, d2)
483 482
484 483 for filename in copy_candidates:
485 484 copy_info = ctx[filename].renamed()
486 485 if copy_info:
487 486 source, srcnode = copy_info
488 487 if source in p1_ctx and p1_ctx[source].filenode() == srcnode:
489 488 md.mark_copied_from_p1(source, filename)
490 489 elif source in p2_ctx and p2_ctx[source].filenode() == srcnode:
491 490 md.mark_copied_from_p2(source, filename)
492 491 return md
493 492
494 493
495 494 def _find(manifest, filename):
496 495 """return the associate filenode or None"""
497 496 if filename not in manifest:
498 497 return None
499 498 return manifest.find(filename)[0]
500 499
501 500
502 501 def _process_other_unchanged(md, mas, filename, diff):
503 502 source_node = diff[0][0]
504 503 target_node = diff[1][0]
505 504
506 505 if source_node is not None and target_node is None:
507 506 if any(not _find(ma, filename) == source_node for ma in mas):
508 507 # case 🄲 of 🄷
509 508 md.mark_removed(filename)
510 509 # else, we have case 🄱 or 🄶 : no change need to be recorded
511 510 elif source_node is None and target_node is not None:
512 511 if any(_find(ma, filename) is not None for ma in mas):
513 512 # case 🄴 or 🄹
514 513 md.mark_salvaged(filename)
515 514 # else, we have case 🄳 or 🄸 : simple merge without intervention
516 515 elif source_node is not None and target_node is not None:
517 516 # case 🄵 or 🄺 : simple merge without intervention
518 517 #
519 518 # In buggy case where source_node is not an ancestors of target_node.
520 519 # There should have a been a new filenode created, recording this as
521 520 # "modified". We do not deal with them yet.
522 521 pass
523 522 else:
524 523 # An impossible case, the diff algorithm should not return entry if the
525 524 # file is missing on both side.
526 525 assert False, "unreachable"
527 526
528 527
529 528 def _missing_from_all_ancestors(mas, filename):
530 529 return all(_find(ma, filename) is None for ma in mas)
531 530
532 531
533 532 def computechangesetfilesadded(ctx):
534 533 """return the list of files added in a changeset"""
535 534 added = []
536 535 for f in ctx.files():
537 536 if not any(f in p for p in ctx.parents()):
538 537 added.append(f)
539 538 return added
540 539
541 540
542 541 def get_removal_filter(ctx, x=None):
543 542 """return a function to detect files "wrongly" detected as `removed`
544 543
545 544 When a file is removed relative to p1 in a merge, this
546 545 function determines whether the absence is due to a
547 546 deletion from a parent, or whether the merge commit
548 547 itself deletes the file. We decide this by doing a
549 548 simplified three way merge of the manifest entry for
550 549 the file. There are two ways we decide the merge
551 550 itself didn't delete a file:
552 551 - neither parent (nor the merge) contain the file
553 552 - exactly one parent contains the file, and that
554 553 parent has the same filelog entry as the merge
555 554 ancestor (or all of them if there two). In other
556 555 words, that parent left the file unchanged while the
557 556 other one deleted it.
558 557 One way to think about this is that deleting a file is
559 558 similar to emptying it, so the list of changed files
560 559 should be similar either way. The computation
561 560 described above is not done directly in _filecommit
562 561 when creating the list of changed files, however
563 562 it does something very similar by comparing filelog
564 563 nodes.
565 564 """
566 565
567 566 if x is not None:
568 567 p1, p2, m1, m2 = x
569 568 else:
570 569 p1 = ctx.p1()
571 570 p2 = ctx.p2()
572 571 m1 = p1.manifest()
573 572 m2 = p2.manifest()
574 573
575 574 @util.cachefunc
576 575 def mas():
577 576 p1n = p1.node()
578 577 p2n = p2.node()
579 578 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
580 579 if not cahs:
581 580 cahs = [nullrev]
582 581 return [ctx.repo()[r].manifest() for r in cahs]
583 582
584 583 def deletionfromparent(f):
585 584 if f in m1:
586 585 return f not in m2 and all(
587 586 f in ma and ma.find(f) == m1.find(f) for ma in mas()
588 587 )
589 588 elif f in m2:
590 589 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
591 590 else:
592 591 return True
593 592
594 593 return deletionfromparent
595 594
596 595
597 596 def computechangesetfilesremoved(ctx):
598 597 """return the list of files removed in a changeset"""
599 598 removed = []
600 599 for f in ctx.files():
601 600 if f not in ctx:
602 601 removed.append(f)
603 602 if removed:
604 603 rf = get_removal_filter(ctx)
605 604 removed = [r for r in removed if not rf(r)]
606 605 return removed
607 606
608 607
609 608 def computechangesetfilesmerged(ctx):
610 609 """return the list of files merged in a changeset"""
611 610 merged = []
612 611 if len(ctx.parents()) < 2:
613 612 return merged
614 613 for f in ctx.files():
615 614 if f in ctx:
616 615 fctx = ctx[f]
617 616 parents = fctx._filelog.parents(fctx._filenode)
618 617 if parents[1] != ctx.repo().nullid:
619 618 merged.append(f)
620 619 return merged
621 620
622 621
623 622 def computechangesetcopies(ctx):
624 623 """return the copies data for a changeset
625 624
626 625 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
627 626
628 627 Each dictionnary are in the form: `{newname: oldname}`
629 628 """
630 629 p1copies = {}
631 630 p2copies = {}
632 631 p1 = ctx.p1()
633 632 p2 = ctx.p2()
634 633 narrowmatch = ctx._repo.narrowmatch()
635 634 for dst in ctx.files():
636 635 if not narrowmatch(dst) or dst not in ctx:
637 636 continue
638 637 copied = ctx[dst].renamed()
639 638 if not copied:
640 639 continue
641 640 src, srcnode = copied
642 641 if src in p1 and p1[src].filenode() == srcnode:
643 642 p1copies[dst] = src
644 643 elif src in p2 and p2[src].filenode() == srcnode:
645 644 p2copies[dst] = src
646 645 return p1copies, p2copies
647 646
648 647
649 648 def encodecopies(files, copies):
650 649 items = []
651 650 for i, dst in enumerate(files):
652 651 if dst in copies:
653 652 items.append(b'%d\0%s' % (i, copies[dst]))
654 653 if len(items) != len(copies):
655 654 raise error.ProgrammingError(
656 655 b'some copy targets missing from file list'
657 656 )
658 657 return b"\n".join(items)
659 658
660 659
661 660 def decodecopies(files, data):
662 661 try:
663 662 copies = {}
664 663 if not data:
665 664 return copies
666 665 for l in data.split(b'\n'):
667 666 strindex, src = l.split(b'\0')
668 667 i = int(strindex)
669 668 dst = files[i]
670 669 copies[dst] = src
671 670 return copies
672 671 except (ValueError, IndexError):
673 672 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
674 673 # used different syntax for the value.
675 674 return None
676 675
677 676
678 677 def encodefileindices(files, subset):
679 678 subset = set(subset)
680 679 indices = []
681 680 for i, f in enumerate(files):
682 681 if f in subset:
683 682 indices.append(b'%d' % i)
684 683 return b'\n'.join(indices)
685 684
686 685
687 686 def decodefileindices(files, data):
688 687 try:
689 688 subset = []
690 689 if not data:
691 690 return subset
692 691 for strindex in data.split(b'\n'):
693 692 i = int(strindex)
694 693 if i < 0 or i >= len(files):
695 694 return None
696 695 subset.append(files[i])
697 696 return subset
698 697 except (ValueError, IndexError):
699 698 # Perhaps someone had chosen the same key name (e.g. "added") and
700 699 # used different syntax for the value.
701 700 return None
702 701
703 702
704 703 # see mercurial/helptext/internals/revlogs.txt for details about the format
705 704
706 705 ACTION_MASK = int("111" "00", 2)
707 706 # note: untouched file used as copy source will as `000` for this mask.
708 707 ADDED_FLAG = int("001" "00", 2)
709 708 MERGED_FLAG = int("010" "00", 2)
710 709 REMOVED_FLAG = int("011" "00", 2)
711 710 SALVAGED_FLAG = int("100" "00", 2)
712 711 TOUCHED_FLAG = int("101" "00", 2)
713 712
714 713 COPIED_MASK = int("11", 2)
715 714 COPIED_FROM_P1_FLAG = int("10", 2)
716 715 COPIED_FROM_P2_FLAG = int("11", 2)
717 716
718 717 # structure is <flag><filename-end><copy-source>
719 718 INDEX_HEADER = struct.Struct(">L")
720 719 INDEX_ENTRY = struct.Struct(">bLL")
721 720
722 721
723 722 def encode_files_sidedata(files):
724 723 all_files = set(files.touched)
725 724 all_files.update(files.copied_from_p1.values())
726 725 all_files.update(files.copied_from_p2.values())
727 726 all_files = sorted(all_files)
728 727 file_idx = {f: i for (i, f) in enumerate(all_files)}
729 728 file_idx[None] = 0
730 729
731 730 chunks = [INDEX_HEADER.pack(len(all_files))]
732 731
733 732 filename_length = 0
734 733 for f in all_files:
735 734 filename_size = len(f)
736 735 filename_length += filename_size
737 736 flag = 0
738 737 if f in files.added:
739 738 flag |= ADDED_FLAG
740 739 elif f in files.merged:
741 740 flag |= MERGED_FLAG
742 741 elif f in files.removed:
743 742 flag |= REMOVED_FLAG
744 743 elif f in files.salvaged:
745 744 flag |= SALVAGED_FLAG
746 745 elif f in files.touched:
747 746 flag |= TOUCHED_FLAG
748 747
749 748 copy = None
750 749 if f in files.copied_from_p1:
751 750 flag |= COPIED_FROM_P1_FLAG
752 751 copy = files.copied_from_p1.get(f)
753 752 elif f in files.copied_from_p2:
754 753 copy = files.copied_from_p2.get(f)
755 754 flag |= COPIED_FROM_P2_FLAG
756 755 copy_idx = file_idx[copy]
757 756 chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx))
758 757 chunks.extend(all_files)
759 758 return {sidedatamod.SD_FILES: b''.join(chunks)}
760 759
761 760
762 761 def decode_files_sidedata(sidedata):
763 762 md = ChangingFiles()
764 763 raw = sidedata.get(sidedatamod.SD_FILES)
765 764
766 765 if raw is None:
767 766 return md
768 767
769 768 copies = []
770 769 all_files = []
771 770
772 771 assert len(raw) >= INDEX_HEADER.size
773 772 total_files = INDEX_HEADER.unpack_from(raw, 0)[0]
774 773
775 774 offset = INDEX_HEADER.size
776 775 file_offset_base = offset + (INDEX_ENTRY.size * total_files)
777 776 file_offset_last = file_offset_base
778 777
779 778 assert len(raw) >= file_offset_base
780 779
781 780 for idx in range(total_files):
782 781 flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset)
783 782 file_end += file_offset_base
784 783 filename = raw[file_offset_last:file_end]
785 784 filesize = file_end - file_offset_last
786 785 assert len(filename) == filesize
787 786 offset += INDEX_ENTRY.size
788 787 file_offset_last = file_end
789 788 all_files.append(filename)
790 789 if flag & ACTION_MASK == ADDED_FLAG:
791 790 md.mark_added(filename)
792 791 elif flag & ACTION_MASK == MERGED_FLAG:
793 792 md.mark_merged(filename)
794 793 elif flag & ACTION_MASK == REMOVED_FLAG:
795 794 md.mark_removed(filename)
796 795 elif flag & ACTION_MASK == SALVAGED_FLAG:
797 796 md.mark_salvaged(filename)
798 797 elif flag & ACTION_MASK == TOUCHED_FLAG:
799 798 md.mark_touched(filename)
800 799
801 800 copied = None
802 801 if flag & COPIED_MASK == COPIED_FROM_P1_FLAG:
803 802 copied = md.mark_copied_from_p1
804 803 elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG:
805 804 copied = md.mark_copied_from_p2
806 805
807 806 if copied is not None:
808 807 copies.append((copied, filename, copy_idx))
809 808
810 809 for copied, filename, copy_idx in copies:
811 810 copied(all_files[copy_idx], filename)
812 811
813 812 return md
814 813
815 814
816 815 def _getsidedata(srcrepo, rev):
817 816 ctx = srcrepo[rev]
818 817 files = compute_all_files_changes(ctx)
819 818 return encode_files_sidedata(files), files.has_copies_info
820 819
821 820
822 821 def copies_sidedata_computer(repo, revlog, rev, existing_sidedata):
823 822 sidedata, has_copies_info = _getsidedata(repo, rev)
824 823 flags_to_add = sidedataflag.REVIDX_HASCOPIESINFO if has_copies_info else 0
825 824 return sidedata, (flags_to_add, 0)
826 825
827 826
828 827 def set_sidedata_spec_for_repo(repo):
829 828 if requirementsmod.COPIESSDC_REQUIREMENT in repo.requirements:
830 829 repo.register_wanted_sidedata(sidedatamod.SD_FILES)
831 830 repo.register_sidedata_computer(
832 831 revlogconst.KIND_CHANGELOG,
833 832 sidedatamod.SD_FILES,
834 833 (sidedatamod.SD_FILES,),
835 834 copies_sidedata_computer,
836 835 sidedataflag.REVIDX_HASCOPIESINFO,
837 836 )
838 837
839 838
840 def getsidedataadder(srcrepo, destrepo):
841 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
842 if pycompat.iswindows or not use_w:
843 return _get_simple_sidedata_adder(srcrepo, destrepo)
844 else:
845 return _get_worker_sidedata_adder(srcrepo, destrepo)
846
847
848 839 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
849 840 """The function used by worker precomputing sidedata
850 841
851 842 It read an input queue containing revision numbers
852 843 It write in an output queue containing (rev, <sidedata-map>)
853 844
854 845 The `None` input value is used as a stop signal.
855 846
856 847 The `tokens` semaphore is user to avoid having too many unprocessed
857 848 entries. The workers needs to acquire one token before fetching a task.
858 849 They will be released by the consumer of the produced data.
859 850 """
860 851 tokens.acquire()
861 852 rev = revs_queue.get()
862 853 while rev is not None:
863 854 data = _getsidedata(srcrepo, rev)
864 855 sidedata_queue.put((rev, data))
865 856 tokens.acquire()
866 857 rev = revs_queue.get()
867 858 # processing of `None` is completed, release the token.
868 859 tokens.release()
869 860
870 861
871 862 BUFF_PER_WORKER = 50
872 863
873 864
874 865 def _get_worker_sidedata_adder(srcrepo, destrepo):
875 866 """The parallel version of the sidedata computation
876 867
877 868 This code spawn a pool of worker that precompute a buffer of sidedata
878 869 before we actually need them"""
879 870 # avoid circular import copies -> scmutil -> worker -> copies
880 871 from . import worker
881 872
882 873 nbworkers = worker._numworkers(srcrepo.ui)
883 874
884 875 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
885 876 revsq = multiprocessing.Queue()
886 877 sidedataq = multiprocessing.Queue()
887 878
888 879 assert srcrepo.filtername is None
889 880 # queue all tasks beforehand, revision numbers are small and it make
890 881 # synchronisation simpler
891 882 #
892 883 # Since the computation for each node can be quite expensive, the overhead
893 884 # of using a single queue is not revelant. In practice, most computation
894 885 # are fast but some are very expensive and dominate all the other smaller
895 886 # cost.
896 887 for r in srcrepo.changelog.revs():
897 888 revsq.put(r)
898 889 # queue the "no more tasks" markers
899 890 for i in range(nbworkers):
900 891 revsq.put(None)
901 892
902 893 allworkers = []
903 894 for i in range(nbworkers):
904 895 args = (srcrepo, revsq, sidedataq, tokens)
905 896 w = multiprocessing.Process(target=_sidedata_worker, args=args)
906 897 allworkers.append(w)
907 898 w.start()
908 899
909 900 # dictionnary to store results for revision higher than we one we are
910 901 # looking for. For example, if we need the sidedatamap for 42, and 43 is
911 902 # received, when shelve 43 for later use.
912 903 staging = {}
913 904
914 def sidedata_companion(revlog, rev):
915 data = {}, False
916 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
917 # Is the data previously shelved ?
918 data = staging.pop(rev, None)
919 if data is None:
920 # look at the queued result until we find the one we are lookig
921 # for (shelve the other ones)
905 def sidedata_companion(repo, revlog, rev, old_sidedata):
906 # Is the data previously shelved ?
907 data = staging.pop(rev, None)
908 if data is None:
909 # look at the queued result until we find the one we are lookig
910 # for (shelve the other ones)
911 r, data = sidedataq.get()
912 while r != rev:
913 staging[r] = data
922 914 r, data = sidedataq.get()
923 while r != rev:
924 staging[r] = data
925 r, data = sidedataq.get()
926 tokens.release()
915 tokens.release()
927 916 sidedata, has_copies_info = data
928 917 new_flag = 0
929 918 if has_copies_info:
930 919 new_flag = sidedataflag.REVIDX_HASCOPIESINFO
931 return False, (), sidedata, new_flag, 0
920 return sidedata, (new_flag, 0)
932 921
933 922 return sidedata_companion
934
935
936 def _get_simple_sidedata_adder(srcrepo, destrepo):
937 """The simple version of the sidedata computation
938
939 It just compute it in the same thread on request"""
940
941 def sidedatacompanion(revlog, rev):
942 sidedata, has_copies_info = {}, False
943 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
944 sidedata, has_copies_info = _getsidedata(srcrepo, rev)
945 new_flag = 0
946 if has_copies_info:
947 new_flag = sidedataflag.REVIDX_HASCOPIESINFO
948
949 return False, (), sidedata, new_flag, 0
950
951 return sidedatacompanion
952
953
954 def getsidedataremover(srcrepo, destrepo):
955 def sidedatacompanion(revlog, rev):
956 f = ()
957 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
958 if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
959 f = (
960 sidedatamod.SD_P1COPIES,
961 sidedatamod.SD_P2COPIES,
962 sidedatamod.SD_FILESADDED,
963 sidedatamod.SD_FILESREMOVED,
964 )
965 return False, f, {}, 0, sidedataflag.REVIDX_HASCOPIESINFO
966
967 return sidedatacompanion
@@ -1,3145 +1,3129 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 INDEX_HEADER,
41 41 REVLOGV0,
42 42 REVLOGV1,
43 43 REVLOGV1_FLAGS,
44 44 REVLOGV2,
45 45 REVLOGV2_FLAGS,
46 46 REVLOG_DEFAULT_FLAGS,
47 47 REVLOG_DEFAULT_FORMAT,
48 48 REVLOG_DEFAULT_VERSION,
49 49 )
50 50 from .revlogutils.flagutil import (
51 51 REVIDX_DEFAULT_FLAGS,
52 52 REVIDX_ELLIPSIS,
53 53 REVIDX_EXTSTORED,
54 54 REVIDX_FLAGS_ORDER,
55 55 REVIDX_HASCOPIESINFO,
56 56 REVIDX_ISCENSORED,
57 57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 58 )
59 59 from .thirdparty import attr
60 60 from . import (
61 61 ancestor,
62 62 dagop,
63 63 error,
64 64 mdiff,
65 65 policy,
66 66 pycompat,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .interfaces import (
71 71 repository,
72 72 util as interfaceutil,
73 73 )
74 74 from .revlogutils import (
75 75 deltas as deltautil,
76 76 flagutil,
77 77 nodemap as nodemaputil,
78 78 revlogv0,
79 79 sidedata as sidedatautil,
80 80 )
81 81 from .utils import (
82 82 storageutil,
83 83 stringutil,
84 84 )
85 85
86 86 # blanked usage of all the name to prevent pyflakes constraints
87 87 # We need these name available in the module for extensions.
88
88 89 REVLOGV0
89 90 REVLOGV1
90 91 REVLOGV2
91 92 FLAG_INLINE_DATA
92 93 FLAG_GENERALDELTA
93 94 REVLOG_DEFAULT_FLAGS
94 95 REVLOG_DEFAULT_FORMAT
95 96 REVLOG_DEFAULT_VERSION
96 97 REVLOGV1_FLAGS
97 98 REVLOGV2_FLAGS
98 99 REVIDX_ISCENSORED
99 100 REVIDX_ELLIPSIS
100 101 REVIDX_HASCOPIESINFO
101 102 REVIDX_EXTSTORED
102 103 REVIDX_DEFAULT_FLAGS
103 104 REVIDX_FLAGS_ORDER
104 105 REVIDX_RAWTEXT_CHANGING_FLAGS
105 106
106 107 parsers = policy.importmod('parsers')
107 108 rustancestor = policy.importrust('ancestor')
108 109 rustdagop = policy.importrust('dagop')
109 110 rustrevlog = policy.importrust('revlog')
110 111
111 112 # Aliased for performance.
112 113 _zlibdecompress = zlib.decompress
113 114
114 115 # max size of revlog with inline data
115 116 _maxinline = 131072
116 117 _chunksize = 1048576
117 118
118 119 # Flag processors for REVIDX_ELLIPSIS.
119 120 def ellipsisreadprocessor(rl, text):
120 121 return text, False
121 122
122 123
123 124 def ellipsiswriteprocessor(rl, text):
124 125 return text, False
125 126
126 127
127 128 def ellipsisrawprocessor(rl, text):
128 129 return False
129 130
130 131
131 132 ellipsisprocessor = (
132 133 ellipsisreadprocessor,
133 134 ellipsiswriteprocessor,
134 135 ellipsisrawprocessor,
135 136 )
136 137
137 138
138 139 def offset_type(offset, type):
139 140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
140 141 raise ValueError(b'unknown revlog index flags')
141 142 return int(int(offset) << 16 | type)
142 143
143 144
144 145 def _verify_revision(rl, skipflags, state, node):
145 146 """Verify the integrity of the given revlog ``node`` while providing a hook
146 147 point for extensions to influence the operation."""
147 148 if skipflags:
148 149 state[b'skipread'].add(node)
149 150 else:
150 151 # Side-effect: read content and verify hash.
151 152 rl.revision(node)
152 153
153 154
154 155 # True if a fast implementation for persistent-nodemap is available
155 156 #
156 157 # We also consider we have a "fast" implementation in "pure" python because
157 158 # people using pure don't really have performance consideration (and a
158 159 # wheelbarrow of other slowness source)
159 160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
160 161 parsers, 'BaseIndexObject'
161 162 )
162 163
163 164
164 165 @attr.s(slots=True, frozen=True)
165 166 class _revisioninfo(object):
166 167 """Information about a revision that allows building its fulltext
167 168 node: expected hash of the revision
168 169 p1, p2: parent revs of the revision
169 170 btext: built text cache consisting of a one-element list
170 171 cachedelta: (baserev, uncompressed_delta) or None
171 172 flags: flags associated to the revision storage
172 173
173 174 One of btext[0] or cachedelta must be set.
174 175 """
175 176
176 177 node = attr.ib()
177 178 p1 = attr.ib()
178 179 p2 = attr.ib()
179 180 btext = attr.ib()
180 181 textlen = attr.ib()
181 182 cachedelta = attr.ib()
182 183 flags = attr.ib()
183 184
184 185
185 186 @interfaceutil.implementer(repository.irevisiondelta)
186 187 @attr.s(slots=True)
187 188 class revlogrevisiondelta(object):
188 189 node = attr.ib()
189 190 p1node = attr.ib()
190 191 p2node = attr.ib()
191 192 basenode = attr.ib()
192 193 flags = attr.ib()
193 194 baserevisionsize = attr.ib()
194 195 revision = attr.ib()
195 196 delta = attr.ib()
196 197 sidedata = attr.ib()
197 198 protocol_flags = attr.ib()
198 199 linknode = attr.ib(default=None)
199 200
200 201
201 202 @interfaceutil.implementer(repository.iverifyproblem)
202 203 @attr.s(frozen=True)
203 204 class revlogproblem(object):
204 205 warning = attr.ib(default=None)
205 206 error = attr.ib(default=None)
206 207 node = attr.ib(default=None)
207 208
208 209
209 210 def parse_index_v1(data, inline):
210 211 # call the C implementation to parse the index data
211 212 index, cache = parsers.parse_index2(data, inline)
212 213 return index, cache
213 214
214 215
215 216 def parse_index_v2(data, inline):
216 217 # call the C implementation to parse the index data
217 218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
218 219 return index, cache
219 220
220 221
221 222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
222 223
223 224 def parse_index_v1_nodemap(data, inline):
224 225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
225 226 return index, cache
226 227
227 228
228 229 else:
229 230 parse_index_v1_nodemap = None
230 231
231 232
232 233 def parse_index_v1_mixed(data, inline):
233 234 index, cache = parse_index_v1(data, inline)
234 235 return rustrevlog.MixedIndex(index), cache
235 236
236 237
237 238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
238 239 # signed integer)
239 240 _maxentrysize = 0x7FFFFFFF
240 241
241 242
242 243 class revlog(object):
243 244 """
244 245 the underlying revision storage object
245 246
246 247 A revlog consists of two parts, an index and the revision data.
247 248
248 249 The index is a file with a fixed record size containing
249 250 information on each revision, including its nodeid (hash), the
250 251 nodeids of its parents, the position and offset of its data within
251 252 the data file, and the revision it's based on. Finally, each entry
252 253 contains a linkrev entry that can serve as a pointer to external
253 254 data.
254 255
255 256 The revision data itself is a linear collection of data chunks.
256 257 Each chunk represents a revision and is usually represented as a
257 258 delta against the previous chunk. To bound lookup time, runs of
258 259 deltas are limited to about 2 times the length of the original
259 260 version data. This makes retrieval of a version proportional to
260 261 its size, or O(1) relative to the number of revisions.
261 262
262 263 Both pieces of the revlog are written to in an append-only
263 264 fashion, which means we never need to rewrite a file to insert or
264 265 remove data, and can use some simple techniques to avoid the need
265 266 for locking while reading.
266 267
267 268 If checkambig, indexfile is opened with checkambig=True at
268 269 writing, to avoid file stat ambiguity.
269 270
270 271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
271 272 index will be mmapped rather than read if it is larger than the
272 273 configured threshold.
273 274
274 275 If censorable is True, the revlog can have censored revisions.
275 276
276 277 If `upperboundcomp` is not None, this is the expected maximal gain from
277 278 compression for the data content.
278 279
279 280 `concurrencychecker` is an optional function that receives 3 arguments: a
280 281 file handle, a filename, and an expected position. It should check whether
281 282 the current position in the file handle is valid, and log/warn/fail (by
282 283 raising).
283 284 """
284 285
285 286 _flagserrorclass = error.RevlogError
286 287
287 288 def __init__(
288 289 self,
289 290 opener,
290 291 target,
291 292 indexfile=None,
292 293 datafile=None,
293 294 checkambig=False,
294 295 mmaplargeindex=False,
295 296 censorable=False,
296 297 upperboundcomp=None,
297 298 persistentnodemap=False,
298 299 concurrencychecker=None,
299 300 ):
300 301 """
301 302 create a revlog object
302 303
303 304 opener is a function that abstracts the file opening operation
304 305 and can be used to implement COW semantics or the like.
305 306
306 307 `target`: a (KIND, ID) tuple that identify the content stored in
307 308 this revlog. It help the rest of the code to understand what the revlog
308 309 is about without having to resort to heuristic and index filename
309 310 analysis. Note: that this must be reliably be set by normal code, but
310 311 that test, debug, or performance measurement code might not set this to
311 312 accurate value.
312 313 """
313 314 self.upperboundcomp = upperboundcomp
314 315 self.indexfile = indexfile
315 316 self.datafile = datafile or (indexfile[:-2] + b".d")
316 317 self.nodemap_file = None
317 318 if persistentnodemap:
318 319 self.nodemap_file = nodemaputil.get_nodemap_file(
319 320 opener, self.indexfile
320 321 )
321 322
322 323 self.opener = opener
323 324 assert target[0] in ALL_KINDS
324 325 assert len(target) == 2
325 326 self.target = target
326 327 # When True, indexfile is opened with checkambig=True at writing, to
327 328 # avoid file stat ambiguity.
328 329 self._checkambig = checkambig
329 330 self._mmaplargeindex = mmaplargeindex
330 331 self._censorable = censorable
331 332 # 3-tuple of (node, rev, text) for a raw revision.
332 333 self._revisioncache = None
333 334 # Maps rev to chain base rev.
334 335 self._chainbasecache = util.lrucachedict(100)
335 336 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
336 337 self._chunkcache = (0, b'')
337 338 # How much data to read and cache into the raw revlog data cache.
338 339 self._chunkcachesize = 65536
339 340 self._maxchainlen = None
340 341 self._deltabothparents = True
341 342 self.index = None
342 343 self._nodemap_docket = None
343 344 # Mapping of partial identifiers to full nodes.
344 345 self._pcache = {}
345 346 # Mapping of revision integer to full node.
346 347 self._compengine = b'zlib'
347 348 self._compengineopts = {}
348 349 self._maxdeltachainspan = -1
349 350 self._withsparseread = False
350 351 self._sparserevlog = False
351 352 self._srdensitythreshold = 0.50
352 353 self._srmingapsize = 262144
353 354
354 355 # Make copy of flag processors so each revlog instance can support
355 356 # custom flags.
356 357 self._flagprocessors = dict(flagutil.flagprocessors)
357 358
358 359 # 2-tuple of file handles being used for active writing.
359 360 self._writinghandles = None
360 361
361 362 self._loadindex()
362 363
363 364 self._concurrencychecker = concurrencychecker
364 365
365 366 def _loadindex(self):
366 367 mmapindexthreshold = None
367 368 opts = self.opener.options
368 369
369 370 if b'revlogv2' in opts:
370 371 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
371 372 elif b'revlogv1' in opts:
372 373 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
373 374 if b'generaldelta' in opts:
374 375 newversionflags |= FLAG_GENERALDELTA
375 376 elif b'revlogv0' in self.opener.options:
376 377 newversionflags = REVLOGV0
377 378 else:
378 379 newversionflags = REVLOG_DEFAULT_VERSION
379 380
380 381 if b'chunkcachesize' in opts:
381 382 self._chunkcachesize = opts[b'chunkcachesize']
382 383 if b'maxchainlen' in opts:
383 384 self._maxchainlen = opts[b'maxchainlen']
384 385 if b'deltabothparents' in opts:
385 386 self._deltabothparents = opts[b'deltabothparents']
386 387 self._lazydelta = bool(opts.get(b'lazydelta', True))
387 388 self._lazydeltabase = False
388 389 if self._lazydelta:
389 390 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
390 391 if b'compengine' in opts:
391 392 self._compengine = opts[b'compengine']
392 393 if b'zlib.level' in opts:
393 394 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
394 395 if b'zstd.level' in opts:
395 396 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
396 397 if b'maxdeltachainspan' in opts:
397 398 self._maxdeltachainspan = opts[b'maxdeltachainspan']
398 399 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
399 400 mmapindexthreshold = opts[b'mmapindexthreshold']
400 401 self.hassidedata = bool(opts.get(b'side-data', False))
401 402 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
402 403 withsparseread = bool(opts.get(b'with-sparse-read', False))
403 404 # sparse-revlog forces sparse-read
404 405 self._withsparseread = self._sparserevlog or withsparseread
405 406 if b'sparse-read-density-threshold' in opts:
406 407 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
407 408 if b'sparse-read-min-gap-size' in opts:
408 409 self._srmingapsize = opts[b'sparse-read-min-gap-size']
409 410 if opts.get(b'enableellipsis'):
410 411 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
411 412
412 413 # revlog v0 doesn't have flag processors
413 414 for flag, processor in pycompat.iteritems(
414 415 opts.get(b'flagprocessors', {})
415 416 ):
416 417 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
417 418
418 419 if self._chunkcachesize <= 0:
419 420 raise error.RevlogError(
420 421 _(b'revlog chunk cache size %r is not greater than 0')
421 422 % self._chunkcachesize
422 423 )
423 424 elif self._chunkcachesize & (self._chunkcachesize - 1):
424 425 raise error.RevlogError(
425 426 _(b'revlog chunk cache size %r is not a power of 2')
426 427 % self._chunkcachesize
427 428 )
428 429
429 430 indexdata = b''
430 431 self._initempty = True
431 432 try:
432 433 with self._indexfp() as f:
433 434 if (
434 435 mmapindexthreshold is not None
435 436 and self.opener.fstat(f).st_size >= mmapindexthreshold
436 437 ):
437 438 # TODO: should .close() to release resources without
438 439 # relying on Python GC
439 440 indexdata = util.buffer(util.mmapread(f))
440 441 else:
441 442 indexdata = f.read()
442 443 if len(indexdata) > 0:
443 444 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
444 445 self._initempty = False
445 446 else:
446 447 versionflags = newversionflags
447 448 except IOError as inst:
448 449 if inst.errno != errno.ENOENT:
449 450 raise
450 451
451 452 versionflags = newversionflags
452 453
453 454 self.version = versionflags
454 455
455 456 flags = versionflags & ~0xFFFF
456 457 fmt = versionflags & 0xFFFF
457 458
458 459 if fmt == REVLOGV0:
459 460 if flags:
460 461 raise error.RevlogError(
461 462 _(b'unknown flags (%#04x) in version %d revlog %s')
462 463 % (flags >> 16, fmt, self.indexfile)
463 464 )
464 465
465 466 self._inline = False
466 467 self._generaldelta = False
467 468
468 469 elif fmt == REVLOGV1:
469 470 if flags & ~REVLOGV1_FLAGS:
470 471 raise error.RevlogError(
471 472 _(b'unknown flags (%#04x) in version %d revlog %s')
472 473 % (flags >> 16, fmt, self.indexfile)
473 474 )
474 475
475 476 self._inline = versionflags & FLAG_INLINE_DATA
476 477 self._generaldelta = versionflags & FLAG_GENERALDELTA
477 478
478 479 elif fmt == REVLOGV2:
479 480 if flags & ~REVLOGV2_FLAGS:
480 481 raise error.RevlogError(
481 482 _(b'unknown flags (%#04x) in version %d revlog %s')
482 483 % (flags >> 16, fmt, self.indexfile)
483 484 )
484 485
485 486 # There is a bug in the transaction handling when going from an
486 487 # inline revlog to a separate index and data file. Turn it off until
487 488 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
488 489 # See issue6485
489 490 self._inline = False
490 491 # generaldelta implied by version 2 revlogs.
491 492 self._generaldelta = True
492 493
493 494 else:
494 495 raise error.RevlogError(
495 496 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
496 497 )
497 498
498 499 self.nodeconstants = sha1nodeconstants
499 500 self.nullid = self.nodeconstants.nullid
500 501
501 502 # sparse-revlog can't be on without general-delta (issue6056)
502 503 if not self._generaldelta:
503 504 self._sparserevlog = False
504 505
505 506 self._storedeltachains = True
506 507
507 508 devel_nodemap = (
508 509 self.nodemap_file
509 510 and opts.get(b'devel-force-nodemap', False)
510 511 and parse_index_v1_nodemap is not None
511 512 )
512 513
513 514 use_rust_index = False
514 515 if rustrevlog is not None:
515 516 if self.nodemap_file is not None:
516 517 use_rust_index = True
517 518 else:
518 519 use_rust_index = self.opener.options.get(b'rust.index')
519 520
520 521 self._parse_index = parse_index_v1
521 522 if self.version == REVLOGV0:
522 523 self._parse_index = revlogv0.parse_index_v0
523 524 elif fmt == REVLOGV2:
524 525 self._parse_index = parse_index_v2
525 526 elif devel_nodemap:
526 527 self._parse_index = parse_index_v1_nodemap
527 528 elif use_rust_index:
528 529 self._parse_index = parse_index_v1_mixed
529 530 try:
530 531 d = self._parse_index(indexdata, self._inline)
531 532 index, _chunkcache = d
532 533 use_nodemap = (
533 534 not self._inline
534 535 and self.nodemap_file is not None
535 536 and util.safehasattr(index, 'update_nodemap_data')
536 537 )
537 538 if use_nodemap:
538 539 nodemap_data = nodemaputil.persisted_data(self)
539 540 if nodemap_data is not None:
540 541 docket = nodemap_data[0]
541 542 if (
542 543 len(d[0]) > docket.tip_rev
543 544 and d[0][docket.tip_rev][7] == docket.tip_node
544 545 ):
545 546 # no changelog tampering
546 547 self._nodemap_docket = docket
547 548 index.update_nodemap_data(*nodemap_data)
548 549 except (ValueError, IndexError):
549 550 raise error.RevlogError(
550 551 _(b"index %s is corrupted") % self.indexfile
551 552 )
552 553 self.index, self._chunkcache = d
553 554 if not self._chunkcache:
554 555 self._chunkclear()
555 556 # revnum -> (chain-length, sum-delta-length)
556 557 self._chaininfocache = util.lrucachedict(500)
557 558 # revlog header -> revlog compressor
558 559 self._decompressors = {}
559 560
560 561 @util.propertycache
561 562 def revlog_kind(self):
562 563 return self.target[0]
563 564
564 565 @util.propertycache
565 566 def _compressor(self):
566 567 engine = util.compengines[self._compengine]
567 568 return engine.revlogcompressor(self._compengineopts)
568 569
569 570 def _indexfp(self, mode=b'r'):
570 571 """file object for the revlog's index file"""
571 572 args = {'mode': mode}
572 573 if mode != b'r':
573 574 args['checkambig'] = self._checkambig
574 575 if mode == b'w':
575 576 args['atomictemp'] = True
576 577 return self.opener(self.indexfile, **args)
577 578
578 579 def _datafp(self, mode=b'r'):
579 580 """file object for the revlog's data file"""
580 581 return self.opener(self.datafile, mode=mode)
581 582
582 583 @contextlib.contextmanager
583 584 def _datareadfp(self, existingfp=None):
584 585 """file object suitable to read data"""
585 586 # Use explicit file handle, if given.
586 587 if existingfp is not None:
587 588 yield existingfp
588 589
589 590 # Use a file handle being actively used for writes, if available.
590 591 # There is some danger to doing this because reads will seek the
591 592 # file. However, _writeentry() performs a SEEK_END before all writes,
592 593 # so we should be safe.
593 594 elif self._writinghandles:
594 595 if self._inline:
595 596 yield self._writinghandles[0]
596 597 else:
597 598 yield self._writinghandles[1]
598 599
599 600 # Otherwise open a new file handle.
600 601 else:
601 602 if self._inline:
602 603 func = self._indexfp
603 604 else:
604 605 func = self._datafp
605 606 with func() as fp:
606 607 yield fp
607 608
608 609 def tiprev(self):
609 610 return len(self.index) - 1
610 611
611 612 def tip(self):
612 613 return self.node(self.tiprev())
613 614
614 615 def __contains__(self, rev):
615 616 return 0 <= rev < len(self)
616 617
617 618 def __len__(self):
618 619 return len(self.index)
619 620
620 621 def __iter__(self):
621 622 return iter(pycompat.xrange(len(self)))
622 623
623 624 def revs(self, start=0, stop=None):
624 625 """iterate over all rev in this revlog (from start to stop)"""
625 626 return storageutil.iterrevs(len(self), start=start, stop=stop)
626 627
627 628 @property
628 629 def nodemap(self):
629 630 msg = (
630 631 b"revlog.nodemap is deprecated, "
631 632 b"use revlog.index.[has_node|rev|get_rev]"
632 633 )
633 634 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
634 635 return self.index.nodemap
635 636
636 637 @property
637 638 def _nodecache(self):
638 639 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
639 640 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
640 641 return self.index.nodemap
641 642
642 643 def hasnode(self, node):
643 644 try:
644 645 self.rev(node)
645 646 return True
646 647 except KeyError:
647 648 return False
648 649
649 650 def candelta(self, baserev, rev):
650 651 """whether two revisions (baserev, rev) can be delta-ed or not"""
651 652 # Disable delta if either rev requires a content-changing flag
652 653 # processor (ex. LFS). This is because such flag processor can alter
653 654 # the rawtext content that the delta will be based on, and two clients
654 655 # could have a same revlog node with different flags (i.e. different
655 656 # rawtext contents) and the delta could be incompatible.
656 657 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
657 658 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
658 659 ):
659 660 return False
660 661 return True
661 662
662 663 def update_caches(self, transaction):
663 664 if self.nodemap_file is not None:
664 665 if transaction is None:
665 666 nodemaputil.update_persistent_nodemap(self)
666 667 else:
667 668 nodemaputil.setup_persistent_nodemap(transaction, self)
668 669
669 670 def clearcaches(self):
670 671 self._revisioncache = None
671 672 self._chainbasecache.clear()
672 673 self._chunkcache = (0, b'')
673 674 self._pcache = {}
674 675 self._nodemap_docket = None
675 676 self.index.clearcaches()
676 677 # The python code is the one responsible for validating the docket, we
677 678 # end up having to refresh it here.
678 679 use_nodemap = (
679 680 not self._inline
680 681 and self.nodemap_file is not None
681 682 and util.safehasattr(self.index, 'update_nodemap_data')
682 683 )
683 684 if use_nodemap:
684 685 nodemap_data = nodemaputil.persisted_data(self)
685 686 if nodemap_data is not None:
686 687 self._nodemap_docket = nodemap_data[0]
687 688 self.index.update_nodemap_data(*nodemap_data)
688 689
689 690 def rev(self, node):
690 691 try:
691 692 return self.index.rev(node)
692 693 except TypeError:
693 694 raise
694 695 except error.RevlogError:
695 696 # parsers.c radix tree lookup failed
696 697 if (
697 698 node == self.nodeconstants.wdirid
698 699 or node in self.nodeconstants.wdirfilenodeids
699 700 ):
700 701 raise error.WdirUnsupported
701 702 raise error.LookupError(node, self.indexfile, _(b'no node'))
702 703
703 704 # Accessors for index entries.
704 705
705 706 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
706 707 # are flags.
707 708 def start(self, rev):
708 709 return int(self.index[rev][0] >> 16)
709 710
710 711 def flags(self, rev):
711 712 return self.index[rev][0] & 0xFFFF
712 713
713 714 def length(self, rev):
714 715 return self.index[rev][1]
715 716
716 717 def sidedata_length(self, rev):
717 718 if self.version & 0xFFFF != REVLOGV2:
718 719 return 0
719 720 return self.index[rev][9]
720 721
721 722 def rawsize(self, rev):
722 723 """return the length of the uncompressed text for a given revision"""
723 724 l = self.index[rev][2]
724 725 if l >= 0:
725 726 return l
726 727
727 728 t = self.rawdata(rev)
728 729 return len(t)
729 730
730 731 def size(self, rev):
731 732 """length of non-raw text (processed by a "read" flag processor)"""
732 733 # fast path: if no "read" flag processor could change the content,
733 734 # size is rawsize. note: ELLIPSIS is known to not change the content.
734 735 flags = self.flags(rev)
735 736 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
736 737 return self.rawsize(rev)
737 738
738 739 return len(self.revision(rev, raw=False))
739 740
740 741 def chainbase(self, rev):
741 742 base = self._chainbasecache.get(rev)
742 743 if base is not None:
743 744 return base
744 745
745 746 index = self.index
746 747 iterrev = rev
747 748 base = index[iterrev][3]
748 749 while base != iterrev:
749 750 iterrev = base
750 751 base = index[iterrev][3]
751 752
752 753 self._chainbasecache[rev] = base
753 754 return base
754 755
755 756 def linkrev(self, rev):
756 757 return self.index[rev][4]
757 758
758 759 def parentrevs(self, rev):
759 760 try:
760 761 entry = self.index[rev]
761 762 except IndexError:
762 763 if rev == wdirrev:
763 764 raise error.WdirUnsupported
764 765 raise
765 766 if entry[5] == nullrev:
766 767 return entry[6], entry[5]
767 768 else:
768 769 return entry[5], entry[6]
769 770
770 771 # fast parentrevs(rev) where rev isn't filtered
771 772 _uncheckedparentrevs = parentrevs
772 773
773 774 def node(self, rev):
774 775 try:
775 776 return self.index[rev][7]
776 777 except IndexError:
777 778 if rev == wdirrev:
778 779 raise error.WdirUnsupported
779 780 raise
780 781
781 782 # Derived from index values.
782 783
783 784 def end(self, rev):
784 785 return self.start(rev) + self.length(rev)
785 786
786 787 def parents(self, node):
787 788 i = self.index
788 789 d = i[self.rev(node)]
789 790 # inline node() to avoid function call overhead
790 791 if d[5] == self.nullid:
791 792 return i[d[6]][7], i[d[5]][7]
792 793 else:
793 794 return i[d[5]][7], i[d[6]][7]
794 795
795 796 def chainlen(self, rev):
796 797 return self._chaininfo(rev)[0]
797 798
798 799 def _chaininfo(self, rev):
799 800 chaininfocache = self._chaininfocache
800 801 if rev in chaininfocache:
801 802 return chaininfocache[rev]
802 803 index = self.index
803 804 generaldelta = self._generaldelta
804 805 iterrev = rev
805 806 e = index[iterrev]
806 807 clen = 0
807 808 compresseddeltalen = 0
808 809 while iterrev != e[3]:
809 810 clen += 1
810 811 compresseddeltalen += e[1]
811 812 if generaldelta:
812 813 iterrev = e[3]
813 814 else:
814 815 iterrev -= 1
815 816 if iterrev in chaininfocache:
816 817 t = chaininfocache[iterrev]
817 818 clen += t[0]
818 819 compresseddeltalen += t[1]
819 820 break
820 821 e = index[iterrev]
821 822 else:
822 823 # Add text length of base since decompressing that also takes
823 824 # work. For cache hits the length is already included.
824 825 compresseddeltalen += e[1]
825 826 r = (clen, compresseddeltalen)
826 827 chaininfocache[rev] = r
827 828 return r
828 829
829 830 def _deltachain(self, rev, stoprev=None):
830 831 """Obtain the delta chain for a revision.
831 832
832 833 ``stoprev`` specifies a revision to stop at. If not specified, we
833 834 stop at the base of the chain.
834 835
835 836 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
836 837 revs in ascending order and ``stopped`` is a bool indicating whether
837 838 ``stoprev`` was hit.
838 839 """
839 840 # Try C implementation.
840 841 try:
841 842 return self.index.deltachain(rev, stoprev, self._generaldelta)
842 843 except AttributeError:
843 844 pass
844 845
845 846 chain = []
846 847
847 848 # Alias to prevent attribute lookup in tight loop.
848 849 index = self.index
849 850 generaldelta = self._generaldelta
850 851
851 852 iterrev = rev
852 853 e = index[iterrev]
853 854 while iterrev != e[3] and iterrev != stoprev:
854 855 chain.append(iterrev)
855 856 if generaldelta:
856 857 iterrev = e[3]
857 858 else:
858 859 iterrev -= 1
859 860 e = index[iterrev]
860 861
861 862 if iterrev == stoprev:
862 863 stopped = True
863 864 else:
864 865 chain.append(iterrev)
865 866 stopped = False
866 867
867 868 chain.reverse()
868 869 return chain, stopped
869 870
870 871 def ancestors(self, revs, stoprev=0, inclusive=False):
871 872 """Generate the ancestors of 'revs' in reverse revision order.
872 873 Does not generate revs lower than stoprev.
873 874
874 875 See the documentation for ancestor.lazyancestors for more details."""
875 876
876 877 # first, make sure start revisions aren't filtered
877 878 revs = list(revs)
878 879 checkrev = self.node
879 880 for r in revs:
880 881 checkrev(r)
881 882 # and we're sure ancestors aren't filtered as well
882 883
883 884 if rustancestor is not None:
884 885 lazyancestors = rustancestor.LazyAncestors
885 886 arg = self.index
886 887 else:
887 888 lazyancestors = ancestor.lazyancestors
888 889 arg = self._uncheckedparentrevs
889 890 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
890 891
891 892 def descendants(self, revs):
892 893 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
893 894
894 895 def findcommonmissing(self, common=None, heads=None):
895 896 """Return a tuple of the ancestors of common and the ancestors of heads
896 897 that are not ancestors of common. In revset terminology, we return the
897 898 tuple:
898 899
899 900 ::common, (::heads) - (::common)
900 901
901 902 The list is sorted by revision number, meaning it is
902 903 topologically sorted.
903 904
904 905 'heads' and 'common' are both lists of node IDs. If heads is
905 906 not supplied, uses all of the revlog's heads. If common is not
906 907 supplied, uses nullid."""
907 908 if common is None:
908 909 common = [self.nullid]
909 910 if heads is None:
910 911 heads = self.heads()
911 912
912 913 common = [self.rev(n) for n in common]
913 914 heads = [self.rev(n) for n in heads]
914 915
915 916 # we want the ancestors, but inclusive
916 917 class lazyset(object):
917 918 def __init__(self, lazyvalues):
918 919 self.addedvalues = set()
919 920 self.lazyvalues = lazyvalues
920 921
921 922 def __contains__(self, value):
922 923 return value in self.addedvalues or value in self.lazyvalues
923 924
924 925 def __iter__(self):
925 926 added = self.addedvalues
926 927 for r in added:
927 928 yield r
928 929 for r in self.lazyvalues:
929 930 if not r in added:
930 931 yield r
931 932
932 933 def add(self, value):
933 934 self.addedvalues.add(value)
934 935
935 936 def update(self, values):
936 937 self.addedvalues.update(values)
937 938
938 939 has = lazyset(self.ancestors(common))
939 940 has.add(nullrev)
940 941 has.update(common)
941 942
942 943 # take all ancestors from heads that aren't in has
943 944 missing = set()
944 945 visit = collections.deque(r for r in heads if r not in has)
945 946 while visit:
946 947 r = visit.popleft()
947 948 if r in missing:
948 949 continue
949 950 else:
950 951 missing.add(r)
951 952 for p in self.parentrevs(r):
952 953 if p not in has:
953 954 visit.append(p)
954 955 missing = list(missing)
955 956 missing.sort()
956 957 return has, [self.node(miss) for miss in missing]
957 958
958 959 def incrementalmissingrevs(self, common=None):
959 960 """Return an object that can be used to incrementally compute the
960 961 revision numbers of the ancestors of arbitrary sets that are not
961 962 ancestors of common. This is an ancestor.incrementalmissingancestors
962 963 object.
963 964
964 965 'common' is a list of revision numbers. If common is not supplied, uses
965 966 nullrev.
966 967 """
967 968 if common is None:
968 969 common = [nullrev]
969 970
970 971 if rustancestor is not None:
971 972 return rustancestor.MissingAncestors(self.index, common)
972 973 return ancestor.incrementalmissingancestors(self.parentrevs, common)
973 974
974 975 def findmissingrevs(self, common=None, heads=None):
975 976 """Return the revision numbers of the ancestors of heads that
976 977 are not ancestors of common.
977 978
978 979 More specifically, return a list of revision numbers corresponding to
979 980 nodes N such that every N satisfies the following constraints:
980 981
981 982 1. N is an ancestor of some node in 'heads'
982 983 2. N is not an ancestor of any node in 'common'
983 984
984 985 The list is sorted by revision number, meaning it is
985 986 topologically sorted.
986 987
987 988 'heads' and 'common' are both lists of revision numbers. If heads is
988 989 not supplied, uses all of the revlog's heads. If common is not
989 990 supplied, uses nullid."""
990 991 if common is None:
991 992 common = [nullrev]
992 993 if heads is None:
993 994 heads = self.headrevs()
994 995
995 996 inc = self.incrementalmissingrevs(common=common)
996 997 return inc.missingancestors(heads)
997 998
998 999 def findmissing(self, common=None, heads=None):
999 1000 """Return the ancestors of heads that are not ancestors of common.
1000 1001
1001 1002 More specifically, return a list of nodes N such that every N
1002 1003 satisfies the following constraints:
1003 1004
1004 1005 1. N is an ancestor of some node in 'heads'
1005 1006 2. N is not an ancestor of any node in 'common'
1006 1007
1007 1008 The list is sorted by revision number, meaning it is
1008 1009 topologically sorted.
1009 1010
1010 1011 'heads' and 'common' are both lists of node IDs. If heads is
1011 1012 not supplied, uses all of the revlog's heads. If common is not
1012 1013 supplied, uses nullid."""
1013 1014 if common is None:
1014 1015 common = [self.nullid]
1015 1016 if heads is None:
1016 1017 heads = self.heads()
1017 1018
1018 1019 common = [self.rev(n) for n in common]
1019 1020 heads = [self.rev(n) for n in heads]
1020 1021
1021 1022 inc = self.incrementalmissingrevs(common=common)
1022 1023 return [self.node(r) for r in inc.missingancestors(heads)]
1023 1024
1024 1025 def nodesbetween(self, roots=None, heads=None):
1025 1026 """Return a topological path from 'roots' to 'heads'.
1026 1027
1027 1028 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1028 1029 topologically sorted list of all nodes N that satisfy both of
1029 1030 these constraints:
1030 1031
1031 1032 1. N is a descendant of some node in 'roots'
1032 1033 2. N is an ancestor of some node in 'heads'
1033 1034
1034 1035 Every node is considered to be both a descendant and an ancestor
1035 1036 of itself, so every reachable node in 'roots' and 'heads' will be
1036 1037 included in 'nodes'.
1037 1038
1038 1039 'outroots' is the list of reachable nodes in 'roots', i.e., the
1039 1040 subset of 'roots' that is returned in 'nodes'. Likewise,
1040 1041 'outheads' is the subset of 'heads' that is also in 'nodes'.
1041 1042
1042 1043 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1043 1044 unspecified, uses nullid as the only root. If 'heads' is
1044 1045 unspecified, uses list of all of the revlog's heads."""
1045 1046 nonodes = ([], [], [])
1046 1047 if roots is not None:
1047 1048 roots = list(roots)
1048 1049 if not roots:
1049 1050 return nonodes
1050 1051 lowestrev = min([self.rev(n) for n in roots])
1051 1052 else:
1052 1053 roots = [self.nullid] # Everybody's a descendant of nullid
1053 1054 lowestrev = nullrev
1054 1055 if (lowestrev == nullrev) and (heads is None):
1055 1056 # We want _all_ the nodes!
1056 1057 return (
1057 1058 [self.node(r) for r in self],
1058 1059 [self.nullid],
1059 1060 list(self.heads()),
1060 1061 )
1061 1062 if heads is None:
1062 1063 # All nodes are ancestors, so the latest ancestor is the last
1063 1064 # node.
1064 1065 highestrev = len(self) - 1
1065 1066 # Set ancestors to None to signal that every node is an ancestor.
1066 1067 ancestors = None
1067 1068 # Set heads to an empty dictionary for later discovery of heads
1068 1069 heads = {}
1069 1070 else:
1070 1071 heads = list(heads)
1071 1072 if not heads:
1072 1073 return nonodes
1073 1074 ancestors = set()
1074 1075 # Turn heads into a dictionary so we can remove 'fake' heads.
1075 1076 # Also, later we will be using it to filter out the heads we can't
1076 1077 # find from roots.
1077 1078 heads = dict.fromkeys(heads, False)
1078 1079 # Start at the top and keep marking parents until we're done.
1079 1080 nodestotag = set(heads)
1080 1081 # Remember where the top was so we can use it as a limit later.
1081 1082 highestrev = max([self.rev(n) for n in nodestotag])
1082 1083 while nodestotag:
1083 1084 # grab a node to tag
1084 1085 n = nodestotag.pop()
1085 1086 # Never tag nullid
1086 1087 if n == self.nullid:
1087 1088 continue
1088 1089 # A node's revision number represents its place in a
1089 1090 # topologically sorted list of nodes.
1090 1091 r = self.rev(n)
1091 1092 if r >= lowestrev:
1092 1093 if n not in ancestors:
1093 1094 # If we are possibly a descendant of one of the roots
1094 1095 # and we haven't already been marked as an ancestor
1095 1096 ancestors.add(n) # Mark as ancestor
1096 1097 # Add non-nullid parents to list of nodes to tag.
1097 1098 nodestotag.update(
1098 1099 [p for p in self.parents(n) if p != self.nullid]
1099 1100 )
1100 1101 elif n in heads: # We've seen it before, is it a fake head?
1101 1102 # So it is, real heads should not be the ancestors of
1102 1103 # any other heads.
1103 1104 heads.pop(n)
1104 1105 if not ancestors:
1105 1106 return nonodes
1106 1107 # Now that we have our set of ancestors, we want to remove any
1107 1108 # roots that are not ancestors.
1108 1109
1109 1110 # If one of the roots was nullid, everything is included anyway.
1110 1111 if lowestrev > nullrev:
1111 1112 # But, since we weren't, let's recompute the lowest rev to not
1112 1113 # include roots that aren't ancestors.
1113 1114
1114 1115 # Filter out roots that aren't ancestors of heads
1115 1116 roots = [root for root in roots if root in ancestors]
1116 1117 # Recompute the lowest revision
1117 1118 if roots:
1118 1119 lowestrev = min([self.rev(root) for root in roots])
1119 1120 else:
1120 1121 # No more roots? Return empty list
1121 1122 return nonodes
1122 1123 else:
1123 1124 # We are descending from nullid, and don't need to care about
1124 1125 # any other roots.
1125 1126 lowestrev = nullrev
1126 1127 roots = [self.nullid]
1127 1128 # Transform our roots list into a set.
1128 1129 descendants = set(roots)
1129 1130 # Also, keep the original roots so we can filter out roots that aren't
1130 1131 # 'real' roots (i.e. are descended from other roots).
1131 1132 roots = descendants.copy()
1132 1133 # Our topologically sorted list of output nodes.
1133 1134 orderedout = []
1134 1135 # Don't start at nullid since we don't want nullid in our output list,
1135 1136 # and if nullid shows up in descendants, empty parents will look like
1136 1137 # they're descendants.
1137 1138 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1138 1139 n = self.node(r)
1139 1140 isdescendant = False
1140 1141 if lowestrev == nullrev: # Everybody is a descendant of nullid
1141 1142 isdescendant = True
1142 1143 elif n in descendants:
1143 1144 # n is already a descendant
1144 1145 isdescendant = True
1145 1146 # This check only needs to be done here because all the roots
1146 1147 # will start being marked is descendants before the loop.
1147 1148 if n in roots:
1148 1149 # If n was a root, check if it's a 'real' root.
1149 1150 p = tuple(self.parents(n))
1150 1151 # If any of its parents are descendants, it's not a root.
1151 1152 if (p[0] in descendants) or (p[1] in descendants):
1152 1153 roots.remove(n)
1153 1154 else:
1154 1155 p = tuple(self.parents(n))
1155 1156 # A node is a descendant if either of its parents are
1156 1157 # descendants. (We seeded the dependents list with the roots
1157 1158 # up there, remember?)
1158 1159 if (p[0] in descendants) or (p[1] in descendants):
1159 1160 descendants.add(n)
1160 1161 isdescendant = True
1161 1162 if isdescendant and ((ancestors is None) or (n in ancestors)):
1162 1163 # Only include nodes that are both descendants and ancestors.
1163 1164 orderedout.append(n)
1164 1165 if (ancestors is not None) and (n in heads):
1165 1166 # We're trying to figure out which heads are reachable
1166 1167 # from roots.
1167 1168 # Mark this head as having been reached
1168 1169 heads[n] = True
1169 1170 elif ancestors is None:
1170 1171 # Otherwise, we're trying to discover the heads.
1171 1172 # Assume this is a head because if it isn't, the next step
1172 1173 # will eventually remove it.
1173 1174 heads[n] = True
1174 1175 # But, obviously its parents aren't.
1175 1176 for p in self.parents(n):
1176 1177 heads.pop(p, None)
1177 1178 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1178 1179 roots = list(roots)
1179 1180 assert orderedout
1180 1181 assert roots
1181 1182 assert heads
1182 1183 return (orderedout, roots, heads)
1183 1184
1184 1185 def headrevs(self, revs=None):
1185 1186 if revs is None:
1186 1187 try:
1187 1188 return self.index.headrevs()
1188 1189 except AttributeError:
1189 1190 return self._headrevs()
1190 1191 if rustdagop is not None:
1191 1192 return rustdagop.headrevs(self.index, revs)
1192 1193 return dagop.headrevs(revs, self._uncheckedparentrevs)
1193 1194
1194 1195 def computephases(self, roots):
1195 1196 return self.index.computephasesmapsets(roots)
1196 1197
1197 1198 def _headrevs(self):
1198 1199 count = len(self)
1199 1200 if not count:
1200 1201 return [nullrev]
1201 1202 # we won't iter over filtered rev so nobody is a head at start
1202 1203 ishead = [0] * (count + 1)
1203 1204 index = self.index
1204 1205 for r in self:
1205 1206 ishead[r] = 1 # I may be an head
1206 1207 e = index[r]
1207 1208 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1208 1209 return [r for r, val in enumerate(ishead) if val]
1209 1210
1210 1211 def heads(self, start=None, stop=None):
1211 1212 """return the list of all nodes that have no children
1212 1213
1213 1214 if start is specified, only heads that are descendants of
1214 1215 start will be returned
1215 1216 if stop is specified, it will consider all the revs from stop
1216 1217 as if they had no children
1217 1218 """
1218 1219 if start is None and stop is None:
1219 1220 if not len(self):
1220 1221 return [self.nullid]
1221 1222 return [self.node(r) for r in self.headrevs()]
1222 1223
1223 1224 if start is None:
1224 1225 start = nullrev
1225 1226 else:
1226 1227 start = self.rev(start)
1227 1228
1228 1229 stoprevs = {self.rev(n) for n in stop or []}
1229 1230
1230 1231 revs = dagop.headrevssubset(
1231 1232 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1232 1233 )
1233 1234
1234 1235 return [self.node(rev) for rev in revs]
1235 1236
1236 1237 def children(self, node):
1237 1238 """find the children of a given node"""
1238 1239 c = []
1239 1240 p = self.rev(node)
1240 1241 for r in self.revs(start=p + 1):
1241 1242 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1242 1243 if prevs:
1243 1244 for pr in prevs:
1244 1245 if pr == p:
1245 1246 c.append(self.node(r))
1246 1247 elif p == nullrev:
1247 1248 c.append(self.node(r))
1248 1249 return c
1249 1250
1250 1251 def commonancestorsheads(self, a, b):
1251 1252 """calculate all the heads of the common ancestors of nodes a and b"""
1252 1253 a, b = self.rev(a), self.rev(b)
1253 1254 ancs = self._commonancestorsheads(a, b)
1254 1255 return pycompat.maplist(self.node, ancs)
1255 1256
1256 1257 def _commonancestorsheads(self, *revs):
1257 1258 """calculate all the heads of the common ancestors of revs"""
1258 1259 try:
1259 1260 ancs = self.index.commonancestorsheads(*revs)
1260 1261 except (AttributeError, OverflowError): # C implementation failed
1261 1262 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1262 1263 return ancs
1263 1264
1264 1265 def isancestor(self, a, b):
1265 1266 """return True if node a is an ancestor of node b
1266 1267
1267 1268 A revision is considered an ancestor of itself."""
1268 1269 a, b = self.rev(a), self.rev(b)
1269 1270 return self.isancestorrev(a, b)
1270 1271
1271 1272 def isancestorrev(self, a, b):
1272 1273 """return True if revision a is an ancestor of revision b
1273 1274
1274 1275 A revision is considered an ancestor of itself.
1275 1276
1276 1277 The implementation of this is trivial but the use of
1277 1278 reachableroots is not."""
1278 1279 if a == nullrev:
1279 1280 return True
1280 1281 elif a == b:
1281 1282 return True
1282 1283 elif a > b:
1283 1284 return False
1284 1285 return bool(self.reachableroots(a, [b], [a], includepath=False))
1285 1286
1286 1287 def reachableroots(self, minroot, heads, roots, includepath=False):
1287 1288 """return (heads(::(<roots> and <roots>::<heads>)))
1288 1289
1289 1290 If includepath is True, return (<roots>::<heads>)."""
1290 1291 try:
1291 1292 return self.index.reachableroots2(
1292 1293 minroot, heads, roots, includepath
1293 1294 )
1294 1295 except AttributeError:
1295 1296 return dagop._reachablerootspure(
1296 1297 self.parentrevs, minroot, roots, heads, includepath
1297 1298 )
1298 1299
1299 1300 def ancestor(self, a, b):
1300 1301 """calculate the "best" common ancestor of nodes a and b"""
1301 1302
1302 1303 a, b = self.rev(a), self.rev(b)
1303 1304 try:
1304 1305 ancs = self.index.ancestors(a, b)
1305 1306 except (AttributeError, OverflowError):
1306 1307 ancs = ancestor.ancestors(self.parentrevs, a, b)
1307 1308 if ancs:
1308 1309 # choose a consistent winner when there's a tie
1309 1310 return min(map(self.node, ancs))
1310 1311 return self.nullid
1311 1312
1312 1313 def _match(self, id):
1313 1314 if isinstance(id, int):
1314 1315 # rev
1315 1316 return self.node(id)
1316 1317 if len(id) == self.nodeconstants.nodelen:
1317 1318 # possibly a binary node
1318 1319 # odds of a binary node being all hex in ASCII are 1 in 10**25
1319 1320 try:
1320 1321 node = id
1321 1322 self.rev(node) # quick search the index
1322 1323 return node
1323 1324 except error.LookupError:
1324 1325 pass # may be partial hex id
1325 1326 try:
1326 1327 # str(rev)
1327 1328 rev = int(id)
1328 1329 if b"%d" % rev != id:
1329 1330 raise ValueError
1330 1331 if rev < 0:
1331 1332 rev = len(self) + rev
1332 1333 if rev < 0 or rev >= len(self):
1333 1334 raise ValueError
1334 1335 return self.node(rev)
1335 1336 except (ValueError, OverflowError):
1336 1337 pass
1337 1338 if len(id) == 2 * self.nodeconstants.nodelen:
1338 1339 try:
1339 1340 # a full hex nodeid?
1340 1341 node = bin(id)
1341 1342 self.rev(node)
1342 1343 return node
1343 1344 except (TypeError, error.LookupError):
1344 1345 pass
1345 1346
1346 1347 def _partialmatch(self, id):
1347 1348 # we don't care wdirfilenodeids as they should be always full hash
1348 1349 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1349 1350 try:
1350 1351 partial = self.index.partialmatch(id)
1351 1352 if partial and self.hasnode(partial):
1352 1353 if maybewdir:
1353 1354 # single 'ff...' match in radix tree, ambiguous with wdir
1354 1355 raise error.RevlogError
1355 1356 return partial
1356 1357 if maybewdir:
1357 1358 # no 'ff...' match in radix tree, wdir identified
1358 1359 raise error.WdirUnsupported
1359 1360 return None
1360 1361 except error.RevlogError:
1361 1362 # parsers.c radix tree lookup gave multiple matches
1362 1363 # fast path: for unfiltered changelog, radix tree is accurate
1363 1364 if not getattr(self, 'filteredrevs', None):
1364 1365 raise error.AmbiguousPrefixLookupError(
1365 1366 id, self.indexfile, _(b'ambiguous identifier')
1366 1367 )
1367 1368 # fall through to slow path that filters hidden revisions
1368 1369 except (AttributeError, ValueError):
1369 1370 # we are pure python, or key was too short to search radix tree
1370 1371 pass
1371 1372
1372 1373 if id in self._pcache:
1373 1374 return self._pcache[id]
1374 1375
1375 1376 if len(id) <= 40:
1376 1377 try:
1377 1378 # hex(node)[:...]
1378 1379 l = len(id) // 2 # grab an even number of digits
1379 1380 prefix = bin(id[: l * 2])
1380 1381 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1381 1382 nl = [
1382 1383 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1383 1384 ]
1384 1385 if self.nodeconstants.nullhex.startswith(id):
1385 1386 nl.append(self.nullid)
1386 1387 if len(nl) > 0:
1387 1388 if len(nl) == 1 and not maybewdir:
1388 1389 self._pcache[id] = nl[0]
1389 1390 return nl[0]
1390 1391 raise error.AmbiguousPrefixLookupError(
1391 1392 id, self.indexfile, _(b'ambiguous identifier')
1392 1393 )
1393 1394 if maybewdir:
1394 1395 raise error.WdirUnsupported
1395 1396 return None
1396 1397 except TypeError:
1397 1398 pass
1398 1399
1399 1400 def lookup(self, id):
1400 1401 """locate a node based on:
1401 1402 - revision number or str(revision number)
1402 1403 - nodeid or subset of hex nodeid
1403 1404 """
1404 1405 n = self._match(id)
1405 1406 if n is not None:
1406 1407 return n
1407 1408 n = self._partialmatch(id)
1408 1409 if n:
1409 1410 return n
1410 1411
1411 1412 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1412 1413
1413 1414 def shortest(self, node, minlength=1):
1414 1415 """Find the shortest unambiguous prefix that matches node."""
1415 1416
1416 1417 def isvalid(prefix):
1417 1418 try:
1418 1419 matchednode = self._partialmatch(prefix)
1419 1420 except error.AmbiguousPrefixLookupError:
1420 1421 return False
1421 1422 except error.WdirUnsupported:
1422 1423 # single 'ff...' match
1423 1424 return True
1424 1425 if matchednode is None:
1425 1426 raise error.LookupError(node, self.indexfile, _(b'no node'))
1426 1427 return True
1427 1428
1428 1429 def maybewdir(prefix):
1429 1430 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1430 1431
1431 1432 hexnode = hex(node)
1432 1433
1433 1434 def disambiguate(hexnode, minlength):
1434 1435 """Disambiguate against wdirid."""
1435 1436 for length in range(minlength, len(hexnode) + 1):
1436 1437 prefix = hexnode[:length]
1437 1438 if not maybewdir(prefix):
1438 1439 return prefix
1439 1440
1440 1441 if not getattr(self, 'filteredrevs', None):
1441 1442 try:
1442 1443 length = max(self.index.shortest(node), minlength)
1443 1444 return disambiguate(hexnode, length)
1444 1445 except error.RevlogError:
1445 1446 if node != self.nodeconstants.wdirid:
1446 1447 raise error.LookupError(node, self.indexfile, _(b'no node'))
1447 1448 except AttributeError:
1448 1449 # Fall through to pure code
1449 1450 pass
1450 1451
1451 1452 if node == self.nodeconstants.wdirid:
1452 1453 for length in range(minlength, len(hexnode) + 1):
1453 1454 prefix = hexnode[:length]
1454 1455 if isvalid(prefix):
1455 1456 return prefix
1456 1457
1457 1458 for length in range(minlength, len(hexnode) + 1):
1458 1459 prefix = hexnode[:length]
1459 1460 if isvalid(prefix):
1460 1461 return disambiguate(hexnode, length)
1461 1462
1462 1463 def cmp(self, node, text):
1463 1464 """compare text with a given file revision
1464 1465
1465 1466 returns True if text is different than what is stored.
1466 1467 """
1467 1468 p1, p2 = self.parents(node)
1468 1469 return storageutil.hashrevisionsha1(text, p1, p2) != node
1469 1470
1470 1471 def _cachesegment(self, offset, data):
1471 1472 """Add a segment to the revlog cache.
1472 1473
1473 1474 Accepts an absolute offset and the data that is at that location.
1474 1475 """
1475 1476 o, d = self._chunkcache
1476 1477 # try to add to existing cache
1477 1478 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1478 1479 self._chunkcache = o, d + data
1479 1480 else:
1480 1481 self._chunkcache = offset, data
1481 1482
1482 1483 def _readsegment(self, offset, length, df=None):
1483 1484 """Load a segment of raw data from the revlog.
1484 1485
1485 1486 Accepts an absolute offset, length to read, and an optional existing
1486 1487 file handle to read from.
1487 1488
1488 1489 If an existing file handle is passed, it will be seeked and the
1489 1490 original seek position will NOT be restored.
1490 1491
1491 1492 Returns a str or buffer of raw byte data.
1492 1493
1493 1494 Raises if the requested number of bytes could not be read.
1494 1495 """
1495 1496 # Cache data both forward and backward around the requested
1496 1497 # data, in a fixed size window. This helps speed up operations
1497 1498 # involving reading the revlog backwards.
1498 1499 cachesize = self._chunkcachesize
1499 1500 realoffset = offset & ~(cachesize - 1)
1500 1501 reallength = (
1501 1502 (offset + length + cachesize) & ~(cachesize - 1)
1502 1503 ) - realoffset
1503 1504 with self._datareadfp(df) as df:
1504 1505 df.seek(realoffset)
1505 1506 d = df.read(reallength)
1506 1507
1507 1508 self._cachesegment(realoffset, d)
1508 1509 if offset != realoffset or reallength != length:
1509 1510 startoffset = offset - realoffset
1510 1511 if len(d) - startoffset < length:
1511 1512 raise error.RevlogError(
1512 1513 _(
1513 1514 b'partial read of revlog %s; expected %d bytes from '
1514 1515 b'offset %d, got %d'
1515 1516 )
1516 1517 % (
1517 1518 self.indexfile if self._inline else self.datafile,
1518 1519 length,
1519 1520 realoffset,
1520 1521 len(d) - startoffset,
1521 1522 )
1522 1523 )
1523 1524
1524 1525 return util.buffer(d, startoffset, length)
1525 1526
1526 1527 if len(d) < length:
1527 1528 raise error.RevlogError(
1528 1529 _(
1529 1530 b'partial read of revlog %s; expected %d bytes from offset '
1530 1531 b'%d, got %d'
1531 1532 )
1532 1533 % (
1533 1534 self.indexfile if self._inline else self.datafile,
1534 1535 length,
1535 1536 offset,
1536 1537 len(d),
1537 1538 )
1538 1539 )
1539 1540
1540 1541 return d
1541 1542
1542 1543 def _getsegment(self, offset, length, df=None):
1543 1544 """Obtain a segment of raw data from the revlog.
1544 1545
1545 1546 Accepts an absolute offset, length of bytes to obtain, and an
1546 1547 optional file handle to the already-opened revlog. If the file
1547 1548 handle is used, it's original seek position will not be preserved.
1548 1549
1549 1550 Requests for data may be returned from a cache.
1550 1551
1551 1552 Returns a str or a buffer instance of raw byte data.
1552 1553 """
1553 1554 o, d = self._chunkcache
1554 1555 l = len(d)
1555 1556
1556 1557 # is it in the cache?
1557 1558 cachestart = offset - o
1558 1559 cacheend = cachestart + length
1559 1560 if cachestart >= 0 and cacheend <= l:
1560 1561 if cachestart == 0 and cacheend == l:
1561 1562 return d # avoid a copy
1562 1563 return util.buffer(d, cachestart, cacheend - cachestart)
1563 1564
1564 1565 return self._readsegment(offset, length, df=df)
1565 1566
1566 1567 def _getsegmentforrevs(self, startrev, endrev, df=None):
1567 1568 """Obtain a segment of raw data corresponding to a range of revisions.
1568 1569
1569 1570 Accepts the start and end revisions and an optional already-open
1570 1571 file handle to be used for reading. If the file handle is read, its
1571 1572 seek position will not be preserved.
1572 1573
1573 1574 Requests for data may be satisfied by a cache.
1574 1575
1575 1576 Returns a 2-tuple of (offset, data) for the requested range of
1576 1577 revisions. Offset is the integer offset from the beginning of the
1577 1578 revlog and data is a str or buffer of the raw byte data.
1578 1579
1579 1580 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1580 1581 to determine where each revision's data begins and ends.
1581 1582 """
1582 1583 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1583 1584 # (functions are expensive).
1584 1585 index = self.index
1585 1586 istart = index[startrev]
1586 1587 start = int(istart[0] >> 16)
1587 1588 if startrev == endrev:
1588 1589 end = start + istart[1]
1589 1590 else:
1590 1591 iend = index[endrev]
1591 1592 end = int(iend[0] >> 16) + iend[1]
1592 1593
1593 1594 if self._inline:
1594 1595 start += (startrev + 1) * self.index.entry_size
1595 1596 end += (endrev + 1) * self.index.entry_size
1596 1597 length = end - start
1597 1598
1598 1599 return start, self._getsegment(start, length, df=df)
1599 1600
1600 1601 def _chunk(self, rev, df=None):
1601 1602 """Obtain a single decompressed chunk for a revision.
1602 1603
1603 1604 Accepts an integer revision and an optional already-open file handle
1604 1605 to be used for reading. If used, the seek position of the file will not
1605 1606 be preserved.
1606 1607
1607 1608 Returns a str holding uncompressed data for the requested revision.
1608 1609 """
1609 1610 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1610 1611
1611 1612 def _chunks(self, revs, df=None, targetsize=None):
1612 1613 """Obtain decompressed chunks for the specified revisions.
1613 1614
1614 1615 Accepts an iterable of numeric revisions that are assumed to be in
1615 1616 ascending order. Also accepts an optional already-open file handle
1616 1617 to be used for reading. If used, the seek position of the file will
1617 1618 not be preserved.
1618 1619
1619 1620 This function is similar to calling ``self._chunk()`` multiple times,
1620 1621 but is faster.
1621 1622
1622 1623 Returns a list with decompressed data for each requested revision.
1623 1624 """
1624 1625 if not revs:
1625 1626 return []
1626 1627 start = self.start
1627 1628 length = self.length
1628 1629 inline = self._inline
1629 1630 iosize = self.index.entry_size
1630 1631 buffer = util.buffer
1631 1632
1632 1633 l = []
1633 1634 ladd = l.append
1634 1635
1635 1636 if not self._withsparseread:
1636 1637 slicedchunks = (revs,)
1637 1638 else:
1638 1639 slicedchunks = deltautil.slicechunk(
1639 1640 self, revs, targetsize=targetsize
1640 1641 )
1641 1642
1642 1643 for revschunk in slicedchunks:
1643 1644 firstrev = revschunk[0]
1644 1645 # Skip trailing revisions with empty diff
1645 1646 for lastrev in revschunk[::-1]:
1646 1647 if length(lastrev) != 0:
1647 1648 break
1648 1649
1649 1650 try:
1650 1651 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1651 1652 except OverflowError:
1652 1653 # issue4215 - we can't cache a run of chunks greater than
1653 1654 # 2G on Windows
1654 1655 return [self._chunk(rev, df=df) for rev in revschunk]
1655 1656
1656 1657 decomp = self.decompress
1657 1658 for rev in revschunk:
1658 1659 chunkstart = start(rev)
1659 1660 if inline:
1660 1661 chunkstart += (rev + 1) * iosize
1661 1662 chunklength = length(rev)
1662 1663 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1663 1664
1664 1665 return l
1665 1666
1666 1667 def _chunkclear(self):
1667 1668 """Clear the raw chunk cache."""
1668 1669 self._chunkcache = (0, b'')
1669 1670
1670 1671 def deltaparent(self, rev):
1671 1672 """return deltaparent of the given revision"""
1672 1673 base = self.index[rev][3]
1673 1674 if base == rev:
1674 1675 return nullrev
1675 1676 elif self._generaldelta:
1676 1677 return base
1677 1678 else:
1678 1679 return rev - 1
1679 1680
1680 1681 def issnapshot(self, rev):
1681 1682 """tells whether rev is a snapshot"""
1682 1683 if not self._sparserevlog:
1683 1684 return self.deltaparent(rev) == nullrev
1684 1685 elif util.safehasattr(self.index, b'issnapshot'):
1685 1686 # directly assign the method to cache the testing and access
1686 1687 self.issnapshot = self.index.issnapshot
1687 1688 return self.issnapshot(rev)
1688 1689 if rev == nullrev:
1689 1690 return True
1690 1691 entry = self.index[rev]
1691 1692 base = entry[3]
1692 1693 if base == rev:
1693 1694 return True
1694 1695 if base == nullrev:
1695 1696 return True
1696 1697 p1 = entry[5]
1697 1698 p2 = entry[6]
1698 1699 if base == p1 or base == p2:
1699 1700 return False
1700 1701 return self.issnapshot(base)
1701 1702
1702 1703 def snapshotdepth(self, rev):
1703 1704 """number of snapshot in the chain before this one"""
1704 1705 if not self.issnapshot(rev):
1705 1706 raise error.ProgrammingError(b'revision %d not a snapshot')
1706 1707 return len(self._deltachain(rev)[0]) - 1
1707 1708
1708 1709 def revdiff(self, rev1, rev2):
1709 1710 """return or calculate a delta between two revisions
1710 1711
1711 1712 The delta calculated is in binary form and is intended to be written to
1712 1713 revlog data directly. So this function needs raw revision data.
1713 1714 """
1714 1715 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1715 1716 return bytes(self._chunk(rev2))
1716 1717
1717 1718 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1718 1719
1719 1720 def _processflags(self, text, flags, operation, raw=False):
1720 1721 """deprecated entry point to access flag processors"""
1721 1722 msg = b'_processflag(...) use the specialized variant'
1722 1723 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1723 1724 if raw:
1724 1725 return text, flagutil.processflagsraw(self, text, flags)
1725 1726 elif operation == b'read':
1726 1727 return flagutil.processflagsread(self, text, flags)
1727 1728 else: # write operation
1728 1729 return flagutil.processflagswrite(self, text, flags)
1729 1730
1730 1731 def revision(self, nodeorrev, _df=None, raw=False):
1731 1732 """return an uncompressed revision of a given node or revision
1732 1733 number.
1733 1734
1734 1735 _df - an existing file handle to read from. (internal-only)
1735 1736 raw - an optional argument specifying if the revision data is to be
1736 1737 treated as raw data when applying flag transforms. 'raw' should be set
1737 1738 to True when generating changegroups or in debug commands.
1738 1739 """
1739 1740 if raw:
1740 1741 msg = (
1741 1742 b'revlog.revision(..., raw=True) is deprecated, '
1742 1743 b'use revlog.rawdata(...)'
1743 1744 )
1744 1745 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1745 1746 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1746 1747
1747 1748 def sidedata(self, nodeorrev, _df=None):
1748 1749 """a map of extra data related to the changeset but not part of the hash
1749 1750
1750 1751 This function currently return a dictionary. However, more advanced
1751 1752 mapping object will likely be used in the future for a more
1752 1753 efficient/lazy code.
1753 1754 """
1754 1755 return self._revisiondata(nodeorrev, _df)[1]
1755 1756
1756 1757 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1757 1758 # deal with <nodeorrev> argument type
1758 1759 if isinstance(nodeorrev, int):
1759 1760 rev = nodeorrev
1760 1761 node = self.node(rev)
1761 1762 else:
1762 1763 node = nodeorrev
1763 1764 rev = None
1764 1765
1765 1766 # fast path the special `nullid` rev
1766 1767 if node == self.nullid:
1767 1768 return b"", {}
1768 1769
1769 1770 # ``rawtext`` is the text as stored inside the revlog. Might be the
1770 1771 # revision or might need to be processed to retrieve the revision.
1771 1772 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1772 1773
1773 1774 if self.version & 0xFFFF == REVLOGV2:
1774 1775 if rev is None:
1775 1776 rev = self.rev(node)
1776 1777 sidedata = self._sidedata(rev)
1777 1778 else:
1778 1779 sidedata = {}
1779 1780
1780 1781 if raw and validated:
1781 1782 # if we don't want to process the raw text and that raw
1782 1783 # text is cached, we can exit early.
1783 1784 return rawtext, sidedata
1784 1785 if rev is None:
1785 1786 rev = self.rev(node)
1786 1787 # the revlog's flag for this revision
1787 1788 # (usually alter its state or content)
1788 1789 flags = self.flags(rev)
1789 1790
1790 1791 if validated and flags == REVIDX_DEFAULT_FLAGS:
1791 1792 # no extra flags set, no flag processor runs, text = rawtext
1792 1793 return rawtext, sidedata
1793 1794
1794 1795 if raw:
1795 1796 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1796 1797 text = rawtext
1797 1798 else:
1798 1799 r = flagutil.processflagsread(self, rawtext, flags)
1799 1800 text, validatehash = r
1800 1801 if validatehash:
1801 1802 self.checkhash(text, node, rev=rev)
1802 1803 if not validated:
1803 1804 self._revisioncache = (node, rev, rawtext)
1804 1805
1805 1806 return text, sidedata
1806 1807
1807 1808 def _rawtext(self, node, rev, _df=None):
1808 1809 """return the possibly unvalidated rawtext for a revision
1809 1810
1810 1811 returns (rev, rawtext, validated)
1811 1812 """
1812 1813
1813 1814 # revision in the cache (could be useful to apply delta)
1814 1815 cachedrev = None
1815 1816 # An intermediate text to apply deltas to
1816 1817 basetext = None
1817 1818
1818 1819 # Check if we have the entry in cache
1819 1820 # The cache entry looks like (node, rev, rawtext)
1820 1821 if self._revisioncache:
1821 1822 if self._revisioncache[0] == node:
1822 1823 return (rev, self._revisioncache[2], True)
1823 1824 cachedrev = self._revisioncache[1]
1824 1825
1825 1826 if rev is None:
1826 1827 rev = self.rev(node)
1827 1828
1828 1829 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1829 1830 if stopped:
1830 1831 basetext = self._revisioncache[2]
1831 1832
1832 1833 # drop cache to save memory, the caller is expected to
1833 1834 # update self._revisioncache after validating the text
1834 1835 self._revisioncache = None
1835 1836
1836 1837 targetsize = None
1837 1838 rawsize = self.index[rev][2]
1838 1839 if 0 <= rawsize:
1839 1840 targetsize = 4 * rawsize
1840 1841
1841 1842 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1842 1843 if basetext is None:
1843 1844 basetext = bytes(bins[0])
1844 1845 bins = bins[1:]
1845 1846
1846 1847 rawtext = mdiff.patches(basetext, bins)
1847 1848 del basetext # let us have a chance to free memory early
1848 1849 return (rev, rawtext, False)
1849 1850
1850 1851 def _sidedata(self, rev):
1851 1852 """Return the sidedata for a given revision number."""
1852 1853 index_entry = self.index[rev]
1853 1854 sidedata_offset = index_entry[8]
1854 1855 sidedata_size = index_entry[9]
1855 1856
1856 1857 if self._inline:
1857 1858 sidedata_offset += self.index.entry_size * (1 + rev)
1858 1859 if sidedata_size == 0:
1859 1860 return {}
1860 1861
1861 1862 segment = self._getsegment(sidedata_offset, sidedata_size)
1862 1863 sidedata = sidedatautil.deserialize_sidedata(segment)
1863 1864 return sidedata
1864 1865
1865 1866 def rawdata(self, nodeorrev, _df=None):
1866 1867 """return an uncompressed raw data of a given node or revision number.
1867 1868
1868 1869 _df - an existing file handle to read from. (internal-only)
1869 1870 """
1870 1871 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1871 1872
1872 1873 def hash(self, text, p1, p2):
1873 1874 """Compute a node hash.
1874 1875
1875 1876 Available as a function so that subclasses can replace the hash
1876 1877 as needed.
1877 1878 """
1878 1879 return storageutil.hashrevisionsha1(text, p1, p2)
1879 1880
1880 1881 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1881 1882 """Check node hash integrity.
1882 1883
1883 1884 Available as a function so that subclasses can extend hash mismatch
1884 1885 behaviors as needed.
1885 1886 """
1886 1887 try:
1887 1888 if p1 is None and p2 is None:
1888 1889 p1, p2 = self.parents(node)
1889 1890 if node != self.hash(text, p1, p2):
1890 1891 # Clear the revision cache on hash failure. The revision cache
1891 1892 # only stores the raw revision and clearing the cache does have
1892 1893 # the side-effect that we won't have a cache hit when the raw
1893 1894 # revision data is accessed. But this case should be rare and
1894 1895 # it is extra work to teach the cache about the hash
1895 1896 # verification state.
1896 1897 if self._revisioncache and self._revisioncache[0] == node:
1897 1898 self._revisioncache = None
1898 1899
1899 1900 revornode = rev
1900 1901 if revornode is None:
1901 1902 revornode = templatefilters.short(hex(node))
1902 1903 raise error.RevlogError(
1903 1904 _(b"integrity check failed on %s:%s")
1904 1905 % (self.indexfile, pycompat.bytestr(revornode))
1905 1906 )
1906 1907 except error.RevlogError:
1907 1908 if self._censorable and storageutil.iscensoredtext(text):
1908 1909 raise error.CensoredNodeError(self.indexfile, node, text)
1909 1910 raise
1910 1911
1911 1912 def _enforceinlinesize(self, tr, fp=None):
1912 1913 """Check if the revlog is too big for inline and convert if so.
1913 1914
1914 1915 This should be called after revisions are added to the revlog. If the
1915 1916 revlog has grown too large to be an inline revlog, it will convert it
1916 1917 to use multiple index and data files.
1917 1918 """
1918 1919 tiprev = len(self) - 1
1919 1920 if (
1920 1921 not self._inline
1921 1922 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1922 1923 ):
1923 1924 return
1924 1925
1925 1926 troffset = tr.findoffset(self.indexfile)
1926 1927 if troffset is None:
1927 1928 raise error.RevlogError(
1928 1929 _(b"%s not found in the transaction") % self.indexfile
1929 1930 )
1930 1931 trindex = 0
1931 1932 tr.add(self.datafile, 0)
1932 1933
1933 1934 if fp:
1934 1935 fp.flush()
1935 1936 fp.close()
1936 1937 # We can't use the cached file handle after close(). So prevent
1937 1938 # its usage.
1938 1939 self._writinghandles = None
1939 1940
1940 1941 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1941 1942 for r in self:
1942 1943 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1943 1944 if troffset <= self.start(r):
1944 1945 trindex = r
1945 1946
1946 1947 with self._indexfp(b'w') as fp:
1947 1948 self.version &= ~FLAG_INLINE_DATA
1948 1949 self._inline = False
1949 1950 for i in self:
1950 1951 e = self.index.entry_binary(i)
1951 1952 if i == 0:
1952 1953 header = self.index.pack_header(self.version)
1953 1954 e = header + e
1954 1955 fp.write(e)
1955 1956
1956 1957 # the temp file replace the real index when we exit the context
1957 1958 # manager
1958 1959
1959 1960 tr.replace(self.indexfile, trindex * self.index.entry_size)
1960 1961 nodemaputil.setup_persistent_nodemap(tr, self)
1961 1962 self._chunkclear()
1962 1963
1963 1964 def _nodeduplicatecallback(self, transaction, node):
1964 1965 """called when trying to add a node already stored."""
1965 1966
1966 1967 def addrevision(
1967 1968 self,
1968 1969 text,
1969 1970 transaction,
1970 1971 link,
1971 1972 p1,
1972 1973 p2,
1973 1974 cachedelta=None,
1974 1975 node=None,
1975 1976 flags=REVIDX_DEFAULT_FLAGS,
1976 1977 deltacomputer=None,
1977 1978 sidedata=None,
1978 1979 ):
1979 1980 """add a revision to the log
1980 1981
1981 1982 text - the revision data to add
1982 1983 transaction - the transaction object used for rollback
1983 1984 link - the linkrev data to add
1984 1985 p1, p2 - the parent nodeids of the revision
1985 1986 cachedelta - an optional precomputed delta
1986 1987 node - nodeid of revision; typically node is not specified, and it is
1987 1988 computed by default as hash(text, p1, p2), however subclasses might
1988 1989 use different hashing method (and override checkhash() in such case)
1989 1990 flags - the known flags to set on the revision
1990 1991 deltacomputer - an optional deltacomputer instance shared between
1991 1992 multiple calls
1992 1993 """
1993 1994 if link == nullrev:
1994 1995 raise error.RevlogError(
1995 1996 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1996 1997 )
1997 1998
1998 1999 if sidedata is None:
1999 2000 sidedata = {}
2000 elif not self.hassidedata:
2001 elif sidedata and not self.hassidedata:
2001 2002 raise error.ProgrammingError(
2002 2003 _(b"trying to add sidedata to a revlog who don't support them")
2003 2004 )
2004 2005
2005 2006 if flags:
2006 2007 node = node or self.hash(text, p1, p2)
2007 2008
2008 2009 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2009 2010
2010 2011 # If the flag processor modifies the revision data, ignore any provided
2011 2012 # cachedelta.
2012 2013 if rawtext != text:
2013 2014 cachedelta = None
2014 2015
2015 2016 if len(rawtext) > _maxentrysize:
2016 2017 raise error.RevlogError(
2017 2018 _(
2018 2019 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2019 2020 )
2020 2021 % (self.indexfile, len(rawtext))
2021 2022 )
2022 2023
2023 2024 node = node or self.hash(rawtext, p1, p2)
2024 2025 rev = self.index.get_rev(node)
2025 2026 if rev is not None:
2026 2027 return rev
2027 2028
2028 2029 if validatehash:
2029 2030 self.checkhash(rawtext, node, p1=p1, p2=p2)
2030 2031
2031 2032 return self.addrawrevision(
2032 2033 rawtext,
2033 2034 transaction,
2034 2035 link,
2035 2036 p1,
2036 2037 p2,
2037 2038 node,
2038 2039 flags,
2039 2040 cachedelta=cachedelta,
2040 2041 deltacomputer=deltacomputer,
2041 2042 sidedata=sidedata,
2042 2043 )
2043 2044
2044 2045 def addrawrevision(
2045 2046 self,
2046 2047 rawtext,
2047 2048 transaction,
2048 2049 link,
2049 2050 p1,
2050 2051 p2,
2051 2052 node,
2052 2053 flags,
2053 2054 cachedelta=None,
2054 2055 deltacomputer=None,
2055 2056 sidedata=None,
2056 2057 ):
2057 2058 """add a raw revision with known flags, node and parents
2058 2059 useful when reusing a revision not stored in this revlog (ex: received
2059 2060 over wire, or read from an external bundle).
2060 2061 """
2061 2062 dfh = None
2062 2063 if not self._inline:
2063 2064 dfh = self._datafp(b"a+")
2064 2065 ifh = self._indexfp(b"a+")
2065 2066 try:
2066 2067 return self._addrevision(
2067 2068 node,
2068 2069 rawtext,
2069 2070 transaction,
2070 2071 link,
2071 2072 p1,
2072 2073 p2,
2073 2074 flags,
2074 2075 cachedelta,
2075 2076 ifh,
2076 2077 dfh,
2077 2078 deltacomputer=deltacomputer,
2078 2079 sidedata=sidedata,
2079 2080 )
2080 2081 finally:
2081 2082 if dfh:
2082 2083 dfh.close()
2083 2084 ifh.close()
2084 2085
2085 2086 def compress(self, data):
2086 2087 """Generate a possibly-compressed representation of data."""
2087 2088 if not data:
2088 2089 return b'', data
2089 2090
2090 2091 compressed = self._compressor.compress(data)
2091 2092
2092 2093 if compressed:
2093 2094 # The revlog compressor added the header in the returned data.
2094 2095 return b'', compressed
2095 2096
2096 2097 if data[0:1] == b'\0':
2097 2098 return b'', data
2098 2099 return b'u', data
2099 2100
2100 2101 def decompress(self, data):
2101 2102 """Decompress a revlog chunk.
2102 2103
2103 2104 The chunk is expected to begin with a header identifying the
2104 2105 format type so it can be routed to an appropriate decompressor.
2105 2106 """
2106 2107 if not data:
2107 2108 return data
2108 2109
2109 2110 # Revlogs are read much more frequently than they are written and many
2110 2111 # chunks only take microseconds to decompress, so performance is
2111 2112 # important here.
2112 2113 #
2113 2114 # We can make a few assumptions about revlogs:
2114 2115 #
2115 2116 # 1) the majority of chunks will be compressed (as opposed to inline
2116 2117 # raw data).
2117 2118 # 2) decompressing *any* data will likely by at least 10x slower than
2118 2119 # returning raw inline data.
2119 2120 # 3) we want to prioritize common and officially supported compression
2120 2121 # engines
2121 2122 #
2122 2123 # It follows that we want to optimize for "decompress compressed data
2123 2124 # when encoded with common and officially supported compression engines"
2124 2125 # case over "raw data" and "data encoded by less common or non-official
2125 2126 # compression engines." That is why we have the inline lookup first
2126 2127 # followed by the compengines lookup.
2127 2128 #
2128 2129 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2129 2130 # compressed chunks. And this matters for changelog and manifest reads.
2130 2131 t = data[0:1]
2131 2132
2132 2133 if t == b'x':
2133 2134 try:
2134 2135 return _zlibdecompress(data)
2135 2136 except zlib.error as e:
2136 2137 raise error.RevlogError(
2137 2138 _(b'revlog decompress error: %s')
2138 2139 % stringutil.forcebytestr(e)
2139 2140 )
2140 2141 # '\0' is more common than 'u' so it goes first.
2141 2142 elif t == b'\0':
2142 2143 return data
2143 2144 elif t == b'u':
2144 2145 return util.buffer(data, 1)
2145 2146
2146 2147 try:
2147 2148 compressor = self._decompressors[t]
2148 2149 except KeyError:
2149 2150 try:
2150 2151 engine = util.compengines.forrevlogheader(t)
2151 2152 compressor = engine.revlogcompressor(self._compengineopts)
2152 2153 self._decompressors[t] = compressor
2153 2154 except KeyError:
2154 2155 raise error.RevlogError(
2155 2156 _(b'unknown compression type %s') % binascii.hexlify(t)
2156 2157 )
2157 2158
2158 2159 return compressor.decompress(data)
2159 2160
2160 2161 def _addrevision(
2161 2162 self,
2162 2163 node,
2163 2164 rawtext,
2164 2165 transaction,
2165 2166 link,
2166 2167 p1,
2167 2168 p2,
2168 2169 flags,
2169 2170 cachedelta,
2170 2171 ifh,
2171 2172 dfh,
2172 2173 alwayscache=False,
2173 2174 deltacomputer=None,
2174 2175 sidedata=None,
2175 2176 ):
2176 2177 """internal function to add revisions to the log
2177 2178
2178 2179 see addrevision for argument descriptions.
2179 2180
2180 2181 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2181 2182
2182 2183 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2183 2184 be used.
2184 2185
2185 2186 invariants:
2186 2187 - rawtext is optional (can be None); if not set, cachedelta must be set.
2187 2188 if both are set, they must correspond to each other.
2188 2189 """
2189 2190 if node == self.nullid:
2190 2191 raise error.RevlogError(
2191 2192 _(b"%s: attempt to add null revision") % self.indexfile
2192 2193 )
2193 2194 if (
2194 2195 node == self.nodeconstants.wdirid
2195 2196 or node in self.nodeconstants.wdirfilenodeids
2196 2197 ):
2197 2198 raise error.RevlogError(
2198 2199 _(b"%s: attempt to add wdir revision") % self.indexfile
2199 2200 )
2200 2201
2201 2202 if self._inline:
2202 2203 fh = ifh
2203 2204 else:
2204 2205 fh = dfh
2205 2206
2206 2207 btext = [rawtext]
2207 2208
2208 2209 curr = len(self)
2209 2210 prev = curr - 1
2210 2211
2211 2212 offset = self._get_data_offset(prev)
2212 2213
2213 2214 if self._concurrencychecker:
2214 2215 if self._inline:
2215 2216 # offset is "as if" it were in the .d file, so we need to add on
2216 2217 # the size of the entry metadata.
2217 2218 self._concurrencychecker(
2218 2219 ifh, self.indexfile, offset + curr * self.index.entry_size
2219 2220 )
2220 2221 else:
2221 2222 # Entries in the .i are a consistent size.
2222 2223 self._concurrencychecker(
2223 2224 ifh, self.indexfile, curr * self.index.entry_size
2224 2225 )
2225 2226 self._concurrencychecker(dfh, self.datafile, offset)
2226 2227
2227 2228 p1r, p2r = self.rev(p1), self.rev(p2)
2228 2229
2229 2230 # full versions are inserted when the needed deltas
2230 2231 # become comparable to the uncompressed text
2231 2232 if rawtext is None:
2232 2233 # need rawtext size, before changed by flag processors, which is
2233 2234 # the non-raw size. use revlog explicitly to avoid filelog's extra
2234 2235 # logic that might remove metadata size.
2235 2236 textlen = mdiff.patchedsize(
2236 2237 revlog.size(self, cachedelta[0]), cachedelta[1]
2237 2238 )
2238 2239 else:
2239 2240 textlen = len(rawtext)
2240 2241
2241 2242 if deltacomputer is None:
2242 2243 deltacomputer = deltautil.deltacomputer(self)
2243 2244
2244 2245 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2245 2246
2246 2247 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2247 2248
2248 2249 if sidedata and self.version & 0xFFFF == REVLOGV2:
2249 2250 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2250 2251 sidedata_offset = offset + deltainfo.deltalen
2251 2252 else:
2252 2253 serialized_sidedata = b""
2253 2254 # Don't store the offset if the sidedata is empty, that way
2254 2255 # we can easily detect empty sidedata and they will be no different
2255 2256 # than ones we manually add.
2256 2257 sidedata_offset = 0
2257 2258
2258 2259 e = (
2259 2260 offset_type(offset, flags),
2260 2261 deltainfo.deltalen,
2261 2262 textlen,
2262 2263 deltainfo.base,
2263 2264 link,
2264 2265 p1r,
2265 2266 p2r,
2266 2267 node,
2267 2268 sidedata_offset,
2268 2269 len(serialized_sidedata),
2269 2270 )
2270 2271
2271 2272 if self.version & 0xFFFF != REVLOGV2:
2272 2273 e = e[:8]
2273 2274
2274 2275 self.index.append(e)
2275 2276 entry = self.index.entry_binary(curr)
2276 2277 if curr == 0:
2277 2278 header = self.index.pack_header(self.version)
2278 2279 entry = header + entry
2279 2280 self._writeentry(
2280 2281 transaction,
2281 2282 ifh,
2282 2283 dfh,
2283 2284 entry,
2284 2285 deltainfo.data,
2285 2286 link,
2286 2287 offset,
2287 2288 serialized_sidedata,
2288 2289 )
2289 2290
2290 2291 rawtext = btext[0]
2291 2292
2292 2293 if alwayscache and rawtext is None:
2293 2294 rawtext = deltacomputer.buildtext(revinfo, fh)
2294 2295
2295 2296 if type(rawtext) == bytes: # only accept immutable objects
2296 2297 self._revisioncache = (node, curr, rawtext)
2297 2298 self._chainbasecache[curr] = deltainfo.chainbase
2298 2299 return curr
2299 2300
2300 2301 def _get_data_offset(self, prev):
2301 2302 """Returns the current offset in the (in-transaction) data file.
2302 2303 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2303 2304 file to store that information: since sidedata can be rewritten to the
2304 2305 end of the data file within a transaction, you can have cases where, for
2305 2306 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2306 2307 to `n - 1`'s sidedata being written after `n`'s data.
2307 2308
2308 2309 TODO cache this in a docket file before getting out of experimental."""
2309 2310 if self.version & 0xFFFF != REVLOGV2:
2310 2311 return self.end(prev)
2311 2312
2312 2313 offset = 0
2313 2314 for rev, entry in enumerate(self.index):
2314 2315 sidedata_end = entry[8] + entry[9]
2315 2316 # Sidedata for a previous rev has potentially been written after
2316 2317 # this rev's end, so take the max.
2317 2318 offset = max(self.end(rev), offset, sidedata_end)
2318 2319 return offset
2319 2320
2320 2321 def _writeentry(
2321 2322 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2322 2323 ):
2323 2324 # Files opened in a+ mode have inconsistent behavior on various
2324 2325 # platforms. Windows requires that a file positioning call be made
2325 2326 # when the file handle transitions between reads and writes. See
2326 2327 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2327 2328 # platforms, Python or the platform itself can be buggy. Some versions
2328 2329 # of Solaris have been observed to not append at the end of the file
2329 2330 # if the file was seeked to before the end. See issue4943 for more.
2330 2331 #
2331 2332 # We work around this issue by inserting a seek() before writing.
2332 2333 # Note: This is likely not necessary on Python 3. However, because
2333 2334 # the file handle is reused for reads and may be seeked there, we need
2334 2335 # to be careful before changing this.
2335 2336 ifh.seek(0, os.SEEK_END)
2336 2337 if dfh:
2337 2338 dfh.seek(0, os.SEEK_END)
2338 2339
2339 2340 curr = len(self) - 1
2340 2341 if not self._inline:
2341 2342 transaction.add(self.datafile, offset)
2342 2343 transaction.add(self.indexfile, curr * len(entry))
2343 2344 if data[0]:
2344 2345 dfh.write(data[0])
2345 2346 dfh.write(data[1])
2346 2347 if sidedata:
2347 2348 dfh.write(sidedata)
2348 2349 ifh.write(entry)
2349 2350 else:
2350 2351 offset += curr * self.index.entry_size
2351 2352 transaction.add(self.indexfile, offset)
2352 2353 ifh.write(entry)
2353 2354 ifh.write(data[0])
2354 2355 ifh.write(data[1])
2355 2356 if sidedata:
2356 2357 ifh.write(sidedata)
2357 2358 self._enforceinlinesize(transaction, ifh)
2358 2359 nodemaputil.setup_persistent_nodemap(transaction, self)
2359 2360
2360 2361 def addgroup(
2361 2362 self,
2362 2363 deltas,
2363 2364 linkmapper,
2364 2365 transaction,
2365 2366 alwayscache=False,
2366 2367 addrevisioncb=None,
2367 2368 duplicaterevisioncb=None,
2368 2369 ):
2369 2370 """
2370 2371 add a delta group
2371 2372
2372 2373 given a set of deltas, add them to the revision log. the
2373 2374 first delta is against its parent, which should be in our
2374 2375 log, the rest are against the previous delta.
2375 2376
2376 2377 If ``addrevisioncb`` is defined, it will be called with arguments of
2377 2378 this revlog and the node that was added.
2378 2379 """
2379 2380
2380 2381 if self._writinghandles:
2381 2382 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2382 2383
2383 2384 r = len(self)
2384 2385 end = 0
2385 2386 if r:
2386 2387 end = self.end(r - 1)
2387 2388 ifh = self._indexfp(b"a+")
2388 2389 isize = r * self.index.entry_size
2389 2390 if self._inline:
2390 2391 transaction.add(self.indexfile, end + isize)
2391 2392 dfh = None
2392 2393 else:
2393 2394 transaction.add(self.indexfile, isize)
2394 2395 transaction.add(self.datafile, end)
2395 2396 dfh = self._datafp(b"a+")
2396 2397
2397 2398 def flush():
2398 2399 if dfh:
2399 2400 dfh.flush()
2400 2401 ifh.flush()
2401 2402
2402 2403 self._writinghandles = (ifh, dfh)
2403 2404 empty = True
2404 2405
2405 2406 try:
2406 2407 deltacomputer = deltautil.deltacomputer(self)
2407 2408 # loop through our set of deltas
2408 2409 for data in deltas:
2409 2410 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2410 2411 link = linkmapper(linknode)
2411 2412 flags = flags or REVIDX_DEFAULT_FLAGS
2412 2413
2413 2414 rev = self.index.get_rev(node)
2414 2415 if rev is not None:
2415 2416 # this can happen if two branches make the same change
2416 2417 self._nodeduplicatecallback(transaction, rev)
2417 2418 if duplicaterevisioncb:
2418 2419 duplicaterevisioncb(self, rev)
2419 2420 empty = False
2420 2421 continue
2421 2422
2422 2423 for p in (p1, p2):
2423 2424 if not self.index.has_node(p):
2424 2425 raise error.LookupError(
2425 2426 p, self.indexfile, _(b'unknown parent')
2426 2427 )
2427 2428
2428 2429 if not self.index.has_node(deltabase):
2429 2430 raise error.LookupError(
2430 2431 deltabase, self.indexfile, _(b'unknown delta base')
2431 2432 )
2432 2433
2433 2434 baserev = self.rev(deltabase)
2434 2435
2435 2436 if baserev != nullrev and self.iscensored(baserev):
2436 2437 # if base is censored, delta must be full replacement in a
2437 2438 # single patch operation
2438 2439 hlen = struct.calcsize(b">lll")
2439 2440 oldlen = self.rawsize(baserev)
2440 2441 newlen = len(delta) - hlen
2441 2442 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2442 2443 raise error.CensoredBaseError(
2443 2444 self.indexfile, self.node(baserev)
2444 2445 )
2445 2446
2446 2447 if not flags and self._peek_iscensored(baserev, delta, flush):
2447 2448 flags |= REVIDX_ISCENSORED
2448 2449
2449 2450 # We assume consumers of addrevisioncb will want to retrieve
2450 2451 # the added revision, which will require a call to
2451 2452 # revision(). revision() will fast path if there is a cache
2452 2453 # hit. So, we tell _addrevision() to always cache in this case.
2453 2454 # We're only using addgroup() in the context of changegroup
2454 2455 # generation so the revision data can always be handled as raw
2455 2456 # by the flagprocessor.
2456 2457 rev = self._addrevision(
2457 2458 node,
2458 2459 None,
2459 2460 transaction,
2460 2461 link,
2461 2462 p1,
2462 2463 p2,
2463 2464 flags,
2464 2465 (baserev, delta),
2465 2466 ifh,
2466 2467 dfh,
2467 2468 alwayscache=alwayscache,
2468 2469 deltacomputer=deltacomputer,
2469 2470 sidedata=sidedata,
2470 2471 )
2471 2472
2472 2473 if addrevisioncb:
2473 2474 addrevisioncb(self, rev)
2474 2475 empty = False
2475 2476
2476 2477 if not dfh and not self._inline:
2477 2478 # addrevision switched from inline to conventional
2478 2479 # reopen the index
2479 2480 ifh.close()
2480 2481 dfh = self._datafp(b"a+")
2481 2482 ifh = self._indexfp(b"a+")
2482 2483 self._writinghandles = (ifh, dfh)
2483 2484 finally:
2484 2485 self._writinghandles = None
2485 2486
2486 2487 if dfh:
2487 2488 dfh.close()
2488 2489 ifh.close()
2489 2490 return not empty
2490 2491
2491 2492 def iscensored(self, rev):
2492 2493 """Check if a file revision is censored."""
2493 2494 if not self._censorable:
2494 2495 return False
2495 2496
2496 2497 return self.flags(rev) & REVIDX_ISCENSORED
2497 2498
2498 2499 def _peek_iscensored(self, baserev, delta, flush):
2499 2500 """Quickly check if a delta produces a censored revision."""
2500 2501 if not self._censorable:
2501 2502 return False
2502 2503
2503 2504 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2504 2505
2505 2506 def getstrippoint(self, minlink):
2506 2507 """find the minimum rev that must be stripped to strip the linkrev
2507 2508
2508 2509 Returns a tuple containing the minimum rev and a set of all revs that
2509 2510 have linkrevs that will be broken by this strip.
2510 2511 """
2511 2512 return storageutil.resolvestripinfo(
2512 2513 minlink,
2513 2514 len(self) - 1,
2514 2515 self.headrevs(),
2515 2516 self.linkrev,
2516 2517 self.parentrevs,
2517 2518 )
2518 2519
2519 2520 def strip(self, minlink, transaction):
2520 2521 """truncate the revlog on the first revision with a linkrev >= minlink
2521 2522
2522 2523 This function is called when we're stripping revision minlink and
2523 2524 its descendants from the repository.
2524 2525
2525 2526 We have to remove all revisions with linkrev >= minlink, because
2526 2527 the equivalent changelog revisions will be renumbered after the
2527 2528 strip.
2528 2529
2529 2530 So we truncate the revlog on the first of these revisions, and
2530 2531 trust that the caller has saved the revisions that shouldn't be
2531 2532 removed and that it'll re-add them after this truncation.
2532 2533 """
2533 2534 if len(self) == 0:
2534 2535 return
2535 2536
2536 2537 rev, _ = self.getstrippoint(minlink)
2537 2538 if rev == len(self):
2538 2539 return
2539 2540
2540 2541 # first truncate the files on disk
2541 2542 end = self.start(rev)
2542 2543 if not self._inline:
2543 2544 transaction.add(self.datafile, end)
2544 2545 end = rev * self.index.entry_size
2545 2546 else:
2546 2547 end += rev * self.index.entry_size
2547 2548
2548 2549 transaction.add(self.indexfile, end)
2549 2550
2550 2551 # then reset internal state in memory to forget those revisions
2551 2552 self._revisioncache = None
2552 2553 self._chaininfocache = util.lrucachedict(500)
2553 2554 self._chunkclear()
2554 2555
2555 2556 del self.index[rev:-1]
2556 2557
2557 2558 def checksize(self):
2558 2559 """Check size of index and data files
2559 2560
2560 2561 return a (dd, di) tuple.
2561 2562 - dd: extra bytes for the "data" file
2562 2563 - di: extra bytes for the "index" file
2563 2564
2564 2565 A healthy revlog will return (0, 0).
2565 2566 """
2566 2567 expected = 0
2567 2568 if len(self):
2568 2569 expected = max(0, self.end(len(self) - 1))
2569 2570
2570 2571 try:
2571 2572 with self._datafp() as f:
2572 2573 f.seek(0, io.SEEK_END)
2573 2574 actual = f.tell()
2574 2575 dd = actual - expected
2575 2576 except IOError as inst:
2576 2577 if inst.errno != errno.ENOENT:
2577 2578 raise
2578 2579 dd = 0
2579 2580
2580 2581 try:
2581 2582 f = self.opener(self.indexfile)
2582 2583 f.seek(0, io.SEEK_END)
2583 2584 actual = f.tell()
2584 2585 f.close()
2585 2586 s = self.index.entry_size
2586 2587 i = max(0, actual // s)
2587 2588 di = actual - (i * s)
2588 2589 if self._inline:
2589 2590 databytes = 0
2590 2591 for r in self:
2591 2592 databytes += max(0, self.length(r))
2592 2593 dd = 0
2593 2594 di = actual - len(self) * s - databytes
2594 2595 except IOError as inst:
2595 2596 if inst.errno != errno.ENOENT:
2596 2597 raise
2597 2598 di = 0
2598 2599
2599 2600 return (dd, di)
2600 2601
2601 2602 def files(self):
2602 2603 res = [self.indexfile]
2603 2604 if not self._inline:
2604 2605 res.append(self.datafile)
2605 2606 return res
2606 2607
2607 2608 def emitrevisions(
2608 2609 self,
2609 2610 nodes,
2610 2611 nodesorder=None,
2611 2612 revisiondata=False,
2612 2613 assumehaveparentrevisions=False,
2613 2614 deltamode=repository.CG_DELTAMODE_STD,
2614 2615 sidedata_helpers=None,
2615 2616 ):
2616 2617 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2617 2618 raise error.ProgrammingError(
2618 2619 b'unhandled value for nodesorder: %s' % nodesorder
2619 2620 )
2620 2621
2621 2622 if nodesorder is None and not self._generaldelta:
2622 2623 nodesorder = b'storage'
2623 2624
2624 2625 if (
2625 2626 not self._storedeltachains
2626 2627 and deltamode != repository.CG_DELTAMODE_PREV
2627 2628 ):
2628 2629 deltamode = repository.CG_DELTAMODE_FULL
2629 2630
2630 2631 return storageutil.emitrevisions(
2631 2632 self,
2632 2633 nodes,
2633 2634 nodesorder,
2634 2635 revlogrevisiondelta,
2635 2636 deltaparentfn=self.deltaparent,
2636 2637 candeltafn=self.candelta,
2637 2638 rawsizefn=self.rawsize,
2638 2639 revdifffn=self.revdiff,
2639 2640 flagsfn=self.flags,
2640 2641 deltamode=deltamode,
2641 2642 revisiondata=revisiondata,
2642 2643 assumehaveparentrevisions=assumehaveparentrevisions,
2643 2644 sidedata_helpers=sidedata_helpers,
2644 2645 )
2645 2646
2646 2647 DELTAREUSEALWAYS = b'always'
2647 2648 DELTAREUSESAMEREVS = b'samerevs'
2648 2649 DELTAREUSENEVER = b'never'
2649 2650
2650 2651 DELTAREUSEFULLADD = b'fulladd'
2651 2652
2652 2653 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2653 2654
2654 2655 def clone(
2655 2656 self,
2656 2657 tr,
2657 2658 destrevlog,
2658 2659 addrevisioncb=None,
2659 2660 deltareuse=DELTAREUSESAMEREVS,
2660 2661 forcedeltabothparents=None,
2661 sidedatacompanion=None,
2662 sidedata_helpers=None,
2662 2663 ):
2663 2664 """Copy this revlog to another, possibly with format changes.
2664 2665
2665 2666 The destination revlog will contain the same revisions and nodes.
2666 2667 However, it may not be bit-for-bit identical due to e.g. delta encoding
2667 2668 differences.
2668 2669
2669 2670 The ``deltareuse`` argument control how deltas from the existing revlog
2670 2671 are preserved in the destination revlog. The argument can have the
2671 2672 following values:
2672 2673
2673 2674 DELTAREUSEALWAYS
2674 2675 Deltas will always be reused (if possible), even if the destination
2675 2676 revlog would not select the same revisions for the delta. This is the
2676 2677 fastest mode of operation.
2677 2678 DELTAREUSESAMEREVS
2678 2679 Deltas will be reused if the destination revlog would pick the same
2679 2680 revisions for the delta. This mode strikes a balance between speed
2680 2681 and optimization.
2681 2682 DELTAREUSENEVER
2682 2683 Deltas will never be reused. This is the slowest mode of execution.
2683 2684 This mode can be used to recompute deltas (e.g. if the diff/delta
2684 2685 algorithm changes).
2685 2686 DELTAREUSEFULLADD
2686 2687 Revision will be re-added as if their were new content. This is
2687 2688 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2688 2689 eg: large file detection and handling.
2689 2690
2690 2691 Delta computation can be slow, so the choice of delta reuse policy can
2691 2692 significantly affect run time.
2692 2693
2693 2694 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2694 2695 two extremes. Deltas will be reused if they are appropriate. But if the
2695 2696 delta could choose a better revision, it will do so. This means if you
2696 2697 are converting a non-generaldelta revlog to a generaldelta revlog,
2697 2698 deltas will be recomputed if the delta's parent isn't a parent of the
2698 2699 revision.
2699 2700
2700 2701 In addition to the delta policy, the ``forcedeltabothparents``
2701 2702 argument controls whether to force compute deltas against both parents
2702 2703 for merges. By default, the current default is used.
2703 2704
2704 If not None, the `sidedatacompanion` is callable that accept two
2705 arguments:
2706
2707 (srcrevlog, rev)
2708
2709 and return a quintet that control changes to sidedata content from the
2710 old revision to the new clone result:
2711
2712 (dropall, filterout, update, new_flags, dropped_flags)
2713
2714 * if `dropall` is True, all sidedata should be dropped
2715 * `filterout` is a set of sidedata keys that should be dropped
2716 * `update` is a mapping of additionnal/new key -> value
2717 * new_flags is a bitfields of new flags that the revision should get
2718 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2705 See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
2719 2706 """
2720 2707 if deltareuse not in self.DELTAREUSEALL:
2721 2708 raise ValueError(
2722 2709 _(b'value for deltareuse invalid: %s') % deltareuse
2723 2710 )
2724 2711
2725 2712 if len(destrevlog):
2726 2713 raise ValueError(_(b'destination revlog is not empty'))
2727 2714
2728 2715 if getattr(self, 'filteredrevs', None):
2729 2716 raise ValueError(_(b'source revlog has filtered revisions'))
2730 2717 if getattr(destrevlog, 'filteredrevs', None):
2731 2718 raise ValueError(_(b'destination revlog has filtered revisions'))
2732 2719
2733 2720 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2734 2721 # if possible.
2735 2722 oldlazydelta = destrevlog._lazydelta
2736 2723 oldlazydeltabase = destrevlog._lazydeltabase
2737 2724 oldamd = destrevlog._deltabothparents
2738 2725
2739 2726 try:
2740 2727 if deltareuse == self.DELTAREUSEALWAYS:
2741 2728 destrevlog._lazydeltabase = True
2742 2729 destrevlog._lazydelta = True
2743 2730 elif deltareuse == self.DELTAREUSESAMEREVS:
2744 2731 destrevlog._lazydeltabase = False
2745 2732 destrevlog._lazydelta = True
2746 2733 elif deltareuse == self.DELTAREUSENEVER:
2747 2734 destrevlog._lazydeltabase = False
2748 2735 destrevlog._lazydelta = False
2749 2736
2750 2737 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2751 2738
2752 2739 self._clone(
2753 2740 tr,
2754 2741 destrevlog,
2755 2742 addrevisioncb,
2756 2743 deltareuse,
2757 2744 forcedeltabothparents,
2758 sidedatacompanion,
2745 sidedata_helpers,
2759 2746 )
2760 2747
2761 2748 finally:
2762 2749 destrevlog._lazydelta = oldlazydelta
2763 2750 destrevlog._lazydeltabase = oldlazydeltabase
2764 2751 destrevlog._deltabothparents = oldamd
2765 2752
2766 2753 def _clone(
2767 2754 self,
2768 2755 tr,
2769 2756 destrevlog,
2770 2757 addrevisioncb,
2771 2758 deltareuse,
2772 2759 forcedeltabothparents,
2773 sidedatacompanion,
2760 sidedata_helpers,
2774 2761 ):
2775 2762 """perform the core duty of `revlog.clone` after parameter processing"""
2776 2763 deltacomputer = deltautil.deltacomputer(destrevlog)
2777 2764 index = self.index
2778 2765 for rev in self:
2779 2766 entry = index[rev]
2780 2767
2781 2768 # Some classes override linkrev to take filtered revs into
2782 2769 # account. Use raw entry from index.
2783 2770 flags = entry[0] & 0xFFFF
2784 2771 linkrev = entry[4]
2785 2772 p1 = index[entry[5]][7]
2786 2773 p2 = index[entry[6]][7]
2787 2774 node = entry[7]
2788 2775
2789 sidedataactions = (False, [], {}, 0, 0)
2790 if sidedatacompanion is not None:
2791 sidedataactions = sidedatacompanion(self, rev)
2792
2793 2776 # (Possibly) reuse the delta from the revlog if allowed and
2794 2777 # the revlog chunk is a delta.
2795 2778 cachedelta = None
2796 2779 rawtext = None
2797 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2798 dropall = sidedataactions[0]
2799 filterout = sidedataactions[1]
2800 update = sidedataactions[2]
2801 new_flags = sidedataactions[3]
2802 dropped_flags = sidedataactions[4]
2780 if deltareuse == self.DELTAREUSEFULLADD:
2803 2781 text, sidedata = self._revisiondata(rev)
2804 if dropall:
2805 sidedata = {}
2806 for key in filterout:
2807 sidedata.pop(key, None)
2808 sidedata.update(update)
2809 if not sidedata:
2810 sidedata = None
2811
2812 flags |= new_flags
2813 flags &= ~dropped_flags
2782
2783 if sidedata_helpers is not None:
2784 (sidedata, new_flags) = storageutil.run_sidedata_helpers(
2785 self, sidedata_helpers, sidedata, rev
2786 )
2787 flags = flags | new_flags[0] & ~new_flags[1]
2814 2788
2815 2789 destrevlog.addrevision(
2816 2790 text,
2817 2791 tr,
2818 2792 linkrev,
2819 2793 p1,
2820 2794 p2,
2821 2795 cachedelta=cachedelta,
2822 2796 node=node,
2823 2797 flags=flags,
2824 2798 deltacomputer=deltacomputer,
2825 2799 sidedata=sidedata,
2826 2800 )
2827 2801 else:
2828 2802 if destrevlog._lazydelta:
2829 2803 dp = self.deltaparent(rev)
2830 2804 if dp != nullrev:
2831 2805 cachedelta = (dp, bytes(self._chunk(rev)))
2832 2806
2807 sidedata = None
2833 2808 if not cachedelta:
2834 rawtext = self.rawdata(rev)
2809 rawtext, sidedata = self._revisiondata(rev)
2810 if sidedata is None:
2811 sidedata = self.sidedata(rev)
2812
2813 if sidedata_helpers is not None:
2814 (sidedata, new_flags) = storageutil.run_sidedata_helpers(
2815 self, sidedata_helpers, sidedata, rev
2816 )
2817 flags = flags | new_flags[0] & ~new_flags[1]
2835 2818
2836 2819 ifh = destrevlog.opener(
2837 2820 destrevlog.indexfile, b'a+', checkambig=False
2838 2821 )
2839 2822 dfh = None
2840 2823 if not destrevlog._inline:
2841 2824 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2842 2825 try:
2843 2826 destrevlog._addrevision(
2844 2827 node,
2845 2828 rawtext,
2846 2829 tr,
2847 2830 linkrev,
2848 2831 p1,
2849 2832 p2,
2850 2833 flags,
2851 2834 cachedelta,
2852 2835 ifh,
2853 2836 dfh,
2854 2837 deltacomputer=deltacomputer,
2838 sidedata=sidedata,
2855 2839 )
2856 2840 finally:
2857 2841 if dfh:
2858 2842 dfh.close()
2859 2843 ifh.close()
2860 2844
2861 2845 if addrevisioncb:
2862 2846 addrevisioncb(self, rev, node)
2863 2847
2864 2848 def censorrevision(self, tr, censornode, tombstone=b''):
2865 2849 if (self.version & 0xFFFF) == REVLOGV0:
2866 2850 raise error.RevlogError(
2867 2851 _(b'cannot censor with version %d revlogs') % self.version
2868 2852 )
2869 2853
2870 2854 censorrev = self.rev(censornode)
2871 2855 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2872 2856
2873 2857 if len(tombstone) > self.rawsize(censorrev):
2874 2858 raise error.Abort(
2875 2859 _(b'censor tombstone must be no longer than censored data')
2876 2860 )
2877 2861
2878 2862 # Rewriting the revlog in place is hard. Our strategy for censoring is
2879 2863 # to create a new revlog, copy all revisions to it, then replace the
2880 2864 # revlogs on transaction close.
2881 2865
2882 2866 newindexfile = self.indexfile + b'.tmpcensored'
2883 2867 newdatafile = self.datafile + b'.tmpcensored'
2884 2868
2885 2869 # This is a bit dangerous. We could easily have a mismatch of state.
2886 2870 newrl = revlog(
2887 2871 self.opener,
2888 2872 target=self.target,
2889 2873 indexfile=newindexfile,
2890 2874 datafile=newdatafile,
2891 2875 censorable=True,
2892 2876 )
2893 2877 newrl.version = self.version
2894 2878 newrl._generaldelta = self._generaldelta
2895 2879 newrl._parse_index = self._parse_index
2896 2880
2897 2881 for rev in self.revs():
2898 2882 node = self.node(rev)
2899 2883 p1, p2 = self.parents(node)
2900 2884
2901 2885 if rev == censorrev:
2902 2886 newrl.addrawrevision(
2903 2887 tombstone,
2904 2888 tr,
2905 2889 self.linkrev(censorrev),
2906 2890 p1,
2907 2891 p2,
2908 2892 censornode,
2909 2893 REVIDX_ISCENSORED,
2910 2894 )
2911 2895
2912 2896 if newrl.deltaparent(rev) != nullrev:
2913 2897 raise error.Abort(
2914 2898 _(
2915 2899 b'censored revision stored as delta; '
2916 2900 b'cannot censor'
2917 2901 ),
2918 2902 hint=_(
2919 2903 b'censoring of revlogs is not '
2920 2904 b'fully implemented; please report '
2921 2905 b'this bug'
2922 2906 ),
2923 2907 )
2924 2908 continue
2925 2909
2926 2910 if self.iscensored(rev):
2927 2911 if self.deltaparent(rev) != nullrev:
2928 2912 raise error.Abort(
2929 2913 _(
2930 2914 b'cannot censor due to censored '
2931 2915 b'revision having delta stored'
2932 2916 )
2933 2917 )
2934 2918 rawtext = self._chunk(rev)
2935 2919 else:
2936 2920 rawtext = self.rawdata(rev)
2937 2921
2938 2922 newrl.addrawrevision(
2939 2923 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2940 2924 )
2941 2925
2942 2926 tr.addbackup(self.indexfile, location=b'store')
2943 2927 if not self._inline:
2944 2928 tr.addbackup(self.datafile, location=b'store')
2945 2929
2946 2930 self.opener.rename(newrl.indexfile, self.indexfile)
2947 2931 if not self._inline:
2948 2932 self.opener.rename(newrl.datafile, self.datafile)
2949 2933
2950 2934 self.clearcaches()
2951 2935 self._loadindex()
2952 2936
2953 2937 def verifyintegrity(self, state):
2954 2938 """Verifies the integrity of the revlog.
2955 2939
2956 2940 Yields ``revlogproblem`` instances describing problems that are
2957 2941 found.
2958 2942 """
2959 2943 dd, di = self.checksize()
2960 2944 if dd:
2961 2945 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2962 2946 if di:
2963 2947 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2964 2948
2965 2949 version = self.version & 0xFFFF
2966 2950
2967 2951 # The verifier tells us what version revlog we should be.
2968 2952 if version != state[b'expectedversion']:
2969 2953 yield revlogproblem(
2970 2954 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2971 2955 % (self.indexfile, version, state[b'expectedversion'])
2972 2956 )
2973 2957
2974 2958 state[b'skipread'] = set()
2975 2959 state[b'safe_renamed'] = set()
2976 2960
2977 2961 for rev in self:
2978 2962 node = self.node(rev)
2979 2963
2980 2964 # Verify contents. 4 cases to care about:
2981 2965 #
2982 2966 # common: the most common case
2983 2967 # rename: with a rename
2984 2968 # meta: file content starts with b'\1\n', the metadata
2985 2969 # header defined in filelog.py, but without a rename
2986 2970 # ext: content stored externally
2987 2971 #
2988 2972 # More formally, their differences are shown below:
2989 2973 #
2990 2974 # | common | rename | meta | ext
2991 2975 # -------------------------------------------------------
2992 2976 # flags() | 0 | 0 | 0 | not 0
2993 2977 # renamed() | False | True | False | ?
2994 2978 # rawtext[0:2]=='\1\n'| False | True | True | ?
2995 2979 #
2996 2980 # "rawtext" means the raw text stored in revlog data, which
2997 2981 # could be retrieved by "rawdata(rev)". "text"
2998 2982 # mentioned below is "revision(rev)".
2999 2983 #
3000 2984 # There are 3 different lengths stored physically:
3001 2985 # 1. L1: rawsize, stored in revlog index
3002 2986 # 2. L2: len(rawtext), stored in revlog data
3003 2987 # 3. L3: len(text), stored in revlog data if flags==0, or
3004 2988 # possibly somewhere else if flags!=0
3005 2989 #
3006 2990 # L1 should be equal to L2. L3 could be different from them.
3007 2991 # "text" may or may not affect commit hash depending on flag
3008 2992 # processors (see flagutil.addflagprocessor).
3009 2993 #
3010 2994 # | common | rename | meta | ext
3011 2995 # -------------------------------------------------
3012 2996 # rawsize() | L1 | L1 | L1 | L1
3013 2997 # size() | L1 | L2-LM | L1(*) | L1 (?)
3014 2998 # len(rawtext) | L2 | L2 | L2 | L2
3015 2999 # len(text) | L2 | L2 | L2 | L3
3016 3000 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3017 3001 #
3018 3002 # LM: length of metadata, depending on rawtext
3019 3003 # (*): not ideal, see comment in filelog.size
3020 3004 # (?): could be "- len(meta)" if the resolved content has
3021 3005 # rename metadata
3022 3006 #
3023 3007 # Checks needed to be done:
3024 3008 # 1. length check: L1 == L2, in all cases.
3025 3009 # 2. hash check: depending on flag processor, we may need to
3026 3010 # use either "text" (external), or "rawtext" (in revlog).
3027 3011
3028 3012 try:
3029 3013 skipflags = state.get(b'skipflags', 0)
3030 3014 if skipflags:
3031 3015 skipflags &= self.flags(rev)
3032 3016
3033 3017 _verify_revision(self, skipflags, state, node)
3034 3018
3035 3019 l1 = self.rawsize(rev)
3036 3020 l2 = len(self.rawdata(node))
3037 3021
3038 3022 if l1 != l2:
3039 3023 yield revlogproblem(
3040 3024 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3041 3025 node=node,
3042 3026 )
3043 3027
3044 3028 except error.CensoredNodeError:
3045 3029 if state[b'erroroncensored']:
3046 3030 yield revlogproblem(
3047 3031 error=_(b'censored file data'), node=node
3048 3032 )
3049 3033 state[b'skipread'].add(node)
3050 3034 except Exception as e:
3051 3035 yield revlogproblem(
3052 3036 error=_(b'unpacking %s: %s')
3053 3037 % (short(node), stringutil.forcebytestr(e)),
3054 3038 node=node,
3055 3039 )
3056 3040 state[b'skipread'].add(node)
3057 3041
3058 3042 def storageinfo(
3059 3043 self,
3060 3044 exclusivefiles=False,
3061 3045 sharedfiles=False,
3062 3046 revisionscount=False,
3063 3047 trackedsize=False,
3064 3048 storedsize=False,
3065 3049 ):
3066 3050 d = {}
3067 3051
3068 3052 if exclusivefiles:
3069 3053 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3070 3054 if not self._inline:
3071 3055 d[b'exclusivefiles'].append((self.opener, self.datafile))
3072 3056
3073 3057 if sharedfiles:
3074 3058 d[b'sharedfiles'] = []
3075 3059
3076 3060 if revisionscount:
3077 3061 d[b'revisionscount'] = len(self)
3078 3062
3079 3063 if trackedsize:
3080 3064 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3081 3065
3082 3066 if storedsize:
3083 3067 d[b'storedsize'] = sum(
3084 3068 self.opener.stat(path).st_size for path in self.files()
3085 3069 )
3086 3070
3087 3071 return d
3088 3072
3089 3073 def rewrite_sidedata(self, helpers, startrev, endrev):
3090 3074 if self.version & 0xFFFF != REVLOGV2:
3091 3075 return
3092 3076 # inline are not yet supported because they suffer from an issue when
3093 3077 # rewriting them (since it's not an append-only operation).
3094 3078 # See issue6485.
3095 3079 assert not self._inline
3096 3080 if not helpers[1] and not helpers[2]:
3097 3081 # Nothing to generate or remove
3098 3082 return
3099 3083
3100 3084 new_entries = []
3101 3085 # append the new sidedata
3102 3086 with self._datafp(b'a+') as fp:
3103 3087 # Maybe this bug still exists, see revlog._writeentry
3104 3088 fp.seek(0, os.SEEK_END)
3105 3089 current_offset = fp.tell()
3106 3090 for rev in range(startrev, endrev + 1):
3107 3091 entry = self.index[rev]
3108 3092 new_sidedata, flags = storageutil.run_sidedata_helpers(
3109 3093 store=self,
3110 3094 sidedata_helpers=helpers,
3111 3095 sidedata={},
3112 3096 rev=rev,
3113 3097 )
3114 3098
3115 3099 serialized_sidedata = sidedatautil.serialize_sidedata(
3116 3100 new_sidedata
3117 3101 )
3118 3102 if entry[8] != 0 or entry[9] != 0:
3119 3103 # rewriting entries that already have sidedata is not
3120 3104 # supported yet, because it introduces garbage data in the
3121 3105 # revlog.
3122 3106 msg = b"Rewriting existing sidedata is not supported yet"
3123 3107 raise error.Abort(msg)
3124 3108
3125 3109 # Apply (potential) flags to add and to remove after running
3126 3110 # the sidedata helpers
3127 3111 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3128 3112 entry = (new_offset_flags,) + entry[1:8]
3129 3113 entry += (current_offset, len(serialized_sidedata))
3130 3114
3131 3115 fp.write(serialized_sidedata)
3132 3116 new_entries.append(entry)
3133 3117 current_offset += len(serialized_sidedata)
3134 3118
3135 3119 # rewrite the new index entries
3136 3120 with self._indexfp(b'w+') as fp:
3137 3121 fp.seek(startrev * self.index.entry_size)
3138 3122 for i, e in enumerate(new_entries):
3139 3123 rev = startrev + i
3140 3124 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3141 3125 packed = self.index.entry_binary(rev)
3142 3126 if rev == 0:
3143 3127 header = self.index.pack_header(self.version)
3144 3128 packed = header + packed
3145 3129 fp.write(packed)
@@ -1,593 +1,594 b''
1 1 # upgrade.py - functions for in place upgrade of Mercurial repository
2 2 #
3 3 # Copyright (c) 2016-present, Gregory Szorc
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import stat
11 11
12 12 from ..i18n import _
13 13 from ..pycompat import getattr
14 14 from .. import (
15 changegroup,
15 16 changelog,
16 17 error,
17 18 filelog,
18 19 manifest,
19 20 metadata,
20 21 pycompat,
21 22 requirements,
22 revlog,
23 23 scmutil,
24 24 store,
25 25 util,
26 26 vfs as vfsmod,
27 27 )
28 from ..revlogutils import nodemap
28 from ..revlogutils import (
29 constants as revlogconst,
30 flagutil,
31 nodemap,
32 sidedata as sidedatamod,
33 )
34
35
36 def get_sidedata_helpers(srcrepo, dstrepo):
37 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
38 sequential = pycompat.iswindows or not use_w
39 if not sequential:
40 srcrepo.register_sidedata_computer(
41 revlogconst.KIND_CHANGELOG,
42 sidedatamod.SD_FILES,
43 (sidedatamod.SD_FILES,),
44 metadata._get_worker_sidedata_adder(srcrepo, dstrepo),
45 flagutil.REVIDX_HASCOPIESINFO,
46 replace=True,
47 )
48 return changegroup.get_sidedata_helpers(srcrepo, dstrepo._wanted_sidedata)
29 49
30 50
31 51 def _revlogfrompath(repo, rl_type, path):
32 52 """Obtain a revlog from a repo path.
33 53
34 54 An instance of the appropriate class is returned.
35 55 """
36 56 if rl_type & store.FILEFLAGS_CHANGELOG:
37 57 return changelog.changelog(repo.svfs)
38 58 elif rl_type & store.FILEFLAGS_MANIFESTLOG:
39 59 mandir = b''
40 60 if b'/' in path:
41 61 mandir = path.rsplit(b'/', 1)[0]
42 62 return manifest.manifestrevlog(
43 63 repo.nodeconstants, repo.svfs, tree=mandir
44 64 )
45 65 else:
46 66 # drop the extension and the `data/` prefix
47 67 path = path.rsplit(b'.', 1)[0].split(b'/', 1)[1]
48 68 return filelog.filelog(repo.svfs, path)
49 69
50 70
51 71 def _copyrevlog(tr, destrepo, oldrl, rl_type, unencodedname):
52 72 """copy all relevant files for `oldrl` into `destrepo` store
53 73
54 74 Files are copied "as is" without any transformation. The copy is performed
55 75 without extra checks. Callers are responsible for making sure the copied
56 76 content is compatible with format of the destination repository.
57 77 """
58 78 oldrl = getattr(oldrl, '_revlog', oldrl)
59 79 newrl = _revlogfrompath(destrepo, rl_type, unencodedname)
60 80 newrl = getattr(newrl, '_revlog', newrl)
61 81
62 82 oldvfs = oldrl.opener
63 83 newvfs = newrl.opener
64 84 oldindex = oldvfs.join(oldrl.indexfile)
65 85 newindex = newvfs.join(newrl.indexfile)
66 86 olddata = oldvfs.join(oldrl.datafile)
67 87 newdata = newvfs.join(newrl.datafile)
68 88
69 89 with newvfs(newrl.indexfile, b'w'):
70 90 pass # create all the directories
71 91
72 92 util.copyfile(oldindex, newindex)
73 93 copydata = oldrl.opener.exists(oldrl.datafile)
74 94 if copydata:
75 95 util.copyfile(olddata, newdata)
76 96
77 97 if rl_type & store.FILEFLAGS_FILELOG:
78 98 destrepo.svfs.fncache.add(unencodedname)
79 99 if copydata:
80 100 destrepo.svfs.fncache.add(unencodedname[:-2] + b'.d')
81 101
82 102
83 103 UPGRADE_CHANGELOG = b"changelog"
84 104 UPGRADE_MANIFEST = b"manifest"
85 105 UPGRADE_FILELOGS = b"all-filelogs"
86 106
87 107 UPGRADE_ALL_REVLOGS = frozenset(
88 108 [UPGRADE_CHANGELOG, UPGRADE_MANIFEST, UPGRADE_FILELOGS]
89 109 )
90 110
91 111
92 def getsidedatacompanion(srcrepo, dstrepo):
93 sidedatacompanion = None
94 removedreqs = srcrepo.requirements - dstrepo.requirements
95 addedreqs = dstrepo.requirements - srcrepo.requirements
96 if requirements.SIDEDATA_REQUIREMENT in removedreqs:
97
98 def sidedatacompanion(rl, rev):
99 rl = getattr(rl, '_revlog', rl)
100 if rl.flags(rev) & revlog.REVIDX_SIDEDATA:
101 return True, (), {}, 0, 0
102 return False, (), {}, 0, 0
103
104 elif requirements.COPIESSDC_REQUIREMENT in addedreqs:
105 sidedatacompanion = metadata.getsidedataadder(srcrepo, dstrepo)
106 elif requirements.COPIESSDC_REQUIREMENT in removedreqs:
107 sidedatacompanion = metadata.getsidedataremover(srcrepo, dstrepo)
108 return sidedatacompanion
109
110
111 112 def matchrevlog(revlogfilter, rl_type):
112 113 """check if a revlog is selected for cloning.
113 114
114 115 In other words, are there any updates which need to be done on revlog
115 116 or it can be blindly copied.
116 117
117 118 The store entry is checked against the passed filter"""
118 119 if rl_type & store.FILEFLAGS_CHANGELOG:
119 120 return UPGRADE_CHANGELOG in revlogfilter
120 121 elif rl_type & store.FILEFLAGS_MANIFESTLOG:
121 122 return UPGRADE_MANIFEST in revlogfilter
122 123 assert rl_type & store.FILEFLAGS_FILELOG
123 124 return UPGRADE_FILELOGS in revlogfilter
124 125
125 126
126 127 def _perform_clone(
127 128 ui,
128 129 dstrepo,
129 130 tr,
130 131 old_revlog,
131 132 rl_type,
132 133 unencoded,
133 134 upgrade_op,
134 sidedatacompanion,
135 sidedata_helpers,
135 136 oncopiedrevision,
136 137 ):
137 138 """ returns the new revlog object created"""
138 139 newrl = None
139 140 if matchrevlog(upgrade_op.revlogs_to_process, rl_type):
140 141 ui.note(
141 142 _(b'cloning %d revisions from %s\n') % (len(old_revlog), unencoded)
142 143 )
143 144 newrl = _revlogfrompath(dstrepo, rl_type, unencoded)
144 145 old_revlog.clone(
145 146 tr,
146 147 newrl,
147 148 addrevisioncb=oncopiedrevision,
148 149 deltareuse=upgrade_op.delta_reuse_mode,
149 150 forcedeltabothparents=upgrade_op.force_re_delta_both_parents,
150 sidedatacompanion=sidedatacompanion,
151 sidedata_helpers=sidedata_helpers,
151 152 )
152 153 else:
153 154 msg = _(b'blindly copying %s containing %i revisions\n')
154 155 ui.note(msg % (unencoded, len(old_revlog)))
155 156 _copyrevlog(tr, dstrepo, old_revlog, rl_type, unencoded)
156 157
157 158 newrl = _revlogfrompath(dstrepo, rl_type, unencoded)
158 159 return newrl
159 160
160 161
161 162 def _clonerevlogs(
162 163 ui,
163 164 srcrepo,
164 165 dstrepo,
165 166 tr,
166 167 upgrade_op,
167 168 ):
168 169 """Copy revlogs between 2 repos."""
169 170 revcount = 0
170 171 srcsize = 0
171 172 srcrawsize = 0
172 173 dstsize = 0
173 174 fcount = 0
174 175 frevcount = 0
175 176 fsrcsize = 0
176 177 frawsize = 0
177 178 fdstsize = 0
178 179 mcount = 0
179 180 mrevcount = 0
180 181 msrcsize = 0
181 182 mrawsize = 0
182 183 mdstsize = 0
183 184 crevcount = 0
184 185 csrcsize = 0
185 186 crawsize = 0
186 187 cdstsize = 0
187 188
188 189 alldatafiles = list(srcrepo.store.walk())
189 190 # mapping of data files which needs to be cloned
190 191 # key is unencoded filename
191 192 # value is revlog_object_from_srcrepo
192 193 manifests = {}
193 194 changelogs = {}
194 195 filelogs = {}
195 196
196 197 # Perform a pass to collect metadata. This validates we can open all
197 198 # source files and allows a unified progress bar to be displayed.
198 199 for rl_type, unencoded, encoded, size in alldatafiles:
199 200 if not rl_type & store.FILEFLAGS_REVLOG_MAIN:
200 201 continue
201 202
202 203 rl = _revlogfrompath(srcrepo, rl_type, unencoded)
203 204
204 205 info = rl.storageinfo(
205 206 exclusivefiles=True,
206 207 revisionscount=True,
207 208 trackedsize=True,
208 209 storedsize=True,
209 210 )
210 211
211 212 revcount += info[b'revisionscount'] or 0
212 213 datasize = info[b'storedsize'] or 0
213 214 rawsize = info[b'trackedsize'] or 0
214 215
215 216 srcsize += datasize
216 217 srcrawsize += rawsize
217 218
218 219 # This is for the separate progress bars.
219 220 if rl_type & store.FILEFLAGS_CHANGELOG:
220 221 changelogs[unencoded] = (rl_type, rl)
221 222 crevcount += len(rl)
222 223 csrcsize += datasize
223 224 crawsize += rawsize
224 225 elif rl_type & store.FILEFLAGS_MANIFESTLOG:
225 226 manifests[unencoded] = (rl_type, rl)
226 227 mcount += 1
227 228 mrevcount += len(rl)
228 229 msrcsize += datasize
229 230 mrawsize += rawsize
230 231 elif rl_type & store.FILEFLAGS_FILELOG:
231 232 filelogs[unencoded] = (rl_type, rl)
232 233 fcount += 1
233 234 frevcount += len(rl)
234 235 fsrcsize += datasize
235 236 frawsize += rawsize
236 237 else:
237 238 error.ProgrammingError(b'unknown revlog type')
238 239
239 240 if not revcount:
240 241 return
241 242
242 243 ui.status(
243 244 _(
244 245 b'migrating %d total revisions (%d in filelogs, %d in manifests, '
245 246 b'%d in changelog)\n'
246 247 )
247 248 % (revcount, frevcount, mrevcount, crevcount)
248 249 )
249 250 ui.status(
250 251 _(b'migrating %s in store; %s tracked data\n')
251 252 % ((util.bytecount(srcsize), util.bytecount(srcrawsize)))
252 253 )
253 254
254 255 # Used to keep track of progress.
255 256 progress = None
256 257
257 258 def oncopiedrevision(rl, rev, node):
258 259 progress.increment()
259 260
260 sidedatacompanion = getsidedatacompanion(srcrepo, dstrepo)
261 sidedata_helpers = get_sidedata_helpers(srcrepo, dstrepo)
261 262
262 263 # Migrating filelogs
263 264 ui.status(
264 265 _(
265 266 b'migrating %d filelogs containing %d revisions '
266 267 b'(%s in store; %s tracked data)\n'
267 268 )
268 269 % (
269 270 fcount,
270 271 frevcount,
271 272 util.bytecount(fsrcsize),
272 273 util.bytecount(frawsize),
273 274 )
274 275 )
275 276 progress = srcrepo.ui.makeprogress(_(b'file revisions'), total=frevcount)
276 277 for unencoded, (rl_type, oldrl) in sorted(filelogs.items()):
277 278 newrl = _perform_clone(
278 279 ui,
279 280 dstrepo,
280 281 tr,
281 282 oldrl,
282 283 rl_type,
283 284 unencoded,
284 285 upgrade_op,
285 sidedatacompanion,
286 sidedata_helpers,
286 287 oncopiedrevision,
287 288 )
288 289 info = newrl.storageinfo(storedsize=True)
289 290 fdstsize += info[b'storedsize'] or 0
290 291 ui.status(
291 292 _(
292 293 b'finished migrating %d filelog revisions across %d '
293 294 b'filelogs; change in size: %s\n'
294 295 )
295 296 % (frevcount, fcount, util.bytecount(fdstsize - fsrcsize))
296 297 )
297 298
298 299 # Migrating manifests
299 300 ui.status(
300 301 _(
301 302 b'migrating %d manifests containing %d revisions '
302 303 b'(%s in store; %s tracked data)\n'
303 304 )
304 305 % (
305 306 mcount,
306 307 mrevcount,
307 308 util.bytecount(msrcsize),
308 309 util.bytecount(mrawsize),
309 310 )
310 311 )
311 312 if progress:
312 313 progress.complete()
313 314 progress = srcrepo.ui.makeprogress(
314 315 _(b'manifest revisions'), total=mrevcount
315 316 )
316 317 for unencoded, (rl_type, oldrl) in sorted(manifests.items()):
317 318 newrl = _perform_clone(
318 319 ui,
319 320 dstrepo,
320 321 tr,
321 322 oldrl,
322 323 rl_type,
323 324 unencoded,
324 325 upgrade_op,
325 sidedatacompanion,
326 sidedata_helpers,
326 327 oncopiedrevision,
327 328 )
328 329 info = newrl.storageinfo(storedsize=True)
329 330 mdstsize += info[b'storedsize'] or 0
330 331 ui.status(
331 332 _(
332 333 b'finished migrating %d manifest revisions across %d '
333 334 b'manifests; change in size: %s\n'
334 335 )
335 336 % (mrevcount, mcount, util.bytecount(mdstsize - msrcsize))
336 337 )
337 338
338 339 # Migrating changelog
339 340 ui.status(
340 341 _(
341 342 b'migrating changelog containing %d revisions '
342 343 b'(%s in store; %s tracked data)\n'
343 344 )
344 345 % (
345 346 crevcount,
346 347 util.bytecount(csrcsize),
347 348 util.bytecount(crawsize),
348 349 )
349 350 )
350 351 if progress:
351 352 progress.complete()
352 353 progress = srcrepo.ui.makeprogress(
353 354 _(b'changelog revisions'), total=crevcount
354 355 )
355 356 for unencoded, (rl_type, oldrl) in sorted(changelogs.items()):
356 357 newrl = _perform_clone(
357 358 ui,
358 359 dstrepo,
359 360 tr,
360 361 oldrl,
361 362 rl_type,
362 363 unencoded,
363 364 upgrade_op,
364 sidedatacompanion,
365 sidedata_helpers,
365 366 oncopiedrevision,
366 367 )
367 368 info = newrl.storageinfo(storedsize=True)
368 369 cdstsize += info[b'storedsize'] or 0
369 370 progress.complete()
370 371 ui.status(
371 372 _(
372 373 b'finished migrating %d changelog revisions; change in size: '
373 374 b'%s\n'
374 375 )
375 376 % (crevcount, util.bytecount(cdstsize - csrcsize))
376 377 )
377 378
378 379 dstsize = fdstsize + mdstsize + cdstsize
379 380 ui.status(
380 381 _(
381 382 b'finished migrating %d total revisions; total change in store '
382 383 b'size: %s\n'
383 384 )
384 385 % (revcount, util.bytecount(dstsize - srcsize))
385 386 )
386 387
387 388
388 389 def _files_to_copy_post_revlog_clone(srcrepo):
389 390 """yields files which should be copied to destination after revlogs
390 391 are cloned"""
391 392 for path, kind, st in sorted(srcrepo.store.vfs.readdir(b'', stat=True)):
392 393 # don't copy revlogs as they are already cloned
393 394 if store.revlog_type(path) is not None:
394 395 continue
395 396 # Skip transaction related files.
396 397 if path.startswith(b'undo'):
397 398 continue
398 399 # Only copy regular files.
399 400 if kind != stat.S_IFREG:
400 401 continue
401 402 # Skip other skipped files.
402 403 if path in (b'lock', b'fncache'):
403 404 continue
404 405 # TODO: should we skip cache too?
405 406
406 407 yield path
407 408
408 409
409 410 def _replacestores(currentrepo, upgradedrepo, backupvfs, upgrade_op):
410 411 """Replace the stores after current repository is upgraded
411 412
412 413 Creates a backup of current repository store at backup path
413 414 Replaces upgraded store files in current repo from upgraded one
414 415
415 416 Arguments:
416 417 currentrepo: repo object of current repository
417 418 upgradedrepo: repo object of the upgraded data
418 419 backupvfs: vfs object for the backup path
419 420 upgrade_op: upgrade operation object
420 421 to be used to decide what all is upgraded
421 422 """
422 423 # TODO: don't blindly rename everything in store
423 424 # There can be upgrades where store is not touched at all
424 425 if upgrade_op.backup_store:
425 426 util.rename(currentrepo.spath, backupvfs.join(b'store'))
426 427 else:
427 428 currentrepo.vfs.rmtree(b'store', forcibly=True)
428 429 util.rename(upgradedrepo.spath, currentrepo.spath)
429 430
430 431
431 432 def finishdatamigration(ui, srcrepo, dstrepo, requirements):
432 433 """Hook point for extensions to perform additional actions during upgrade.
433 434
434 435 This function is called after revlogs and store files have been copied but
435 436 before the new store is swapped into the original location.
436 437 """
437 438
438 439
439 440 def upgrade(ui, srcrepo, dstrepo, upgrade_op):
440 441 """Do the low-level work of upgrading a repository.
441 442
442 443 The upgrade is effectively performed as a copy between a source
443 444 repository and a temporary destination repository.
444 445
445 446 The source repository is unmodified for as long as possible so the
446 447 upgrade can abort at any time without causing loss of service for
447 448 readers and without corrupting the source repository.
448 449 """
449 450 assert srcrepo.currentwlock()
450 451 assert dstrepo.currentwlock()
451 452 backuppath = None
452 453 backupvfs = None
453 454
454 455 ui.status(
455 456 _(
456 457 b'(it is safe to interrupt this process any time before '
457 458 b'data migration completes)\n'
458 459 )
459 460 )
460 461
461 462 if upgrade_op.requirements_only:
462 463 ui.status(_(b'upgrading repository requirements\n'))
463 464 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
464 465 # if there is only one action and that is persistent nodemap upgrade
465 466 # directly write the nodemap file and update requirements instead of going
466 467 # through the whole cloning process
467 468 elif (
468 469 len(upgrade_op.upgrade_actions) == 1
469 470 and b'persistent-nodemap' in upgrade_op._upgrade_actions_names
470 471 and not upgrade_op.removed_actions
471 472 ):
472 473 ui.status(
473 474 _(b'upgrading repository to use persistent nodemap feature\n')
474 475 )
475 476 with srcrepo.transaction(b'upgrade') as tr:
476 477 unfi = srcrepo.unfiltered()
477 478 cl = unfi.changelog
478 479 nodemap.persist_nodemap(tr, cl, force=True)
479 480 # we want to directly operate on the underlying revlog to force
480 481 # create a nodemap file. This is fine since this is upgrade code
481 482 # and it heavily relies on repository being revlog based
482 483 # hence accessing private attributes can be justified
483 484 nodemap.persist_nodemap(
484 485 tr, unfi.manifestlog._rootstore._revlog, force=True
485 486 )
486 487 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
487 488 elif (
488 489 len(upgrade_op.removed_actions) == 1
489 490 and [
490 491 x
491 492 for x in upgrade_op.removed_actions
492 493 if x.name == b'persistent-nodemap'
493 494 ]
494 495 and not upgrade_op.upgrade_actions
495 496 ):
496 497 ui.status(
497 498 _(b'downgrading repository to not use persistent nodemap feature\n')
498 499 )
499 500 with srcrepo.transaction(b'upgrade') as tr:
500 501 unfi = srcrepo.unfiltered()
501 502 cl = unfi.changelog
502 503 nodemap.delete_nodemap(tr, srcrepo, cl)
503 504 # check comment 20 lines above for accessing private attributes
504 505 nodemap.delete_nodemap(
505 506 tr, srcrepo, unfi.manifestlog._rootstore._revlog
506 507 )
507 508 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
508 509 else:
509 510 with dstrepo.transaction(b'upgrade') as tr:
510 511 _clonerevlogs(
511 512 ui,
512 513 srcrepo,
513 514 dstrepo,
514 515 tr,
515 516 upgrade_op,
516 517 )
517 518
518 519 # Now copy other files in the store directory.
519 520 for p in _files_to_copy_post_revlog_clone(srcrepo):
520 521 srcrepo.ui.status(_(b'copying %s\n') % p)
521 522 src = srcrepo.store.rawvfs.join(p)
522 523 dst = dstrepo.store.rawvfs.join(p)
523 524 util.copyfile(src, dst, copystat=True)
524 525
525 526 finishdatamigration(ui, srcrepo, dstrepo, requirements)
526 527
527 528 ui.status(_(b'data fully upgraded in a temporary repository\n'))
528 529
529 530 if upgrade_op.backup_store:
530 531 backuppath = pycompat.mkdtemp(
531 532 prefix=b'upgradebackup.', dir=srcrepo.path
532 533 )
533 534 backupvfs = vfsmod.vfs(backuppath)
534 535
535 536 # Make a backup of requires file first, as it is the first to be modified.
536 537 util.copyfile(
537 538 srcrepo.vfs.join(b'requires'), backupvfs.join(b'requires')
538 539 )
539 540
540 541 # We install an arbitrary requirement that clients must not support
541 542 # as a mechanism to lock out new clients during the data swap. This is
542 543 # better than allowing a client to continue while the repository is in
543 544 # an inconsistent state.
544 545 ui.status(
545 546 _(
546 547 b'marking source repository as being upgraded; clients will be '
547 548 b'unable to read from repository\n'
548 549 )
549 550 )
550 551 scmutil.writereporequirements(
551 552 srcrepo, srcrepo.requirements | {b'upgradeinprogress'}
552 553 )
553 554
554 555 ui.status(_(b'starting in-place swap of repository data\n'))
555 556 if upgrade_op.backup_store:
556 557 ui.status(
557 558 _(b'replaced files will be backed up at %s\n') % backuppath
558 559 )
559 560
560 561 # Now swap in the new store directory. Doing it as a rename should make
561 562 # the operation nearly instantaneous and atomic (at least in well-behaved
562 563 # environments).
563 564 ui.status(_(b'replacing store...\n'))
564 565 tstart = util.timer()
565 566 _replacestores(srcrepo, dstrepo, backupvfs, upgrade_op)
566 567 elapsed = util.timer() - tstart
567 568 ui.status(
568 569 _(
569 570 b'store replacement complete; repository was inconsistent for '
570 571 b'%0.1fs\n'
571 572 )
572 573 % elapsed
573 574 )
574 575
575 576 # We first write the requirements file. Any new requirements will lock
576 577 # out legacy clients.
577 578 ui.status(
578 579 _(
579 580 b'finalizing requirements file and making repository readable '
580 581 b'again\n'
581 582 )
582 583 )
583 584 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
584 585
585 586 if upgrade_op.backup_store:
586 587 # The lock file from the old store won't be removed because nothing has a
587 588 # reference to its new location. So clean it up manually. Alternatively, we
588 589 # could update srcrepo.svfs and other variables to point to the new
589 590 # location. This is simpler.
590 591 assert backupvfs is not None # help pytype
591 592 backupvfs.unlink(b'store/lock')
592 593
593 594 return backuppath
@@ -1,507 +1,503 b''
1 1 #testcases extra sidedata
2 2
3 3 #if extra
4 4 $ cat >> $HGRCPATH << EOF
5 5 > [experimental]
6 6 > copies.write-to=changeset-only
7 7 > copies.read-from=changeset-only
8 8 > [alias]
9 9 > changesetcopies = log -r . -T 'files: {files}
10 10 > {extras % "{ifcontains("files", key, "{key}: {value}\n")}"}
11 11 > {extras % "{ifcontains("copies", key, "{key}: {value}\n")}"}'
12 12 > EOF
13 13 #endif
14 14
15 15 #if sidedata
16 16 $ cat >> $HGRCPATH << EOF
17 17 > [format]
18 18 > exp-use-copies-side-data-changeset = yes
19 19 > EOF
20 20 #endif
21 21
22 22 $ cat >> $HGRCPATH << EOF
23 23 > [alias]
24 24 > showcopies = log -r . -T '{file_copies % "{source} -> {name}\n"}'
25 25 > [extensions]
26 26 > rebase =
27 27 > split =
28 28 > EOF
29 29
30 30 Check that copies are recorded correctly
31 31
32 32 $ hg init repo
33 33 $ cd repo
34 34 #if sidedata
35 35 $ hg debugformat -v
36 36 format-variant repo config default
37 37 fncache: yes yes yes
38 38 dotencode: yes yes yes
39 39 generaldelta: yes yes yes
40 40 share-safe: no no no
41 41 sparserevlog: yes yes yes
42 42 persistent-nodemap: no no no (no-rust !)
43 43 persistent-nodemap: yes yes no (rust !)
44 44 copies-sdc: yes yes no
45 45 revlog-v2: yes yes no
46 46 plain-cl-delta: yes yes yes
47 47 compression: zlib zlib zlib (no-zstd !)
48 48 compression: zstd zstd zstd (zstd !)
49 49 compression-level: default default default
50 50 #else
51 51 $ hg debugformat -v
52 52 format-variant repo config default
53 53 fncache: yes yes yes
54 54 dotencode: yes yes yes
55 55 generaldelta: yes yes yes
56 56 share-safe: no no no
57 57 sparserevlog: yes yes yes
58 58 persistent-nodemap: no no no (no-rust !)
59 59 persistent-nodemap: yes yes no (rust !)
60 60 copies-sdc: no no no
61 61 revlog-v2: no no no
62 62 plain-cl-delta: yes yes yes
63 63 compression: zlib zlib zlib (no-zstd !)
64 64 compression: zstd zstd zstd (zstd !)
65 65 compression-level: default default default
66 66 #endif
67 67 $ echo a > a
68 68 $ hg add a
69 69 $ hg ci -m initial
70 70 $ hg cp a b
71 71 $ hg cp a c
72 72 $ hg cp a d
73 73 $ hg ci -m 'copy a to b, c, and d'
74 74
75 75 #if extra
76 76
77 77 $ hg changesetcopies
78 78 files: b c d
79 79 filesadded: 0
80 80 1
81 81 2
82 82
83 83 p1copies: 0\x00a (esc)
84 84 1\x00a (esc)
85 85 2\x00a (esc)
86 86 #else
87 87 $ hg debugsidedata -c -v -- -1
88 88 1 sidedata entries
89 89 entry-0014 size 44
90 90 '\x00\x00\x00\x04\x00\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x00\x06\x00\x00\x00\x03\x00\x00\x00\x00\x06\x00\x00\x00\x04\x00\x00\x00\x00abcd'
91 91 #endif
92 92
93 93 $ hg showcopies
94 94 a -> b
95 95 a -> c
96 96 a -> d
97 97
98 98 #if extra
99 99
100 100 $ hg showcopies --config experimental.copies.read-from=compatibility
101 101 a -> b
102 102 a -> c
103 103 a -> d
104 104 $ hg showcopies --config experimental.copies.read-from=filelog-only
105 105
106 106 #endif
107 107
108 108 Check that renames are recorded correctly
109 109
110 110 $ hg mv b b2
111 111 $ hg ci -m 'rename b to b2'
112 112
113 113 #if extra
114 114
115 115 $ hg changesetcopies
116 116 files: b b2
117 117 filesadded: 1
118 118 filesremoved: 0
119 119
120 120 p1copies: 1\x00b (esc)
121 121
122 122 #else
123 123 $ hg debugsidedata -c -v -- -1
124 124 1 sidedata entries
125 125 entry-0014 size 25
126 126 '\x00\x00\x00\x02\x0c\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x03\x00\x00\x00\x00bb2'
127 127 #endif
128 128
129 129 $ hg showcopies
130 130 b -> b2
131 131
132 132
133 133 Rename onto existing file. This should get recorded in the changeset files list and in the extras,
134 134 even though there is no filelog entry.
135 135
136 136 $ hg cp b2 c --force
137 137 $ hg st --copies
138 138 M c
139 139 b2
140 140
141 141 #if extra
142 142
143 143 $ hg debugindex c
144 144 rev linkrev nodeid p1 p2
145 145 0 1 b789fdd96dc2 000000000000 000000000000
146 146
147 147 #else
148 148
149 149 $ hg debugindex c
150 150 rev linkrev nodeid p1 p2
151 151 0 1 37d9b5d994ea 000000000000 000000000000
152 152
153 153 #endif
154 154
155 155
156 156 $ hg ci -m 'move b onto d'
157 157
158 158 #if extra
159 159
160 160 $ hg changesetcopies
161 161 files: c
162 162
163 163 p1copies: 0\x00b2 (esc)
164 164
165 165 #else
166 166 $ hg debugsidedata -c -v -- -1
167 167 1 sidedata entries
168 168 entry-0014 size 25
169 169 '\x00\x00\x00\x02\x00\x00\x00\x00\x02\x00\x00\x00\x00\x16\x00\x00\x00\x03\x00\x00\x00\x00b2c'
170 170 #endif
171 171
172 172 $ hg showcopies
173 173 b2 -> c
174 174
175 175 #if extra
176 176
177 177 $ hg debugindex c
178 178 rev linkrev nodeid p1 p2
179 179 0 1 b789fdd96dc2 000000000000 000000000000
180 180
181 181 #else
182 182
183 183 $ hg debugindex c
184 184 rev linkrev nodeid p1 p2
185 185 0 1 37d9b5d994ea 000000000000 000000000000
186 186 1 3 029625640347 000000000000 000000000000
187 187
188 188 #endif
189 189
190 190 Create a merge commit with copying done during merge.
191 191
192 192 $ hg co 0
193 193 0 files updated, 0 files merged, 3 files removed, 0 files unresolved
194 194 $ hg cp a e
195 195 $ hg cp a f
196 196 $ hg ci -m 'copy a to e and f'
197 197 created new head
198 198 $ hg merge 3
199 199 3 files updated, 0 files merged, 0 files removed, 0 files unresolved
200 200 (branch merge, don't forget to commit)
201 201 File 'a' exists on both sides, so 'g' could be recorded as being from p1 or p2, but we currently
202 202 always record it as being from p1
203 203 $ hg cp a g
204 204 File 'd' exists only in p2, so 'h' should be from p2
205 205 $ hg cp d h
206 206 File 'f' exists only in p1, so 'i' should be from p1
207 207 $ hg cp f i
208 208 $ hg ci -m 'merge'
209 209
210 210 #if extra
211 211
212 212 $ hg changesetcopies
213 213 files: g h i
214 214 filesadded: 0
215 215 1
216 216 2
217 217
218 218 p1copies: 0\x00a (esc)
219 219 2\x00f (esc)
220 220 p2copies: 1\x00d (esc)
221 221
222 222 #else
223 223 $ hg debugsidedata -c -v -- -1
224 224 1 sidedata entries
225 225 entry-0014 size 64
226 226 '\x00\x00\x00\x06\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x06\x00\x00\x00\x04\x00\x00\x00\x00\x07\x00\x00\x00\x05\x00\x00\x00\x01\x06\x00\x00\x00\x06\x00\x00\x00\x02adfghi'
227 227 #endif
228 228
229 229 $ hg showcopies
230 230 a -> g
231 231 d -> h
232 232 f -> i
233 233
234 234 Test writing to both changeset and filelog
235 235
236 236 $ hg cp a j
237 237 #if extra
238 238 $ hg ci -m 'copy a to j' --config experimental.copies.write-to=compatibility
239 239 $ hg changesetcopies
240 240 files: j
241 241 filesadded: 0
242 242 filesremoved:
243 243
244 244 p1copies: 0\x00a (esc)
245 245 p2copies:
246 246 #else
247 247 $ hg ci -m 'copy a to j'
248 248 $ hg debugsidedata -c -v -- -1
249 249 1 sidedata entries
250 250 entry-0014 size 24
251 251 '\x00\x00\x00\x02\x00\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x00aj'
252 252 #endif
253 253 $ hg debugdata j 0
254 254 \x01 (esc)
255 255 copy: a
256 256 copyrev: b789fdd96dc2f3bd229c1dd8eedf0fc60e2b68e3
257 257 \x01 (esc)
258 258 a
259 259 $ hg showcopies
260 260 a -> j
261 261 $ hg showcopies --config experimental.copies.read-from=compatibility
262 262 a -> j
263 263 $ hg showcopies --config experimental.copies.read-from=filelog-only
264 264 a -> j
265 265 Existing copy information in the changeset gets removed on amend and writing
266 266 copy information on to the filelog
267 267 #if extra
268 268 $ hg ci --amend -m 'copy a to j, v2' \
269 269 > --config experimental.copies.write-to=filelog-only
270 270 saved backup bundle to $TESTTMP/repo/.hg/strip-backup/*-*-amend.hg (glob)
271 271 $ hg changesetcopies
272 272 files: j
273 273
274 274 #else
275 275 $ hg ci --amend -m 'copy a to j, v2'
276 276 saved backup bundle to $TESTTMP/repo/.hg/strip-backup/*-*-amend.hg (glob)
277 277 $ hg debugsidedata -c -v -- -1
278 278 1 sidedata entries
279 279 entry-0014 size 24
280 280 '\x00\x00\x00\x02\x00\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x00aj'
281 281 #endif
282 282 $ hg showcopies --config experimental.copies.read-from=filelog-only
283 283 a -> j
284 284 The entries should be written to extras even if they're empty (so the client
285 285 won't have to fall back to reading from filelogs)
286 286 $ echo x >> j
287 287 #if extra
288 288 $ hg ci -m 'modify j' --config experimental.copies.write-to=compatibility
289 289 $ hg changesetcopies
290 290 files: j
291 291 filesadded:
292 292 filesremoved:
293 293
294 294 p1copies:
295 295 p2copies:
296 296 #else
297 297 $ hg ci -m 'modify j'
298 298 $ hg debugsidedata -c -v -- -1
299 299 1 sidedata entries
300 300 entry-0014 size 14
301 301 '\x00\x00\x00\x01\x14\x00\x00\x00\x01\x00\x00\x00\x00j'
302 302 #endif
303 303
304 304 Test writing only to filelog
305 305
306 306 $ hg cp a k
307 307 #if extra
308 308 $ hg ci -m 'copy a to k' --config experimental.copies.write-to=filelog-only
309 309
310 310 $ hg changesetcopies
311 311 files: k
312 312
313 313 #else
314 314 $ hg ci -m 'copy a to k'
315 315 $ hg debugsidedata -c -v -- -1
316 316 1 sidedata entries
317 317 entry-0014 size 24
318 318 '\x00\x00\x00\x02\x00\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x00ak'
319 319 #endif
320 320
321 321 $ hg debugdata k 0
322 322 \x01 (esc)
323 323 copy: a
324 324 copyrev: b789fdd96dc2f3bd229c1dd8eedf0fc60e2b68e3
325 325 \x01 (esc)
326 326 a
327 327 #if extra
328 328 $ hg showcopies
329 329
330 330 $ hg showcopies --config experimental.copies.read-from=compatibility
331 331 a -> k
332 332 $ hg showcopies --config experimental.copies.read-from=filelog-only
333 333 a -> k
334 334 #else
335 335 $ hg showcopies
336 336 a -> k
337 337 #endif
338 338
339 339 $ cd ..
340 340
341 341 Test rebasing a commit with copy information
342 342
343 343 $ hg init rebase-rename
344 344 $ cd rebase-rename
345 345 $ echo a > a
346 346 $ hg ci -Aqm 'add a'
347 347 $ echo a2 > a
348 348 $ hg ci -m 'modify a'
349 349 $ hg co -q 0
350 350 $ hg mv a b
351 351 $ hg ci -qm 'rename a to b'
352 352 Not only do we want this to run in-memory, it shouldn't fall back to
353 353 on-disk merge (no conflicts), so we force it to be in-memory
354 354 with no fallback.
355 355 $ hg rebase -d 1 --config rebase.experimental.inmemory=yes --config devel.rebase.force-in-memory-merge=yes
356 356 rebasing 2:* tip "rename a to b" (glob)
357 357 merging a and b to b
358 358 saved backup bundle to $TESTTMP/rebase-rename/.hg/strip-backup/*-*-rebase.hg (glob)
359 359 $ hg st --change . --copies
360 360 A b
361 361 a
362 362 R a
363 363 $ cd ..
364 364
365 365 Test splitting a commit
366 366
367 367 $ hg init split
368 368 $ cd split
369 369 $ echo a > a
370 370 $ echo b > b
371 371 $ hg ci -Aqm 'add a and b'
372 372 $ echo a2 > a
373 373 $ hg mv b c
374 374 $ hg ci -m 'modify a, move b to c'
375 375 $ hg --config ui.interactive=yes split <<EOF
376 376 > y
377 377 > y
378 378 > n
379 379 > y
380 380 > EOF
381 381 diff --git a/a b/a
382 382 1 hunks, 1 lines changed
383 383 examine changes to 'a'?
384 384 (enter ? for help) [Ynesfdaq?] y
385 385
386 386 @@ -1,1 +1,1 @@
387 387 -a
388 388 +a2
389 389 record this change to 'a'?
390 390 (enter ? for help) [Ynesfdaq?] y
391 391
392 392 diff --git a/b b/c
393 393 rename from b
394 394 rename to c
395 395 examine changes to 'b' and 'c'?
396 396 (enter ? for help) [Ynesfdaq?] n
397 397
398 398 created new head
399 399 diff --git a/b b/c
400 400 rename from b
401 401 rename to c
402 402 examine changes to 'b' and 'c'?
403 403 (enter ? for help) [Ynesfdaq?] y
404 404
405 405 saved backup bundle to $TESTTMP/split/.hg/strip-backup/*-*-split.hg (glob)
406 406 $ cd ..
407 407
408 408 Test committing half a rename
409 409
410 410 $ hg init partial
411 411 $ cd partial
412 412 $ echo a > a
413 413 $ hg ci -Aqm 'add a'
414 414 $ hg mv a b
415 415 $ hg ci -m 'remove a' a
416 416
417 417 #if sidedata
418 418
419 419 Test upgrading/downgrading to sidedata storage
420 420 ==============================================
421 421
422 downgrading (keeping some sidedata)
422 downgrading
423 423
424 424 $ hg debugformat -v
425 425 format-variant repo config default
426 426 fncache: yes yes yes
427 427 dotencode: yes yes yes
428 428 generaldelta: yes yes yes
429 429 share-safe: no no no
430 430 sparserevlog: yes yes yes
431 431 persistent-nodemap: no no no (no-rust !)
432 432 persistent-nodemap: yes yes no (rust !)
433 433 copies-sdc: yes yes no
434 434 revlog-v2: yes yes no
435 435 plain-cl-delta: yes yes yes
436 436 compression: zlib zlib zlib (no-zstd !)
437 437 compression: zstd zstd zstd (zstd !)
438 438 compression-level: default default default
439 439 $ hg debugsidedata -c -- 0
440 440 1 sidedata entries
441 441 entry-0014 size 14
442 442 $ hg debugsidedata -c -- 1
443 443 1 sidedata entries
444 444 entry-0014 size 14
445 445 $ hg debugsidedata -m -- 0
446 446 $ cat << EOF > .hg/hgrc
447 447 > [format]
448 448 > exp-use-side-data = yes
449 449 > exp-use-copies-side-data-changeset = no
450 450 > EOF
451 451 $ hg debugupgraderepo --run --quiet --no-backup > /dev/null
452 452 $ hg debugformat -v
453 453 format-variant repo config default
454 454 fncache: yes yes yes
455 455 dotencode: yes yes yes
456 456 generaldelta: yes yes yes
457 457 share-safe: no no no
458 458 sparserevlog: yes yes yes
459 459 persistent-nodemap: no no no (no-rust !)
460 460 persistent-nodemap: yes yes no (rust !)
461 461 copies-sdc: no no no
462 462 revlog-v2: yes yes no
463 463 plain-cl-delta: yes yes yes
464 464 compression: zlib zlib zlib (no-zstd !)
465 465 compression: zstd zstd zstd (zstd !)
466 466 compression-level: default default default
467 467 $ hg debugsidedata -c -- 0
468 1 sidedata entries
469 entry-0014 size 14
470 468 $ hg debugsidedata -c -- 1
471 1 sidedata entries
472 entry-0014 size 14
473 469 $ hg debugsidedata -m -- 0
474 470
475 471 upgrading
476 472
477 473 $ cat << EOF > .hg/hgrc
478 474 > [format]
479 475 > exp-use-copies-side-data-changeset = yes
480 476 > EOF
481 477 $ hg debugupgraderepo --run --quiet --no-backup > /dev/null
482 478 $ hg debugformat -v
483 479 format-variant repo config default
484 480 fncache: yes yes yes
485 481 dotencode: yes yes yes
486 482 generaldelta: yes yes yes
487 483 share-safe: no no no
488 484 sparserevlog: yes yes yes
489 485 persistent-nodemap: no no no (no-rust !)
490 486 persistent-nodemap: yes yes no (rust !)
491 487 copies-sdc: yes yes no
492 488 revlog-v2: yes yes no
493 489 plain-cl-delta: yes yes yes
494 490 compression: zlib zlib zlib (no-zstd !)
495 491 compression: zstd zstd zstd (zstd !)
496 492 compression-level: default default default
497 493 $ hg debugsidedata -c -- 0
498 494 1 sidedata entries
499 495 entry-0014 size 14
500 496 $ hg debugsidedata -c -- 1
501 497 1 sidedata entries
502 498 entry-0014 size 14
503 499 $ hg debugsidedata -m -- 0
504 500
505 501 #endif
506 502
507 503 $ cd ..
@@ -1,93 +1,106 b''
1 1 # ext-sidedata.py - small extension to test the sidedata logic
2 2 #
3 3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import hashlib
11 11 import struct
12 12
13 13 from mercurial.node import nullrev
14 14 from mercurial import (
15 changegroup,
15 16 extensions,
16 17 requirements,
17 18 revlog,
18 19 )
19 20
20 21 from mercurial.upgrade_utils import engine as upgrade_engine
21 22
23 from mercurial.revlogutils import constants
22 24 from mercurial.revlogutils import sidedata
23 25
24 26
25 27 def wrapaddrevision(
26 28 orig, self, text, transaction, link, p1, p2, *args, **kwargs
27 29 ):
28 30 if kwargs.get('sidedata') is None:
29 31 kwargs['sidedata'] = {}
30 32 sd = kwargs['sidedata']
31 33 ## let's store some arbitrary data just for testing
32 34 # text length
33 35 sd[sidedata.SD_TEST1] = struct.pack('>I', len(text))
34 36 # and sha2 hashes
35 37 sha256 = hashlib.sha256(text).digest()
36 38 sd[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
37 39 return orig(self, text, transaction, link, p1, p2, *args, **kwargs)
38 40
39 41
40 42 def wrap_revisiondata(orig, self, nodeorrev, *args, **kwargs):
41 43 text, sd = orig(self, nodeorrev, *args, **kwargs)
42 44 if getattr(self, 'sidedatanocheck', False):
43 45 return text, sd
44 46 if self.version & 0xFFFF != 2:
45 47 return text, sd
46 48 if nodeorrev != nullrev and nodeorrev != self.nullid:
47 49 cat1 = sd.get(sidedata.SD_TEST1)
48 50 if cat1 is not None and len(text) != struct.unpack('>I', cat1)[0]:
49 51 raise RuntimeError('text size mismatch')
50 52 expected = sd.get(sidedata.SD_TEST2)
51 53 got = hashlib.sha256(text).digest()
52 54 if expected is not None and got != expected:
53 55 raise RuntimeError('sha256 mismatch')
54 56 return text, sd
55 57
56 58
57 def wrapgetsidedatacompanion(orig, srcrepo, dstrepo):
58 sidedatacompanion = orig(srcrepo, dstrepo)
59 def wrapget_sidedata_helpers(orig, srcrepo, dstrepo):
60 repo, computers, removers = orig(srcrepo, dstrepo)
61 assert not computers and not removers # deal with composition later
59 62 addedreqs = dstrepo.requirements - srcrepo.requirements
63
60 64 if requirements.SIDEDATA_REQUIREMENT in addedreqs:
61 assert sidedatacompanion is None # deal with composition later
62 65
63 def sidedatacompanion(revlog, rev):
66 def computer(repo, revlog, rev, old_sidedata):
67 assert not old_sidedata # not supported yet
64 68 update = {}
65 69 revlog.sidedatanocheck = True
66 70 try:
67 71 text = revlog.revision(rev)
68 72 finally:
69 73 del revlog.sidedatanocheck
70 74 ## let's store some arbitrary data just for testing
71 75 # text length
72 76 update[sidedata.SD_TEST1] = struct.pack('>I', len(text))
73 77 # and sha2 hashes
74 78 sha256 = hashlib.sha256(text).digest()
75 79 update[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
76 return False, (), update, 0, 0
80 return update, (0, 0)
77 81
78 return sidedatacompanion
82 srcrepo.register_sidedata_computer(
83 constants.KIND_CHANGELOG,
84 b"whatever",
85 (sidedata.SD_TEST1, sidedata.SD_TEST2),
86 computer,
87 0,
88 )
89 dstrepo.register_wanted_sidedata(b"whatever")
90
91 return changegroup.get_sidedata_helpers(srcrepo, dstrepo._wanted_sidedata)
79 92
80 93
81 94 def extsetup(ui):
82 95 extensions.wrapfunction(revlog.revlog, 'addrevision', wrapaddrevision)
83 96 extensions.wrapfunction(revlog.revlog, '_revisiondata', wrap_revisiondata)
84 97 extensions.wrapfunction(
85 upgrade_engine, 'getsidedatacompanion', wrapgetsidedatacompanion
98 upgrade_engine, 'get_sidedata_helpers', wrapget_sidedata_helpers
86 99 )
87 100
88 101
89 102 def reposetup(ui, repo):
90 103 # We don't register sidedata computers because we don't care within these
91 104 # tests
92 105 repo.register_wanted_sidedata(sidedata.SD_TEST1)
93 106 repo.register_wanted_sidedata(sidedata.SD_TEST2)
General Comments 0
You need to be logged in to leave comments. Login now