##// END OF EJS Templates
sidedata: register copies sidedata computer regardless of the revlog version...
Raphaël Gomès -
r47840:b409cdc6 default
parent child Browse files
Show More
@@ -1,964 +1,964 b''
1 1 # coding: utf-8
2 2 # metadata.py -- code related to various metadata computation and access.
3 3 #
4 4 # Copyright 2019 Google, Inc <martinvonz@google.com>
5 5 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9 from __future__ import absolute_import, print_function
10 10
11 11 import multiprocessing
12 12 import struct
13 13
14 14 from .node import nullrev
15 15 from . import (
16 16 error,
17 17 pycompat,
18 18 requirements as requirementsmod,
19 19 util,
20 20 )
21 21
22 22 from .revlogutils import (
23 23 constants as revlogconst,
24 24 flagutil as sidedataflag,
25 25 sidedata as sidedatamod,
26 26 )
27 27
28 28
29 29 class ChangingFiles(object):
30 30 """A class recording the changes made to files by a changeset
31 31
32 32 Actions performed on files are gathered into 3 sets:
33 33
34 34 - added: files actively added in the changeset.
35 35 - merged: files whose history got merged
36 36 - removed: files removed in the revision
37 37 - salvaged: files that might have been deleted by a merge but were not
38 38 - touched: files affected by the merge
39 39
40 40 and copies information is held by 2 mappings
41 41
42 42 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
43 43 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
44 44
45 45 See their inline help for details.
46 46 """
47 47
48 48 def __init__(
49 49 self,
50 50 touched=None,
51 51 added=None,
52 52 removed=None,
53 53 merged=None,
54 54 salvaged=None,
55 55 p1_copies=None,
56 56 p2_copies=None,
57 57 ):
58 58 self._added = set(() if added is None else added)
59 59 self._merged = set(() if merged is None else merged)
60 60 self._removed = set(() if removed is None else removed)
61 61 self._touched = set(() if touched is None else touched)
62 62 self._salvaged = set(() if salvaged is None else salvaged)
63 63 self._touched.update(self._added)
64 64 self._touched.update(self._merged)
65 65 self._touched.update(self._removed)
66 66 self._p1_copies = dict(() if p1_copies is None else p1_copies)
67 67 self._p2_copies = dict(() if p2_copies is None else p2_copies)
68 68
69 69 def __eq__(self, other):
70 70 return (
71 71 self.added == other.added
72 72 and self.merged == other.merged
73 73 and self.removed == other.removed
74 74 and self.salvaged == other.salvaged
75 75 and self.touched == other.touched
76 76 and self.copied_from_p1 == other.copied_from_p1
77 77 and self.copied_from_p2 == other.copied_from_p2
78 78 )
79 79
80 80 @property
81 81 def has_copies_info(self):
82 82 return bool(
83 83 self.removed
84 84 or self.merged
85 85 or self.salvaged
86 86 or self.copied_from_p1
87 87 or self.copied_from_p2
88 88 )
89 89
90 90 @util.propertycache
91 91 def added(self):
92 92 """files actively added in the changeset
93 93
94 94 Any file present in that revision that was absent in all the changeset's
95 95 parents.
96 96
97 97 In case of merge, this means a file absent in one of the parents but
98 98 existing in the other will *not* be contained in this set. (They were
99 99 added by an ancestor)
100 100 """
101 101 return frozenset(self._added)
102 102
103 103 def mark_added(self, filename):
104 104 if 'added' in vars(self):
105 105 del self.added
106 106 self._added.add(filename)
107 107 self.mark_touched(filename)
108 108
109 109 def update_added(self, filenames):
110 110 for f in filenames:
111 111 self.mark_added(f)
112 112
113 113 @util.propertycache
114 114 def merged(self):
115 115 """files actively merged during a merge
116 116
117 117 Any modified files which had modification on both size that needed merging.
118 118
119 119 In this case a new filenode was created and it has two parents.
120 120 """
121 121 return frozenset(self._merged)
122 122
123 123 def mark_merged(self, filename):
124 124 if 'merged' in vars(self):
125 125 del self.merged
126 126 self._merged.add(filename)
127 127 self.mark_touched(filename)
128 128
129 129 def update_merged(self, filenames):
130 130 for f in filenames:
131 131 self.mark_merged(f)
132 132
133 133 @util.propertycache
134 134 def removed(self):
135 135 """files actively removed by the changeset
136 136
137 137 In case of merge this will only contain the set of files removing "new"
138 138 content. For any file absent in the current changeset:
139 139
140 140 a) If the file exists in both parents, it is clearly "actively" removed
141 141 by this changeset.
142 142
143 143 b) If a file exists in only one parent and in none of the common
144 144 ancestors, then the file was newly added in one of the merged branches
145 145 and then got "actively" removed.
146 146
147 147 c) If a file exists in only one parent and at least one of the common
148 148 ancestors using the same filenode, then the file was unchanged on one
149 149 side and deleted on the other side. The merge "passively" propagated
150 150 that deletion, but didn't "actively" remove the file. In this case the
151 151 file is *not* included in the `removed` set.
152 152
153 153 d) If a file exists in only one parent and at least one of the common
154 154 ancestors using a different filenode, then the file was changed on one
155 155 side and removed on the other side. The merge process "actively"
156 156 decided to drop the new change and delete the file. Unlike in the
157 157 previous case, (c), the file included in the `removed` set.
158 158
159 159 Summary table for merge:
160 160
161 161 case | exists in parents | exists in gca || removed
162 162 (a) | both | * || yes
163 163 (b) | one | none || yes
164 164 (c) | one | same filenode || no
165 165 (d) | one | new filenode || yes
166 166 """
167 167 return frozenset(self._removed)
168 168
169 169 def mark_removed(self, filename):
170 170 if 'removed' in vars(self):
171 171 del self.removed
172 172 self._removed.add(filename)
173 173 self.mark_touched(filename)
174 174
175 175 def update_removed(self, filenames):
176 176 for f in filenames:
177 177 self.mark_removed(f)
178 178
179 179 @util.propertycache
180 180 def salvaged(self):
181 181 """files that might have been deleted by a merge, but still exists.
182 182
183 183 During a merge, the manifest merging might select some files for
184 184 removal, or for a removed/changed conflict. If at commit time the file
185 185 still exists, its removal was "reverted" and the file is "salvaged"
186 186 """
187 187 return frozenset(self._salvaged)
188 188
189 189 def mark_salvaged(self, filename):
190 190 if "salvaged" in vars(self):
191 191 del self.salvaged
192 192 self._salvaged.add(filename)
193 193 self.mark_touched(filename)
194 194
195 195 def update_salvaged(self, filenames):
196 196 for f in filenames:
197 197 self.mark_salvaged(f)
198 198
199 199 @util.propertycache
200 200 def touched(self):
201 201 """files either actively modified, added or removed"""
202 202 return frozenset(self._touched)
203 203
204 204 def mark_touched(self, filename):
205 205 if 'touched' in vars(self):
206 206 del self.touched
207 207 self._touched.add(filename)
208 208
209 209 def update_touched(self, filenames):
210 210 for f in filenames:
211 211 self.mark_touched(f)
212 212
213 213 @util.propertycache
214 214 def copied_from_p1(self):
215 215 return self._p1_copies.copy()
216 216
217 217 def mark_copied_from_p1(self, source, dest):
218 218 if 'copied_from_p1' in vars(self):
219 219 del self.copied_from_p1
220 220 self._p1_copies[dest] = source
221 221
222 222 def update_copies_from_p1(self, copies):
223 223 for dest, source in copies.items():
224 224 self.mark_copied_from_p1(source, dest)
225 225
226 226 @util.propertycache
227 227 def copied_from_p2(self):
228 228 return self._p2_copies.copy()
229 229
230 230 def mark_copied_from_p2(self, source, dest):
231 231 if 'copied_from_p2' in vars(self):
232 232 del self.copied_from_p2
233 233 self._p2_copies[dest] = source
234 234
235 235 def update_copies_from_p2(self, copies):
236 236 for dest, source in copies.items():
237 237 self.mark_copied_from_p2(source, dest)
238 238
239 239
240 240 def compute_all_files_changes(ctx):
241 241 """compute the files changed by a revision"""
242 242 p1 = ctx.p1()
243 243 p2 = ctx.p2()
244 244 if p1.rev() == nullrev and p2.rev() == nullrev:
245 245 return _process_root(ctx)
246 246 elif p1.rev() != nullrev and p2.rev() == nullrev:
247 247 return _process_linear(p1, ctx)
248 248 elif p1.rev() == nullrev and p2.rev() != nullrev:
249 249 # In the wild, one can encounter changeset where p1 is null but p2 is not
250 250 return _process_linear(p1, ctx, parent=2)
251 251 elif p1.rev() == p2.rev():
252 252 # In the wild, one can encounter such "non-merge"
253 253 return _process_linear(p1, ctx)
254 254 else:
255 255 return _process_merge(p1, p2, ctx)
256 256
257 257
258 258 def _process_root(ctx):
259 259 """compute the appropriate changed files for a changeset with no parents"""
260 260 # Simple, there was nothing before it, so everything is added.
261 261 md = ChangingFiles()
262 262 manifest = ctx.manifest()
263 263 for filename in manifest:
264 264 md.mark_added(filename)
265 265 return md
266 266
267 267
268 268 def _process_linear(parent_ctx, children_ctx, parent=1):
269 269 """compute the appropriate changed files for a changeset with a single parent"""
270 270 md = ChangingFiles()
271 271 parent_manifest = parent_ctx.manifest()
272 272 children_manifest = children_ctx.manifest()
273 273
274 274 copies_candidate = []
275 275
276 276 for filename, d in parent_manifest.diff(children_manifest).items():
277 277 if d[1][0] is None:
278 278 # no filenode for the "new" value, file is absent
279 279 md.mark_removed(filename)
280 280 else:
281 281 copies_candidate.append(filename)
282 282 if d[0][0] is None:
283 283 # not filenode for the "old" value file was absent
284 284 md.mark_added(filename)
285 285 else:
286 286 # filenode for both "old" and "new"
287 287 md.mark_touched(filename)
288 288
289 289 if parent == 1:
290 290 copied = md.mark_copied_from_p1
291 291 elif parent == 2:
292 292 copied = md.mark_copied_from_p2
293 293 else:
294 294 assert False, "bad parent value %d" % parent
295 295
296 296 for filename in copies_candidate:
297 297 copy_info = children_ctx[filename].renamed()
298 298 if copy_info:
299 299 source, srcnode = copy_info
300 300 copied(source, filename)
301 301
302 302 return md
303 303
304 304
305 305 def _process_merge(p1_ctx, p2_ctx, ctx):
306 306 """compute the appropriate changed files for a changeset with two parents
307 307
308 308 This is a more advance case. The information we need to record is summarise
309 309 in the following table:
310 310
311 311 ┌──────────────┬──────────────┬──────────────┬──────────────┬──────────────┐
312 312 │ diff ╲ diff │ ø │ (Some, None) │ (None, Some) │ (Some, Some) │
313 313 │ p2 ╲ p1 │ │ │ │ │
314 314 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
315 315 │ │ │🄱 No Changes │🄳 No Changes │ │
316 316 │ ø │🄰 No Changes │ OR │ OR │🄵 No Changes │
317 317 │ │ │🄲 Deleted[1] │🄴 Salvaged[2]│ [3] │
318 318 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
319 319 │ │🄶 No Changes │ │ │ │
320 320 │ (Some, None) │ OR │🄻 Deleted │ ø │ ø │
321 321 │ │🄷 Deleted[1] │ │ │ │
322 322 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
323 323 │ │🄸 No Changes │ │ │ 🄽 Touched │
324 324 │ (None, Some) │ OR │ ø │🄼 Added │OR 🅀 Salvaged │
325 325 │ │🄹 Salvaged[2]│ │ (copied?) │ (copied?) │
326 326 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
327 327 │ │ │ │ 🄾 Touched │ 🄿 Merged │
328 328 │ (Some, Some) │🄺 No Changes │ ø │OR 🅁 Salvaged │OR 🅂 Touched │
329 329 │ │ [3] │ │ (copied?) │ (copied?) │
330 330 └──────────────┴──────────────┴──────────────┴──────────────┴──────────────┘
331 331
332 332 Special case [1]:
333 333
334 334 The situation is:
335 335 - parent-A: file exists,
336 336 - parent-B: no file,
337 337 - working-copy: no file.
338 338
339 339 Detecting a "deletion" will depend on the presence of actual change on
340 340 the "parent-A" branch:
341 341
342 342 Subcase 🄱 or 🄶 : if the state of the file in "parent-A" is unchanged
343 343 compared to the merge ancestors, then parent-A branch left the file
344 344 untouched while parent-B deleted it. We simply apply the change from
345 345 "parent-B" branch the file was automatically dropped.
346 346 The result is:
347 347 - file is not recorded as touched by the merge.
348 348
349 349 Subcase 🄲 or 🄷 : otherwise, the change from parent-A branch were explicitly dropped and
350 350 the file was "deleted again". From a user perspective, the message
351 351 about "locally changed" while "remotely deleted" (or the other way
352 352 around) was issued and the user chose to deleted the file.
353 353 The result:
354 354 - file is recorded as touched by the merge.
355 355
356 356
357 357 Special case [2]:
358 358
359 359 The situation is:
360 360 - parent-A: no file,
361 361 - parent-B: file,
362 362 - working-copy: file (same content as parent-B).
363 363
364 364 There are three subcases depending on the ancestors contents:
365 365
366 366 - A) the file is missing in all ancestors,
367 367 - B) at least one ancestor has the file with filenode ≠ from parent-B,
368 368 - C) all ancestors use the same filenode as parent-B,
369 369
370 370 Subcase (A) is the simpler, nothing happend on parent-A side while
371 371 parent-B added it.
372 372
373 373 The result:
374 374 - the file is not marked as touched by the merge.
375 375
376 376 Subcase (B) is the counter part of "Special case [1]", the file was
377 377 modified on parent-B side, while parent-A side deleted it. However this
378 378 time, the conflict was solved by keeping the file (and its
379 379 modification). We consider the file as "salvaged".
380 380
381 381 The result:
382 382 - the file is marked as "salvaged" by the merge.
383 383
384 384 Subcase (C) is subtle variation of the case above. In this case, the
385 385 file in unchanged on the parent-B side and actively removed on the
386 386 parent-A side. So the merge machinery correctly decide it should be
387 387 removed. However, the file was explicitly restored to its parent-B
388 388 content before the merge was commited. The file is be marked
389 389 as salvaged too. From the merge result perspective, this is similar to
390 390 Subcase (B), however from the merge resolution perspective they differ
391 391 since in (C), there was some conflict not obvious solution to the
392 392 merge (That got reversed)
393 393
394 394 Special case [3]:
395 395
396 396 The situation is:
397 397 - parent-A: file,
398 398 - parent-B: file (different filenode as parent-A),
399 399 - working-copy: file (same filenode as parent-B).
400 400
401 401 This case is in theory much simple, for this to happens, this mean the
402 402 filenode in parent-A is purely replacing the one in parent-B (either a
403 403 descendant, or a full new file history, see changeset). So the merge
404 404 introduce no changes, and the file is not affected by the merge...
405 405
406 406 However, in the wild it is possible to find commit with the above is not
407 407 True. For example repository have some commit where the *new* node is an
408 408 ancestor of the node in parent-A, or where parent-A and parent-B are two
409 409 branches of the same file history, yet not merge-filenode were created
410 410 (while the "merge" should have led to a "modification").
411 411
412 412 Detecting such cases (and not recording the file as modified) would be a
413 413 nice bonus. However do not any of this yet.
414 414 """
415 415
416 416 repo = ctx.repo()
417 417 md = ChangingFiles()
418 418
419 419 m = ctx.manifest()
420 420 p1m = p1_ctx.manifest()
421 421 p2m = p2_ctx.manifest()
422 422 diff_p1 = p1m.diff(m)
423 423 diff_p2 = p2m.diff(m)
424 424
425 425 cahs = ctx.repo().changelog.commonancestorsheads(
426 426 p1_ctx.node(), p2_ctx.node()
427 427 )
428 428 if not cahs:
429 429 cahs = [nullrev]
430 430 mas = [ctx.repo()[r].manifest() for r in cahs]
431 431
432 432 copy_candidates = []
433 433
434 434 # Dealing with case 🄰 happens automatically. Since there are no entry in
435 435 # d1 nor d2, we won't iterate on it ever.
436 436
437 437 # Iteration over d1 content will deal with all cases, but the one in the
438 438 # first column of the table.
439 439 for filename, d1 in diff_p1.items():
440 440
441 441 d2 = diff_p2.pop(filename, None)
442 442
443 443 if d2 is None:
444 444 # this deal with the first line of the table.
445 445 _process_other_unchanged(md, mas, filename, d1)
446 446 else:
447 447
448 448 if d1[0][0] is None and d2[0][0] is None:
449 449 # case 🄼 — both deleted the file.
450 450 md.mark_added(filename)
451 451 copy_candidates.append(filename)
452 452 elif d1[1][0] is None and d2[1][0] is None:
453 453 # case 🄻 — both deleted the file.
454 454 md.mark_removed(filename)
455 455 elif d1[1][0] is not None and d2[1][0] is not None:
456 456 if d1[0][0] is None or d2[0][0] is None:
457 457 if any(_find(ma, filename) is not None for ma in mas):
458 458 # case 🅀 or 🅁
459 459 md.mark_salvaged(filename)
460 460 else:
461 461 # case 🄽 🄾 : touched
462 462 md.mark_touched(filename)
463 463 else:
464 464 fctx = repo.filectx(filename, fileid=d1[1][0])
465 465 if fctx.p2().rev() == nullrev:
466 466 # case 🅂
467 467 # lets assume we can trust the file history. If the
468 468 # filenode is not a merge, the file was not merged.
469 469 md.mark_touched(filename)
470 470 else:
471 471 # case 🄿
472 472 md.mark_merged(filename)
473 473 copy_candidates.append(filename)
474 474 else:
475 475 # Impossible case, the post-merge file status cannot be None on
476 476 # one side and Something on the other side.
477 477 assert False, "unreachable"
478 478
479 479 # Iteration over remaining d2 content deal with the first column of the
480 480 # table.
481 481 for filename, d2 in diff_p2.items():
482 482 _process_other_unchanged(md, mas, filename, d2)
483 483
484 484 for filename in copy_candidates:
485 485 copy_info = ctx[filename].renamed()
486 486 if copy_info:
487 487 source, srcnode = copy_info
488 488 if source in p1_ctx and p1_ctx[source].filenode() == srcnode:
489 489 md.mark_copied_from_p1(source, filename)
490 490 elif source in p2_ctx and p2_ctx[source].filenode() == srcnode:
491 491 md.mark_copied_from_p2(source, filename)
492 492 return md
493 493
494 494
495 495 def _find(manifest, filename):
496 496 """return the associate filenode or None"""
497 497 if filename not in manifest:
498 498 return None
499 499 return manifest.find(filename)[0]
500 500
501 501
502 502 def _process_other_unchanged(md, mas, filename, diff):
503 503 source_node = diff[0][0]
504 504 target_node = diff[1][0]
505 505
506 506 if source_node is not None and target_node is None:
507 507 if any(not _find(ma, filename) == source_node for ma in mas):
508 508 # case 🄲 of 🄷
509 509 md.mark_removed(filename)
510 510 # else, we have case 🄱 or 🄶 : no change need to be recorded
511 511 elif source_node is None and target_node is not None:
512 512 if any(_find(ma, filename) is not None for ma in mas):
513 513 # case 🄴 or 🄹
514 514 md.mark_salvaged(filename)
515 515 # else, we have case 🄳 or 🄸 : simple merge without intervention
516 516 elif source_node is not None and target_node is not None:
517 517 # case 🄵 or 🄺 : simple merge without intervention
518 518 #
519 519 # In buggy case where source_node is not an ancestors of target_node.
520 520 # There should have a been a new filenode created, recording this as
521 521 # "modified". We do not deal with them yet.
522 522 pass
523 523 else:
524 524 # An impossible case, the diff algorithm should not return entry if the
525 525 # file is missing on both side.
526 526 assert False, "unreachable"
527 527
528 528
529 529 def _missing_from_all_ancestors(mas, filename):
530 530 return all(_find(ma, filename) is None for ma in mas)
531 531
532 532
533 533 def computechangesetfilesadded(ctx):
534 534 """return the list of files added in a changeset"""
535 535 added = []
536 536 for f in ctx.files():
537 537 if not any(f in p for p in ctx.parents()):
538 538 added.append(f)
539 539 return added
540 540
541 541
542 542 def get_removal_filter(ctx, x=None):
543 543 """return a function to detect files "wrongly" detected as `removed`
544 544
545 545 When a file is removed relative to p1 in a merge, this
546 546 function determines whether the absence is due to a
547 547 deletion from a parent, or whether the merge commit
548 548 itself deletes the file. We decide this by doing a
549 549 simplified three way merge of the manifest entry for
550 550 the file. There are two ways we decide the merge
551 551 itself didn't delete a file:
552 552 - neither parent (nor the merge) contain the file
553 553 - exactly one parent contains the file, and that
554 554 parent has the same filelog entry as the merge
555 555 ancestor (or all of them if there two). In other
556 556 words, that parent left the file unchanged while the
557 557 other one deleted it.
558 558 One way to think about this is that deleting a file is
559 559 similar to emptying it, so the list of changed files
560 560 should be similar either way. The computation
561 561 described above is not done directly in _filecommit
562 562 when creating the list of changed files, however
563 563 it does something very similar by comparing filelog
564 564 nodes.
565 565 """
566 566
567 567 if x is not None:
568 568 p1, p2, m1, m2 = x
569 569 else:
570 570 p1 = ctx.p1()
571 571 p2 = ctx.p2()
572 572 m1 = p1.manifest()
573 573 m2 = p2.manifest()
574 574
575 575 @util.cachefunc
576 576 def mas():
577 577 p1n = p1.node()
578 578 p2n = p2.node()
579 579 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
580 580 if not cahs:
581 581 cahs = [nullrev]
582 582 return [ctx.repo()[r].manifest() for r in cahs]
583 583
584 584 def deletionfromparent(f):
585 585 if f in m1:
586 586 return f not in m2 and all(
587 587 f in ma and ma.find(f) == m1.find(f) for ma in mas()
588 588 )
589 589 elif f in m2:
590 590 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
591 591 else:
592 592 return True
593 593
594 594 return deletionfromparent
595 595
596 596
597 597 def computechangesetfilesremoved(ctx):
598 598 """return the list of files removed in a changeset"""
599 599 removed = []
600 600 for f in ctx.files():
601 601 if f not in ctx:
602 602 removed.append(f)
603 603 if removed:
604 604 rf = get_removal_filter(ctx)
605 605 removed = [r for r in removed if not rf(r)]
606 606 return removed
607 607
608 608
609 609 def computechangesetfilesmerged(ctx):
610 610 """return the list of files merged in a changeset"""
611 611 merged = []
612 612 if len(ctx.parents()) < 2:
613 613 return merged
614 614 for f in ctx.files():
615 615 if f in ctx:
616 616 fctx = ctx[f]
617 617 parents = fctx._filelog.parents(fctx._filenode)
618 618 if parents[1] != ctx.repo().nullid:
619 619 merged.append(f)
620 620 return merged
621 621
622 622
623 623 def computechangesetcopies(ctx):
624 624 """return the copies data for a changeset
625 625
626 626 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
627 627
628 628 Each dictionnary are in the form: `{newname: oldname}`
629 629 """
630 630 p1copies = {}
631 631 p2copies = {}
632 632 p1 = ctx.p1()
633 633 p2 = ctx.p2()
634 634 narrowmatch = ctx._repo.narrowmatch()
635 635 for dst in ctx.files():
636 636 if not narrowmatch(dst) or dst not in ctx:
637 637 continue
638 638 copied = ctx[dst].renamed()
639 639 if not copied:
640 640 continue
641 641 src, srcnode = copied
642 642 if src in p1 and p1[src].filenode() == srcnode:
643 643 p1copies[dst] = src
644 644 elif src in p2 and p2[src].filenode() == srcnode:
645 645 p2copies[dst] = src
646 646 return p1copies, p2copies
647 647
648 648
649 649 def encodecopies(files, copies):
650 650 items = []
651 651 for i, dst in enumerate(files):
652 652 if dst in copies:
653 653 items.append(b'%d\0%s' % (i, copies[dst]))
654 654 if len(items) != len(copies):
655 655 raise error.ProgrammingError(
656 656 b'some copy targets missing from file list'
657 657 )
658 658 return b"\n".join(items)
659 659
660 660
661 661 def decodecopies(files, data):
662 662 try:
663 663 copies = {}
664 664 if not data:
665 665 return copies
666 666 for l in data.split(b'\n'):
667 667 strindex, src = l.split(b'\0')
668 668 i = int(strindex)
669 669 dst = files[i]
670 670 copies[dst] = src
671 671 return copies
672 672 except (ValueError, IndexError):
673 673 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
674 674 # used different syntax for the value.
675 675 return None
676 676
677 677
678 678 def encodefileindices(files, subset):
679 679 subset = set(subset)
680 680 indices = []
681 681 for i, f in enumerate(files):
682 682 if f in subset:
683 683 indices.append(b'%d' % i)
684 684 return b'\n'.join(indices)
685 685
686 686
687 687 def decodefileindices(files, data):
688 688 try:
689 689 subset = []
690 690 if not data:
691 691 return subset
692 692 for strindex in data.split(b'\n'):
693 693 i = int(strindex)
694 694 if i < 0 or i >= len(files):
695 695 return None
696 696 subset.append(files[i])
697 697 return subset
698 698 except (ValueError, IndexError):
699 699 # Perhaps someone had chosen the same key name (e.g. "added") and
700 700 # used different syntax for the value.
701 701 return None
702 702
703 703
704 704 # see mercurial/helptext/internals/revlogs.txt for details about the format
705 705
706 706 ACTION_MASK = int("111" "00", 2)
707 707 # note: untouched file used as copy source will as `000` for this mask.
708 708 ADDED_FLAG = int("001" "00", 2)
709 709 MERGED_FLAG = int("010" "00", 2)
710 710 REMOVED_FLAG = int("011" "00", 2)
711 711 SALVAGED_FLAG = int("100" "00", 2)
712 712 TOUCHED_FLAG = int("101" "00", 2)
713 713
714 714 COPIED_MASK = int("11", 2)
715 715 COPIED_FROM_P1_FLAG = int("10", 2)
716 716 COPIED_FROM_P2_FLAG = int("11", 2)
717 717
718 718 # structure is <flag><filename-end><copy-source>
719 719 INDEX_HEADER = struct.Struct(">L")
720 720 INDEX_ENTRY = struct.Struct(">bLL")
721 721
722 722
723 723 def encode_files_sidedata(files):
724 724 all_files = set(files.touched)
725 725 all_files.update(files.copied_from_p1.values())
726 726 all_files.update(files.copied_from_p2.values())
727 727 all_files = sorted(all_files)
728 728 file_idx = {f: i for (i, f) in enumerate(all_files)}
729 729 file_idx[None] = 0
730 730
731 731 chunks = [INDEX_HEADER.pack(len(all_files))]
732 732
733 733 filename_length = 0
734 734 for f in all_files:
735 735 filename_size = len(f)
736 736 filename_length += filename_size
737 737 flag = 0
738 738 if f in files.added:
739 739 flag |= ADDED_FLAG
740 740 elif f in files.merged:
741 741 flag |= MERGED_FLAG
742 742 elif f in files.removed:
743 743 flag |= REMOVED_FLAG
744 744 elif f in files.salvaged:
745 745 flag |= SALVAGED_FLAG
746 746 elif f in files.touched:
747 747 flag |= TOUCHED_FLAG
748 748
749 749 copy = None
750 750 if f in files.copied_from_p1:
751 751 flag |= COPIED_FROM_P1_FLAG
752 752 copy = files.copied_from_p1.get(f)
753 753 elif f in files.copied_from_p2:
754 754 copy = files.copied_from_p2.get(f)
755 755 flag |= COPIED_FROM_P2_FLAG
756 756 copy_idx = file_idx[copy]
757 757 chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx))
758 758 chunks.extend(all_files)
759 759 return {sidedatamod.SD_FILES: b''.join(chunks)}
760 760
761 761
762 762 def decode_files_sidedata(sidedata):
763 763 md = ChangingFiles()
764 764 raw = sidedata.get(sidedatamod.SD_FILES)
765 765
766 766 if raw is None:
767 767 return md
768 768
769 769 copies = []
770 770 all_files = []
771 771
772 772 assert len(raw) >= INDEX_HEADER.size
773 773 total_files = INDEX_HEADER.unpack_from(raw, 0)[0]
774 774
775 775 offset = INDEX_HEADER.size
776 776 file_offset_base = offset + (INDEX_ENTRY.size * total_files)
777 777 file_offset_last = file_offset_base
778 778
779 779 assert len(raw) >= file_offset_base
780 780
781 781 for idx in range(total_files):
782 782 flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset)
783 783 file_end += file_offset_base
784 784 filename = raw[file_offset_last:file_end]
785 785 filesize = file_end - file_offset_last
786 786 assert len(filename) == filesize
787 787 offset += INDEX_ENTRY.size
788 788 file_offset_last = file_end
789 789 all_files.append(filename)
790 790 if flag & ACTION_MASK == ADDED_FLAG:
791 791 md.mark_added(filename)
792 792 elif flag & ACTION_MASK == MERGED_FLAG:
793 793 md.mark_merged(filename)
794 794 elif flag & ACTION_MASK == REMOVED_FLAG:
795 795 md.mark_removed(filename)
796 796 elif flag & ACTION_MASK == SALVAGED_FLAG:
797 797 md.mark_salvaged(filename)
798 798 elif flag & ACTION_MASK == TOUCHED_FLAG:
799 799 md.mark_touched(filename)
800 800
801 801 copied = None
802 802 if flag & COPIED_MASK == COPIED_FROM_P1_FLAG:
803 803 copied = md.mark_copied_from_p1
804 804 elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG:
805 805 copied = md.mark_copied_from_p2
806 806
807 807 if copied is not None:
808 808 copies.append((copied, filename, copy_idx))
809 809
810 810 for copied, filename, copy_idx in copies:
811 811 copied(all_files[copy_idx], filename)
812 812
813 813 return md
814 814
815 815
816 816 def _getsidedata(srcrepo, rev):
817 817 ctx = srcrepo[rev]
818 818 files = compute_all_files_changes(ctx)
819 819 return encode_files_sidedata(files), files.has_copies_info
820 820
821 821
822 822 def copies_sidedata_computer(repo, revlog, rev, existing_sidedata):
823 823 return _getsidedata(repo, rev)[0]
824 824
825 825
826 826 def set_sidedata_spec_for_repo(repo):
827 827 if requirementsmod.COPIESSDC_REQUIREMENT in repo.requirements:
828 828 repo.register_wanted_sidedata(sidedatamod.SD_FILES)
829 repo.register_sidedata_computer(
830 revlogconst.KIND_CHANGELOG,
831 sidedatamod.SD_FILES,
832 (sidedatamod.SD_FILES,),
833 copies_sidedata_computer,
834 )
829 repo.register_sidedata_computer(
830 revlogconst.KIND_CHANGELOG,
831 sidedatamod.SD_FILES,
832 (sidedatamod.SD_FILES,),
833 copies_sidedata_computer,
834 )
835 835
836 836
837 837 def getsidedataadder(srcrepo, destrepo):
838 838 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
839 839 if pycompat.iswindows or not use_w:
840 840 return _get_simple_sidedata_adder(srcrepo, destrepo)
841 841 else:
842 842 return _get_worker_sidedata_adder(srcrepo, destrepo)
843 843
844 844
845 845 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
846 846 """The function used by worker precomputing sidedata
847 847
848 848 It read an input queue containing revision numbers
849 849 It write in an output queue containing (rev, <sidedata-map>)
850 850
851 851 The `None` input value is used as a stop signal.
852 852
853 853 The `tokens` semaphore is user to avoid having too many unprocessed
854 854 entries. The workers needs to acquire one token before fetching a task.
855 855 They will be released by the consumer of the produced data.
856 856 """
857 857 tokens.acquire()
858 858 rev = revs_queue.get()
859 859 while rev is not None:
860 860 data = _getsidedata(srcrepo, rev)
861 861 sidedata_queue.put((rev, data))
862 862 tokens.acquire()
863 863 rev = revs_queue.get()
864 864 # processing of `None` is completed, release the token.
865 865 tokens.release()
866 866
867 867
868 868 BUFF_PER_WORKER = 50
869 869
870 870
871 871 def _get_worker_sidedata_adder(srcrepo, destrepo):
872 872 """The parallel version of the sidedata computation
873 873
874 874 This code spawn a pool of worker that precompute a buffer of sidedata
875 875 before we actually need them"""
876 876 # avoid circular import copies -> scmutil -> worker -> copies
877 877 from . import worker
878 878
879 879 nbworkers = worker._numworkers(srcrepo.ui)
880 880
881 881 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
882 882 revsq = multiprocessing.Queue()
883 883 sidedataq = multiprocessing.Queue()
884 884
885 885 assert srcrepo.filtername is None
886 886 # queue all tasks beforehand, revision numbers are small and it make
887 887 # synchronisation simpler
888 888 #
889 889 # Since the computation for each node can be quite expensive, the overhead
890 890 # of using a single queue is not revelant. In practice, most computation
891 891 # are fast but some are very expensive and dominate all the other smaller
892 892 # cost.
893 893 for r in srcrepo.changelog.revs():
894 894 revsq.put(r)
895 895 # queue the "no more tasks" markers
896 896 for i in range(nbworkers):
897 897 revsq.put(None)
898 898
899 899 allworkers = []
900 900 for i in range(nbworkers):
901 901 args = (srcrepo, revsq, sidedataq, tokens)
902 902 w = multiprocessing.Process(target=_sidedata_worker, args=args)
903 903 allworkers.append(w)
904 904 w.start()
905 905
906 906 # dictionnary to store results for revision higher than we one we are
907 907 # looking for. For example, if we need the sidedatamap for 42, and 43 is
908 908 # received, when shelve 43 for later use.
909 909 staging = {}
910 910
911 911 def sidedata_companion(revlog, rev):
912 912 data = {}, False
913 913 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
914 914 # Is the data previously shelved ?
915 915 data = staging.pop(rev, None)
916 916 if data is None:
917 917 # look at the queued result until we find the one we are lookig
918 918 # for (shelve the other ones)
919 919 r, data = sidedataq.get()
920 920 while r != rev:
921 921 staging[r] = data
922 922 r, data = sidedataq.get()
923 923 tokens.release()
924 924 sidedata, has_copies_info = data
925 925 new_flag = 0
926 926 if has_copies_info:
927 927 new_flag = sidedataflag.REVIDX_HASCOPIESINFO
928 928 return False, (), sidedata, new_flag, 0
929 929
930 930 return sidedata_companion
931 931
932 932
933 933 def _get_simple_sidedata_adder(srcrepo, destrepo):
934 934 """The simple version of the sidedata computation
935 935
936 936 It just compute it in the same thread on request"""
937 937
938 938 def sidedatacompanion(revlog, rev):
939 939 sidedata, has_copies_info = {}, False
940 940 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
941 941 sidedata, has_copies_info = _getsidedata(srcrepo, rev)
942 942 new_flag = 0
943 943 if has_copies_info:
944 944 new_flag = sidedataflag.REVIDX_HASCOPIESINFO
945 945
946 946 return False, (), sidedata, new_flag, 0
947 947
948 948 return sidedatacompanion
949 949
950 950
951 951 def getsidedataremover(srcrepo, destrepo):
952 952 def sidedatacompanion(revlog, rev):
953 953 f = ()
954 954 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
955 955 if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
956 956 f = (
957 957 sidedatamod.SD_P1COPIES,
958 958 sidedatamod.SD_P2COPIES,
959 959 sidedatamod.SD_FILESADDED,
960 960 sidedatamod.SD_FILESREMOVED,
961 961 )
962 962 return False, f, {}, 0, sidedataflag.REVIDX_HASCOPIESINFO
963 963
964 964 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now