##// END OF EJS Templates
sidedata: return enough data to set the proper flag in the future...
marmoute -
r46326:53c265a6 default
parent child Browse files
Show More
@@ -1,928 +1,929
1 1 # coding: utf8
2 2 # metadata.py -- code related to various metadata computation and access.
3 3 #
4 4 # Copyright 2019 Google, Inc <martinvonz@google.com>
5 5 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9 from __future__ import absolute_import, print_function
10 10
11 11 import multiprocessing
12 12 import struct
13 13
14 14 from . import (
15 15 error,
16 16 node,
17 17 pycompat,
18 18 util,
19 19 )
20 20
21 21 from .revlogutils import (
22 22 flagutil as sidedataflag,
23 23 sidedata as sidedatamod,
24 24 )
25 25
26 26
27 27 class ChangingFiles(object):
28 28 """A class recording the changes made to files by a changeset
29 29
30 30 Actions performed on files are gathered into 3 sets:
31 31
32 32 - added: files actively added in the changeset.
33 33 - merged: files whose history got merged
34 34 - removed: files removed in the revision
35 35 - salvaged: files that might have been deleted by a merge but were not
36 36 - touched: files affected by the merge
37 37
38 38 and copies information is held by 2 mappings
39 39
40 40 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
41 41 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
42 42
43 43 See their inline help for details.
44 44 """
45 45
46 46 def __init__(
47 47 self,
48 48 touched=None,
49 49 added=None,
50 50 removed=None,
51 51 merged=None,
52 52 salvaged=None,
53 53 p1_copies=None,
54 54 p2_copies=None,
55 55 ):
56 56 self._added = set(() if added is None else added)
57 57 self._merged = set(() if merged is None else merged)
58 58 self._removed = set(() if removed is None else removed)
59 59 self._touched = set(() if touched is None else touched)
60 60 self._salvaged = set(() if salvaged is None else salvaged)
61 61 self._touched.update(self._added)
62 62 self._touched.update(self._merged)
63 63 self._touched.update(self._removed)
64 64 self._p1_copies = dict(() if p1_copies is None else p1_copies)
65 65 self._p2_copies = dict(() if p2_copies is None else p2_copies)
66 66
67 67 def __eq__(self, other):
68 68 return (
69 69 self.added == other.added
70 70 and self.merged == other.merged
71 71 and self.removed == other.removed
72 72 and self.salvaged == other.salvaged
73 73 and self.touched == other.touched
74 74 and self.copied_from_p1 == other.copied_from_p1
75 75 and self.copied_from_p2 == other.copied_from_p2
76 76 )
77 77
78 78 @property
79 79 def has_copies_info(self):
80 80 return bool(
81 81 self.removed
82 82 or self.merged
83 83 or self.salvaged
84 84 or self.copied_from_p1
85 85 or self.copied_from_p2
86 86 )
87 87
88 88 @util.propertycache
89 89 def added(self):
90 90 """files actively added in the changeset
91 91
92 92 Any file present in that revision that was absent in all the changeset's
93 93 parents.
94 94
95 95 In case of merge, this means a file absent in one of the parents but
96 96 existing in the other will *not* be contained in this set. (They were
97 97 added by an ancestor)
98 98 """
99 99 return frozenset(self._added)
100 100
101 101 def mark_added(self, filename):
102 102 if 'added' in vars(self):
103 103 del self.added
104 104 self._added.add(filename)
105 105 self.mark_touched(filename)
106 106
107 107 def update_added(self, filenames):
108 108 for f in filenames:
109 109 self.mark_added(f)
110 110
111 111 @util.propertycache
112 112 def merged(self):
113 113 """files actively merged during a merge
114 114
115 115 Any modified files which had modification on both size that needed merging.
116 116
117 117 In this case a new filenode was created and it has two parents.
118 118 """
119 119 return frozenset(self._merged)
120 120
121 121 def mark_merged(self, filename):
122 122 if 'merged' in vars(self):
123 123 del self.merged
124 124 self._merged.add(filename)
125 125 self.mark_touched(filename)
126 126
127 127 def update_merged(self, filenames):
128 128 for f in filenames:
129 129 self.mark_merged(f)
130 130
131 131 @util.propertycache
132 132 def removed(self):
133 133 """files actively removed by the changeset
134 134
135 135 In case of merge this will only contain the set of files removing "new"
136 136 content. For any file absent in the current changeset:
137 137
138 138 a) If the file exists in both parents, it is clearly "actively" removed
139 139 by this changeset.
140 140
141 141 b) If a file exists in only one parent and in none of the common
142 142 ancestors, then the file was newly added in one of the merged branches
143 143 and then got "actively" removed.
144 144
145 145 c) If a file exists in only one parent and at least one of the common
146 146 ancestors using the same filenode, then the file was unchanged on one
147 147 side and deleted on the other side. The merge "passively" propagated
148 148 that deletion, but didn't "actively" remove the file. In this case the
149 149 file is *not* included in the `removed` set.
150 150
151 151 d) If a file exists in only one parent and at least one of the common
152 152 ancestors using a different filenode, then the file was changed on one
153 153 side and removed on the other side. The merge process "actively"
154 154 decided to drop the new change and delete the file. Unlike in the
155 155 previous case, (c), the file included in the `removed` set.
156 156
157 157 Summary table for merge:
158 158
159 159 case | exists in parents | exists in gca || removed
160 160 (a) | both | * || yes
161 161 (b) | one | none || yes
162 162 (c) | one | same filenode || no
163 163 (d) | one | new filenode || yes
164 164 """
165 165 return frozenset(self._removed)
166 166
167 167 def mark_removed(self, filename):
168 168 if 'removed' in vars(self):
169 169 del self.removed
170 170 self._removed.add(filename)
171 171 self.mark_touched(filename)
172 172
173 173 def update_removed(self, filenames):
174 174 for f in filenames:
175 175 self.mark_removed(f)
176 176
177 177 @util.propertycache
178 178 def salvaged(self):
179 179 """files that might have been deleted by a merge, but still exists.
180 180
181 181 During a merge, the manifest merging might select some files for
182 182 removal, or for a removed/changed conflict. If at commit time the file
183 183 still exists, its removal was "reverted" and the file is "salvaged"
184 184 """
185 185 return frozenset(self._salvaged)
186 186
187 187 def mark_salvaged(self, filename):
188 188 if "salvaged" in vars(self):
189 189 del self.salvaged
190 190 self._salvaged.add(filename)
191 191 self.mark_touched(filename)
192 192
193 193 def update_salvaged(self, filenames):
194 194 for f in filenames:
195 195 self.mark_salvaged(f)
196 196
197 197 @util.propertycache
198 198 def touched(self):
199 199 """files either actively modified, added or removed"""
200 200 return frozenset(self._touched)
201 201
202 202 def mark_touched(self, filename):
203 203 if 'touched' in vars(self):
204 204 del self.touched
205 205 self._touched.add(filename)
206 206
207 207 def update_touched(self, filenames):
208 208 for f in filenames:
209 209 self.mark_touched(f)
210 210
211 211 @util.propertycache
212 212 def copied_from_p1(self):
213 213 return self._p1_copies.copy()
214 214
215 215 def mark_copied_from_p1(self, source, dest):
216 216 if 'copied_from_p1' in vars(self):
217 217 del self.copied_from_p1
218 218 self._p1_copies[dest] = source
219 219
220 220 def update_copies_from_p1(self, copies):
221 221 for dest, source in copies.items():
222 222 self.mark_copied_from_p1(source, dest)
223 223
224 224 @util.propertycache
225 225 def copied_from_p2(self):
226 226 return self._p2_copies.copy()
227 227
228 228 def mark_copied_from_p2(self, source, dest):
229 229 if 'copied_from_p2' in vars(self):
230 230 del self.copied_from_p2
231 231 self._p2_copies[dest] = source
232 232
233 233 def update_copies_from_p2(self, copies):
234 234 for dest, source in copies.items():
235 235 self.mark_copied_from_p2(source, dest)
236 236
237 237
238 238 def compute_all_files_changes(ctx):
239 239 """compute the files changed by a revision"""
240 240 p1 = ctx.p1()
241 241 p2 = ctx.p2()
242 242 if p1.rev() == node.nullrev and p2.rev() == node.nullrev:
243 243 return _process_root(ctx)
244 244 elif p1.rev() != node.nullrev and p2.rev() == node.nullrev:
245 245 return _process_linear(p1, ctx)
246 246 elif p1.rev() == node.nullrev and p2.rev() != node.nullrev:
247 247 # In the wild, one can encounter changeset where p1 is null but p2 is not
248 248 return _process_linear(p1, ctx, parent=2)
249 249 elif p1.rev() == p2.rev():
250 250 # In the wild, one can encounter such "non-merge"
251 251 return _process_linear(p1, ctx)
252 252 else:
253 253 return _process_merge(p1, p2, ctx)
254 254
255 255
256 256 def _process_root(ctx):
257 257 """compute the appropriate changed files for a changeset with no parents
258 258 """
259 259 # Simple, there was nothing before it, so everything is added.
260 260 md = ChangingFiles()
261 261 manifest = ctx.manifest()
262 262 for filename in manifest:
263 263 md.mark_added(filename)
264 264 return md
265 265
266 266
267 267 def _process_linear(parent_ctx, children_ctx, parent=1):
268 268 """compute the appropriate changed files for a changeset with a single parent
269 269 """
270 270 md = ChangingFiles()
271 271 parent_manifest = parent_ctx.manifest()
272 272 children_manifest = children_ctx.manifest()
273 273
274 274 copies_candidate = []
275 275
276 276 for filename, d in parent_manifest.diff(children_manifest).items():
277 277 if d[1][0] is None:
278 278 # no filenode for the "new" value, file is absent
279 279 md.mark_removed(filename)
280 280 else:
281 281 copies_candidate.append(filename)
282 282 if d[0][0] is None:
283 283 # not filenode for the "old" value file was absent
284 284 md.mark_added(filename)
285 285 else:
286 286 # filenode for both "old" and "new"
287 287 md.mark_touched(filename)
288 288
289 289 if parent == 1:
290 290 copied = md.mark_copied_from_p1
291 291 elif parent == 2:
292 292 copied = md.mark_copied_from_p2
293 293 else:
294 294 assert False, "bad parent value %d" % parent
295 295
296 296 for filename in copies_candidate:
297 297 copy_info = children_ctx[filename].renamed()
298 298 if copy_info:
299 299 source, srcnode = copy_info
300 300 copied(source, filename)
301 301
302 302 return md
303 303
304 304
305 305 def _process_merge(p1_ctx, p2_ctx, ctx):
306 306 """compute the appropriate changed files for a changeset with two parents
307 307
308 308 This is a more advance case. The information we need to record is summarise
309 309 in the following table:
310 310
311 311 β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
312 312 β”‚ diff β•² diff β”‚ ΓΈ β”‚ (Some, None) β”‚ (None, Some) β”‚ (Some, Some) β”‚
313 313 β”‚ p2 β•² p1 β”‚ β”‚ β”‚ β”‚ β”‚
314 314 β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
315 315 β”‚ β”‚ β”‚πŸ„± No Changes β”‚πŸ„³ No Changes β”‚ β”‚
316 316 β”‚ ΓΈ β”‚πŸ„° No Changes β”‚ OR β”‚ OR β”‚πŸ„΅ No Changes β”‚
317 317 β”‚ β”‚ β”‚πŸ„² Deleted[1] β”‚πŸ„΄ Salvaged[2]β”‚ [3] β”‚
318 318 β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
319 319 β”‚ β”‚πŸ„Ά No Changes β”‚ β”‚ β”‚ β”‚
320 320 β”‚ (Some, None) β”‚ OR β”‚πŸ„» Deleted β”‚ ΓΈ β”‚ ΓΈ β”‚
321 321 β”‚ β”‚πŸ„· Deleted[1] β”‚ β”‚ β”‚ β”‚
322 322 β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
323 323 β”‚ β”‚πŸ„Έ No Changes β”‚ β”‚ β”‚ β”‚
324 324 β”‚ (None, Some) β”‚ OR β”‚ ΓΈ β”‚πŸ„Ό Added β”‚πŸ„½ Merged β”‚
325 325 β”‚ β”‚πŸ„Ή Salvaged[2]β”‚ β”‚ (copied?) β”‚ (copied?) β”‚
326 326 β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
327 327 β”‚ β”‚ β”‚ β”‚ β”‚ β”‚
328 328 β”‚ (Some, Some) β”‚πŸ„Ί No Changes β”‚ ΓΈ β”‚πŸ„Ύ Merged β”‚πŸ„Ώ Merged β”‚
329 329 β”‚ β”‚ [3] β”‚ β”‚ (copied?) β”‚ (copied?) β”‚
330 330 β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
331 331
332 332 Special case [1]:
333 333
334 334 The situation is:
335 335 - parent-A: file exists,
336 336 - parent-B: no file,
337 337 - working-copy: no file.
338 338
339 339 Detecting a "deletion" will depend on the presence of actual change on
340 340 the "parent-A" branch:
341 341
342 342 Subcase πŸ„± or πŸ„Ά : if the state of the file in "parent-A" is unchanged
343 343 compared to the merge ancestors, then parent-A branch left the file
344 344 untouched while parent-B deleted it. We simply apply the change from
345 345 "parent-B" branch the file was automatically dropped.
346 346 The result is:
347 347 - file is not recorded as touched by the merge.
348 348
349 349 Subcase πŸ„² or πŸ„· : otherwise, the change from parent-A branch were explicitly dropped and
350 350 the file was "deleted again". From a user perspective, the message
351 351 about "locally changed" while "remotely deleted" (or the other way
352 352 around) was issued and the user chose to deleted the file.
353 353 The result:
354 354 - file is recorded as touched by the merge.
355 355
356 356
357 357 Special case [2]:
358 358
359 359 The situation is:
360 360 - parent-A: no file,
361 361 - parent-B: file,
362 362 - working-copy: file (same content as parent-B).
363 363
364 364 There are three subcases depending on the ancestors contents:
365 365
366 366 - A) the file is missing in all ancestors,
367 367 - B) at least one ancestor has the file with filenode β‰  from parent-B,
368 368 - C) all ancestors use the same filenode as parent-B,
369 369
370 370 Subcase (A) is the simpler, nothing happend on parent-A side while
371 371 parent-B added it.
372 372
373 373 The result:
374 374 - the file is not marked as touched by the merge.
375 375
376 376 Subcase (B) is the counter part of "Special case [1]", the file was
377 377 modified on parent-B side, while parent-A side deleted it. However this
378 378 time, the conflict was solved by keeping the file (and its
379 379 modification). We consider the file as "salvaged".
380 380
381 381 The result:
382 382 - the file is marked as "salvaged" by the merge.
383 383
384 384 Subcase (C) is subtle variation of the case above. In this case, the
385 385 file in unchanged on the parent-B side and actively removed on the
386 386 parent-A side. So the merge machinery correctly decide it should be
387 387 removed. However, the file was explicitly restored to its parent-B
388 388 content before the merge was commited. The file is be marked
389 389 as salvaged too. From the merge result perspective, this is similar to
390 390 Subcase (B), however from the merge resolution perspective they differ
391 391 since in (C), there was some conflict not obvious solution to the
392 392 merge (That got reversed)
393 393
394 394 Special case [3]:
395 395
396 396 The situation is:
397 397 - parent-A: file,
398 398 - parent-B: file (different filenode as parent-A),
399 399 - working-copy: file (same filenode as parent-B).
400 400
401 401 This case is in theory much simple, for this to happens, this mean the
402 402 filenode in parent-A is purely replacing the one in parent-B (either a
403 403 descendant, or a full new file history, see changeset). So the merge
404 404 introduce no changes, and the file is not affected by the merge...
405 405
406 406 However, in the wild it is possible to find commit with the above is not
407 407 True. For example repository have some commit where the *new* node is an
408 408 ancestor of the node in parent-A, or where parent-A and parent-B are two
409 409 branches of the same file history, yet not merge-filenode were created
410 410 (while the "merge" should have led to a "modification").
411 411
412 412 Detecting such cases (and not recording the file as modified) would be a
413 413 nice bonus. However do not any of this yet.
414 414 """
415 415
416 416 md = ChangingFiles()
417 417
418 418 m = ctx.manifest()
419 419 p1m = p1_ctx.manifest()
420 420 p2m = p2_ctx.manifest()
421 421 diff_p1 = p1m.diff(m)
422 422 diff_p2 = p2m.diff(m)
423 423
424 424 cahs = ctx.repo().changelog.commonancestorsheads(
425 425 p1_ctx.node(), p2_ctx.node()
426 426 )
427 427 if not cahs:
428 428 cahs = [node.nullrev]
429 429 mas = [ctx.repo()[r].manifest() for r in cahs]
430 430
431 431 copy_candidates = []
432 432
433 433 # Dealing with case πŸ„° happens automatically. Since there are no entry in
434 434 # d1 nor d2, we won't iterate on it ever.
435 435
436 436 # Iteration over d1 content will deal with all cases, but the one in the
437 437 # first column of the table.
438 438 for filename, d1 in diff_p1.items():
439 439
440 440 d2 = diff_p2.pop(filename, None)
441 441
442 442 if d2 is None:
443 443 # this deal with the first line of the table.
444 444 _process_other_unchanged(md, mas, filename, d1)
445 445 else:
446 446
447 447 if d1[0][0] is None and d2[0][0] is None:
448 448 # case πŸ„Ό β€” both deleted the file.
449 449 md.mark_added(filename)
450 450 copy_candidates.append(filename)
451 451 elif d1[1][0] is None and d2[1][0] is None:
452 452 # case πŸ„» β€” both deleted the file.
453 453 md.mark_removed(filename)
454 454 elif d1[1][0] is not None and d2[1][0] is not None:
455 455 # case πŸ„½ πŸ„Ύ πŸ„Ώ
456 456 md.mark_merged(filename)
457 457 copy_candidates.append(filename)
458 458 else:
459 459 # Impossible case, the post-merge file status cannot be None on
460 460 # one side and Something on the other side.
461 461 assert False, "unreachable"
462 462
463 463 # Iteration over remaining d2 content deal with the first column of the
464 464 # table.
465 465 for filename, d2 in diff_p2.items():
466 466 _process_other_unchanged(md, mas, filename, d2)
467 467
468 468 for filename in copy_candidates:
469 469 copy_info = ctx[filename].renamed()
470 470 if copy_info:
471 471 source, srcnode = copy_info
472 472 if source in p1_ctx and p1_ctx[source].filenode() == srcnode:
473 473 md.mark_copied_from_p1(source, filename)
474 474 elif source in p2_ctx and p2_ctx[source].filenode() == srcnode:
475 475 md.mark_copied_from_p2(source, filename)
476 476 return md
477 477
478 478
479 479 def _find(manifest, filename):
480 480 """return the associate filenode or None"""
481 481 if filename not in manifest:
482 482 return None
483 483 return manifest.find(filename)[0]
484 484
485 485
486 486 def _process_other_unchanged(md, mas, filename, diff):
487 487 source_node = diff[0][0]
488 488 target_node = diff[1][0]
489 489
490 490 if source_node is not None and target_node is None:
491 491 if any(not _find(ma, filename) == source_node for ma in mas):
492 492 # case πŸ„² of πŸ„·
493 493 md.mark_removed(filename)
494 494 # else, we have case πŸ„± or πŸ„Ά : no change need to be recorded
495 495 elif source_node is None and target_node is not None:
496 496 if any(_find(ma, filename) is not None for ma in mas):
497 497 # case πŸ„΄ or πŸ„Ή
498 498 md.mark_salvaged(filename)
499 499 # else, we have case πŸ„³ or πŸ„Έ : simple merge without intervention
500 500 elif source_node is not None and target_node is not None:
501 501 # case πŸ„΅ or πŸ„Ί : simple merge without intervention
502 502 #
503 503 # In buggy case where source_node is not an ancestors of target_node.
504 504 # There should have a been a new filenode created, recording this as
505 505 # "modified". We do not deal with them yet.
506 506 pass
507 507 else:
508 508 # An impossible case, the diff algorithm should not return entry if the
509 509 # file is missing on both side.
510 510 assert False, "unreachable"
511 511
512 512
513 513 def _missing_from_all_ancestors(mas, filename):
514 514 return all(_find(ma, filename) is None for ma in mas)
515 515
516 516
517 517 def computechangesetfilesadded(ctx):
518 518 """return the list of files added in a changeset
519 519 """
520 520 added = []
521 521 for f in ctx.files():
522 522 if not any(f in p for p in ctx.parents()):
523 523 added.append(f)
524 524 return added
525 525
526 526
527 527 def get_removal_filter(ctx, x=None):
528 528 """return a function to detect files "wrongly" detected as `removed`
529 529
530 530 When a file is removed relative to p1 in a merge, this
531 531 function determines whether the absence is due to a
532 532 deletion from a parent, or whether the merge commit
533 533 itself deletes the file. We decide this by doing a
534 534 simplified three way merge of the manifest entry for
535 535 the file. There are two ways we decide the merge
536 536 itself didn't delete a file:
537 537 - neither parent (nor the merge) contain the file
538 538 - exactly one parent contains the file, and that
539 539 parent has the same filelog entry as the merge
540 540 ancestor (or all of them if there two). In other
541 541 words, that parent left the file unchanged while the
542 542 other one deleted it.
543 543 One way to think about this is that deleting a file is
544 544 similar to emptying it, so the list of changed files
545 545 should be similar either way. The computation
546 546 described above is not done directly in _filecommit
547 547 when creating the list of changed files, however
548 548 it does something very similar by comparing filelog
549 549 nodes.
550 550 """
551 551
552 552 if x is not None:
553 553 p1, p2, m1, m2 = x
554 554 else:
555 555 p1 = ctx.p1()
556 556 p2 = ctx.p2()
557 557 m1 = p1.manifest()
558 558 m2 = p2.manifest()
559 559
560 560 @util.cachefunc
561 561 def mas():
562 562 p1n = p1.node()
563 563 p2n = p2.node()
564 564 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
565 565 if not cahs:
566 566 cahs = [node.nullrev]
567 567 return [ctx.repo()[r].manifest() for r in cahs]
568 568
569 569 def deletionfromparent(f):
570 570 if f in m1:
571 571 return f not in m2 and all(
572 572 f in ma and ma.find(f) == m1.find(f) for ma in mas()
573 573 )
574 574 elif f in m2:
575 575 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
576 576 else:
577 577 return True
578 578
579 579 return deletionfromparent
580 580
581 581
582 582 def computechangesetfilesremoved(ctx):
583 583 """return the list of files removed in a changeset
584 584 """
585 585 removed = []
586 586 for f in ctx.files():
587 587 if f not in ctx:
588 588 removed.append(f)
589 589 if removed:
590 590 rf = get_removal_filter(ctx)
591 591 removed = [r for r in removed if not rf(r)]
592 592 return removed
593 593
594 594
595 595 def computechangesetfilesmerged(ctx):
596 596 """return the list of files merged in a changeset
597 597 """
598 598 merged = []
599 599 if len(ctx.parents()) < 2:
600 600 return merged
601 601 for f in ctx.files():
602 602 if f in ctx:
603 603 fctx = ctx[f]
604 604 parents = fctx._filelog.parents(fctx._filenode)
605 605 if parents[1] != node.nullid:
606 606 merged.append(f)
607 607 return merged
608 608
609 609
610 610 def computechangesetcopies(ctx):
611 611 """return the copies data for a changeset
612 612
613 613 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
614 614
615 615 Each dictionnary are in the form: `{newname: oldname}`
616 616 """
617 617 p1copies = {}
618 618 p2copies = {}
619 619 p1 = ctx.p1()
620 620 p2 = ctx.p2()
621 621 narrowmatch = ctx._repo.narrowmatch()
622 622 for dst in ctx.files():
623 623 if not narrowmatch(dst) or dst not in ctx:
624 624 continue
625 625 copied = ctx[dst].renamed()
626 626 if not copied:
627 627 continue
628 628 src, srcnode = copied
629 629 if src in p1 and p1[src].filenode() == srcnode:
630 630 p1copies[dst] = src
631 631 elif src in p2 and p2[src].filenode() == srcnode:
632 632 p2copies[dst] = src
633 633 return p1copies, p2copies
634 634
635 635
636 636 def encodecopies(files, copies):
637 637 items = []
638 638 for i, dst in enumerate(files):
639 639 if dst in copies:
640 640 items.append(b'%d\0%s' % (i, copies[dst]))
641 641 if len(items) != len(copies):
642 642 raise error.ProgrammingError(
643 643 b'some copy targets missing from file list'
644 644 )
645 645 return b"\n".join(items)
646 646
647 647
648 648 def decodecopies(files, data):
649 649 try:
650 650 copies = {}
651 651 if not data:
652 652 return copies
653 653 for l in data.split(b'\n'):
654 654 strindex, src = l.split(b'\0')
655 655 i = int(strindex)
656 656 dst = files[i]
657 657 copies[dst] = src
658 658 return copies
659 659 except (ValueError, IndexError):
660 660 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
661 661 # used different syntax for the value.
662 662 return None
663 663
664 664
665 665 def encodefileindices(files, subset):
666 666 subset = set(subset)
667 667 indices = []
668 668 for i, f in enumerate(files):
669 669 if f in subset:
670 670 indices.append(b'%d' % i)
671 671 return b'\n'.join(indices)
672 672
673 673
674 674 def decodefileindices(files, data):
675 675 try:
676 676 subset = []
677 677 if not data:
678 678 return subset
679 679 for strindex in data.split(b'\n'):
680 680 i = int(strindex)
681 681 if i < 0 or i >= len(files):
682 682 return None
683 683 subset.append(files[i])
684 684 return subset
685 685 except (ValueError, IndexError):
686 686 # Perhaps someone had chosen the same key name (e.g. "added") and
687 687 # used different syntax for the value.
688 688 return None
689 689
690 690
691 691 # see mercurial/helptext/internals/revlogs.txt for details about the format
692 692
693 693 ACTION_MASK = int("111" "00", 2)
694 694 # note: untouched file used as copy source will as `000` for this mask.
695 695 ADDED_FLAG = int("001" "00", 2)
696 696 MERGED_FLAG = int("010" "00", 2)
697 697 REMOVED_FLAG = int("011" "00", 2)
698 698 SALVAGED_FLAG = int("100" "00", 2)
699 699 TOUCHED_FLAG = int("101" "00", 2)
700 700
701 701 COPIED_MASK = int("11", 2)
702 702 COPIED_FROM_P1_FLAG = int("10", 2)
703 703 COPIED_FROM_P2_FLAG = int("11", 2)
704 704
705 705 # structure is <flag><filename-end><copy-source>
706 706 INDEX_HEADER = struct.Struct(">L")
707 707 INDEX_ENTRY = struct.Struct(">bLL")
708 708
709 709
710 710 def encode_files_sidedata(files):
711 711 all_files = set(files.touched)
712 712 all_files.update(files.copied_from_p1.values())
713 713 all_files.update(files.copied_from_p2.values())
714 714 all_files = sorted(all_files)
715 715 file_idx = {f: i for (i, f) in enumerate(all_files)}
716 716 file_idx[None] = 0
717 717
718 718 chunks = [INDEX_HEADER.pack(len(all_files))]
719 719
720 720 filename_length = 0
721 721 for f in all_files:
722 722 filename_size = len(f)
723 723 filename_length += filename_size
724 724 flag = 0
725 725 if f in files.added:
726 726 flag |= ADDED_FLAG
727 727 elif f in files.merged:
728 728 flag |= MERGED_FLAG
729 729 elif f in files.removed:
730 730 flag |= REMOVED_FLAG
731 731 elif f in files.salvaged:
732 732 flag |= SALVAGED_FLAG
733 733 elif f in files.touched:
734 734 flag |= TOUCHED_FLAG
735 735
736 736 copy = None
737 737 if f in files.copied_from_p1:
738 738 flag |= COPIED_FROM_P1_FLAG
739 739 copy = files.copied_from_p1.get(f)
740 740 elif f in files.copied_from_p2:
741 741 copy = files.copied_from_p2.get(f)
742 742 flag |= COPIED_FROM_P2_FLAG
743 743 copy_idx = file_idx[copy]
744 744 chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx))
745 745 chunks.extend(all_files)
746 746 return {sidedatamod.SD_FILES: b''.join(chunks)}
747 747
748 748
749 749 def decode_files_sidedata(sidedata):
750 750 md = ChangingFiles()
751 751 raw = sidedata.get(sidedatamod.SD_FILES)
752 752
753 753 if raw is None:
754 754 return md
755 755
756 756 copies = []
757 757 all_files = []
758 758
759 759 assert len(raw) >= INDEX_HEADER.size
760 760 total_files = INDEX_HEADER.unpack_from(raw, 0)[0]
761 761
762 762 offset = INDEX_HEADER.size
763 763 file_offset_base = offset + (INDEX_ENTRY.size * total_files)
764 764 file_offset_last = file_offset_base
765 765
766 766 assert len(raw) >= file_offset_base
767 767
768 768 for idx in range(total_files):
769 769 flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset)
770 770 file_end += file_offset_base
771 771 filename = raw[file_offset_last:file_end]
772 772 filesize = file_end - file_offset_last
773 773 assert len(filename) == filesize
774 774 offset += INDEX_ENTRY.size
775 775 file_offset_last = file_end
776 776 all_files.append(filename)
777 777 if flag & ACTION_MASK == ADDED_FLAG:
778 778 md.mark_added(filename)
779 779 elif flag & ACTION_MASK == MERGED_FLAG:
780 780 md.mark_merged(filename)
781 781 elif flag & ACTION_MASK == REMOVED_FLAG:
782 782 md.mark_removed(filename)
783 783 elif flag & ACTION_MASK == SALVAGED_FLAG:
784 784 md.mark_salvaged(filename)
785 785 elif flag & ACTION_MASK == TOUCHED_FLAG:
786 786 md.mark_touched(filename)
787 787
788 788 copied = None
789 789 if flag & COPIED_MASK == COPIED_FROM_P1_FLAG:
790 790 copied = md.mark_copied_from_p1
791 791 elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG:
792 792 copied = md.mark_copied_from_p2
793 793
794 794 if copied is not None:
795 795 copies.append((copied, filename, copy_idx))
796 796
797 797 for copied, filename, copy_idx in copies:
798 798 copied(all_files[copy_idx], filename)
799 799
800 800 return md
801 801
802 802
803 803 def _getsidedata(srcrepo, rev):
804 804 ctx = srcrepo[rev]
805 805 files = compute_all_files_changes(ctx)
806 return encode_files_sidedata(files)
806 return encode_files_sidedata(files), files.has_copies_info
807 807
808 808
809 809 def getsidedataadder(srcrepo, destrepo):
810 810 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
811 811 if pycompat.iswindows or not use_w:
812 812 return _get_simple_sidedata_adder(srcrepo, destrepo)
813 813 else:
814 814 return _get_worker_sidedata_adder(srcrepo, destrepo)
815 815
816 816
817 817 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
818 818 """The function used by worker precomputing sidedata
819 819
820 820 It read an input queue containing revision numbers
821 821 It write in an output queue containing (rev, <sidedata-map>)
822 822
823 823 The `None` input value is used as a stop signal.
824 824
825 825 The `tokens` semaphore is user to avoid having too many unprocessed
826 826 entries. The workers needs to acquire one token before fetching a task.
827 827 They will be released by the consumer of the produced data.
828 828 """
829 829 tokens.acquire()
830 830 rev = revs_queue.get()
831 831 while rev is not None:
832 832 data = _getsidedata(srcrepo, rev)
833 833 sidedata_queue.put((rev, data))
834 834 tokens.acquire()
835 835 rev = revs_queue.get()
836 836 # processing of `None` is completed, release the token.
837 837 tokens.release()
838 838
839 839
840 840 BUFF_PER_WORKER = 50
841 841
842 842
843 843 def _get_worker_sidedata_adder(srcrepo, destrepo):
844 844 """The parallel version of the sidedata computation
845 845
846 846 This code spawn a pool of worker that precompute a buffer of sidedata
847 847 before we actually need them"""
848 848 # avoid circular import copies -> scmutil -> worker -> copies
849 849 from . import worker
850 850
851 851 nbworkers = worker._numworkers(srcrepo.ui)
852 852
853 853 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
854 854 revsq = multiprocessing.Queue()
855 855 sidedataq = multiprocessing.Queue()
856 856
857 857 assert srcrepo.filtername is None
858 858 # queue all tasks beforehand, revision numbers are small and it make
859 859 # synchronisation simpler
860 860 #
861 861 # Since the computation for each node can be quite expensive, the overhead
862 862 # of using a single queue is not revelant. In practice, most computation
863 863 # are fast but some are very expensive and dominate all the other smaller
864 864 # cost.
865 865 for r in srcrepo.changelog.revs():
866 866 revsq.put(r)
867 867 # queue the "no more tasks" markers
868 868 for i in range(nbworkers):
869 869 revsq.put(None)
870 870
871 871 allworkers = []
872 872 for i in range(nbworkers):
873 873 args = (srcrepo, revsq, sidedataq, tokens)
874 874 w = multiprocessing.Process(target=_sidedata_worker, args=args)
875 875 allworkers.append(w)
876 876 w.start()
877 877
878 878 # dictionnary to store results for revision higher than we one we are
879 879 # looking for. For example, if we need the sidedatamap for 42, and 43 is
880 880 # received, when shelve 43 for later use.
881 881 staging = {}
882 882
883 883 def sidedata_companion(revlog, rev):
884 sidedata = {}
884 data = {}, False
885 885 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
886 886 # Is the data previously shelved ?
887 887 sidedata = staging.pop(rev, None)
888 888 if sidedata is None:
889 889 # look at the queued result until we find the one we are lookig
890 890 # for (shelve the other ones)
891 r, sidedata = sidedataq.get()
891 r, data = sidedataq.get()
892 892 while r != rev:
893 staging[r] = sidedata
893 staging[r] = data
894 894 r, sidedata = sidedataq.get()
895 895 tokens.release()
896 sidedataq, has_copies_info = data
896 897 return False, (), sidedata
897 898
898 899 return sidedata_companion
899 900
900 901
901 902 def _get_simple_sidedata_adder(srcrepo, destrepo):
902 903 """The simple version of the sidedata computation
903 904
904 905 It just compute it in the same thread on request"""
905 906
906 907 def sidedatacompanion(revlog, rev):
907 908 sidedata = {}
908 909 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
909 sidedata = _getsidedata(srcrepo, rev)
910 sidedata, has_copies_info = _getsidedata(srcrepo, rev)
910 911 return False, (), sidedata
911 912
912 913 return sidedatacompanion
913 914
914 915
915 916 def getsidedataremover(srcrepo, destrepo):
916 917 def sidedatacompanion(revlog, rev):
917 918 f = ()
918 919 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
919 920 if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
920 921 f = (
921 922 sidedatamod.SD_P1COPIES,
922 923 sidedatamod.SD_P2COPIES,
923 924 sidedatamod.SD_FILESADDED,
924 925 sidedatamod.SD_FILESREMOVED,
925 926 )
926 927 return False, f, {}
927 928
928 929 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now