##// END OF EJS Templates
sidedata: register copies sidedata computer regardless of the revlog version...
Raphaël Gomès -
r47840:b409cdc6 default
parent child Browse files
Show More
@@ -1,964 +1,964 b''
1 # coding: utf-8
1 # coding: utf-8
2 # metadata.py -- code related to various metadata computation and access.
2 # metadata.py -- code related to various metadata computation and access.
3 #
3 #
4 # Copyright 2019 Google, Inc <martinvonz@google.com>
4 # Copyright 2019 Google, Inc <martinvonz@google.com>
5 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
5 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9 from __future__ import absolute_import, print_function
9 from __future__ import absolute_import, print_function
10
10
11 import multiprocessing
11 import multiprocessing
12 import struct
12 import struct
13
13
14 from .node import nullrev
14 from .node import nullrev
15 from . import (
15 from . import (
16 error,
16 error,
17 pycompat,
17 pycompat,
18 requirements as requirementsmod,
18 requirements as requirementsmod,
19 util,
19 util,
20 )
20 )
21
21
22 from .revlogutils import (
22 from .revlogutils import (
23 constants as revlogconst,
23 constants as revlogconst,
24 flagutil as sidedataflag,
24 flagutil as sidedataflag,
25 sidedata as sidedatamod,
25 sidedata as sidedatamod,
26 )
26 )
27
27
28
28
29 class ChangingFiles(object):
29 class ChangingFiles(object):
30 """A class recording the changes made to files by a changeset
30 """A class recording the changes made to files by a changeset
31
31
32 Actions performed on files are gathered into 3 sets:
32 Actions performed on files are gathered into 3 sets:
33
33
34 - added: files actively added in the changeset.
34 - added: files actively added in the changeset.
35 - merged: files whose history got merged
35 - merged: files whose history got merged
36 - removed: files removed in the revision
36 - removed: files removed in the revision
37 - salvaged: files that might have been deleted by a merge but were not
37 - salvaged: files that might have been deleted by a merge but were not
38 - touched: files affected by the merge
38 - touched: files affected by the merge
39
39
40 and copies information is held by 2 mappings
40 and copies information is held by 2 mappings
41
41
42 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
42 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
43 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
43 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
44
44
45 See their inline help for details.
45 See their inline help for details.
46 """
46 """
47
47
48 def __init__(
48 def __init__(
49 self,
49 self,
50 touched=None,
50 touched=None,
51 added=None,
51 added=None,
52 removed=None,
52 removed=None,
53 merged=None,
53 merged=None,
54 salvaged=None,
54 salvaged=None,
55 p1_copies=None,
55 p1_copies=None,
56 p2_copies=None,
56 p2_copies=None,
57 ):
57 ):
58 self._added = set(() if added is None else added)
58 self._added = set(() if added is None else added)
59 self._merged = set(() if merged is None else merged)
59 self._merged = set(() if merged is None else merged)
60 self._removed = set(() if removed is None else removed)
60 self._removed = set(() if removed is None else removed)
61 self._touched = set(() if touched is None else touched)
61 self._touched = set(() if touched is None else touched)
62 self._salvaged = set(() if salvaged is None else salvaged)
62 self._salvaged = set(() if salvaged is None else salvaged)
63 self._touched.update(self._added)
63 self._touched.update(self._added)
64 self._touched.update(self._merged)
64 self._touched.update(self._merged)
65 self._touched.update(self._removed)
65 self._touched.update(self._removed)
66 self._p1_copies = dict(() if p1_copies is None else p1_copies)
66 self._p1_copies = dict(() if p1_copies is None else p1_copies)
67 self._p2_copies = dict(() if p2_copies is None else p2_copies)
67 self._p2_copies = dict(() if p2_copies is None else p2_copies)
68
68
69 def __eq__(self, other):
69 def __eq__(self, other):
70 return (
70 return (
71 self.added == other.added
71 self.added == other.added
72 and self.merged == other.merged
72 and self.merged == other.merged
73 and self.removed == other.removed
73 and self.removed == other.removed
74 and self.salvaged == other.salvaged
74 and self.salvaged == other.salvaged
75 and self.touched == other.touched
75 and self.touched == other.touched
76 and self.copied_from_p1 == other.copied_from_p1
76 and self.copied_from_p1 == other.copied_from_p1
77 and self.copied_from_p2 == other.copied_from_p2
77 and self.copied_from_p2 == other.copied_from_p2
78 )
78 )
79
79
80 @property
80 @property
81 def has_copies_info(self):
81 def has_copies_info(self):
82 return bool(
82 return bool(
83 self.removed
83 self.removed
84 or self.merged
84 or self.merged
85 or self.salvaged
85 or self.salvaged
86 or self.copied_from_p1
86 or self.copied_from_p1
87 or self.copied_from_p2
87 or self.copied_from_p2
88 )
88 )
89
89
90 @util.propertycache
90 @util.propertycache
91 def added(self):
91 def added(self):
92 """files actively added in the changeset
92 """files actively added in the changeset
93
93
94 Any file present in that revision that was absent in all the changeset's
94 Any file present in that revision that was absent in all the changeset's
95 parents.
95 parents.
96
96
97 In case of merge, this means a file absent in one of the parents but
97 In case of merge, this means a file absent in one of the parents but
98 existing in the other will *not* be contained in this set. (They were
98 existing in the other will *not* be contained in this set. (They were
99 added by an ancestor)
99 added by an ancestor)
100 """
100 """
101 return frozenset(self._added)
101 return frozenset(self._added)
102
102
103 def mark_added(self, filename):
103 def mark_added(self, filename):
104 if 'added' in vars(self):
104 if 'added' in vars(self):
105 del self.added
105 del self.added
106 self._added.add(filename)
106 self._added.add(filename)
107 self.mark_touched(filename)
107 self.mark_touched(filename)
108
108
109 def update_added(self, filenames):
109 def update_added(self, filenames):
110 for f in filenames:
110 for f in filenames:
111 self.mark_added(f)
111 self.mark_added(f)
112
112
113 @util.propertycache
113 @util.propertycache
114 def merged(self):
114 def merged(self):
115 """files actively merged during a merge
115 """files actively merged during a merge
116
116
117 Any modified files which had modification on both size that needed merging.
117 Any modified files which had modification on both size that needed merging.
118
118
119 In this case a new filenode was created and it has two parents.
119 In this case a new filenode was created and it has two parents.
120 """
120 """
121 return frozenset(self._merged)
121 return frozenset(self._merged)
122
122
123 def mark_merged(self, filename):
123 def mark_merged(self, filename):
124 if 'merged' in vars(self):
124 if 'merged' in vars(self):
125 del self.merged
125 del self.merged
126 self._merged.add(filename)
126 self._merged.add(filename)
127 self.mark_touched(filename)
127 self.mark_touched(filename)
128
128
129 def update_merged(self, filenames):
129 def update_merged(self, filenames):
130 for f in filenames:
130 for f in filenames:
131 self.mark_merged(f)
131 self.mark_merged(f)
132
132
133 @util.propertycache
133 @util.propertycache
134 def removed(self):
134 def removed(self):
135 """files actively removed by the changeset
135 """files actively removed by the changeset
136
136
137 In case of merge this will only contain the set of files removing "new"
137 In case of merge this will only contain the set of files removing "new"
138 content. For any file absent in the current changeset:
138 content. For any file absent in the current changeset:
139
139
140 a) If the file exists in both parents, it is clearly "actively" removed
140 a) If the file exists in both parents, it is clearly "actively" removed
141 by this changeset.
141 by this changeset.
142
142
143 b) If a file exists in only one parent and in none of the common
143 b) If a file exists in only one parent and in none of the common
144 ancestors, then the file was newly added in one of the merged branches
144 ancestors, then the file was newly added in one of the merged branches
145 and then got "actively" removed.
145 and then got "actively" removed.
146
146
147 c) If a file exists in only one parent and at least one of the common
147 c) If a file exists in only one parent and at least one of the common
148 ancestors using the same filenode, then the file was unchanged on one
148 ancestors using the same filenode, then the file was unchanged on one
149 side and deleted on the other side. The merge "passively" propagated
149 side and deleted on the other side. The merge "passively" propagated
150 that deletion, but didn't "actively" remove the file. In this case the
150 that deletion, but didn't "actively" remove the file. In this case the
151 file is *not* included in the `removed` set.
151 file is *not* included in the `removed` set.
152
152
153 d) If a file exists in only one parent and at least one of the common
153 d) If a file exists in only one parent and at least one of the common
154 ancestors using a different filenode, then the file was changed on one
154 ancestors using a different filenode, then the file was changed on one
155 side and removed on the other side. The merge process "actively"
155 side and removed on the other side. The merge process "actively"
156 decided to drop the new change and delete the file. Unlike in the
156 decided to drop the new change and delete the file. Unlike in the
157 previous case, (c), the file included in the `removed` set.
157 previous case, (c), the file included in the `removed` set.
158
158
159 Summary table for merge:
159 Summary table for merge:
160
160
161 case | exists in parents | exists in gca || removed
161 case | exists in parents | exists in gca || removed
162 (a) | both | * || yes
162 (a) | both | * || yes
163 (b) | one | none || yes
163 (b) | one | none || yes
164 (c) | one | same filenode || no
164 (c) | one | same filenode || no
165 (d) | one | new filenode || yes
165 (d) | one | new filenode || yes
166 """
166 """
167 return frozenset(self._removed)
167 return frozenset(self._removed)
168
168
169 def mark_removed(self, filename):
169 def mark_removed(self, filename):
170 if 'removed' in vars(self):
170 if 'removed' in vars(self):
171 del self.removed
171 del self.removed
172 self._removed.add(filename)
172 self._removed.add(filename)
173 self.mark_touched(filename)
173 self.mark_touched(filename)
174
174
175 def update_removed(self, filenames):
175 def update_removed(self, filenames):
176 for f in filenames:
176 for f in filenames:
177 self.mark_removed(f)
177 self.mark_removed(f)
178
178
179 @util.propertycache
179 @util.propertycache
180 def salvaged(self):
180 def salvaged(self):
181 """files that might have been deleted by a merge, but still exists.
181 """files that might have been deleted by a merge, but still exists.
182
182
183 During a merge, the manifest merging might select some files for
183 During a merge, the manifest merging might select some files for
184 removal, or for a removed/changed conflict. If at commit time the file
184 removal, or for a removed/changed conflict. If at commit time the file
185 still exists, its removal was "reverted" and the file is "salvaged"
185 still exists, its removal was "reverted" and the file is "salvaged"
186 """
186 """
187 return frozenset(self._salvaged)
187 return frozenset(self._salvaged)
188
188
189 def mark_salvaged(self, filename):
189 def mark_salvaged(self, filename):
190 if "salvaged" in vars(self):
190 if "salvaged" in vars(self):
191 del self.salvaged
191 del self.salvaged
192 self._salvaged.add(filename)
192 self._salvaged.add(filename)
193 self.mark_touched(filename)
193 self.mark_touched(filename)
194
194
195 def update_salvaged(self, filenames):
195 def update_salvaged(self, filenames):
196 for f in filenames:
196 for f in filenames:
197 self.mark_salvaged(f)
197 self.mark_salvaged(f)
198
198
199 @util.propertycache
199 @util.propertycache
200 def touched(self):
200 def touched(self):
201 """files either actively modified, added or removed"""
201 """files either actively modified, added or removed"""
202 return frozenset(self._touched)
202 return frozenset(self._touched)
203
203
204 def mark_touched(self, filename):
204 def mark_touched(self, filename):
205 if 'touched' in vars(self):
205 if 'touched' in vars(self):
206 del self.touched
206 del self.touched
207 self._touched.add(filename)
207 self._touched.add(filename)
208
208
209 def update_touched(self, filenames):
209 def update_touched(self, filenames):
210 for f in filenames:
210 for f in filenames:
211 self.mark_touched(f)
211 self.mark_touched(f)
212
212
213 @util.propertycache
213 @util.propertycache
214 def copied_from_p1(self):
214 def copied_from_p1(self):
215 return self._p1_copies.copy()
215 return self._p1_copies.copy()
216
216
217 def mark_copied_from_p1(self, source, dest):
217 def mark_copied_from_p1(self, source, dest):
218 if 'copied_from_p1' in vars(self):
218 if 'copied_from_p1' in vars(self):
219 del self.copied_from_p1
219 del self.copied_from_p1
220 self._p1_copies[dest] = source
220 self._p1_copies[dest] = source
221
221
222 def update_copies_from_p1(self, copies):
222 def update_copies_from_p1(self, copies):
223 for dest, source in copies.items():
223 for dest, source in copies.items():
224 self.mark_copied_from_p1(source, dest)
224 self.mark_copied_from_p1(source, dest)
225
225
226 @util.propertycache
226 @util.propertycache
227 def copied_from_p2(self):
227 def copied_from_p2(self):
228 return self._p2_copies.copy()
228 return self._p2_copies.copy()
229
229
230 def mark_copied_from_p2(self, source, dest):
230 def mark_copied_from_p2(self, source, dest):
231 if 'copied_from_p2' in vars(self):
231 if 'copied_from_p2' in vars(self):
232 del self.copied_from_p2
232 del self.copied_from_p2
233 self._p2_copies[dest] = source
233 self._p2_copies[dest] = source
234
234
235 def update_copies_from_p2(self, copies):
235 def update_copies_from_p2(self, copies):
236 for dest, source in copies.items():
236 for dest, source in copies.items():
237 self.mark_copied_from_p2(source, dest)
237 self.mark_copied_from_p2(source, dest)
238
238
239
239
240 def compute_all_files_changes(ctx):
240 def compute_all_files_changes(ctx):
241 """compute the files changed by a revision"""
241 """compute the files changed by a revision"""
242 p1 = ctx.p1()
242 p1 = ctx.p1()
243 p2 = ctx.p2()
243 p2 = ctx.p2()
244 if p1.rev() == nullrev and p2.rev() == nullrev:
244 if p1.rev() == nullrev and p2.rev() == nullrev:
245 return _process_root(ctx)
245 return _process_root(ctx)
246 elif p1.rev() != nullrev and p2.rev() == nullrev:
246 elif p1.rev() != nullrev and p2.rev() == nullrev:
247 return _process_linear(p1, ctx)
247 return _process_linear(p1, ctx)
248 elif p1.rev() == nullrev and p2.rev() != nullrev:
248 elif p1.rev() == nullrev and p2.rev() != nullrev:
249 # In the wild, one can encounter changeset where p1 is null but p2 is not
249 # In the wild, one can encounter changeset where p1 is null but p2 is not
250 return _process_linear(p1, ctx, parent=2)
250 return _process_linear(p1, ctx, parent=2)
251 elif p1.rev() == p2.rev():
251 elif p1.rev() == p2.rev():
252 # In the wild, one can encounter such "non-merge"
252 # In the wild, one can encounter such "non-merge"
253 return _process_linear(p1, ctx)
253 return _process_linear(p1, ctx)
254 else:
254 else:
255 return _process_merge(p1, p2, ctx)
255 return _process_merge(p1, p2, ctx)
256
256
257
257
258 def _process_root(ctx):
258 def _process_root(ctx):
259 """compute the appropriate changed files for a changeset with no parents"""
259 """compute the appropriate changed files for a changeset with no parents"""
260 # Simple, there was nothing before it, so everything is added.
260 # Simple, there was nothing before it, so everything is added.
261 md = ChangingFiles()
261 md = ChangingFiles()
262 manifest = ctx.manifest()
262 manifest = ctx.manifest()
263 for filename in manifest:
263 for filename in manifest:
264 md.mark_added(filename)
264 md.mark_added(filename)
265 return md
265 return md
266
266
267
267
268 def _process_linear(parent_ctx, children_ctx, parent=1):
268 def _process_linear(parent_ctx, children_ctx, parent=1):
269 """compute the appropriate changed files for a changeset with a single parent"""
269 """compute the appropriate changed files for a changeset with a single parent"""
270 md = ChangingFiles()
270 md = ChangingFiles()
271 parent_manifest = parent_ctx.manifest()
271 parent_manifest = parent_ctx.manifest()
272 children_manifest = children_ctx.manifest()
272 children_manifest = children_ctx.manifest()
273
273
274 copies_candidate = []
274 copies_candidate = []
275
275
276 for filename, d in parent_manifest.diff(children_manifest).items():
276 for filename, d in parent_manifest.diff(children_manifest).items():
277 if d[1][0] is None:
277 if d[1][0] is None:
278 # no filenode for the "new" value, file is absent
278 # no filenode for the "new" value, file is absent
279 md.mark_removed(filename)
279 md.mark_removed(filename)
280 else:
280 else:
281 copies_candidate.append(filename)
281 copies_candidate.append(filename)
282 if d[0][0] is None:
282 if d[0][0] is None:
283 # not filenode for the "old" value file was absent
283 # not filenode for the "old" value file was absent
284 md.mark_added(filename)
284 md.mark_added(filename)
285 else:
285 else:
286 # filenode for both "old" and "new"
286 # filenode for both "old" and "new"
287 md.mark_touched(filename)
287 md.mark_touched(filename)
288
288
289 if parent == 1:
289 if parent == 1:
290 copied = md.mark_copied_from_p1
290 copied = md.mark_copied_from_p1
291 elif parent == 2:
291 elif parent == 2:
292 copied = md.mark_copied_from_p2
292 copied = md.mark_copied_from_p2
293 else:
293 else:
294 assert False, "bad parent value %d" % parent
294 assert False, "bad parent value %d" % parent
295
295
296 for filename in copies_candidate:
296 for filename in copies_candidate:
297 copy_info = children_ctx[filename].renamed()
297 copy_info = children_ctx[filename].renamed()
298 if copy_info:
298 if copy_info:
299 source, srcnode = copy_info
299 source, srcnode = copy_info
300 copied(source, filename)
300 copied(source, filename)
301
301
302 return md
302 return md
303
303
304
304
305 def _process_merge(p1_ctx, p2_ctx, ctx):
305 def _process_merge(p1_ctx, p2_ctx, ctx):
306 """compute the appropriate changed files for a changeset with two parents
306 """compute the appropriate changed files for a changeset with two parents
307
307
308 This is a more advance case. The information we need to record is summarise
308 This is a more advance case. The information we need to record is summarise
309 in the following table:
309 in the following table:
310
310
311 ┌──────────────┬──────────────┬──────────────┬──────────────┬──────────────┐
311 ┌──────────────┬──────────────┬──────────────┬──────────────┬──────────────┐
312 │ diff ╲ diff │ ø │ (Some, None) │ (None, Some) │ (Some, Some) │
312 │ diff ╲ diff │ ø │ (Some, None) │ (None, Some) │ (Some, Some) │
313 │ p2 ╲ p1 │ │ │ │ │
313 │ p2 ╲ p1 │ │ │ │ │
314 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
314 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
315 │ │ │🄱 No Changes │🄳 No Changes │ │
315 │ │ │🄱 No Changes │🄳 No Changes │ │
316 │ ø │🄰 No Changes │ OR │ OR │🄵 No Changes │
316 │ ø │🄰 No Changes │ OR │ OR │🄵 No Changes │
317 │ │ │🄲 Deleted[1] │🄴 Salvaged[2]│ [3] │
317 │ │ │🄲 Deleted[1] │🄴 Salvaged[2]│ [3] │
318 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
318 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
319 │ │🄶 No Changes │ │ │ │
319 │ │🄶 No Changes │ │ │ │
320 │ (Some, None) │ OR │🄻 Deleted │ ø │ ø │
320 │ (Some, None) │ OR │🄻 Deleted │ ø │ ø │
321 │ │🄷 Deleted[1] │ │ │ │
321 │ │🄷 Deleted[1] │ │ │ │
322 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
322 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
323 │ │🄸 No Changes │ │ │ 🄽 Touched │
323 │ │🄸 No Changes │ │ │ 🄽 Touched │
324 │ (None, Some) │ OR │ ø │🄼 Added │OR 🅀 Salvaged │
324 │ (None, Some) │ OR │ ø │🄼 Added │OR 🅀 Salvaged │
325 │ │🄹 Salvaged[2]│ │ (copied?) │ (copied?) │
325 │ │🄹 Salvaged[2]│ │ (copied?) │ (copied?) │
326 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
326 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
327 │ │ │ │ 🄾 Touched │ 🄿 Merged │
327 │ │ │ │ 🄾 Touched │ 🄿 Merged │
328 │ (Some, Some) │🄺 No Changes │ ø │OR 🅁 Salvaged │OR 🅂 Touched │
328 │ (Some, Some) │🄺 No Changes │ ø │OR 🅁 Salvaged │OR 🅂 Touched │
329 │ │ [3] │ │ (copied?) │ (copied?) │
329 │ │ [3] │ │ (copied?) │ (copied?) │
330 └──────────────┴──────────────┴──────────────┴──────────────┴──────────────┘
330 └──────────────┴──────────────┴──────────────┴──────────────┴──────────────┘
331
331
332 Special case [1]:
332 Special case [1]:
333
333
334 The situation is:
334 The situation is:
335 - parent-A: file exists,
335 - parent-A: file exists,
336 - parent-B: no file,
336 - parent-B: no file,
337 - working-copy: no file.
337 - working-copy: no file.
338
338
339 Detecting a "deletion" will depend on the presence of actual change on
339 Detecting a "deletion" will depend on the presence of actual change on
340 the "parent-A" branch:
340 the "parent-A" branch:
341
341
342 Subcase 🄱 or 🄶 : if the state of the file in "parent-A" is unchanged
342 Subcase 🄱 or 🄶 : if the state of the file in "parent-A" is unchanged
343 compared to the merge ancestors, then parent-A branch left the file
343 compared to the merge ancestors, then parent-A branch left the file
344 untouched while parent-B deleted it. We simply apply the change from
344 untouched while parent-B deleted it. We simply apply the change from
345 "parent-B" branch the file was automatically dropped.
345 "parent-B" branch the file was automatically dropped.
346 The result is:
346 The result is:
347 - file is not recorded as touched by the merge.
347 - file is not recorded as touched by the merge.
348
348
349 Subcase 🄲 or 🄷 : otherwise, the change from parent-A branch were explicitly dropped and
349 Subcase 🄲 or 🄷 : otherwise, the change from parent-A branch were explicitly dropped and
350 the file was "deleted again". From a user perspective, the message
350 the file was "deleted again". From a user perspective, the message
351 about "locally changed" while "remotely deleted" (or the other way
351 about "locally changed" while "remotely deleted" (or the other way
352 around) was issued and the user chose to deleted the file.
352 around) was issued and the user chose to deleted the file.
353 The result:
353 The result:
354 - file is recorded as touched by the merge.
354 - file is recorded as touched by the merge.
355
355
356
356
357 Special case [2]:
357 Special case [2]:
358
358
359 The situation is:
359 The situation is:
360 - parent-A: no file,
360 - parent-A: no file,
361 - parent-B: file,
361 - parent-B: file,
362 - working-copy: file (same content as parent-B).
362 - working-copy: file (same content as parent-B).
363
363
364 There are three subcases depending on the ancestors contents:
364 There are three subcases depending on the ancestors contents:
365
365
366 - A) the file is missing in all ancestors,
366 - A) the file is missing in all ancestors,
367 - B) at least one ancestor has the file with filenode ≠ from parent-B,
367 - B) at least one ancestor has the file with filenode ≠ from parent-B,
368 - C) all ancestors use the same filenode as parent-B,
368 - C) all ancestors use the same filenode as parent-B,
369
369
370 Subcase (A) is the simpler, nothing happend on parent-A side while
370 Subcase (A) is the simpler, nothing happend on parent-A side while
371 parent-B added it.
371 parent-B added it.
372
372
373 The result:
373 The result:
374 - the file is not marked as touched by the merge.
374 - the file is not marked as touched by the merge.
375
375
376 Subcase (B) is the counter part of "Special case [1]", the file was
376 Subcase (B) is the counter part of "Special case [1]", the file was
377 modified on parent-B side, while parent-A side deleted it. However this
377 modified on parent-B side, while parent-A side deleted it. However this
378 time, the conflict was solved by keeping the file (and its
378 time, the conflict was solved by keeping the file (and its
379 modification). We consider the file as "salvaged".
379 modification). We consider the file as "salvaged".
380
380
381 The result:
381 The result:
382 - the file is marked as "salvaged" by the merge.
382 - the file is marked as "salvaged" by the merge.
383
383
384 Subcase (C) is subtle variation of the case above. In this case, the
384 Subcase (C) is subtle variation of the case above. In this case, the
385 file in unchanged on the parent-B side and actively removed on the
385 file in unchanged on the parent-B side and actively removed on the
386 parent-A side. So the merge machinery correctly decide it should be
386 parent-A side. So the merge machinery correctly decide it should be
387 removed. However, the file was explicitly restored to its parent-B
387 removed. However, the file was explicitly restored to its parent-B
388 content before the merge was commited. The file is be marked
388 content before the merge was commited. The file is be marked
389 as salvaged too. From the merge result perspective, this is similar to
389 as salvaged too. From the merge result perspective, this is similar to
390 Subcase (B), however from the merge resolution perspective they differ
390 Subcase (B), however from the merge resolution perspective they differ
391 since in (C), there was some conflict not obvious solution to the
391 since in (C), there was some conflict not obvious solution to the
392 merge (That got reversed)
392 merge (That got reversed)
393
393
394 Special case [3]:
394 Special case [3]:
395
395
396 The situation is:
396 The situation is:
397 - parent-A: file,
397 - parent-A: file,
398 - parent-B: file (different filenode as parent-A),
398 - parent-B: file (different filenode as parent-A),
399 - working-copy: file (same filenode as parent-B).
399 - working-copy: file (same filenode as parent-B).
400
400
401 This case is in theory much simple, for this to happens, this mean the
401 This case is in theory much simple, for this to happens, this mean the
402 filenode in parent-A is purely replacing the one in parent-B (either a
402 filenode in parent-A is purely replacing the one in parent-B (either a
403 descendant, or a full new file history, see changeset). So the merge
403 descendant, or a full new file history, see changeset). So the merge
404 introduce no changes, and the file is not affected by the merge...
404 introduce no changes, and the file is not affected by the merge...
405
405
406 However, in the wild it is possible to find commit with the above is not
406 However, in the wild it is possible to find commit with the above is not
407 True. For example repository have some commit where the *new* node is an
407 True. For example repository have some commit where the *new* node is an
408 ancestor of the node in parent-A, or where parent-A and parent-B are two
408 ancestor of the node in parent-A, or where parent-A and parent-B are two
409 branches of the same file history, yet not merge-filenode were created
409 branches of the same file history, yet not merge-filenode were created
410 (while the "merge" should have led to a "modification").
410 (while the "merge" should have led to a "modification").
411
411
412 Detecting such cases (and not recording the file as modified) would be a
412 Detecting such cases (and not recording the file as modified) would be a
413 nice bonus. However do not any of this yet.
413 nice bonus. However do not any of this yet.
414 """
414 """
415
415
416 repo = ctx.repo()
416 repo = ctx.repo()
417 md = ChangingFiles()
417 md = ChangingFiles()
418
418
419 m = ctx.manifest()
419 m = ctx.manifest()
420 p1m = p1_ctx.manifest()
420 p1m = p1_ctx.manifest()
421 p2m = p2_ctx.manifest()
421 p2m = p2_ctx.manifest()
422 diff_p1 = p1m.diff(m)
422 diff_p1 = p1m.diff(m)
423 diff_p2 = p2m.diff(m)
423 diff_p2 = p2m.diff(m)
424
424
425 cahs = ctx.repo().changelog.commonancestorsheads(
425 cahs = ctx.repo().changelog.commonancestorsheads(
426 p1_ctx.node(), p2_ctx.node()
426 p1_ctx.node(), p2_ctx.node()
427 )
427 )
428 if not cahs:
428 if not cahs:
429 cahs = [nullrev]
429 cahs = [nullrev]
430 mas = [ctx.repo()[r].manifest() for r in cahs]
430 mas = [ctx.repo()[r].manifest() for r in cahs]
431
431
432 copy_candidates = []
432 copy_candidates = []
433
433
434 # Dealing with case 🄰 happens automatically. Since there are no entry in
434 # Dealing with case 🄰 happens automatically. Since there are no entry in
435 # d1 nor d2, we won't iterate on it ever.
435 # d1 nor d2, we won't iterate on it ever.
436
436
437 # Iteration over d1 content will deal with all cases, but the one in the
437 # Iteration over d1 content will deal with all cases, but the one in the
438 # first column of the table.
438 # first column of the table.
439 for filename, d1 in diff_p1.items():
439 for filename, d1 in diff_p1.items():
440
440
441 d2 = diff_p2.pop(filename, None)
441 d2 = diff_p2.pop(filename, None)
442
442
443 if d2 is None:
443 if d2 is None:
444 # this deal with the first line of the table.
444 # this deal with the first line of the table.
445 _process_other_unchanged(md, mas, filename, d1)
445 _process_other_unchanged(md, mas, filename, d1)
446 else:
446 else:
447
447
448 if d1[0][0] is None and d2[0][0] is None:
448 if d1[0][0] is None and d2[0][0] is None:
449 # case 🄼 — both deleted the file.
449 # case 🄼 — both deleted the file.
450 md.mark_added(filename)
450 md.mark_added(filename)
451 copy_candidates.append(filename)
451 copy_candidates.append(filename)
452 elif d1[1][0] is None and d2[1][0] is None:
452 elif d1[1][0] is None and d2[1][0] is None:
453 # case 🄻 — both deleted the file.
453 # case 🄻 — both deleted the file.
454 md.mark_removed(filename)
454 md.mark_removed(filename)
455 elif d1[1][0] is not None and d2[1][0] is not None:
455 elif d1[1][0] is not None and d2[1][0] is not None:
456 if d1[0][0] is None or d2[0][0] is None:
456 if d1[0][0] is None or d2[0][0] is None:
457 if any(_find(ma, filename) is not None for ma in mas):
457 if any(_find(ma, filename) is not None for ma in mas):
458 # case 🅀 or 🅁
458 # case 🅀 or 🅁
459 md.mark_salvaged(filename)
459 md.mark_salvaged(filename)
460 else:
460 else:
461 # case 🄽 🄾 : touched
461 # case 🄽 🄾 : touched
462 md.mark_touched(filename)
462 md.mark_touched(filename)
463 else:
463 else:
464 fctx = repo.filectx(filename, fileid=d1[1][0])
464 fctx = repo.filectx(filename, fileid=d1[1][0])
465 if fctx.p2().rev() == nullrev:
465 if fctx.p2().rev() == nullrev:
466 # case 🅂
466 # case 🅂
467 # lets assume we can trust the file history. If the
467 # lets assume we can trust the file history. If the
468 # filenode is not a merge, the file was not merged.
468 # filenode is not a merge, the file was not merged.
469 md.mark_touched(filename)
469 md.mark_touched(filename)
470 else:
470 else:
471 # case 🄿
471 # case 🄿
472 md.mark_merged(filename)
472 md.mark_merged(filename)
473 copy_candidates.append(filename)
473 copy_candidates.append(filename)
474 else:
474 else:
475 # Impossible case, the post-merge file status cannot be None on
475 # Impossible case, the post-merge file status cannot be None on
476 # one side and Something on the other side.
476 # one side and Something on the other side.
477 assert False, "unreachable"
477 assert False, "unreachable"
478
478
479 # Iteration over remaining d2 content deal with the first column of the
479 # Iteration over remaining d2 content deal with the first column of the
480 # table.
480 # table.
481 for filename, d2 in diff_p2.items():
481 for filename, d2 in diff_p2.items():
482 _process_other_unchanged(md, mas, filename, d2)
482 _process_other_unchanged(md, mas, filename, d2)
483
483
484 for filename in copy_candidates:
484 for filename in copy_candidates:
485 copy_info = ctx[filename].renamed()
485 copy_info = ctx[filename].renamed()
486 if copy_info:
486 if copy_info:
487 source, srcnode = copy_info
487 source, srcnode = copy_info
488 if source in p1_ctx and p1_ctx[source].filenode() == srcnode:
488 if source in p1_ctx and p1_ctx[source].filenode() == srcnode:
489 md.mark_copied_from_p1(source, filename)
489 md.mark_copied_from_p1(source, filename)
490 elif source in p2_ctx and p2_ctx[source].filenode() == srcnode:
490 elif source in p2_ctx and p2_ctx[source].filenode() == srcnode:
491 md.mark_copied_from_p2(source, filename)
491 md.mark_copied_from_p2(source, filename)
492 return md
492 return md
493
493
494
494
495 def _find(manifest, filename):
495 def _find(manifest, filename):
496 """return the associate filenode or None"""
496 """return the associate filenode or None"""
497 if filename not in manifest:
497 if filename not in manifest:
498 return None
498 return None
499 return manifest.find(filename)[0]
499 return manifest.find(filename)[0]
500
500
501
501
502 def _process_other_unchanged(md, mas, filename, diff):
502 def _process_other_unchanged(md, mas, filename, diff):
503 source_node = diff[0][0]
503 source_node = diff[0][0]
504 target_node = diff[1][0]
504 target_node = diff[1][0]
505
505
506 if source_node is not None and target_node is None:
506 if source_node is not None and target_node is None:
507 if any(not _find(ma, filename) == source_node for ma in mas):
507 if any(not _find(ma, filename) == source_node for ma in mas):
508 # case 🄲 of 🄷
508 # case 🄲 of 🄷
509 md.mark_removed(filename)
509 md.mark_removed(filename)
510 # else, we have case 🄱 or 🄶 : no change need to be recorded
510 # else, we have case 🄱 or 🄶 : no change need to be recorded
511 elif source_node is None and target_node is not None:
511 elif source_node is None and target_node is not None:
512 if any(_find(ma, filename) is not None for ma in mas):
512 if any(_find(ma, filename) is not None for ma in mas):
513 # case 🄴 or 🄹
513 # case 🄴 or 🄹
514 md.mark_salvaged(filename)
514 md.mark_salvaged(filename)
515 # else, we have case 🄳 or 🄸 : simple merge without intervention
515 # else, we have case 🄳 or 🄸 : simple merge without intervention
516 elif source_node is not None and target_node is not None:
516 elif source_node is not None and target_node is not None:
517 # case 🄵 or 🄺 : simple merge without intervention
517 # case 🄵 or 🄺 : simple merge without intervention
518 #
518 #
519 # In buggy case where source_node is not an ancestors of target_node.
519 # In buggy case where source_node is not an ancestors of target_node.
520 # There should have a been a new filenode created, recording this as
520 # There should have a been a new filenode created, recording this as
521 # "modified". We do not deal with them yet.
521 # "modified". We do not deal with them yet.
522 pass
522 pass
523 else:
523 else:
524 # An impossible case, the diff algorithm should not return entry if the
524 # An impossible case, the diff algorithm should not return entry if the
525 # file is missing on both side.
525 # file is missing on both side.
526 assert False, "unreachable"
526 assert False, "unreachable"
527
527
528
528
529 def _missing_from_all_ancestors(mas, filename):
529 def _missing_from_all_ancestors(mas, filename):
530 return all(_find(ma, filename) is None for ma in mas)
530 return all(_find(ma, filename) is None for ma in mas)
531
531
532
532
533 def computechangesetfilesadded(ctx):
533 def computechangesetfilesadded(ctx):
534 """return the list of files added in a changeset"""
534 """return the list of files added in a changeset"""
535 added = []
535 added = []
536 for f in ctx.files():
536 for f in ctx.files():
537 if not any(f in p for p in ctx.parents()):
537 if not any(f in p for p in ctx.parents()):
538 added.append(f)
538 added.append(f)
539 return added
539 return added
540
540
541
541
542 def get_removal_filter(ctx, x=None):
542 def get_removal_filter(ctx, x=None):
543 """return a function to detect files "wrongly" detected as `removed`
543 """return a function to detect files "wrongly" detected as `removed`
544
544
545 When a file is removed relative to p1 in a merge, this
545 When a file is removed relative to p1 in a merge, this
546 function determines whether the absence is due to a
546 function determines whether the absence is due to a
547 deletion from a parent, or whether the merge commit
547 deletion from a parent, or whether the merge commit
548 itself deletes the file. We decide this by doing a
548 itself deletes the file. We decide this by doing a
549 simplified three way merge of the manifest entry for
549 simplified three way merge of the manifest entry for
550 the file. There are two ways we decide the merge
550 the file. There are two ways we decide the merge
551 itself didn't delete a file:
551 itself didn't delete a file:
552 - neither parent (nor the merge) contain the file
552 - neither parent (nor the merge) contain the file
553 - exactly one parent contains the file, and that
553 - exactly one parent contains the file, and that
554 parent has the same filelog entry as the merge
554 parent has the same filelog entry as the merge
555 ancestor (or all of them if there two). In other
555 ancestor (or all of them if there two). In other
556 words, that parent left the file unchanged while the
556 words, that parent left the file unchanged while the
557 other one deleted it.
557 other one deleted it.
558 One way to think about this is that deleting a file is
558 One way to think about this is that deleting a file is
559 similar to emptying it, so the list of changed files
559 similar to emptying it, so the list of changed files
560 should be similar either way. The computation
560 should be similar either way. The computation
561 described above is not done directly in _filecommit
561 described above is not done directly in _filecommit
562 when creating the list of changed files, however
562 when creating the list of changed files, however
563 it does something very similar by comparing filelog
563 it does something very similar by comparing filelog
564 nodes.
564 nodes.
565 """
565 """
566
566
567 if x is not None:
567 if x is not None:
568 p1, p2, m1, m2 = x
568 p1, p2, m1, m2 = x
569 else:
569 else:
570 p1 = ctx.p1()
570 p1 = ctx.p1()
571 p2 = ctx.p2()
571 p2 = ctx.p2()
572 m1 = p1.manifest()
572 m1 = p1.manifest()
573 m2 = p2.manifest()
573 m2 = p2.manifest()
574
574
575 @util.cachefunc
575 @util.cachefunc
576 def mas():
576 def mas():
577 p1n = p1.node()
577 p1n = p1.node()
578 p2n = p2.node()
578 p2n = p2.node()
579 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
579 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
580 if not cahs:
580 if not cahs:
581 cahs = [nullrev]
581 cahs = [nullrev]
582 return [ctx.repo()[r].manifest() for r in cahs]
582 return [ctx.repo()[r].manifest() for r in cahs]
583
583
584 def deletionfromparent(f):
584 def deletionfromparent(f):
585 if f in m1:
585 if f in m1:
586 return f not in m2 and all(
586 return f not in m2 and all(
587 f in ma and ma.find(f) == m1.find(f) for ma in mas()
587 f in ma and ma.find(f) == m1.find(f) for ma in mas()
588 )
588 )
589 elif f in m2:
589 elif f in m2:
590 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
590 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
591 else:
591 else:
592 return True
592 return True
593
593
594 return deletionfromparent
594 return deletionfromparent
595
595
596
596
597 def computechangesetfilesremoved(ctx):
597 def computechangesetfilesremoved(ctx):
598 """return the list of files removed in a changeset"""
598 """return the list of files removed in a changeset"""
599 removed = []
599 removed = []
600 for f in ctx.files():
600 for f in ctx.files():
601 if f not in ctx:
601 if f not in ctx:
602 removed.append(f)
602 removed.append(f)
603 if removed:
603 if removed:
604 rf = get_removal_filter(ctx)
604 rf = get_removal_filter(ctx)
605 removed = [r for r in removed if not rf(r)]
605 removed = [r for r in removed if not rf(r)]
606 return removed
606 return removed
607
607
608
608
609 def computechangesetfilesmerged(ctx):
609 def computechangesetfilesmerged(ctx):
610 """return the list of files merged in a changeset"""
610 """return the list of files merged in a changeset"""
611 merged = []
611 merged = []
612 if len(ctx.parents()) < 2:
612 if len(ctx.parents()) < 2:
613 return merged
613 return merged
614 for f in ctx.files():
614 for f in ctx.files():
615 if f in ctx:
615 if f in ctx:
616 fctx = ctx[f]
616 fctx = ctx[f]
617 parents = fctx._filelog.parents(fctx._filenode)
617 parents = fctx._filelog.parents(fctx._filenode)
618 if parents[1] != ctx.repo().nullid:
618 if parents[1] != ctx.repo().nullid:
619 merged.append(f)
619 merged.append(f)
620 return merged
620 return merged
621
621
622
622
623 def computechangesetcopies(ctx):
623 def computechangesetcopies(ctx):
624 """return the copies data for a changeset
624 """return the copies data for a changeset
625
625
626 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
626 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
627
627
628 Each dictionnary are in the form: `{newname: oldname}`
628 Each dictionnary are in the form: `{newname: oldname}`
629 """
629 """
630 p1copies = {}
630 p1copies = {}
631 p2copies = {}
631 p2copies = {}
632 p1 = ctx.p1()
632 p1 = ctx.p1()
633 p2 = ctx.p2()
633 p2 = ctx.p2()
634 narrowmatch = ctx._repo.narrowmatch()
634 narrowmatch = ctx._repo.narrowmatch()
635 for dst in ctx.files():
635 for dst in ctx.files():
636 if not narrowmatch(dst) or dst not in ctx:
636 if not narrowmatch(dst) or dst not in ctx:
637 continue
637 continue
638 copied = ctx[dst].renamed()
638 copied = ctx[dst].renamed()
639 if not copied:
639 if not copied:
640 continue
640 continue
641 src, srcnode = copied
641 src, srcnode = copied
642 if src in p1 and p1[src].filenode() == srcnode:
642 if src in p1 and p1[src].filenode() == srcnode:
643 p1copies[dst] = src
643 p1copies[dst] = src
644 elif src in p2 and p2[src].filenode() == srcnode:
644 elif src in p2 and p2[src].filenode() == srcnode:
645 p2copies[dst] = src
645 p2copies[dst] = src
646 return p1copies, p2copies
646 return p1copies, p2copies
647
647
648
648
649 def encodecopies(files, copies):
649 def encodecopies(files, copies):
650 items = []
650 items = []
651 for i, dst in enumerate(files):
651 for i, dst in enumerate(files):
652 if dst in copies:
652 if dst in copies:
653 items.append(b'%d\0%s' % (i, copies[dst]))
653 items.append(b'%d\0%s' % (i, copies[dst]))
654 if len(items) != len(copies):
654 if len(items) != len(copies):
655 raise error.ProgrammingError(
655 raise error.ProgrammingError(
656 b'some copy targets missing from file list'
656 b'some copy targets missing from file list'
657 )
657 )
658 return b"\n".join(items)
658 return b"\n".join(items)
659
659
660
660
661 def decodecopies(files, data):
661 def decodecopies(files, data):
662 try:
662 try:
663 copies = {}
663 copies = {}
664 if not data:
664 if not data:
665 return copies
665 return copies
666 for l in data.split(b'\n'):
666 for l in data.split(b'\n'):
667 strindex, src = l.split(b'\0')
667 strindex, src = l.split(b'\0')
668 i = int(strindex)
668 i = int(strindex)
669 dst = files[i]
669 dst = files[i]
670 copies[dst] = src
670 copies[dst] = src
671 return copies
671 return copies
672 except (ValueError, IndexError):
672 except (ValueError, IndexError):
673 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
673 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
674 # used different syntax for the value.
674 # used different syntax for the value.
675 return None
675 return None
676
676
677
677
678 def encodefileindices(files, subset):
678 def encodefileindices(files, subset):
679 subset = set(subset)
679 subset = set(subset)
680 indices = []
680 indices = []
681 for i, f in enumerate(files):
681 for i, f in enumerate(files):
682 if f in subset:
682 if f in subset:
683 indices.append(b'%d' % i)
683 indices.append(b'%d' % i)
684 return b'\n'.join(indices)
684 return b'\n'.join(indices)
685
685
686
686
687 def decodefileindices(files, data):
687 def decodefileindices(files, data):
688 try:
688 try:
689 subset = []
689 subset = []
690 if not data:
690 if not data:
691 return subset
691 return subset
692 for strindex in data.split(b'\n'):
692 for strindex in data.split(b'\n'):
693 i = int(strindex)
693 i = int(strindex)
694 if i < 0 or i >= len(files):
694 if i < 0 or i >= len(files):
695 return None
695 return None
696 subset.append(files[i])
696 subset.append(files[i])
697 return subset
697 return subset
698 except (ValueError, IndexError):
698 except (ValueError, IndexError):
699 # Perhaps someone had chosen the same key name (e.g. "added") and
699 # Perhaps someone had chosen the same key name (e.g. "added") and
700 # used different syntax for the value.
700 # used different syntax for the value.
701 return None
701 return None
702
702
703
703
704 # see mercurial/helptext/internals/revlogs.txt for details about the format
704 # see mercurial/helptext/internals/revlogs.txt for details about the format
705
705
706 ACTION_MASK = int("111" "00", 2)
706 ACTION_MASK = int("111" "00", 2)
707 # note: untouched file used as copy source will as `000` for this mask.
707 # note: untouched file used as copy source will as `000` for this mask.
708 ADDED_FLAG = int("001" "00", 2)
708 ADDED_FLAG = int("001" "00", 2)
709 MERGED_FLAG = int("010" "00", 2)
709 MERGED_FLAG = int("010" "00", 2)
710 REMOVED_FLAG = int("011" "00", 2)
710 REMOVED_FLAG = int("011" "00", 2)
711 SALVAGED_FLAG = int("100" "00", 2)
711 SALVAGED_FLAG = int("100" "00", 2)
712 TOUCHED_FLAG = int("101" "00", 2)
712 TOUCHED_FLAG = int("101" "00", 2)
713
713
714 COPIED_MASK = int("11", 2)
714 COPIED_MASK = int("11", 2)
715 COPIED_FROM_P1_FLAG = int("10", 2)
715 COPIED_FROM_P1_FLAG = int("10", 2)
716 COPIED_FROM_P2_FLAG = int("11", 2)
716 COPIED_FROM_P2_FLAG = int("11", 2)
717
717
718 # structure is <flag><filename-end><copy-source>
718 # structure is <flag><filename-end><copy-source>
719 INDEX_HEADER = struct.Struct(">L")
719 INDEX_HEADER = struct.Struct(">L")
720 INDEX_ENTRY = struct.Struct(">bLL")
720 INDEX_ENTRY = struct.Struct(">bLL")
721
721
722
722
723 def encode_files_sidedata(files):
723 def encode_files_sidedata(files):
724 all_files = set(files.touched)
724 all_files = set(files.touched)
725 all_files.update(files.copied_from_p1.values())
725 all_files.update(files.copied_from_p1.values())
726 all_files.update(files.copied_from_p2.values())
726 all_files.update(files.copied_from_p2.values())
727 all_files = sorted(all_files)
727 all_files = sorted(all_files)
728 file_idx = {f: i for (i, f) in enumerate(all_files)}
728 file_idx = {f: i for (i, f) in enumerate(all_files)}
729 file_idx[None] = 0
729 file_idx[None] = 0
730
730
731 chunks = [INDEX_HEADER.pack(len(all_files))]
731 chunks = [INDEX_HEADER.pack(len(all_files))]
732
732
733 filename_length = 0
733 filename_length = 0
734 for f in all_files:
734 for f in all_files:
735 filename_size = len(f)
735 filename_size = len(f)
736 filename_length += filename_size
736 filename_length += filename_size
737 flag = 0
737 flag = 0
738 if f in files.added:
738 if f in files.added:
739 flag |= ADDED_FLAG
739 flag |= ADDED_FLAG
740 elif f in files.merged:
740 elif f in files.merged:
741 flag |= MERGED_FLAG
741 flag |= MERGED_FLAG
742 elif f in files.removed:
742 elif f in files.removed:
743 flag |= REMOVED_FLAG
743 flag |= REMOVED_FLAG
744 elif f in files.salvaged:
744 elif f in files.salvaged:
745 flag |= SALVAGED_FLAG
745 flag |= SALVAGED_FLAG
746 elif f in files.touched:
746 elif f in files.touched:
747 flag |= TOUCHED_FLAG
747 flag |= TOUCHED_FLAG
748
748
749 copy = None
749 copy = None
750 if f in files.copied_from_p1:
750 if f in files.copied_from_p1:
751 flag |= COPIED_FROM_P1_FLAG
751 flag |= COPIED_FROM_P1_FLAG
752 copy = files.copied_from_p1.get(f)
752 copy = files.copied_from_p1.get(f)
753 elif f in files.copied_from_p2:
753 elif f in files.copied_from_p2:
754 copy = files.copied_from_p2.get(f)
754 copy = files.copied_from_p2.get(f)
755 flag |= COPIED_FROM_P2_FLAG
755 flag |= COPIED_FROM_P2_FLAG
756 copy_idx = file_idx[copy]
756 copy_idx = file_idx[copy]
757 chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx))
757 chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx))
758 chunks.extend(all_files)
758 chunks.extend(all_files)
759 return {sidedatamod.SD_FILES: b''.join(chunks)}
759 return {sidedatamod.SD_FILES: b''.join(chunks)}
760
760
761
761
762 def decode_files_sidedata(sidedata):
762 def decode_files_sidedata(sidedata):
763 md = ChangingFiles()
763 md = ChangingFiles()
764 raw = sidedata.get(sidedatamod.SD_FILES)
764 raw = sidedata.get(sidedatamod.SD_FILES)
765
765
766 if raw is None:
766 if raw is None:
767 return md
767 return md
768
768
769 copies = []
769 copies = []
770 all_files = []
770 all_files = []
771
771
772 assert len(raw) >= INDEX_HEADER.size
772 assert len(raw) >= INDEX_HEADER.size
773 total_files = INDEX_HEADER.unpack_from(raw, 0)[0]
773 total_files = INDEX_HEADER.unpack_from(raw, 0)[0]
774
774
775 offset = INDEX_HEADER.size
775 offset = INDEX_HEADER.size
776 file_offset_base = offset + (INDEX_ENTRY.size * total_files)
776 file_offset_base = offset + (INDEX_ENTRY.size * total_files)
777 file_offset_last = file_offset_base
777 file_offset_last = file_offset_base
778
778
779 assert len(raw) >= file_offset_base
779 assert len(raw) >= file_offset_base
780
780
781 for idx in range(total_files):
781 for idx in range(total_files):
782 flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset)
782 flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset)
783 file_end += file_offset_base
783 file_end += file_offset_base
784 filename = raw[file_offset_last:file_end]
784 filename = raw[file_offset_last:file_end]
785 filesize = file_end - file_offset_last
785 filesize = file_end - file_offset_last
786 assert len(filename) == filesize
786 assert len(filename) == filesize
787 offset += INDEX_ENTRY.size
787 offset += INDEX_ENTRY.size
788 file_offset_last = file_end
788 file_offset_last = file_end
789 all_files.append(filename)
789 all_files.append(filename)
790 if flag & ACTION_MASK == ADDED_FLAG:
790 if flag & ACTION_MASK == ADDED_FLAG:
791 md.mark_added(filename)
791 md.mark_added(filename)
792 elif flag & ACTION_MASK == MERGED_FLAG:
792 elif flag & ACTION_MASK == MERGED_FLAG:
793 md.mark_merged(filename)
793 md.mark_merged(filename)
794 elif flag & ACTION_MASK == REMOVED_FLAG:
794 elif flag & ACTION_MASK == REMOVED_FLAG:
795 md.mark_removed(filename)
795 md.mark_removed(filename)
796 elif flag & ACTION_MASK == SALVAGED_FLAG:
796 elif flag & ACTION_MASK == SALVAGED_FLAG:
797 md.mark_salvaged(filename)
797 md.mark_salvaged(filename)
798 elif flag & ACTION_MASK == TOUCHED_FLAG:
798 elif flag & ACTION_MASK == TOUCHED_FLAG:
799 md.mark_touched(filename)
799 md.mark_touched(filename)
800
800
801 copied = None
801 copied = None
802 if flag & COPIED_MASK == COPIED_FROM_P1_FLAG:
802 if flag & COPIED_MASK == COPIED_FROM_P1_FLAG:
803 copied = md.mark_copied_from_p1
803 copied = md.mark_copied_from_p1
804 elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG:
804 elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG:
805 copied = md.mark_copied_from_p2
805 copied = md.mark_copied_from_p2
806
806
807 if copied is not None:
807 if copied is not None:
808 copies.append((copied, filename, copy_idx))
808 copies.append((copied, filename, copy_idx))
809
809
810 for copied, filename, copy_idx in copies:
810 for copied, filename, copy_idx in copies:
811 copied(all_files[copy_idx], filename)
811 copied(all_files[copy_idx], filename)
812
812
813 return md
813 return md
814
814
815
815
816 def _getsidedata(srcrepo, rev):
816 def _getsidedata(srcrepo, rev):
817 ctx = srcrepo[rev]
817 ctx = srcrepo[rev]
818 files = compute_all_files_changes(ctx)
818 files = compute_all_files_changes(ctx)
819 return encode_files_sidedata(files), files.has_copies_info
819 return encode_files_sidedata(files), files.has_copies_info
820
820
821
821
822 def copies_sidedata_computer(repo, revlog, rev, existing_sidedata):
822 def copies_sidedata_computer(repo, revlog, rev, existing_sidedata):
823 return _getsidedata(repo, rev)[0]
823 return _getsidedata(repo, rev)[0]
824
824
825
825
826 def set_sidedata_spec_for_repo(repo):
826 def set_sidedata_spec_for_repo(repo):
827 if requirementsmod.COPIESSDC_REQUIREMENT in repo.requirements:
827 if requirementsmod.COPIESSDC_REQUIREMENT in repo.requirements:
828 repo.register_wanted_sidedata(sidedatamod.SD_FILES)
828 repo.register_wanted_sidedata(sidedatamod.SD_FILES)
829 repo.register_sidedata_computer(
829 repo.register_sidedata_computer(
830 revlogconst.KIND_CHANGELOG,
830 revlogconst.KIND_CHANGELOG,
831 sidedatamod.SD_FILES,
831 sidedatamod.SD_FILES,
832 (sidedatamod.SD_FILES,),
832 (sidedatamod.SD_FILES,),
833 copies_sidedata_computer,
833 copies_sidedata_computer,
834 )
834 )
835
835
836
836
837 def getsidedataadder(srcrepo, destrepo):
837 def getsidedataadder(srcrepo, destrepo):
838 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
838 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
839 if pycompat.iswindows or not use_w:
839 if pycompat.iswindows or not use_w:
840 return _get_simple_sidedata_adder(srcrepo, destrepo)
840 return _get_simple_sidedata_adder(srcrepo, destrepo)
841 else:
841 else:
842 return _get_worker_sidedata_adder(srcrepo, destrepo)
842 return _get_worker_sidedata_adder(srcrepo, destrepo)
843
843
844
844
845 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
845 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
846 """The function used by worker precomputing sidedata
846 """The function used by worker precomputing sidedata
847
847
848 It read an input queue containing revision numbers
848 It read an input queue containing revision numbers
849 It write in an output queue containing (rev, <sidedata-map>)
849 It write in an output queue containing (rev, <sidedata-map>)
850
850
851 The `None` input value is used as a stop signal.
851 The `None` input value is used as a stop signal.
852
852
853 The `tokens` semaphore is user to avoid having too many unprocessed
853 The `tokens` semaphore is user to avoid having too many unprocessed
854 entries. The workers needs to acquire one token before fetching a task.
854 entries. The workers needs to acquire one token before fetching a task.
855 They will be released by the consumer of the produced data.
855 They will be released by the consumer of the produced data.
856 """
856 """
857 tokens.acquire()
857 tokens.acquire()
858 rev = revs_queue.get()
858 rev = revs_queue.get()
859 while rev is not None:
859 while rev is not None:
860 data = _getsidedata(srcrepo, rev)
860 data = _getsidedata(srcrepo, rev)
861 sidedata_queue.put((rev, data))
861 sidedata_queue.put((rev, data))
862 tokens.acquire()
862 tokens.acquire()
863 rev = revs_queue.get()
863 rev = revs_queue.get()
864 # processing of `None` is completed, release the token.
864 # processing of `None` is completed, release the token.
865 tokens.release()
865 tokens.release()
866
866
867
867
868 BUFF_PER_WORKER = 50
868 BUFF_PER_WORKER = 50
869
869
870
870
871 def _get_worker_sidedata_adder(srcrepo, destrepo):
871 def _get_worker_sidedata_adder(srcrepo, destrepo):
872 """The parallel version of the sidedata computation
872 """The parallel version of the sidedata computation
873
873
874 This code spawn a pool of worker that precompute a buffer of sidedata
874 This code spawn a pool of worker that precompute a buffer of sidedata
875 before we actually need them"""
875 before we actually need them"""
876 # avoid circular import copies -> scmutil -> worker -> copies
876 # avoid circular import copies -> scmutil -> worker -> copies
877 from . import worker
877 from . import worker
878
878
879 nbworkers = worker._numworkers(srcrepo.ui)
879 nbworkers = worker._numworkers(srcrepo.ui)
880
880
881 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
881 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
882 revsq = multiprocessing.Queue()
882 revsq = multiprocessing.Queue()
883 sidedataq = multiprocessing.Queue()
883 sidedataq = multiprocessing.Queue()
884
884
885 assert srcrepo.filtername is None
885 assert srcrepo.filtername is None
886 # queue all tasks beforehand, revision numbers are small and it make
886 # queue all tasks beforehand, revision numbers are small and it make
887 # synchronisation simpler
887 # synchronisation simpler
888 #
888 #
889 # Since the computation for each node can be quite expensive, the overhead
889 # Since the computation for each node can be quite expensive, the overhead
890 # of using a single queue is not revelant. In practice, most computation
890 # of using a single queue is not revelant. In practice, most computation
891 # are fast but some are very expensive and dominate all the other smaller
891 # are fast but some are very expensive and dominate all the other smaller
892 # cost.
892 # cost.
893 for r in srcrepo.changelog.revs():
893 for r in srcrepo.changelog.revs():
894 revsq.put(r)
894 revsq.put(r)
895 # queue the "no more tasks" markers
895 # queue the "no more tasks" markers
896 for i in range(nbworkers):
896 for i in range(nbworkers):
897 revsq.put(None)
897 revsq.put(None)
898
898
899 allworkers = []
899 allworkers = []
900 for i in range(nbworkers):
900 for i in range(nbworkers):
901 args = (srcrepo, revsq, sidedataq, tokens)
901 args = (srcrepo, revsq, sidedataq, tokens)
902 w = multiprocessing.Process(target=_sidedata_worker, args=args)
902 w = multiprocessing.Process(target=_sidedata_worker, args=args)
903 allworkers.append(w)
903 allworkers.append(w)
904 w.start()
904 w.start()
905
905
906 # dictionnary to store results for revision higher than we one we are
906 # dictionnary to store results for revision higher than we one we are
907 # looking for. For example, if we need the sidedatamap for 42, and 43 is
907 # looking for. For example, if we need the sidedatamap for 42, and 43 is
908 # received, when shelve 43 for later use.
908 # received, when shelve 43 for later use.
909 staging = {}
909 staging = {}
910
910
911 def sidedata_companion(revlog, rev):
911 def sidedata_companion(revlog, rev):
912 data = {}, False
912 data = {}, False
913 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
913 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
914 # Is the data previously shelved ?
914 # Is the data previously shelved ?
915 data = staging.pop(rev, None)
915 data = staging.pop(rev, None)
916 if data is None:
916 if data is None:
917 # look at the queued result until we find the one we are lookig
917 # look at the queued result until we find the one we are lookig
918 # for (shelve the other ones)
918 # for (shelve the other ones)
919 r, data = sidedataq.get()
919 r, data = sidedataq.get()
920 while r != rev:
920 while r != rev:
921 staging[r] = data
921 staging[r] = data
922 r, data = sidedataq.get()
922 r, data = sidedataq.get()
923 tokens.release()
923 tokens.release()
924 sidedata, has_copies_info = data
924 sidedata, has_copies_info = data
925 new_flag = 0
925 new_flag = 0
926 if has_copies_info:
926 if has_copies_info:
927 new_flag = sidedataflag.REVIDX_HASCOPIESINFO
927 new_flag = sidedataflag.REVIDX_HASCOPIESINFO
928 return False, (), sidedata, new_flag, 0
928 return False, (), sidedata, new_flag, 0
929
929
930 return sidedata_companion
930 return sidedata_companion
931
931
932
932
933 def _get_simple_sidedata_adder(srcrepo, destrepo):
933 def _get_simple_sidedata_adder(srcrepo, destrepo):
934 """The simple version of the sidedata computation
934 """The simple version of the sidedata computation
935
935
936 It just compute it in the same thread on request"""
936 It just compute it in the same thread on request"""
937
937
938 def sidedatacompanion(revlog, rev):
938 def sidedatacompanion(revlog, rev):
939 sidedata, has_copies_info = {}, False
939 sidedata, has_copies_info = {}, False
940 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
940 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
941 sidedata, has_copies_info = _getsidedata(srcrepo, rev)
941 sidedata, has_copies_info = _getsidedata(srcrepo, rev)
942 new_flag = 0
942 new_flag = 0
943 if has_copies_info:
943 if has_copies_info:
944 new_flag = sidedataflag.REVIDX_HASCOPIESINFO
944 new_flag = sidedataflag.REVIDX_HASCOPIESINFO
945
945
946 return False, (), sidedata, new_flag, 0
946 return False, (), sidedata, new_flag, 0
947
947
948 return sidedatacompanion
948 return sidedatacompanion
949
949
950
950
951 def getsidedataremover(srcrepo, destrepo):
951 def getsidedataremover(srcrepo, destrepo):
952 def sidedatacompanion(revlog, rev):
952 def sidedatacompanion(revlog, rev):
953 f = ()
953 f = ()
954 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
954 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
955 if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
955 if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
956 f = (
956 f = (
957 sidedatamod.SD_P1COPIES,
957 sidedatamod.SD_P1COPIES,
958 sidedatamod.SD_P2COPIES,
958 sidedatamod.SD_P2COPIES,
959 sidedatamod.SD_FILESADDED,
959 sidedatamod.SD_FILESADDED,
960 sidedatamod.SD_FILESREMOVED,
960 sidedatamod.SD_FILESREMOVED,
961 )
961 )
962 return False, f, {}, 0, sidedataflag.REVIDX_HASCOPIESINFO
962 return False, f, {}, 0, sidedataflag.REVIDX_HASCOPIESINFO
963
963
964 return sidedatacompanion
964 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now