##// END OF EJS Templates
changing-files: add clean computation of changed files for roots...
marmoute -
r46258:f6811e5b default
parent child Browse files
Show More
@@ -1,650 +1,665 b''
1 # metadata.py -- code related to various metadata computation and access.
1 # metadata.py -- code related to various metadata computation and access.
2 #
2 #
3 # Copyright 2019 Google, Inc <martinvonz@google.com>
3 # Copyright 2019 Google, Inc <martinvonz@google.com>
4 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import multiprocessing
10 import multiprocessing
11 import struct
11 import struct
12
12
13 from . import (
13 from . import (
14 error,
14 error,
15 node,
15 node,
16 pycompat,
16 pycompat,
17 util,
17 util,
18 )
18 )
19
19
20 from .revlogutils import (
20 from .revlogutils import (
21 flagutil as sidedataflag,
21 flagutil as sidedataflag,
22 sidedata as sidedatamod,
22 sidedata as sidedatamod,
23 )
23 )
24
24
25
25
26 class ChangingFiles(object):
26 class ChangingFiles(object):
27 """A class recording the changes made to files by a changeset
27 """A class recording the changes made to files by a changeset
28
28
29 Actions performed on files are gathered into 3 sets:
29 Actions performed on files are gathered into 3 sets:
30
30
31 - added: files actively added in the changeset.
31 - added: files actively added in the changeset.
32 - merged: files whose history got merged
32 - merged: files whose history got merged
33 - removed: files removed in the revision
33 - removed: files removed in the revision
34 - salvaged: files that might have been deleted by a merge but were not
34 - salvaged: files that might have been deleted by a merge but were not
35 - touched: files affected by the merge
35 - touched: files affected by the merge
36
36
37 and copies information is held by 2 mappings
37 and copies information is held by 2 mappings
38
38
39 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
39 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
40 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
40 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
41
41
42 See their inline help for details.
42 See their inline help for details.
43 """
43 """
44
44
45 def __init__(
45 def __init__(
46 self,
46 self,
47 touched=None,
47 touched=None,
48 added=None,
48 added=None,
49 removed=None,
49 removed=None,
50 merged=None,
50 merged=None,
51 salvaged=None,
51 salvaged=None,
52 p1_copies=None,
52 p1_copies=None,
53 p2_copies=None,
53 p2_copies=None,
54 ):
54 ):
55 self._added = set(() if added is None else added)
55 self._added = set(() if added is None else added)
56 self._merged = set(() if merged is None else merged)
56 self._merged = set(() if merged is None else merged)
57 self._removed = set(() if removed is None else removed)
57 self._removed = set(() if removed is None else removed)
58 self._touched = set(() if touched is None else touched)
58 self._touched = set(() if touched is None else touched)
59 self._salvaged = set(() if salvaged is None else salvaged)
59 self._salvaged = set(() if salvaged is None else salvaged)
60 self._touched.update(self._added)
60 self._touched.update(self._added)
61 self._touched.update(self._merged)
61 self._touched.update(self._merged)
62 self._touched.update(self._removed)
62 self._touched.update(self._removed)
63 self._p1_copies = dict(() if p1_copies is None else p1_copies)
63 self._p1_copies = dict(() if p1_copies is None else p1_copies)
64 self._p2_copies = dict(() if p2_copies is None else p2_copies)
64 self._p2_copies = dict(() if p2_copies is None else p2_copies)
65
65
66 def __eq__(self, other):
66 def __eq__(self, other):
67 return (
67 return (
68 self.added == other.added
68 self.added == other.added
69 and self.merged == other.merged
69 and self.merged == other.merged
70 and self.removed == other.removed
70 and self.removed == other.removed
71 and self.salvaged == other.salvaged
71 and self.salvaged == other.salvaged
72 and self.touched == other.touched
72 and self.touched == other.touched
73 and self.copied_from_p1 == other.copied_from_p1
73 and self.copied_from_p1 == other.copied_from_p1
74 and self.copied_from_p2 == other.copied_from_p2
74 and self.copied_from_p2 == other.copied_from_p2
75 )
75 )
76
76
77 @util.propertycache
77 @util.propertycache
78 def added(self):
78 def added(self):
79 """files actively added in the changeset
79 """files actively added in the changeset
80
80
81 Any file present in that revision that was absent in all the changeset's
81 Any file present in that revision that was absent in all the changeset's
82 parents.
82 parents.
83
83
84 In case of merge, this means a file absent in one of the parents but
84 In case of merge, this means a file absent in one of the parents but
85 existing in the other will *not* be contained in this set. (They were
85 existing in the other will *not* be contained in this set. (They were
86 added by an ancestor)
86 added by an ancestor)
87 """
87 """
88 return frozenset(self._added)
88 return frozenset(self._added)
89
89
90 def mark_added(self, filename):
90 def mark_added(self, filename):
91 if 'added' in vars(self):
91 if 'added' in vars(self):
92 del self.added
92 del self.added
93 self._added.add(filename)
93 self._added.add(filename)
94 self.mark_touched(filename)
94 self.mark_touched(filename)
95
95
96 def update_added(self, filenames):
96 def update_added(self, filenames):
97 for f in filenames:
97 for f in filenames:
98 self.mark_added(f)
98 self.mark_added(f)
99
99
100 @util.propertycache
100 @util.propertycache
101 def merged(self):
101 def merged(self):
102 """files actively merged during a merge
102 """files actively merged during a merge
103
103
104 Any modified files which had modification on both size that needed merging.
104 Any modified files which had modification on both size that needed merging.
105
105
106 In this case a new filenode was created and it has two parents.
106 In this case a new filenode was created and it has two parents.
107 """
107 """
108 return frozenset(self._merged)
108 return frozenset(self._merged)
109
109
110 def mark_merged(self, filename):
110 def mark_merged(self, filename):
111 if 'merged' in vars(self):
111 if 'merged' in vars(self):
112 del self.merged
112 del self.merged
113 self._merged.add(filename)
113 self._merged.add(filename)
114 self.mark_touched(filename)
114 self.mark_touched(filename)
115
115
116 def update_merged(self, filenames):
116 def update_merged(self, filenames):
117 for f in filenames:
117 for f in filenames:
118 self.mark_merged(f)
118 self.mark_merged(f)
119
119
120 @util.propertycache
120 @util.propertycache
121 def removed(self):
121 def removed(self):
122 """files actively removed by the changeset
122 """files actively removed by the changeset
123
123
124 In case of merge this will only contain the set of files removing "new"
124 In case of merge this will only contain the set of files removing "new"
125 content. For any file absent in the current changeset:
125 content. For any file absent in the current changeset:
126
126
127 a) If the file exists in both parents, it is clearly "actively" removed
127 a) If the file exists in both parents, it is clearly "actively" removed
128 by this changeset.
128 by this changeset.
129
129
130 b) If a file exists in only one parent and in none of the common
130 b) If a file exists in only one parent and in none of the common
131 ancestors, then the file was newly added in one of the merged branches
131 ancestors, then the file was newly added in one of the merged branches
132 and then got "actively" removed.
132 and then got "actively" removed.
133
133
134 c) If a file exists in only one parent and at least one of the common
134 c) If a file exists in only one parent and at least one of the common
135 ancestors using the same filenode, then the file was unchanged on one
135 ancestors using the same filenode, then the file was unchanged on one
136 side and deleted on the other side. The merge "passively" propagated
136 side and deleted on the other side. The merge "passively" propagated
137 that deletion, but didn't "actively" remove the file. In this case the
137 that deletion, but didn't "actively" remove the file. In this case the
138 file is *not* included in the `removed` set.
138 file is *not* included in the `removed` set.
139
139
140 d) If a file exists in only one parent and at least one of the common
140 d) If a file exists in only one parent and at least one of the common
141 ancestors using a different filenode, then the file was changed on one
141 ancestors using a different filenode, then the file was changed on one
142 side and removed on the other side. The merge process "actively"
142 side and removed on the other side. The merge process "actively"
143 decided to drop the new change and delete the file. Unlike in the
143 decided to drop the new change and delete the file. Unlike in the
144 previous case, (c), the file included in the `removed` set.
144 previous case, (c), the file included in the `removed` set.
145
145
146 Summary table for merge:
146 Summary table for merge:
147
147
148 case | exists in parents | exists in gca || removed
148 case | exists in parents | exists in gca || removed
149 (a) | both | * || yes
149 (a) | both | * || yes
150 (b) | one | none || yes
150 (b) | one | none || yes
151 (c) | one | same filenode || no
151 (c) | one | same filenode || no
152 (d) | one | new filenode || yes
152 (d) | one | new filenode || yes
153 """
153 """
154 return frozenset(self._removed)
154 return frozenset(self._removed)
155
155
156 def mark_removed(self, filename):
156 def mark_removed(self, filename):
157 if 'removed' in vars(self):
157 if 'removed' in vars(self):
158 del self.removed
158 del self.removed
159 self._removed.add(filename)
159 self._removed.add(filename)
160 self.mark_touched(filename)
160 self.mark_touched(filename)
161
161
162 def update_removed(self, filenames):
162 def update_removed(self, filenames):
163 for f in filenames:
163 for f in filenames:
164 self.mark_removed(f)
164 self.mark_removed(f)
165
165
166 @util.propertycache
166 @util.propertycache
167 def salvaged(self):
167 def salvaged(self):
168 """files that might have been deleted by a merge, but still exists.
168 """files that might have been deleted by a merge, but still exists.
169
169
170 During a merge, the manifest merging might select some files for
170 During a merge, the manifest merging might select some files for
171 removal, or for a removed/changed conflict. If at commit time the file
171 removal, or for a removed/changed conflict. If at commit time the file
172 still exists, its removal was "reverted" and the file is "salvaged"
172 still exists, its removal was "reverted" and the file is "salvaged"
173 """
173 """
174 return frozenset(self._salvaged)
174 return frozenset(self._salvaged)
175
175
176 def mark_salvaged(self, filename):
176 def mark_salvaged(self, filename):
177 if "salvaged" in vars(self):
177 if "salvaged" in vars(self):
178 del self.salvaged
178 del self.salvaged
179 self._salvaged.add(filename)
179 self._salvaged.add(filename)
180 self.mark_touched(filename)
180 self.mark_touched(filename)
181
181
182 def update_salvaged(self, filenames):
182 def update_salvaged(self, filenames):
183 for f in filenames:
183 for f in filenames:
184 self.mark_salvaged(f)
184 self.mark_salvaged(f)
185
185
186 @util.propertycache
186 @util.propertycache
187 def touched(self):
187 def touched(self):
188 """files either actively modified, added or removed"""
188 """files either actively modified, added or removed"""
189 return frozenset(self._touched)
189 return frozenset(self._touched)
190
190
191 def mark_touched(self, filename):
191 def mark_touched(self, filename):
192 if 'touched' in vars(self):
192 if 'touched' in vars(self):
193 del self.touched
193 del self.touched
194 self._touched.add(filename)
194 self._touched.add(filename)
195
195
196 def update_touched(self, filenames):
196 def update_touched(self, filenames):
197 for f in filenames:
197 for f in filenames:
198 self.mark_touched(f)
198 self.mark_touched(f)
199
199
200 @util.propertycache
200 @util.propertycache
201 def copied_from_p1(self):
201 def copied_from_p1(self):
202 return self._p1_copies.copy()
202 return self._p1_copies.copy()
203
203
204 def mark_copied_from_p1(self, source, dest):
204 def mark_copied_from_p1(self, source, dest):
205 if 'copied_from_p1' in vars(self):
205 if 'copied_from_p1' in vars(self):
206 del self.copied_from_p1
206 del self.copied_from_p1
207 self._p1_copies[dest] = source
207 self._p1_copies[dest] = source
208
208
209 def update_copies_from_p1(self, copies):
209 def update_copies_from_p1(self, copies):
210 for dest, source in copies.items():
210 for dest, source in copies.items():
211 self.mark_copied_from_p1(source, dest)
211 self.mark_copied_from_p1(source, dest)
212
212
213 @util.propertycache
213 @util.propertycache
214 def copied_from_p2(self):
214 def copied_from_p2(self):
215 return self._p2_copies.copy()
215 return self._p2_copies.copy()
216
216
217 def mark_copied_from_p2(self, source, dest):
217 def mark_copied_from_p2(self, source, dest):
218 if 'copied_from_p2' in vars(self):
218 if 'copied_from_p2' in vars(self):
219 del self.copied_from_p2
219 del self.copied_from_p2
220 self._p2_copies[dest] = source
220 self._p2_copies[dest] = source
221
221
222 def update_copies_from_p2(self, copies):
222 def update_copies_from_p2(self, copies):
223 for dest, source in copies.items():
223 for dest, source in copies.items():
224 self.mark_copied_from_p2(source, dest)
224 self.mark_copied_from_p2(source, dest)
225
225
226
226
227 def compute_all_files_changes(ctx):
227 def compute_all_files_changes(ctx):
228 """compute the files changed by a revision"""
228 """compute the files changed by a revision"""
229 p1 = ctx.p1()
230 p2 = ctx.p2()
231 if p1.rev() == node.nullrev and p2.rev() == node.nullrev:
232 return _process_root(ctx)
229 filescopies = computechangesetcopies(ctx)
233 filescopies = computechangesetcopies(ctx)
230 filesadded = computechangesetfilesadded(ctx)
234 filesadded = computechangesetfilesadded(ctx)
231 filesremoved = computechangesetfilesremoved(ctx)
235 filesremoved = computechangesetfilesremoved(ctx)
232 filesmerged = computechangesetfilesmerged(ctx)
236 filesmerged = computechangesetfilesmerged(ctx)
233 files = ChangingFiles()
237 files = ChangingFiles()
234 files.update_touched(ctx.files())
238 files.update_touched(ctx.files())
235 files.update_added(filesadded)
239 files.update_added(filesadded)
236 files.update_removed(filesremoved)
240 files.update_removed(filesremoved)
237 files.update_merged(filesmerged)
241 files.update_merged(filesmerged)
238 files.update_copies_from_p1(filescopies[0])
242 files.update_copies_from_p1(filescopies[0])
239 files.update_copies_from_p2(filescopies[1])
243 files.update_copies_from_p2(filescopies[1])
240 return files
244 return files
241
245
242
246
247 def _process_root(ctx):
248 """compute the appropriate changed files for a changeset with no parents
249 """
250 # Simple, there was nothing before it, so everything is added.
251 md = ChangingFiles()
252 manifest = ctx.manifest()
253 for filename in manifest:
254 md.mark_added(filename)
255 return md
256
257
243 def computechangesetfilesadded(ctx):
258 def computechangesetfilesadded(ctx):
244 """return the list of files added in a changeset
259 """return the list of files added in a changeset
245 """
260 """
246 added = []
261 added = []
247 for f in ctx.files():
262 for f in ctx.files():
248 if not any(f in p for p in ctx.parents()):
263 if not any(f in p for p in ctx.parents()):
249 added.append(f)
264 added.append(f)
250 return added
265 return added
251
266
252
267
253 def get_removal_filter(ctx, x=None):
268 def get_removal_filter(ctx, x=None):
254 """return a function to detect files "wrongly" detected as `removed`
269 """return a function to detect files "wrongly" detected as `removed`
255
270
256 When a file is removed relative to p1 in a merge, this
271 When a file is removed relative to p1 in a merge, this
257 function determines whether the absence is due to a
272 function determines whether the absence is due to a
258 deletion from a parent, or whether the merge commit
273 deletion from a parent, or whether the merge commit
259 itself deletes the file. We decide this by doing a
274 itself deletes the file. We decide this by doing a
260 simplified three way merge of the manifest entry for
275 simplified three way merge of the manifest entry for
261 the file. There are two ways we decide the merge
276 the file. There are two ways we decide the merge
262 itself didn't delete a file:
277 itself didn't delete a file:
263 - neither parent (nor the merge) contain the file
278 - neither parent (nor the merge) contain the file
264 - exactly one parent contains the file, and that
279 - exactly one parent contains the file, and that
265 parent has the same filelog entry as the merge
280 parent has the same filelog entry as the merge
266 ancestor (or all of them if there two). In other
281 ancestor (or all of them if there two). In other
267 words, that parent left the file unchanged while the
282 words, that parent left the file unchanged while the
268 other one deleted it.
283 other one deleted it.
269 One way to think about this is that deleting a file is
284 One way to think about this is that deleting a file is
270 similar to emptying it, so the list of changed files
285 similar to emptying it, so the list of changed files
271 should be similar either way. The computation
286 should be similar either way. The computation
272 described above is not done directly in _filecommit
287 described above is not done directly in _filecommit
273 when creating the list of changed files, however
288 when creating the list of changed files, however
274 it does something very similar by comparing filelog
289 it does something very similar by comparing filelog
275 nodes.
290 nodes.
276 """
291 """
277
292
278 if x is not None:
293 if x is not None:
279 p1, p2, m1, m2 = x
294 p1, p2, m1, m2 = x
280 else:
295 else:
281 p1 = ctx.p1()
296 p1 = ctx.p1()
282 p2 = ctx.p2()
297 p2 = ctx.p2()
283 m1 = p1.manifest()
298 m1 = p1.manifest()
284 m2 = p2.manifest()
299 m2 = p2.manifest()
285
300
286 @util.cachefunc
301 @util.cachefunc
287 def mas():
302 def mas():
288 p1n = p1.node()
303 p1n = p1.node()
289 p2n = p2.node()
304 p2n = p2.node()
290 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
305 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
291 if not cahs:
306 if not cahs:
292 cahs = [node.nullrev]
307 cahs = [node.nullrev]
293 return [ctx.repo()[r].manifest() for r in cahs]
308 return [ctx.repo()[r].manifest() for r in cahs]
294
309
295 def deletionfromparent(f):
310 def deletionfromparent(f):
296 if f in m1:
311 if f in m1:
297 return f not in m2 and all(
312 return f not in m2 and all(
298 f in ma and ma.find(f) == m1.find(f) for ma in mas()
313 f in ma and ma.find(f) == m1.find(f) for ma in mas()
299 )
314 )
300 elif f in m2:
315 elif f in m2:
301 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
316 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
302 else:
317 else:
303 return True
318 return True
304
319
305 return deletionfromparent
320 return deletionfromparent
306
321
307
322
308 def computechangesetfilesremoved(ctx):
323 def computechangesetfilesremoved(ctx):
309 """return the list of files removed in a changeset
324 """return the list of files removed in a changeset
310 """
325 """
311 removed = []
326 removed = []
312 for f in ctx.files():
327 for f in ctx.files():
313 if f not in ctx:
328 if f not in ctx:
314 removed.append(f)
329 removed.append(f)
315 if removed:
330 if removed:
316 rf = get_removal_filter(ctx)
331 rf = get_removal_filter(ctx)
317 removed = [r for r in removed if not rf(r)]
332 removed = [r for r in removed if not rf(r)]
318 return removed
333 return removed
319
334
320
335
321 def computechangesetfilesmerged(ctx):
336 def computechangesetfilesmerged(ctx):
322 """return the list of files merged in a changeset
337 """return the list of files merged in a changeset
323 """
338 """
324 merged = []
339 merged = []
325 if len(ctx.parents()) < 2:
340 if len(ctx.parents()) < 2:
326 return merged
341 return merged
327 for f in ctx.files():
342 for f in ctx.files():
328 if f in ctx:
343 if f in ctx:
329 fctx = ctx[f]
344 fctx = ctx[f]
330 parents = fctx._filelog.parents(fctx._filenode)
345 parents = fctx._filelog.parents(fctx._filenode)
331 if parents[1] != node.nullid:
346 if parents[1] != node.nullid:
332 merged.append(f)
347 merged.append(f)
333 return merged
348 return merged
334
349
335
350
336 def computechangesetcopies(ctx):
351 def computechangesetcopies(ctx):
337 """return the copies data for a changeset
352 """return the copies data for a changeset
338
353
339 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
354 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
340
355
341 Each dictionnary are in the form: `{newname: oldname}`
356 Each dictionnary are in the form: `{newname: oldname}`
342 """
357 """
343 p1copies = {}
358 p1copies = {}
344 p2copies = {}
359 p2copies = {}
345 p1 = ctx.p1()
360 p1 = ctx.p1()
346 p2 = ctx.p2()
361 p2 = ctx.p2()
347 narrowmatch = ctx._repo.narrowmatch()
362 narrowmatch = ctx._repo.narrowmatch()
348 for dst in ctx.files():
363 for dst in ctx.files():
349 if not narrowmatch(dst) or dst not in ctx:
364 if not narrowmatch(dst) or dst not in ctx:
350 continue
365 continue
351 copied = ctx[dst].renamed()
366 copied = ctx[dst].renamed()
352 if not copied:
367 if not copied:
353 continue
368 continue
354 src, srcnode = copied
369 src, srcnode = copied
355 if src in p1 and p1[src].filenode() == srcnode:
370 if src in p1 and p1[src].filenode() == srcnode:
356 p1copies[dst] = src
371 p1copies[dst] = src
357 elif src in p2 and p2[src].filenode() == srcnode:
372 elif src in p2 and p2[src].filenode() == srcnode:
358 p2copies[dst] = src
373 p2copies[dst] = src
359 return p1copies, p2copies
374 return p1copies, p2copies
360
375
361
376
362 def encodecopies(files, copies):
377 def encodecopies(files, copies):
363 items = []
378 items = []
364 for i, dst in enumerate(files):
379 for i, dst in enumerate(files):
365 if dst in copies:
380 if dst in copies:
366 items.append(b'%d\0%s' % (i, copies[dst]))
381 items.append(b'%d\0%s' % (i, copies[dst]))
367 if len(items) != len(copies):
382 if len(items) != len(copies):
368 raise error.ProgrammingError(
383 raise error.ProgrammingError(
369 b'some copy targets missing from file list'
384 b'some copy targets missing from file list'
370 )
385 )
371 return b"\n".join(items)
386 return b"\n".join(items)
372
387
373
388
374 def decodecopies(files, data):
389 def decodecopies(files, data):
375 try:
390 try:
376 copies = {}
391 copies = {}
377 if not data:
392 if not data:
378 return copies
393 return copies
379 for l in data.split(b'\n'):
394 for l in data.split(b'\n'):
380 strindex, src = l.split(b'\0')
395 strindex, src = l.split(b'\0')
381 i = int(strindex)
396 i = int(strindex)
382 dst = files[i]
397 dst = files[i]
383 copies[dst] = src
398 copies[dst] = src
384 return copies
399 return copies
385 except (ValueError, IndexError):
400 except (ValueError, IndexError):
386 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
401 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
387 # used different syntax for the value.
402 # used different syntax for the value.
388 return None
403 return None
389
404
390
405
391 def encodefileindices(files, subset):
406 def encodefileindices(files, subset):
392 subset = set(subset)
407 subset = set(subset)
393 indices = []
408 indices = []
394 for i, f in enumerate(files):
409 for i, f in enumerate(files):
395 if f in subset:
410 if f in subset:
396 indices.append(b'%d' % i)
411 indices.append(b'%d' % i)
397 return b'\n'.join(indices)
412 return b'\n'.join(indices)
398
413
399
414
400 def decodefileindices(files, data):
415 def decodefileindices(files, data):
401 try:
416 try:
402 subset = []
417 subset = []
403 if not data:
418 if not data:
404 return subset
419 return subset
405 for strindex in data.split(b'\n'):
420 for strindex in data.split(b'\n'):
406 i = int(strindex)
421 i = int(strindex)
407 if i < 0 or i >= len(files):
422 if i < 0 or i >= len(files):
408 return None
423 return None
409 subset.append(files[i])
424 subset.append(files[i])
410 return subset
425 return subset
411 except (ValueError, IndexError):
426 except (ValueError, IndexError):
412 # Perhaps someone had chosen the same key name (e.g. "added") and
427 # Perhaps someone had chosen the same key name (e.g. "added") and
413 # used different syntax for the value.
428 # used different syntax for the value.
414 return None
429 return None
415
430
416
431
417 # see mercurial/helptext/internals/revlogs.txt for details about the format
432 # see mercurial/helptext/internals/revlogs.txt for details about the format
418
433
419 ACTION_MASK = int("111" "00", 2)
434 ACTION_MASK = int("111" "00", 2)
420 # note: untouched file used as copy source will as `000` for this mask.
435 # note: untouched file used as copy source will as `000` for this mask.
421 ADDED_FLAG = int("001" "00", 2)
436 ADDED_FLAG = int("001" "00", 2)
422 MERGED_FLAG = int("010" "00", 2)
437 MERGED_FLAG = int("010" "00", 2)
423 REMOVED_FLAG = int("011" "00", 2)
438 REMOVED_FLAG = int("011" "00", 2)
424 # `100` is reserved for future use
439 # `100` is reserved for future use
425 TOUCHED_FLAG = int("101" "00", 2)
440 TOUCHED_FLAG = int("101" "00", 2)
426
441
427 COPIED_MASK = int("11", 2)
442 COPIED_MASK = int("11", 2)
428 COPIED_FROM_P1_FLAG = int("10", 2)
443 COPIED_FROM_P1_FLAG = int("10", 2)
429 COPIED_FROM_P2_FLAG = int("11", 2)
444 COPIED_FROM_P2_FLAG = int("11", 2)
430
445
431 # structure is <flag><filename-end><copy-source>
446 # structure is <flag><filename-end><copy-source>
432 INDEX_HEADER = struct.Struct(">L")
447 INDEX_HEADER = struct.Struct(">L")
433 INDEX_ENTRY = struct.Struct(">bLL")
448 INDEX_ENTRY = struct.Struct(">bLL")
434
449
435
450
436 def encode_files_sidedata(files):
451 def encode_files_sidedata(files):
437 all_files = set(files.touched - files.salvaged)
452 all_files = set(files.touched - files.salvaged)
438 all_files.update(files.copied_from_p1.values())
453 all_files.update(files.copied_from_p1.values())
439 all_files.update(files.copied_from_p2.values())
454 all_files.update(files.copied_from_p2.values())
440 all_files = sorted(all_files)
455 all_files = sorted(all_files)
441 file_idx = {f: i for (i, f) in enumerate(all_files)}
456 file_idx = {f: i for (i, f) in enumerate(all_files)}
442 file_idx[None] = 0
457 file_idx[None] = 0
443
458
444 chunks = [INDEX_HEADER.pack(len(all_files))]
459 chunks = [INDEX_HEADER.pack(len(all_files))]
445
460
446 filename_length = 0
461 filename_length = 0
447 for f in all_files:
462 for f in all_files:
448 filename_size = len(f)
463 filename_size = len(f)
449 filename_length += filename_size
464 filename_length += filename_size
450 flag = 0
465 flag = 0
451 if f in files.added:
466 if f in files.added:
452 flag |= ADDED_FLAG
467 flag |= ADDED_FLAG
453 elif f in files.merged:
468 elif f in files.merged:
454 flag |= MERGED_FLAG
469 flag |= MERGED_FLAG
455 elif f in files.removed:
470 elif f in files.removed:
456 flag |= REMOVED_FLAG
471 flag |= REMOVED_FLAG
457 elif f in files.touched:
472 elif f in files.touched:
458 flag |= TOUCHED_FLAG
473 flag |= TOUCHED_FLAG
459
474
460 copy = None
475 copy = None
461 if f in files.copied_from_p1:
476 if f in files.copied_from_p1:
462 flag |= COPIED_FROM_P1_FLAG
477 flag |= COPIED_FROM_P1_FLAG
463 copy = files.copied_from_p1.get(f)
478 copy = files.copied_from_p1.get(f)
464 elif f in files.copied_from_p2:
479 elif f in files.copied_from_p2:
465 copy = files.copied_from_p2.get(f)
480 copy = files.copied_from_p2.get(f)
466 flag |= COPIED_FROM_P2_FLAG
481 flag |= COPIED_FROM_P2_FLAG
467 copy_idx = file_idx[copy]
482 copy_idx = file_idx[copy]
468 chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx))
483 chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx))
469 chunks.extend(all_files)
484 chunks.extend(all_files)
470 return {sidedatamod.SD_FILES: b''.join(chunks)}
485 return {sidedatamod.SD_FILES: b''.join(chunks)}
471
486
472
487
473 def decode_files_sidedata(sidedata):
488 def decode_files_sidedata(sidedata):
474 md = ChangingFiles()
489 md = ChangingFiles()
475 raw = sidedata.get(sidedatamod.SD_FILES)
490 raw = sidedata.get(sidedatamod.SD_FILES)
476
491
477 if raw is None:
492 if raw is None:
478 return md
493 return md
479
494
480 copies = []
495 copies = []
481 all_files = []
496 all_files = []
482
497
483 assert len(raw) >= INDEX_HEADER.size
498 assert len(raw) >= INDEX_HEADER.size
484 total_files = INDEX_HEADER.unpack_from(raw, 0)[0]
499 total_files = INDEX_HEADER.unpack_from(raw, 0)[0]
485
500
486 offset = INDEX_HEADER.size
501 offset = INDEX_HEADER.size
487 file_offset_base = offset + (INDEX_ENTRY.size * total_files)
502 file_offset_base = offset + (INDEX_ENTRY.size * total_files)
488 file_offset_last = file_offset_base
503 file_offset_last = file_offset_base
489
504
490 assert len(raw) >= file_offset_base
505 assert len(raw) >= file_offset_base
491
506
492 for idx in range(total_files):
507 for idx in range(total_files):
493 flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset)
508 flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset)
494 file_end += file_offset_base
509 file_end += file_offset_base
495 filename = raw[file_offset_last:file_end]
510 filename = raw[file_offset_last:file_end]
496 filesize = file_end - file_offset_last
511 filesize = file_end - file_offset_last
497 assert len(filename) == filesize
512 assert len(filename) == filesize
498 offset += INDEX_ENTRY.size
513 offset += INDEX_ENTRY.size
499 file_offset_last = file_end
514 file_offset_last = file_end
500 all_files.append(filename)
515 all_files.append(filename)
501 if flag & ACTION_MASK == ADDED_FLAG:
516 if flag & ACTION_MASK == ADDED_FLAG:
502 md.mark_added(filename)
517 md.mark_added(filename)
503 elif flag & ACTION_MASK == MERGED_FLAG:
518 elif flag & ACTION_MASK == MERGED_FLAG:
504 md.mark_merged(filename)
519 md.mark_merged(filename)
505 elif flag & ACTION_MASK == REMOVED_FLAG:
520 elif flag & ACTION_MASK == REMOVED_FLAG:
506 md.mark_removed(filename)
521 md.mark_removed(filename)
507 elif flag & ACTION_MASK == TOUCHED_FLAG:
522 elif flag & ACTION_MASK == TOUCHED_FLAG:
508 md.mark_touched(filename)
523 md.mark_touched(filename)
509
524
510 copied = None
525 copied = None
511 if flag & COPIED_MASK == COPIED_FROM_P1_FLAG:
526 if flag & COPIED_MASK == COPIED_FROM_P1_FLAG:
512 copied = md.mark_copied_from_p1
527 copied = md.mark_copied_from_p1
513 elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG:
528 elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG:
514 copied = md.mark_copied_from_p2
529 copied = md.mark_copied_from_p2
515
530
516 if copied is not None:
531 if copied is not None:
517 copies.append((copied, filename, copy_idx))
532 copies.append((copied, filename, copy_idx))
518
533
519 for copied, filename, copy_idx in copies:
534 for copied, filename, copy_idx in copies:
520 copied(all_files[copy_idx], filename)
535 copied(all_files[copy_idx], filename)
521
536
522 return md
537 return md
523
538
524
539
525 def _getsidedata(srcrepo, rev):
540 def _getsidedata(srcrepo, rev):
526 ctx = srcrepo[rev]
541 ctx = srcrepo[rev]
527 files = compute_all_files_changes(ctx)
542 files = compute_all_files_changes(ctx)
528 return encode_files_sidedata(files)
543 return encode_files_sidedata(files)
529
544
530
545
531 def getsidedataadder(srcrepo, destrepo):
546 def getsidedataadder(srcrepo, destrepo):
532 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
547 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
533 if pycompat.iswindows or not use_w:
548 if pycompat.iswindows or not use_w:
534 return _get_simple_sidedata_adder(srcrepo, destrepo)
549 return _get_simple_sidedata_adder(srcrepo, destrepo)
535 else:
550 else:
536 return _get_worker_sidedata_adder(srcrepo, destrepo)
551 return _get_worker_sidedata_adder(srcrepo, destrepo)
537
552
538
553
539 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
554 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
540 """The function used by worker precomputing sidedata
555 """The function used by worker precomputing sidedata
541
556
542 It read an input queue containing revision numbers
557 It read an input queue containing revision numbers
543 It write in an output queue containing (rev, <sidedata-map>)
558 It write in an output queue containing (rev, <sidedata-map>)
544
559
545 The `None` input value is used as a stop signal.
560 The `None` input value is used as a stop signal.
546
561
547 The `tokens` semaphore is user to avoid having too many unprocessed
562 The `tokens` semaphore is user to avoid having too many unprocessed
548 entries. The workers needs to acquire one token before fetching a task.
563 entries. The workers needs to acquire one token before fetching a task.
549 They will be released by the consumer of the produced data.
564 They will be released by the consumer of the produced data.
550 """
565 """
551 tokens.acquire()
566 tokens.acquire()
552 rev = revs_queue.get()
567 rev = revs_queue.get()
553 while rev is not None:
568 while rev is not None:
554 data = _getsidedata(srcrepo, rev)
569 data = _getsidedata(srcrepo, rev)
555 sidedata_queue.put((rev, data))
570 sidedata_queue.put((rev, data))
556 tokens.acquire()
571 tokens.acquire()
557 rev = revs_queue.get()
572 rev = revs_queue.get()
558 # processing of `None` is completed, release the token.
573 # processing of `None` is completed, release the token.
559 tokens.release()
574 tokens.release()
560
575
561
576
562 BUFF_PER_WORKER = 50
577 BUFF_PER_WORKER = 50
563
578
564
579
565 def _get_worker_sidedata_adder(srcrepo, destrepo):
580 def _get_worker_sidedata_adder(srcrepo, destrepo):
566 """The parallel version of the sidedata computation
581 """The parallel version of the sidedata computation
567
582
568 This code spawn a pool of worker that precompute a buffer of sidedata
583 This code spawn a pool of worker that precompute a buffer of sidedata
569 before we actually need them"""
584 before we actually need them"""
570 # avoid circular import copies -> scmutil -> worker -> copies
585 # avoid circular import copies -> scmutil -> worker -> copies
571 from . import worker
586 from . import worker
572
587
573 nbworkers = worker._numworkers(srcrepo.ui)
588 nbworkers = worker._numworkers(srcrepo.ui)
574
589
575 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
590 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
576 revsq = multiprocessing.Queue()
591 revsq = multiprocessing.Queue()
577 sidedataq = multiprocessing.Queue()
592 sidedataq = multiprocessing.Queue()
578
593
579 assert srcrepo.filtername is None
594 assert srcrepo.filtername is None
580 # queue all tasks beforehand, revision numbers are small and it make
595 # queue all tasks beforehand, revision numbers are small and it make
581 # synchronisation simpler
596 # synchronisation simpler
582 #
597 #
583 # Since the computation for each node can be quite expensive, the overhead
598 # Since the computation for each node can be quite expensive, the overhead
584 # of using a single queue is not revelant. In practice, most computation
599 # of using a single queue is not revelant. In practice, most computation
585 # are fast but some are very expensive and dominate all the other smaller
600 # are fast but some are very expensive and dominate all the other smaller
586 # cost.
601 # cost.
587 for r in srcrepo.changelog.revs():
602 for r in srcrepo.changelog.revs():
588 revsq.put(r)
603 revsq.put(r)
589 # queue the "no more tasks" markers
604 # queue the "no more tasks" markers
590 for i in range(nbworkers):
605 for i in range(nbworkers):
591 revsq.put(None)
606 revsq.put(None)
592
607
593 allworkers = []
608 allworkers = []
594 for i in range(nbworkers):
609 for i in range(nbworkers):
595 args = (srcrepo, revsq, sidedataq, tokens)
610 args = (srcrepo, revsq, sidedataq, tokens)
596 w = multiprocessing.Process(target=_sidedata_worker, args=args)
611 w = multiprocessing.Process(target=_sidedata_worker, args=args)
597 allworkers.append(w)
612 allworkers.append(w)
598 w.start()
613 w.start()
599
614
600 # dictionnary to store results for revision higher than we one we are
615 # dictionnary to store results for revision higher than we one we are
601 # looking for. For example, if we need the sidedatamap for 42, and 43 is
616 # looking for. For example, if we need the sidedatamap for 42, and 43 is
602 # received, when shelve 43 for later use.
617 # received, when shelve 43 for later use.
603 staging = {}
618 staging = {}
604
619
605 def sidedata_companion(revlog, rev):
620 def sidedata_companion(revlog, rev):
606 sidedata = {}
621 sidedata = {}
607 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
622 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
608 # Is the data previously shelved ?
623 # Is the data previously shelved ?
609 sidedata = staging.pop(rev, None)
624 sidedata = staging.pop(rev, None)
610 if sidedata is None:
625 if sidedata is None:
611 # look at the queued result until we find the one we are lookig
626 # look at the queued result until we find the one we are lookig
612 # for (shelve the other ones)
627 # for (shelve the other ones)
613 r, sidedata = sidedataq.get()
628 r, sidedata = sidedataq.get()
614 while r != rev:
629 while r != rev:
615 staging[r] = sidedata
630 staging[r] = sidedata
616 r, sidedata = sidedataq.get()
631 r, sidedata = sidedataq.get()
617 tokens.release()
632 tokens.release()
618 return False, (), sidedata
633 return False, (), sidedata
619
634
620 return sidedata_companion
635 return sidedata_companion
621
636
622
637
623 def _get_simple_sidedata_adder(srcrepo, destrepo):
638 def _get_simple_sidedata_adder(srcrepo, destrepo):
624 """The simple version of the sidedata computation
639 """The simple version of the sidedata computation
625
640
626 It just compute it in the same thread on request"""
641 It just compute it in the same thread on request"""
627
642
628 def sidedatacompanion(revlog, rev):
643 def sidedatacompanion(revlog, rev):
629 sidedata = {}
644 sidedata = {}
630 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
645 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
631 sidedata = _getsidedata(srcrepo, rev)
646 sidedata = _getsidedata(srcrepo, rev)
632 return False, (), sidedata
647 return False, (), sidedata
633
648
634 return sidedatacompanion
649 return sidedatacompanion
635
650
636
651
637 def getsidedataremover(srcrepo, destrepo):
652 def getsidedataremover(srcrepo, destrepo):
638 def sidedatacompanion(revlog, rev):
653 def sidedatacompanion(revlog, rev):
639 f = ()
654 f = ()
640 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
655 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
641 if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
656 if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
642 f = (
657 f = (
643 sidedatamod.SD_P1COPIES,
658 sidedatamod.SD_P1COPIES,
644 sidedatamod.SD_P2COPIES,
659 sidedatamod.SD_P2COPIES,
645 sidedatamod.SD_FILESADDED,
660 sidedatamod.SD_FILESADDED,
646 sidedatamod.SD_FILESREMOVED,
661 sidedatamod.SD_FILESREMOVED,
647 )
662 )
648 return False, f, {}
663 return False, f, {}
649
664
650 return sidedatacompanion
665 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now