##// END OF EJS Templates
changing-files: add the ability to track merged files too...
marmoute -
r46186:e5578dbe default
parent child Browse files
Show More
@@ -1,519 +1,542 b''
1 # metadata.py -- code related to various metadata computation and access.
1 # metadata.py -- code related to various metadata computation and access.
2 #
2 #
3 # Copyright 2019 Google, Inc <martinvonz@google.com>
3 # Copyright 2019 Google, Inc <martinvonz@google.com>
4 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import multiprocessing
10 import multiprocessing
11
11
12 from . import (
12 from . import (
13 error,
13 error,
14 node,
14 node,
15 pycompat,
15 pycompat,
16 util,
16 util,
17 )
17 )
18
18
19 from .revlogutils import (
19 from .revlogutils import (
20 flagutil as sidedataflag,
20 flagutil as sidedataflag,
21 sidedata as sidedatamod,
21 sidedata as sidedatamod,
22 )
22 )
23
23
24
24
25 class ChangingFiles(object):
25 class ChangingFiles(object):
26 """A class recording the changes made to a file by a changeset
26 """A class recording the changes made to a file by a changeset
27
27
28 Actions performed on files are gathered into 3 sets:
28 Actions performed on files are gathered into 3 sets:
29
29
30 - added: files actively added in the changeset.
30 - added: files actively added in the changeset.
31 - merged: files whose history got merged
31 - removed: files removed in the revision
32 - removed: files removed in the revision
32 - touched: files affected by the merge
33 - touched: files affected by the merge
33
34
34 and copies information is held by 2 mappings
35 and copies information is held by 2 mappings
35
36
36 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
37 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
37 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
38 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
38
39
39 See their inline help for details.
40 See their inline help for details.
40 """
41 """
41
42
42 def __init__(
43 def __init__(
43 self,
44 self,
44 touched=None,
45 touched=None,
45 added=None,
46 added=None,
46 removed=None,
47 removed=None,
48 merged=None,
47 p1_copies=None,
49 p1_copies=None,
48 p2_copies=None,
50 p2_copies=None,
49 ):
51 ):
50 self._added = set(() if added is None else added)
52 self._added = set(() if added is None else added)
53 self._merged = set(() if merged is None else merged)
51 self._removed = set(() if removed is None else removed)
54 self._removed = set(() if removed is None else removed)
52 self._touched = set(() if touched is None else touched)
55 self._touched = set(() if touched is None else touched)
53 self._touched.update(self._added)
56 self._touched.update(self._added)
57 self._touched.update(self._merged)
54 self._touched.update(self._removed)
58 self._touched.update(self._removed)
55 self._p1_copies = dict(() if p1_copies is None else p1_copies)
59 self._p1_copies = dict(() if p1_copies is None else p1_copies)
56 self._p2_copies = dict(() if p2_copies is None else p2_copies)
60 self._p2_copies = dict(() if p2_copies is None else p2_copies)
57
61
58 def __eq__(self, other):
62 def __eq__(self, other):
59 return (
63 return (
60 self.added == other.added
64 self.added == other.added
65 and self.merged == other.merged
61 and self.removed == other.removed
66 and self.removed == other.removed
62 and self.touched == other.touched
67 and self.touched == other.touched
63 and self.copied_from_p1 == other.copied_from_p1
68 and self.copied_from_p1 == other.copied_from_p1
64 and self.copied_from_p2 == other.copied_from_p2
69 and self.copied_from_p2 == other.copied_from_p2
65 )
70 )
66
71
67 @property
72 @property
68 def added(self):
73 def added(self):
69 """files actively added in the changeset
74 """files actively added in the changeset
70
75
71 Any file present in that revision that was absent in all the changeset's
76 Any file present in that revision that was absent in all the changeset's
72 parents.
77 parents.
73
78
74 In case of merge, this means a file absent in one of the parents but
79 In case of merge, this means a file absent in one of the parents but
75 existing in the other will *not* be contained in this set. (They were
80 existing in the other will *not* be contained in this set. (They were
76 added by an ancestor)
81 added by an ancestor)
77 """
82 """
78 return frozenset(self._added)
83 return frozenset(self._added)
79
84
80 def mark_added(self, filename):
85 def mark_added(self, filename):
81 self._added.add(filename)
86 self._added.add(filename)
82 self._touched.add(filename)
87 self._touched.add(filename)
83
88
84 def update_added(self, filenames):
89 def update_added(self, filenames):
85 for f in filenames:
90 for f in filenames:
86 self.mark_added(f)
91 self.mark_added(f)
87
92
88 @property
93 @property
94 def merged(self):
95 """files actively merged during a merge
96
97 Any modified files which had modification on both size that needed merging.
98
99 In this case a new filenode was created and it has two parents.
100 """
101 return frozenset(self._merged)
102
103 def mark_merged(self, filename):
104 self._merged.add(filename)
105 self._touched.add(filename)
106
107 def update_merged(self, filenames):
108 for f in filenames:
109 self.mark_merged(f)
110
111 @property
89 def removed(self):
112 def removed(self):
90 """files actively removed by the changeset
113 """files actively removed by the changeset
91
114
92 In case of merge this will only contain the set of files removing "new"
115 In case of merge this will only contain the set of files removing "new"
93 content. For any file absent in the current changeset:
116 content. For any file absent in the current changeset:
94
117
95 a) If the file exists in both parents, it is clearly "actively" removed
118 a) If the file exists in both parents, it is clearly "actively" removed
96 by this changeset.
119 by this changeset.
97
120
98 b) If a file exists in only one parent and in none of the common
121 b) If a file exists in only one parent and in none of the common
99 ancestors, then the file was newly added in one of the merged branches
122 ancestors, then the file was newly added in one of the merged branches
100 and then got "actively" removed.
123 and then got "actively" removed.
101
124
102 c) If a file exists in only one parent and at least one of the common
125 c) If a file exists in only one parent and at least one of the common
103 ancestors using the same filenode, then the file was unchanged on one
126 ancestors using the same filenode, then the file was unchanged on one
104 side and deleted on the other side. The merge "passively" propagated
127 side and deleted on the other side. The merge "passively" propagated
105 that deletion, but didn't "actively" remove the file. In this case the
128 that deletion, but didn't "actively" remove the file. In this case the
106 file is *not* included in the `removed` set.
129 file is *not* included in the `removed` set.
107
130
108 d) If a file exists in only one parent and at least one of the common
131 d) If a file exists in only one parent and at least one of the common
109 ancestors using a different filenode, then the file was changed on one
132 ancestors using a different filenode, then the file was changed on one
110 side and removed on the other side. The merge process "actively"
133 side and removed on the other side. The merge process "actively"
111 decided to drop the new change and delete the file. Unlike in the
134 decided to drop the new change and delete the file. Unlike in the
112 previous case, (c), the file included in the `removed` set.
135 previous case, (c), the file included in the `removed` set.
113
136
114 Summary table for merge:
137 Summary table for merge:
115
138
116 case | exists in parents | exists in gca || removed
139 case | exists in parents | exists in gca || removed
117 (a) | both | * || yes
140 (a) | both | * || yes
118 (b) | one | none || yes
141 (b) | one | none || yes
119 (c) | one | same filenode || no
142 (c) | one | same filenode || no
120 (d) | one | new filenode || yes
143 (d) | one | new filenode || yes
121 """
144 """
122 return frozenset(self._removed)
145 return frozenset(self._removed)
123
146
124 def mark_removed(self, filename):
147 def mark_removed(self, filename):
125 self._removed.add(filename)
148 self._removed.add(filename)
126 self._touched.add(filename)
149 self._touched.add(filename)
127
150
128 def update_removed(self, filenames):
151 def update_removed(self, filenames):
129 for f in filenames:
152 for f in filenames:
130 self.mark_removed(f)
153 self.mark_removed(f)
131
154
132 @property
155 @property
133 def touched(self):
156 def touched(self):
134 """files either actively modified, added or removed"""
157 """files either actively modified, added or removed"""
135 return frozenset(self._touched)
158 return frozenset(self._touched)
136
159
137 def mark_touched(self, filename):
160 def mark_touched(self, filename):
138 self._touched.add(filename)
161 self._touched.add(filename)
139
162
140 def update_touched(self, filenames):
163 def update_touched(self, filenames):
141 for f in filenames:
164 for f in filenames:
142 self.mark_touched(f)
165 self.mark_touched(f)
143
166
144 @property
167 @property
145 def copied_from_p1(self):
168 def copied_from_p1(self):
146 return self._p1_copies.copy()
169 return self._p1_copies.copy()
147
170
148 def mark_copied_from_p1(self, source, dest):
171 def mark_copied_from_p1(self, source, dest):
149 self._p1_copies[dest] = source
172 self._p1_copies[dest] = source
150
173
151 def update_copies_from_p1(self, copies):
174 def update_copies_from_p1(self, copies):
152 for dest, source in copies.items():
175 for dest, source in copies.items():
153 self.mark_copied_from_p1(source, dest)
176 self.mark_copied_from_p1(source, dest)
154
177
155 @property
178 @property
156 def copied_from_p2(self):
179 def copied_from_p2(self):
157 return self._p2_copies.copy()
180 return self._p2_copies.copy()
158
181
159 def mark_copied_from_p2(self, source, dest):
182 def mark_copied_from_p2(self, source, dest):
160 self._p2_copies[dest] = source
183 self._p2_copies[dest] = source
161
184
162 def update_copies_from_p2(self, copies):
185 def update_copies_from_p2(self, copies):
163 for dest, source in copies.items():
186 for dest, source in copies.items():
164 self.mark_copied_from_p2(source, dest)
187 self.mark_copied_from_p2(source, dest)
165
188
166
189
167 def computechangesetfilesadded(ctx):
190 def computechangesetfilesadded(ctx):
168 """return the list of files added in a changeset
191 """return the list of files added in a changeset
169 """
192 """
170 added = []
193 added = []
171 for f in ctx.files():
194 for f in ctx.files():
172 if not any(f in p for p in ctx.parents()):
195 if not any(f in p for p in ctx.parents()):
173 added.append(f)
196 added.append(f)
174 return added
197 return added
175
198
176
199
177 def get_removal_filter(ctx, x=None):
200 def get_removal_filter(ctx, x=None):
178 """return a function to detect files "wrongly" detected as `removed`
201 """return a function to detect files "wrongly" detected as `removed`
179
202
180 When a file is removed relative to p1 in a merge, this
203 When a file is removed relative to p1 in a merge, this
181 function determines whether the absence is due to a
204 function determines whether the absence is due to a
182 deletion from a parent, or whether the merge commit
205 deletion from a parent, or whether the merge commit
183 itself deletes the file. We decide this by doing a
206 itself deletes the file. We decide this by doing a
184 simplified three way merge of the manifest entry for
207 simplified three way merge of the manifest entry for
185 the file. There are two ways we decide the merge
208 the file. There are two ways we decide the merge
186 itself didn't delete a file:
209 itself didn't delete a file:
187 - neither parent (nor the merge) contain the file
210 - neither parent (nor the merge) contain the file
188 - exactly one parent contains the file, and that
211 - exactly one parent contains the file, and that
189 parent has the same filelog entry as the merge
212 parent has the same filelog entry as the merge
190 ancestor (or all of them if there two). In other
213 ancestor (or all of them if there two). In other
191 words, that parent left the file unchanged while the
214 words, that parent left the file unchanged while the
192 other one deleted it.
215 other one deleted it.
193 One way to think about this is that deleting a file is
216 One way to think about this is that deleting a file is
194 similar to emptying it, so the list of changed files
217 similar to emptying it, so the list of changed files
195 should be similar either way. The computation
218 should be similar either way. The computation
196 described above is not done directly in _filecommit
219 described above is not done directly in _filecommit
197 when creating the list of changed files, however
220 when creating the list of changed files, however
198 it does something very similar by comparing filelog
221 it does something very similar by comparing filelog
199 nodes.
222 nodes.
200 """
223 """
201
224
202 if x is not None:
225 if x is not None:
203 p1, p2, m1, m2 = x
226 p1, p2, m1, m2 = x
204 else:
227 else:
205 p1 = ctx.p1()
228 p1 = ctx.p1()
206 p2 = ctx.p2()
229 p2 = ctx.p2()
207 m1 = p1.manifest()
230 m1 = p1.manifest()
208 m2 = p2.manifest()
231 m2 = p2.manifest()
209
232
210 @util.cachefunc
233 @util.cachefunc
211 def mas():
234 def mas():
212 p1n = p1.node()
235 p1n = p1.node()
213 p2n = p2.node()
236 p2n = p2.node()
214 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
237 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
215 if not cahs:
238 if not cahs:
216 cahs = [node.nullrev]
239 cahs = [node.nullrev]
217 return [ctx.repo()[r].manifest() for r in cahs]
240 return [ctx.repo()[r].manifest() for r in cahs]
218
241
219 def deletionfromparent(f):
242 def deletionfromparent(f):
220 if f in m1:
243 if f in m1:
221 return f not in m2 and all(
244 return f not in m2 and all(
222 f in ma and ma.find(f) == m1.find(f) for ma in mas()
245 f in ma and ma.find(f) == m1.find(f) for ma in mas()
223 )
246 )
224 elif f in m2:
247 elif f in m2:
225 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
248 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
226 else:
249 else:
227 return True
250 return True
228
251
229 return deletionfromparent
252 return deletionfromparent
230
253
231
254
232 def computechangesetfilesremoved(ctx):
255 def computechangesetfilesremoved(ctx):
233 """return the list of files removed in a changeset
256 """return the list of files removed in a changeset
234 """
257 """
235 removed = []
258 removed = []
236 for f in ctx.files():
259 for f in ctx.files():
237 if f not in ctx:
260 if f not in ctx:
238 removed.append(f)
261 removed.append(f)
239 if removed:
262 if removed:
240 rf = get_removal_filter(ctx)
263 rf = get_removal_filter(ctx)
241 removed = [r for r in removed if not rf(r)]
264 removed = [r for r in removed if not rf(r)]
242 return removed
265 return removed
243
266
244
267
245 def computechangesetcopies(ctx):
268 def computechangesetcopies(ctx):
246 """return the copies data for a changeset
269 """return the copies data for a changeset
247
270
248 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
271 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
249
272
250 Each dictionnary are in the form: `{newname: oldname}`
273 Each dictionnary are in the form: `{newname: oldname}`
251 """
274 """
252 p1copies = {}
275 p1copies = {}
253 p2copies = {}
276 p2copies = {}
254 p1 = ctx.p1()
277 p1 = ctx.p1()
255 p2 = ctx.p2()
278 p2 = ctx.p2()
256 narrowmatch = ctx._repo.narrowmatch()
279 narrowmatch = ctx._repo.narrowmatch()
257 for dst in ctx.files():
280 for dst in ctx.files():
258 if not narrowmatch(dst) or dst not in ctx:
281 if not narrowmatch(dst) or dst not in ctx:
259 continue
282 continue
260 copied = ctx[dst].renamed()
283 copied = ctx[dst].renamed()
261 if not copied:
284 if not copied:
262 continue
285 continue
263 src, srcnode = copied
286 src, srcnode = copied
264 if src in p1 and p1[src].filenode() == srcnode:
287 if src in p1 and p1[src].filenode() == srcnode:
265 p1copies[dst] = src
288 p1copies[dst] = src
266 elif src in p2 and p2[src].filenode() == srcnode:
289 elif src in p2 and p2[src].filenode() == srcnode:
267 p2copies[dst] = src
290 p2copies[dst] = src
268 return p1copies, p2copies
291 return p1copies, p2copies
269
292
270
293
271 def encodecopies(files, copies):
294 def encodecopies(files, copies):
272 items = []
295 items = []
273 for i, dst in enumerate(files):
296 for i, dst in enumerate(files):
274 if dst in copies:
297 if dst in copies:
275 items.append(b'%d\0%s' % (i, copies[dst]))
298 items.append(b'%d\0%s' % (i, copies[dst]))
276 if len(items) != len(copies):
299 if len(items) != len(copies):
277 raise error.ProgrammingError(
300 raise error.ProgrammingError(
278 b'some copy targets missing from file list'
301 b'some copy targets missing from file list'
279 )
302 )
280 return b"\n".join(items)
303 return b"\n".join(items)
281
304
282
305
283 def decodecopies(files, data):
306 def decodecopies(files, data):
284 try:
307 try:
285 copies = {}
308 copies = {}
286 if not data:
309 if not data:
287 return copies
310 return copies
288 for l in data.split(b'\n'):
311 for l in data.split(b'\n'):
289 strindex, src = l.split(b'\0')
312 strindex, src = l.split(b'\0')
290 i = int(strindex)
313 i = int(strindex)
291 dst = files[i]
314 dst = files[i]
292 copies[dst] = src
315 copies[dst] = src
293 return copies
316 return copies
294 except (ValueError, IndexError):
317 except (ValueError, IndexError):
295 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
318 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
296 # used different syntax for the value.
319 # used different syntax for the value.
297 return None
320 return None
298
321
299
322
300 def encodefileindices(files, subset):
323 def encodefileindices(files, subset):
301 subset = set(subset)
324 subset = set(subset)
302 indices = []
325 indices = []
303 for i, f in enumerate(files):
326 for i, f in enumerate(files):
304 if f in subset:
327 if f in subset:
305 indices.append(b'%d' % i)
328 indices.append(b'%d' % i)
306 return b'\n'.join(indices)
329 return b'\n'.join(indices)
307
330
308
331
309 def decodefileindices(files, data):
332 def decodefileindices(files, data):
310 try:
333 try:
311 subset = []
334 subset = []
312 if not data:
335 if not data:
313 return subset
336 return subset
314 for strindex in data.split(b'\n'):
337 for strindex in data.split(b'\n'):
315 i = int(strindex)
338 i = int(strindex)
316 if i < 0 or i >= len(files):
339 if i < 0 or i >= len(files):
317 return None
340 return None
318 subset.append(files[i])
341 subset.append(files[i])
319 return subset
342 return subset
320 except (ValueError, IndexError):
343 except (ValueError, IndexError):
321 # Perhaps someone had chosen the same key name (e.g. "added") and
344 # Perhaps someone had chosen the same key name (e.g. "added") and
322 # used different syntax for the value.
345 # used different syntax for the value.
323 return None
346 return None
324
347
325
348
326 def encode_files_sidedata(files):
349 def encode_files_sidedata(files):
327 sortedfiles = sorted(files.touched)
350 sortedfiles = sorted(files.touched)
328 sidedata = {}
351 sidedata = {}
329 p1copies = files.copied_from_p1
352 p1copies = files.copied_from_p1
330 if p1copies:
353 if p1copies:
331 p1copies = encodecopies(sortedfiles, p1copies)
354 p1copies = encodecopies(sortedfiles, p1copies)
332 sidedata[sidedatamod.SD_P1COPIES] = p1copies
355 sidedata[sidedatamod.SD_P1COPIES] = p1copies
333 p2copies = files.copied_from_p2
356 p2copies = files.copied_from_p2
334 if p2copies:
357 if p2copies:
335 p2copies = encodecopies(sortedfiles, p2copies)
358 p2copies = encodecopies(sortedfiles, p2copies)
336 sidedata[sidedatamod.SD_P2COPIES] = p2copies
359 sidedata[sidedatamod.SD_P2COPIES] = p2copies
337 filesadded = files.added
360 filesadded = files.added
338 if filesadded:
361 if filesadded:
339 filesadded = encodefileindices(sortedfiles, filesadded)
362 filesadded = encodefileindices(sortedfiles, filesadded)
340 sidedata[sidedatamod.SD_FILESADDED] = filesadded
363 sidedata[sidedatamod.SD_FILESADDED] = filesadded
341 filesremoved = files.removed
364 filesremoved = files.removed
342 if filesremoved:
365 if filesremoved:
343 filesremoved = encodefileindices(sortedfiles, filesremoved)
366 filesremoved = encodefileindices(sortedfiles, filesremoved)
344 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
367 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
345 if not sidedata:
368 if not sidedata:
346 sidedata = None
369 sidedata = None
347 return sidedata
370 return sidedata
348
371
349
372
350 def decode_files_sidedata(changelogrevision, sidedata):
373 def decode_files_sidedata(changelogrevision, sidedata):
351 """Return a ChangingFiles instance from a changelogrevision using sidata
374 """Return a ChangingFiles instance from a changelogrevision using sidata
352 """
375 """
353 touched = changelogrevision.files
376 touched = changelogrevision.files
354
377
355 rawindices = sidedata.get(sidedatamod.SD_FILESADDED)
378 rawindices = sidedata.get(sidedatamod.SD_FILESADDED)
356 added = decodefileindices(touched, rawindices)
379 added = decodefileindices(touched, rawindices)
357
380
358 rawindices = sidedata.get(sidedatamod.SD_FILESREMOVED)
381 rawindices = sidedata.get(sidedatamod.SD_FILESREMOVED)
359 removed = decodefileindices(touched, rawindices)
382 removed = decodefileindices(touched, rawindices)
360
383
361 rawcopies = sidedata.get(sidedatamod.SD_P1COPIES)
384 rawcopies = sidedata.get(sidedatamod.SD_P1COPIES)
362 p1_copies = decodecopies(touched, rawcopies)
385 p1_copies = decodecopies(touched, rawcopies)
363
386
364 rawcopies = sidedata.get(sidedatamod.SD_P2COPIES)
387 rawcopies = sidedata.get(sidedatamod.SD_P2COPIES)
365 p2_copies = decodecopies(touched, rawcopies)
388 p2_copies = decodecopies(touched, rawcopies)
366
389
367 return ChangingFiles(
390 return ChangingFiles(
368 touched=touched,
391 touched=touched,
369 added=added,
392 added=added,
370 removed=removed,
393 removed=removed,
371 p1_copies=p1_copies,
394 p1_copies=p1_copies,
372 p2_copies=p2_copies,
395 p2_copies=p2_copies,
373 )
396 )
374
397
375
398
376 def _getsidedata(srcrepo, rev):
399 def _getsidedata(srcrepo, rev):
377 ctx = srcrepo[rev]
400 ctx = srcrepo[rev]
378 filescopies = computechangesetcopies(ctx)
401 filescopies = computechangesetcopies(ctx)
379 filesadded = computechangesetfilesadded(ctx)
402 filesadded = computechangesetfilesadded(ctx)
380 filesremoved = computechangesetfilesremoved(ctx)
403 filesremoved = computechangesetfilesremoved(ctx)
381 sidedata = {}
404 sidedata = {}
382 if any([filescopies, filesadded, filesremoved]):
405 if any([filescopies, filesadded, filesremoved]):
383 sortedfiles = sorted(ctx.files())
406 sortedfiles = sorted(ctx.files())
384 p1copies, p2copies = filescopies
407 p1copies, p2copies = filescopies
385 p1copies = encodecopies(sortedfiles, p1copies)
408 p1copies = encodecopies(sortedfiles, p1copies)
386 p2copies = encodecopies(sortedfiles, p2copies)
409 p2copies = encodecopies(sortedfiles, p2copies)
387 filesadded = encodefileindices(sortedfiles, filesadded)
410 filesadded = encodefileindices(sortedfiles, filesadded)
388 filesremoved = encodefileindices(sortedfiles, filesremoved)
411 filesremoved = encodefileindices(sortedfiles, filesremoved)
389 if p1copies:
412 if p1copies:
390 sidedata[sidedatamod.SD_P1COPIES] = p1copies
413 sidedata[sidedatamod.SD_P1COPIES] = p1copies
391 if p2copies:
414 if p2copies:
392 sidedata[sidedatamod.SD_P2COPIES] = p2copies
415 sidedata[sidedatamod.SD_P2COPIES] = p2copies
393 if filesadded:
416 if filesadded:
394 sidedata[sidedatamod.SD_FILESADDED] = filesadded
417 sidedata[sidedatamod.SD_FILESADDED] = filesadded
395 if filesremoved:
418 if filesremoved:
396 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
419 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
397 return sidedata
420 return sidedata
398
421
399
422
400 def getsidedataadder(srcrepo, destrepo):
423 def getsidedataadder(srcrepo, destrepo):
401 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
424 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
402 if pycompat.iswindows or not use_w:
425 if pycompat.iswindows or not use_w:
403 return _get_simple_sidedata_adder(srcrepo, destrepo)
426 return _get_simple_sidedata_adder(srcrepo, destrepo)
404 else:
427 else:
405 return _get_worker_sidedata_adder(srcrepo, destrepo)
428 return _get_worker_sidedata_adder(srcrepo, destrepo)
406
429
407
430
408 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
431 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
409 """The function used by worker precomputing sidedata
432 """The function used by worker precomputing sidedata
410
433
411 It read an input queue containing revision numbers
434 It read an input queue containing revision numbers
412 It write in an output queue containing (rev, <sidedata-map>)
435 It write in an output queue containing (rev, <sidedata-map>)
413
436
414 The `None` input value is used as a stop signal.
437 The `None` input value is used as a stop signal.
415
438
416 The `tokens` semaphore is user to avoid having too many unprocessed
439 The `tokens` semaphore is user to avoid having too many unprocessed
417 entries. The workers needs to acquire one token before fetching a task.
440 entries. The workers needs to acquire one token before fetching a task.
418 They will be released by the consumer of the produced data.
441 They will be released by the consumer of the produced data.
419 """
442 """
420 tokens.acquire()
443 tokens.acquire()
421 rev = revs_queue.get()
444 rev = revs_queue.get()
422 while rev is not None:
445 while rev is not None:
423 data = _getsidedata(srcrepo, rev)
446 data = _getsidedata(srcrepo, rev)
424 sidedata_queue.put((rev, data))
447 sidedata_queue.put((rev, data))
425 tokens.acquire()
448 tokens.acquire()
426 rev = revs_queue.get()
449 rev = revs_queue.get()
427 # processing of `None` is completed, release the token.
450 # processing of `None` is completed, release the token.
428 tokens.release()
451 tokens.release()
429
452
430
453
431 BUFF_PER_WORKER = 50
454 BUFF_PER_WORKER = 50
432
455
433
456
434 def _get_worker_sidedata_adder(srcrepo, destrepo):
457 def _get_worker_sidedata_adder(srcrepo, destrepo):
435 """The parallel version of the sidedata computation
458 """The parallel version of the sidedata computation
436
459
437 This code spawn a pool of worker that precompute a buffer of sidedata
460 This code spawn a pool of worker that precompute a buffer of sidedata
438 before we actually need them"""
461 before we actually need them"""
439 # avoid circular import copies -> scmutil -> worker -> copies
462 # avoid circular import copies -> scmutil -> worker -> copies
440 from . import worker
463 from . import worker
441
464
442 nbworkers = worker._numworkers(srcrepo.ui)
465 nbworkers = worker._numworkers(srcrepo.ui)
443
466
444 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
467 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
445 revsq = multiprocessing.Queue()
468 revsq = multiprocessing.Queue()
446 sidedataq = multiprocessing.Queue()
469 sidedataq = multiprocessing.Queue()
447
470
448 assert srcrepo.filtername is None
471 assert srcrepo.filtername is None
449 # queue all tasks beforehand, revision numbers are small and it make
472 # queue all tasks beforehand, revision numbers are small and it make
450 # synchronisation simpler
473 # synchronisation simpler
451 #
474 #
452 # Since the computation for each node can be quite expensive, the overhead
475 # Since the computation for each node can be quite expensive, the overhead
453 # of using a single queue is not revelant. In practice, most computation
476 # of using a single queue is not revelant. In practice, most computation
454 # are fast but some are very expensive and dominate all the other smaller
477 # are fast but some are very expensive and dominate all the other smaller
455 # cost.
478 # cost.
456 for r in srcrepo.changelog.revs():
479 for r in srcrepo.changelog.revs():
457 revsq.put(r)
480 revsq.put(r)
458 # queue the "no more tasks" markers
481 # queue the "no more tasks" markers
459 for i in range(nbworkers):
482 for i in range(nbworkers):
460 revsq.put(None)
483 revsq.put(None)
461
484
462 allworkers = []
485 allworkers = []
463 for i in range(nbworkers):
486 for i in range(nbworkers):
464 args = (srcrepo, revsq, sidedataq, tokens)
487 args = (srcrepo, revsq, sidedataq, tokens)
465 w = multiprocessing.Process(target=_sidedata_worker, args=args)
488 w = multiprocessing.Process(target=_sidedata_worker, args=args)
466 allworkers.append(w)
489 allworkers.append(w)
467 w.start()
490 w.start()
468
491
469 # dictionnary to store results for revision higher than we one we are
492 # dictionnary to store results for revision higher than we one we are
470 # looking for. For example, if we need the sidedatamap for 42, and 43 is
493 # looking for. For example, if we need the sidedatamap for 42, and 43 is
471 # received, when shelve 43 for later use.
494 # received, when shelve 43 for later use.
472 staging = {}
495 staging = {}
473
496
474 def sidedata_companion(revlog, rev):
497 def sidedata_companion(revlog, rev):
475 sidedata = {}
498 sidedata = {}
476 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
499 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
477 # Is the data previously shelved ?
500 # Is the data previously shelved ?
478 sidedata = staging.pop(rev, None)
501 sidedata = staging.pop(rev, None)
479 if sidedata is None:
502 if sidedata is None:
480 # look at the queued result until we find the one we are lookig
503 # look at the queued result until we find the one we are lookig
481 # for (shelve the other ones)
504 # for (shelve the other ones)
482 r, sidedata = sidedataq.get()
505 r, sidedata = sidedataq.get()
483 while r != rev:
506 while r != rev:
484 staging[r] = sidedata
507 staging[r] = sidedata
485 r, sidedata = sidedataq.get()
508 r, sidedata = sidedataq.get()
486 tokens.release()
509 tokens.release()
487 return False, (), sidedata
510 return False, (), sidedata
488
511
489 return sidedata_companion
512 return sidedata_companion
490
513
491
514
492 def _get_simple_sidedata_adder(srcrepo, destrepo):
515 def _get_simple_sidedata_adder(srcrepo, destrepo):
493 """The simple version of the sidedata computation
516 """The simple version of the sidedata computation
494
517
495 It just compute it in the same thread on request"""
518 It just compute it in the same thread on request"""
496
519
497 def sidedatacompanion(revlog, rev):
520 def sidedatacompanion(revlog, rev):
498 sidedata = {}
521 sidedata = {}
499 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
522 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
500 sidedata = _getsidedata(srcrepo, rev)
523 sidedata = _getsidedata(srcrepo, rev)
501 return False, (), sidedata
524 return False, (), sidedata
502
525
503 return sidedatacompanion
526 return sidedatacompanion
504
527
505
528
506 def getsidedataremover(srcrepo, destrepo):
529 def getsidedataremover(srcrepo, destrepo):
507 def sidedatacompanion(revlog, rev):
530 def sidedatacompanion(revlog, rev):
508 f = ()
531 f = ()
509 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
532 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
510 if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
533 if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
511 f = (
534 f = (
512 sidedatamod.SD_P1COPIES,
535 sidedatamod.SD_P1COPIES,
513 sidedatamod.SD_P2COPIES,
536 sidedatamod.SD_P2COPIES,
514 sidedatamod.SD_FILESADDED,
537 sidedatamod.SD_FILESADDED,
515 sidedatamod.SD_FILESREMOVED,
538 sidedatamod.SD_FILESREMOVED,
516 )
539 )
517 return False, f, {}
540 return False, f, {}
518
541
519 return sidedatacompanion
542 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now