##// END OF EJS Templates
changing-files: implement equality checking...
marmoute -
r46079:df878210 default
parent child Browse files
Show More
@@ -1,479 +1,488 b''
1 # metadata.py -- code related to various metadata computation and access.
1 # metadata.py -- code related to various metadata computation and access.
2 #
2 #
3 # Copyright 2019 Google, Inc <martinvonz@google.com>
3 # Copyright 2019 Google, Inc <martinvonz@google.com>
4 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import multiprocessing
10 import multiprocessing
11
11
12 from . import (
12 from . import (
13 error,
13 error,
14 node,
14 node,
15 pycompat,
15 pycompat,
16 util,
16 util,
17 )
17 )
18
18
19 from .revlogutils import (
19 from .revlogutils import (
20 flagutil as sidedataflag,
20 flagutil as sidedataflag,
21 sidedata as sidedatamod,
21 sidedata as sidedatamod,
22 )
22 )
23
23
24
24
25 class ChangingFiles(object):
25 class ChangingFiles(object):
26 """A class recording the changes made to a file by a changeset
26 """A class recording the changes made to a file by a changeset
27
27
28 Actions performed on files are gathered into 3 sets:
28 Actions performed on files are gathered into 3 sets:
29
29
30 - added: files actively added in the changeset.
30 - added: files actively added in the changeset.
31 - removed: files removed in the revision
31 - removed: files removed in the revision
32 - touched: files affected by the merge
32 - touched: files affected by the merge
33
33
34 and copies information is held by 2 mappings
34 and copies information is held by 2 mappings
35
35
36 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
36 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
37 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
37 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
38
38
39 See their inline help for details.
39 See their inline help for details.
40 """
40 """
41
41
42 def __init__(
42 def __init__(
43 self, touched=(), added=(), removed=(), p1_copies=(), p2_copies=(),
43 self, touched=(), added=(), removed=(), p1_copies=(), p2_copies=(),
44 ):
44 ):
45 self._added = set(added)
45 self._added = set(added)
46 self._removed = set(removed)
46 self._removed = set(removed)
47 self._touched = set(touched)
47 self._touched = set(touched)
48 self._touched.update(self._added)
48 self._touched.update(self._added)
49 self._touched.update(self._removed)
49 self._touched.update(self._removed)
50 self._p1_copies = dict(p1_copies)
50 self._p1_copies = dict(p1_copies)
51 self._p2_copies = dict(p2_copies)
51 self._p2_copies = dict(p2_copies)
52
52
53 def __eq__(self, other):
54 return (
55 self.added == other.added
56 and self.removed == other.removed
57 and self.touched == other.touched
58 and self.copied_from_p1 == other.copied_from_p1
59 and self.copied_from_p2 == other.copied_from_p2
60 )
61
53 @property
62 @property
54 def added(self):
63 def added(self):
55 """files actively added in the changeset
64 """files actively added in the changeset
56
65
57 Any file present in that revision that was absent in all the changeset's
66 Any file present in that revision that was absent in all the changeset's
58 parents.
67 parents.
59
68
60 In case of merge, this means a file absent in one of the parents but
69 In case of merge, this means a file absent in one of the parents but
61 existing in the other will *not* be contained in this set. (They were
70 existing in the other will *not* be contained in this set. (They were
62 added by an ancestor)
71 added by an ancestor)
63 """
72 """
64 return frozenset(self._added)
73 return frozenset(self._added)
65
74
66 def mark_added(self, filename):
75 def mark_added(self, filename):
67 self._added.add(filename)
76 self._added.add(filename)
68 self._touched.add(filename)
77 self._touched.add(filename)
69
78
70 def update_added(self, filenames):
79 def update_added(self, filenames):
71 for f in filenames:
80 for f in filenames:
72 self.mark_added(f)
81 self.mark_added(f)
73
82
74 @property
83 @property
75 def removed(self):
84 def removed(self):
76 """files actively removed by the changeset
85 """files actively removed by the changeset
77
86
78 In case of merge this will only contain the set of files removing "new"
87 In case of merge this will only contain the set of files removing "new"
79 content. For any file absent in the current changeset:
88 content. For any file absent in the current changeset:
80
89
81 a) If the file exists in both parents, it is clearly "actively" removed
90 a) If the file exists in both parents, it is clearly "actively" removed
82 by this changeset.
91 by this changeset.
83
92
84 b) If a file exists in only one parent and in none of the common
93 b) If a file exists in only one parent and in none of the common
85 ancestors, then the file was newly added in one of the merged branches
94 ancestors, then the file was newly added in one of the merged branches
86 and then got "actively" removed.
95 and then got "actively" removed.
87
96
88 c) If a file exists in only one parent and at least one of the common
97 c) If a file exists in only one parent and at least one of the common
89 ancestors using the same filenode, then the file was unchanged on one
98 ancestors using the same filenode, then the file was unchanged on one
90 side and deleted on the other side. The merge "passively" propagated
99 side and deleted on the other side. The merge "passively" propagated
91 that deletion, but didn't "actively" remove the file. In this case the
100 that deletion, but didn't "actively" remove the file. In this case the
92 file is *not* included in the `removed` set.
101 file is *not* included in the `removed` set.
93
102
94 d) If a file exists in only one parent and at least one of the common
103 d) If a file exists in only one parent and at least one of the common
95 ancestors using a different filenode, then the file was changed on one
104 ancestors using a different filenode, then the file was changed on one
96 side and removed on the other side. The merge process "actively"
105 side and removed on the other side. The merge process "actively"
97 decided to drop the new change and delete the file. Unlike in the
106 decided to drop the new change and delete the file. Unlike in the
98 previous case, (c), the file included in the `removed` set.
107 previous case, (c), the file included in the `removed` set.
99
108
100 Summary table for merge:
109 Summary table for merge:
101
110
102 case | exists in parents | exists in gca || removed
111 case | exists in parents | exists in gca || removed
103 (a) | both | * || yes
112 (a) | both | * || yes
104 (b) | one | none || yes
113 (b) | one | none || yes
105 (c) | one | same filenode || no
114 (c) | one | same filenode || no
106 (d) | one | new filenode || yes
115 (d) | one | new filenode || yes
107 """
116 """
108 return frozenset(self._removed)
117 return frozenset(self._removed)
109
118
110 def mark_removed(self, filename):
119 def mark_removed(self, filename):
111 self._removed.add(filename)
120 self._removed.add(filename)
112 self._touched.add(filename)
121 self._touched.add(filename)
113
122
114 def update_removed(self, filenames):
123 def update_removed(self, filenames):
115 for f in filenames:
124 for f in filenames:
116 self.mark_removed(f)
125 self.mark_removed(f)
117
126
118 @property
127 @property
119 def touched(self):
128 def touched(self):
120 """files either actively modified, added or removed"""
129 """files either actively modified, added or removed"""
121 return frozenset(self._touched)
130 return frozenset(self._touched)
122
131
123 def mark_touched(self, filename):
132 def mark_touched(self, filename):
124 self._touched.add(filename)
133 self._touched.add(filename)
125
134
126 def update_touched(self, filenames):
135 def update_touched(self, filenames):
127 for f in filenames:
136 for f in filenames:
128 self.mark_touched(f)
137 self.mark_touched(f)
129
138
130 @property
139 @property
131 def copied_from_p1(self):
140 def copied_from_p1(self):
132 return self._p1_copies.copy()
141 return self._p1_copies.copy()
133
142
134 def mark_copied_from_p1(self, source, dest):
143 def mark_copied_from_p1(self, source, dest):
135 self._p1_copies[dest] = source
144 self._p1_copies[dest] = source
136
145
137 def update_copies_from_p1(self, copies):
146 def update_copies_from_p1(self, copies):
138 for dest, source in copies.items():
147 for dest, source in copies.items():
139 self.mark_copied_from_p1(source, dest)
148 self.mark_copied_from_p1(source, dest)
140
149
141 @property
150 @property
142 def copied_from_p2(self):
151 def copied_from_p2(self):
143 return self._p2_copies.copy()
152 return self._p2_copies.copy()
144
153
145 def mark_copied_from_p2(self, source, dest):
154 def mark_copied_from_p2(self, source, dest):
146 self._p2_copies[dest] = source
155 self._p2_copies[dest] = source
147
156
148 def update_copies_from_p2(self, copies):
157 def update_copies_from_p2(self, copies):
149 for dest, source in copies.items():
158 for dest, source in copies.items():
150 self.mark_copied_from_p2(source, dest)
159 self.mark_copied_from_p2(source, dest)
151
160
152
161
153 def computechangesetfilesadded(ctx):
162 def computechangesetfilesadded(ctx):
154 """return the list of files added in a changeset
163 """return the list of files added in a changeset
155 """
164 """
156 added = []
165 added = []
157 for f in ctx.files():
166 for f in ctx.files():
158 if not any(f in p for p in ctx.parents()):
167 if not any(f in p for p in ctx.parents()):
159 added.append(f)
168 added.append(f)
160 return added
169 return added
161
170
162
171
163 def get_removal_filter(ctx, x=None):
172 def get_removal_filter(ctx, x=None):
164 """return a function to detect files "wrongly" detected as `removed`
173 """return a function to detect files "wrongly" detected as `removed`
165
174
166 When a file is removed relative to p1 in a merge, this
175 When a file is removed relative to p1 in a merge, this
167 function determines whether the absence is due to a
176 function determines whether the absence is due to a
168 deletion from a parent, or whether the merge commit
177 deletion from a parent, or whether the merge commit
169 itself deletes the file. We decide this by doing a
178 itself deletes the file. We decide this by doing a
170 simplified three way merge of the manifest entry for
179 simplified three way merge of the manifest entry for
171 the file. There are two ways we decide the merge
180 the file. There are two ways we decide the merge
172 itself didn't delete a file:
181 itself didn't delete a file:
173 - neither parent (nor the merge) contain the file
182 - neither parent (nor the merge) contain the file
174 - exactly one parent contains the file, and that
183 - exactly one parent contains the file, and that
175 parent has the same filelog entry as the merge
184 parent has the same filelog entry as the merge
176 ancestor (or all of them if there two). In other
185 ancestor (or all of them if there two). In other
177 words, that parent left the file unchanged while the
186 words, that parent left the file unchanged while the
178 other one deleted it.
187 other one deleted it.
179 One way to think about this is that deleting a file is
188 One way to think about this is that deleting a file is
180 similar to emptying it, so the list of changed files
189 similar to emptying it, so the list of changed files
181 should be similar either way. The computation
190 should be similar either way. The computation
182 described above is not done directly in _filecommit
191 described above is not done directly in _filecommit
183 when creating the list of changed files, however
192 when creating the list of changed files, however
184 it does something very similar by comparing filelog
193 it does something very similar by comparing filelog
185 nodes.
194 nodes.
186 """
195 """
187
196
188 if x is not None:
197 if x is not None:
189 p1, p2, m1, m2 = x
198 p1, p2, m1, m2 = x
190 else:
199 else:
191 p1 = ctx.p1()
200 p1 = ctx.p1()
192 p2 = ctx.p2()
201 p2 = ctx.p2()
193 m1 = p1.manifest()
202 m1 = p1.manifest()
194 m2 = p2.manifest()
203 m2 = p2.manifest()
195
204
196 @util.cachefunc
205 @util.cachefunc
197 def mas():
206 def mas():
198 p1n = p1.node()
207 p1n = p1.node()
199 p2n = p2.node()
208 p2n = p2.node()
200 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
209 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
201 if not cahs:
210 if not cahs:
202 cahs = [node.nullrev]
211 cahs = [node.nullrev]
203 return [ctx.repo()[r].manifest() for r in cahs]
212 return [ctx.repo()[r].manifest() for r in cahs]
204
213
205 def deletionfromparent(f):
214 def deletionfromparent(f):
206 if f in m1:
215 if f in m1:
207 return f not in m2 and all(
216 return f not in m2 and all(
208 f in ma and ma.find(f) == m1.find(f) for ma in mas()
217 f in ma and ma.find(f) == m1.find(f) for ma in mas()
209 )
218 )
210 elif f in m2:
219 elif f in m2:
211 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
220 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
212 else:
221 else:
213 return True
222 return True
214
223
215 return deletionfromparent
224 return deletionfromparent
216
225
217
226
218 def computechangesetfilesremoved(ctx):
227 def computechangesetfilesremoved(ctx):
219 """return the list of files removed in a changeset
228 """return the list of files removed in a changeset
220 """
229 """
221 removed = []
230 removed = []
222 for f in ctx.files():
231 for f in ctx.files():
223 if f not in ctx:
232 if f not in ctx:
224 removed.append(f)
233 removed.append(f)
225 if removed:
234 if removed:
226 rf = get_removal_filter(ctx)
235 rf = get_removal_filter(ctx)
227 removed = [r for r in removed if not rf(r)]
236 removed = [r for r in removed if not rf(r)]
228 return removed
237 return removed
229
238
230
239
231 def computechangesetcopies(ctx):
240 def computechangesetcopies(ctx):
232 """return the copies data for a changeset
241 """return the copies data for a changeset
233
242
234 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
243 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
235
244
236 Each dictionnary are in the form: `{newname: oldname}`
245 Each dictionnary are in the form: `{newname: oldname}`
237 """
246 """
238 p1copies = {}
247 p1copies = {}
239 p2copies = {}
248 p2copies = {}
240 p1 = ctx.p1()
249 p1 = ctx.p1()
241 p2 = ctx.p2()
250 p2 = ctx.p2()
242 narrowmatch = ctx._repo.narrowmatch()
251 narrowmatch = ctx._repo.narrowmatch()
243 for dst in ctx.files():
252 for dst in ctx.files():
244 if not narrowmatch(dst) or dst not in ctx:
253 if not narrowmatch(dst) or dst not in ctx:
245 continue
254 continue
246 copied = ctx[dst].renamed()
255 copied = ctx[dst].renamed()
247 if not copied:
256 if not copied:
248 continue
257 continue
249 src, srcnode = copied
258 src, srcnode = copied
250 if src in p1 and p1[src].filenode() == srcnode:
259 if src in p1 and p1[src].filenode() == srcnode:
251 p1copies[dst] = src
260 p1copies[dst] = src
252 elif src in p2 and p2[src].filenode() == srcnode:
261 elif src in p2 and p2[src].filenode() == srcnode:
253 p2copies[dst] = src
262 p2copies[dst] = src
254 return p1copies, p2copies
263 return p1copies, p2copies
255
264
256
265
257 def encodecopies(files, copies):
266 def encodecopies(files, copies):
258 items = []
267 items = []
259 for i, dst in enumerate(files):
268 for i, dst in enumerate(files):
260 if dst in copies:
269 if dst in copies:
261 items.append(b'%d\0%s' % (i, copies[dst]))
270 items.append(b'%d\0%s' % (i, copies[dst]))
262 if len(items) != len(copies):
271 if len(items) != len(copies):
263 raise error.ProgrammingError(
272 raise error.ProgrammingError(
264 b'some copy targets missing from file list'
273 b'some copy targets missing from file list'
265 )
274 )
266 return b"\n".join(items)
275 return b"\n".join(items)
267
276
268
277
269 def decodecopies(files, data):
278 def decodecopies(files, data):
270 try:
279 try:
271 copies = {}
280 copies = {}
272 if not data:
281 if not data:
273 return copies
282 return copies
274 for l in data.split(b'\n'):
283 for l in data.split(b'\n'):
275 strindex, src = l.split(b'\0')
284 strindex, src = l.split(b'\0')
276 i = int(strindex)
285 i = int(strindex)
277 dst = files[i]
286 dst = files[i]
278 copies[dst] = src
287 copies[dst] = src
279 return copies
288 return copies
280 except (ValueError, IndexError):
289 except (ValueError, IndexError):
281 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
290 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
282 # used different syntax for the value.
291 # used different syntax for the value.
283 return None
292 return None
284
293
285
294
286 def encodefileindices(files, subset):
295 def encodefileindices(files, subset):
287 subset = set(subset)
296 subset = set(subset)
288 indices = []
297 indices = []
289 for i, f in enumerate(files):
298 for i, f in enumerate(files):
290 if f in subset:
299 if f in subset:
291 indices.append(b'%d' % i)
300 indices.append(b'%d' % i)
292 return b'\n'.join(indices)
301 return b'\n'.join(indices)
293
302
294
303
295 def decodefileindices(files, data):
304 def decodefileindices(files, data):
296 try:
305 try:
297 subset = []
306 subset = []
298 if not data:
307 if not data:
299 return subset
308 return subset
300 for strindex in data.split(b'\n'):
309 for strindex in data.split(b'\n'):
301 i = int(strindex)
310 i = int(strindex)
302 if i < 0 or i >= len(files):
311 if i < 0 or i >= len(files):
303 return None
312 return None
304 subset.append(files[i])
313 subset.append(files[i])
305 return subset
314 return subset
306 except (ValueError, IndexError):
315 except (ValueError, IndexError):
307 # Perhaps someone had chosen the same key name (e.g. "added") and
316 # Perhaps someone had chosen the same key name (e.g. "added") and
308 # used different syntax for the value.
317 # used different syntax for the value.
309 return None
318 return None
310
319
311
320
312 def encode_copies_sidedata(files):
321 def encode_copies_sidedata(files):
313 sortedfiles = sorted(files.touched)
322 sortedfiles = sorted(files.touched)
314 sidedata = {}
323 sidedata = {}
315 p1copies = files.copied_from_p1
324 p1copies = files.copied_from_p1
316 if p1copies:
325 if p1copies:
317 p1copies = encodecopies(sortedfiles, p1copies)
326 p1copies = encodecopies(sortedfiles, p1copies)
318 sidedata[sidedatamod.SD_P1COPIES] = p1copies
327 sidedata[sidedatamod.SD_P1COPIES] = p1copies
319 p2copies = files.copied_from_p2
328 p2copies = files.copied_from_p2
320 if p2copies:
329 if p2copies:
321 p2copies = encodecopies(sortedfiles, p2copies)
330 p2copies = encodecopies(sortedfiles, p2copies)
322 sidedata[sidedatamod.SD_P2COPIES] = p2copies
331 sidedata[sidedatamod.SD_P2COPIES] = p2copies
323 filesadded = files.added
332 filesadded = files.added
324 if filesadded:
333 if filesadded:
325 filesadded = encodefileindices(sortedfiles, filesadded)
334 filesadded = encodefileindices(sortedfiles, filesadded)
326 sidedata[sidedatamod.SD_FILESADDED] = filesadded
335 sidedata[sidedatamod.SD_FILESADDED] = filesadded
327 filesremoved = files.removed
336 filesremoved = files.removed
328 if filesremoved:
337 if filesremoved:
329 filesremoved = encodefileindices(sortedfiles, filesremoved)
338 filesremoved = encodefileindices(sortedfiles, filesremoved)
330 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
339 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
331 if not sidedata:
340 if not sidedata:
332 sidedata = None
341 sidedata = None
333 return sidedata
342 return sidedata
334
343
335
344
336 def _getsidedata(srcrepo, rev):
345 def _getsidedata(srcrepo, rev):
337 ctx = srcrepo[rev]
346 ctx = srcrepo[rev]
338 filescopies = computechangesetcopies(ctx)
347 filescopies = computechangesetcopies(ctx)
339 filesadded = computechangesetfilesadded(ctx)
348 filesadded = computechangesetfilesadded(ctx)
340 filesremoved = computechangesetfilesremoved(ctx)
349 filesremoved = computechangesetfilesremoved(ctx)
341 sidedata = {}
350 sidedata = {}
342 if any([filescopies, filesadded, filesremoved]):
351 if any([filescopies, filesadded, filesremoved]):
343 sortedfiles = sorted(ctx.files())
352 sortedfiles = sorted(ctx.files())
344 p1copies, p2copies = filescopies
353 p1copies, p2copies = filescopies
345 p1copies = encodecopies(sortedfiles, p1copies)
354 p1copies = encodecopies(sortedfiles, p1copies)
346 p2copies = encodecopies(sortedfiles, p2copies)
355 p2copies = encodecopies(sortedfiles, p2copies)
347 filesadded = encodefileindices(sortedfiles, filesadded)
356 filesadded = encodefileindices(sortedfiles, filesadded)
348 filesremoved = encodefileindices(sortedfiles, filesremoved)
357 filesremoved = encodefileindices(sortedfiles, filesremoved)
349 if p1copies:
358 if p1copies:
350 sidedata[sidedatamod.SD_P1COPIES] = p1copies
359 sidedata[sidedatamod.SD_P1COPIES] = p1copies
351 if p2copies:
360 if p2copies:
352 sidedata[sidedatamod.SD_P2COPIES] = p2copies
361 sidedata[sidedatamod.SD_P2COPIES] = p2copies
353 if filesadded:
362 if filesadded:
354 sidedata[sidedatamod.SD_FILESADDED] = filesadded
363 sidedata[sidedatamod.SD_FILESADDED] = filesadded
355 if filesremoved:
364 if filesremoved:
356 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
365 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
357 return sidedata
366 return sidedata
358
367
359
368
360 def getsidedataadder(srcrepo, destrepo):
369 def getsidedataadder(srcrepo, destrepo):
361 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
370 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
362 if pycompat.iswindows or not use_w:
371 if pycompat.iswindows or not use_w:
363 return _get_simple_sidedata_adder(srcrepo, destrepo)
372 return _get_simple_sidedata_adder(srcrepo, destrepo)
364 else:
373 else:
365 return _get_worker_sidedata_adder(srcrepo, destrepo)
374 return _get_worker_sidedata_adder(srcrepo, destrepo)
366
375
367
376
368 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
377 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
369 """The function used by worker precomputing sidedata
378 """The function used by worker precomputing sidedata
370
379
371 It read an input queue containing revision numbers
380 It read an input queue containing revision numbers
372 It write in an output queue containing (rev, <sidedata-map>)
381 It write in an output queue containing (rev, <sidedata-map>)
373
382
374 The `None` input value is used as a stop signal.
383 The `None` input value is used as a stop signal.
375
384
376 The `tokens` semaphore is user to avoid having too many unprocessed
385 The `tokens` semaphore is user to avoid having too many unprocessed
377 entries. The workers needs to acquire one token before fetching a task.
386 entries. The workers needs to acquire one token before fetching a task.
378 They will be released by the consumer of the produced data.
387 They will be released by the consumer of the produced data.
379 """
388 """
380 tokens.acquire()
389 tokens.acquire()
381 rev = revs_queue.get()
390 rev = revs_queue.get()
382 while rev is not None:
391 while rev is not None:
383 data = _getsidedata(srcrepo, rev)
392 data = _getsidedata(srcrepo, rev)
384 sidedata_queue.put((rev, data))
393 sidedata_queue.put((rev, data))
385 tokens.acquire()
394 tokens.acquire()
386 rev = revs_queue.get()
395 rev = revs_queue.get()
387 # processing of `None` is completed, release the token.
396 # processing of `None` is completed, release the token.
388 tokens.release()
397 tokens.release()
389
398
390
399
391 BUFF_PER_WORKER = 50
400 BUFF_PER_WORKER = 50
392
401
393
402
394 def _get_worker_sidedata_adder(srcrepo, destrepo):
403 def _get_worker_sidedata_adder(srcrepo, destrepo):
395 """The parallel version of the sidedata computation
404 """The parallel version of the sidedata computation
396
405
397 This code spawn a pool of worker that precompute a buffer of sidedata
406 This code spawn a pool of worker that precompute a buffer of sidedata
398 before we actually need them"""
407 before we actually need them"""
399 # avoid circular import copies -> scmutil -> worker -> copies
408 # avoid circular import copies -> scmutil -> worker -> copies
400 from . import worker
409 from . import worker
401
410
402 nbworkers = worker._numworkers(srcrepo.ui)
411 nbworkers = worker._numworkers(srcrepo.ui)
403
412
404 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
413 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
405 revsq = multiprocessing.Queue()
414 revsq = multiprocessing.Queue()
406 sidedataq = multiprocessing.Queue()
415 sidedataq = multiprocessing.Queue()
407
416
408 assert srcrepo.filtername is None
417 assert srcrepo.filtername is None
409 # queue all tasks beforehand, revision numbers are small and it make
418 # queue all tasks beforehand, revision numbers are small and it make
410 # synchronisation simpler
419 # synchronisation simpler
411 #
420 #
412 # Since the computation for each node can be quite expensive, the overhead
421 # Since the computation for each node can be quite expensive, the overhead
413 # of using a single queue is not revelant. In practice, most computation
422 # of using a single queue is not revelant. In practice, most computation
414 # are fast but some are very expensive and dominate all the other smaller
423 # are fast but some are very expensive and dominate all the other smaller
415 # cost.
424 # cost.
416 for r in srcrepo.changelog.revs():
425 for r in srcrepo.changelog.revs():
417 revsq.put(r)
426 revsq.put(r)
418 # queue the "no more tasks" markers
427 # queue the "no more tasks" markers
419 for i in range(nbworkers):
428 for i in range(nbworkers):
420 revsq.put(None)
429 revsq.put(None)
421
430
422 allworkers = []
431 allworkers = []
423 for i in range(nbworkers):
432 for i in range(nbworkers):
424 args = (srcrepo, revsq, sidedataq, tokens)
433 args = (srcrepo, revsq, sidedataq, tokens)
425 w = multiprocessing.Process(target=_sidedata_worker, args=args)
434 w = multiprocessing.Process(target=_sidedata_worker, args=args)
426 allworkers.append(w)
435 allworkers.append(w)
427 w.start()
436 w.start()
428
437
429 # dictionnary to store results for revision higher than we one we are
438 # dictionnary to store results for revision higher than we one we are
430 # looking for. For example, if we need the sidedatamap for 42, and 43 is
439 # looking for. For example, if we need the sidedatamap for 42, and 43 is
431 # received, when shelve 43 for later use.
440 # received, when shelve 43 for later use.
432 staging = {}
441 staging = {}
433
442
434 def sidedata_companion(revlog, rev):
443 def sidedata_companion(revlog, rev):
435 sidedata = {}
444 sidedata = {}
436 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
445 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
437 # Is the data previously shelved ?
446 # Is the data previously shelved ?
438 sidedata = staging.pop(rev, None)
447 sidedata = staging.pop(rev, None)
439 if sidedata is None:
448 if sidedata is None:
440 # look at the queued result until we find the one we are lookig
449 # look at the queued result until we find the one we are lookig
441 # for (shelve the other ones)
450 # for (shelve the other ones)
442 r, sidedata = sidedataq.get()
451 r, sidedata = sidedataq.get()
443 while r != rev:
452 while r != rev:
444 staging[r] = sidedata
453 staging[r] = sidedata
445 r, sidedata = sidedataq.get()
454 r, sidedata = sidedataq.get()
446 tokens.release()
455 tokens.release()
447 return False, (), sidedata
456 return False, (), sidedata
448
457
449 return sidedata_companion
458 return sidedata_companion
450
459
451
460
452 def _get_simple_sidedata_adder(srcrepo, destrepo):
461 def _get_simple_sidedata_adder(srcrepo, destrepo):
453 """The simple version of the sidedata computation
462 """The simple version of the sidedata computation
454
463
455 It just compute it in the same thread on request"""
464 It just compute it in the same thread on request"""
456
465
457 def sidedatacompanion(revlog, rev):
466 def sidedatacompanion(revlog, rev):
458 sidedata = {}
467 sidedata = {}
459 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
468 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
460 sidedata = _getsidedata(srcrepo, rev)
469 sidedata = _getsidedata(srcrepo, rev)
461 return False, (), sidedata
470 return False, (), sidedata
462
471
463 return sidedatacompanion
472 return sidedatacompanion
464
473
465
474
466 def getsidedataremover(srcrepo, destrepo):
475 def getsidedataremover(srcrepo, destrepo):
467 def sidedatacompanion(revlog, rev):
476 def sidedatacompanion(revlog, rev):
468 f = ()
477 f = ()
469 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
478 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
470 if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
479 if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
471 f = (
480 f = (
472 sidedatamod.SD_P1COPIES,
481 sidedatamod.SD_P1COPIES,
473 sidedatamod.SD_P2COPIES,
482 sidedatamod.SD_P2COPIES,
474 sidedatamod.SD_FILESADDED,
483 sidedatamod.SD_FILESADDED,
475 sidedatamod.SD_FILESREMOVED,
484 sidedatamod.SD_FILESREMOVED,
476 )
485 )
477 return False, f, {}
486 return False, f, {}
478
487
479 return sidedatacompanion
488 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now