##// END OF EJS Templates
changing-files: cache the various property...
marmoute -
r46198:d3148337 default
parent child Browse files
Show More
@@ -1,557 +1,569 b''
1 # metadata.py -- code related to various metadata computation and access.
1 # metadata.py -- code related to various metadata computation and access.
2 #
2 #
3 # Copyright 2019 Google, Inc <martinvonz@google.com>
3 # Copyright 2019 Google, Inc <martinvonz@google.com>
4 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import multiprocessing
10 import multiprocessing
11
11
12 from . import (
12 from . import (
13 error,
13 error,
14 node,
14 node,
15 pycompat,
15 pycompat,
16 util,
16 util,
17 )
17 )
18
18
19 from .revlogutils import (
19 from .revlogutils import (
20 flagutil as sidedataflag,
20 flagutil as sidedataflag,
21 sidedata as sidedatamod,
21 sidedata as sidedatamod,
22 )
22 )
23
23
24
24
25 class ChangingFiles(object):
25 class ChangingFiles(object):
26 """A class recording the changes made to files by a changeset
26 """A class recording the changes made to files by a changeset
27
27
28 Actions performed on files are gathered into 3 sets:
28 Actions performed on files are gathered into 3 sets:
29
29
30 - added: files actively added in the changeset.
30 - added: files actively added in the changeset.
31 - merged: files whose history got merged
31 - merged: files whose history got merged
32 - removed: files removed in the revision
32 - removed: files removed in the revision
33 - touched: files affected by the merge
33 - touched: files affected by the merge
34
34
35 and copies information is held by 2 mappings
35 and copies information is held by 2 mappings
36
36
37 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
37 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
38 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
38 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
39
39
40 See their inline help for details.
40 See their inline help for details.
41 """
41 """
42
42
43 def __init__(
43 def __init__(
44 self,
44 self,
45 touched=None,
45 touched=None,
46 added=None,
46 added=None,
47 removed=None,
47 removed=None,
48 merged=None,
48 merged=None,
49 p1_copies=None,
49 p1_copies=None,
50 p2_copies=None,
50 p2_copies=None,
51 ):
51 ):
52 self._added = set(() if added is None else added)
52 self._added = set(() if added is None else added)
53 self._merged = set(() if merged is None else merged)
53 self._merged = set(() if merged is None else merged)
54 self._removed = set(() if removed is None else removed)
54 self._removed = set(() if removed is None else removed)
55 self._touched = set(() if touched is None else touched)
55 self._touched = set(() if touched is None else touched)
56 self._touched.update(self._added)
56 self._touched.update(self._added)
57 self._touched.update(self._merged)
57 self._touched.update(self._merged)
58 self._touched.update(self._removed)
58 self._touched.update(self._removed)
59 self._p1_copies = dict(() if p1_copies is None else p1_copies)
59 self._p1_copies = dict(() if p1_copies is None else p1_copies)
60 self._p2_copies = dict(() if p2_copies is None else p2_copies)
60 self._p2_copies = dict(() if p2_copies is None else p2_copies)
61
61
62 def __eq__(self, other):
62 def __eq__(self, other):
63 return (
63 return (
64 self.added == other.added
64 self.added == other.added
65 and self.merged == other.merged
65 and self.merged == other.merged
66 and self.removed == other.removed
66 and self.removed == other.removed
67 and self.touched == other.touched
67 and self.touched == other.touched
68 and self.copied_from_p1 == other.copied_from_p1
68 and self.copied_from_p1 == other.copied_from_p1
69 and self.copied_from_p2 == other.copied_from_p2
69 and self.copied_from_p2 == other.copied_from_p2
70 )
70 )
71
71
72 @property
72 @util.propertycache
73 def added(self):
73 def added(self):
74 """files actively added in the changeset
74 """files actively added in the changeset
75
75
76 Any file present in that revision that was absent in all the changeset's
76 Any file present in that revision that was absent in all the changeset's
77 parents.
77 parents.
78
78
79 In case of merge, this means a file absent in one of the parents but
79 In case of merge, this means a file absent in one of the parents but
80 existing in the other will *not* be contained in this set. (They were
80 existing in the other will *not* be contained in this set. (They were
81 added by an ancestor)
81 added by an ancestor)
82 """
82 """
83 return frozenset(self._added)
83 return frozenset(self._added)
84
84
85 def mark_added(self, filename):
85 def mark_added(self, filename):
86 if 'added' in vars(self):
87 del self.added
86 self._added.add(filename)
88 self._added.add(filename)
87 self.mark_touched(filename)
89 self.mark_touched(filename)
88
90
89 def update_added(self, filenames):
91 def update_added(self, filenames):
90 for f in filenames:
92 for f in filenames:
91 self.mark_added(f)
93 self.mark_added(f)
92
94
93 @property
95 @util.propertycache
94 def merged(self):
96 def merged(self):
95 """files actively merged during a merge
97 """files actively merged during a merge
96
98
97 Any modified files which had modification on both size that needed merging.
99 Any modified files which had modification on both size that needed merging.
98
100
99 In this case a new filenode was created and it has two parents.
101 In this case a new filenode was created and it has two parents.
100 """
102 """
101 return frozenset(self._merged)
103 return frozenset(self._merged)
102
104
103 def mark_merged(self, filename):
105 def mark_merged(self, filename):
106 if 'merged' in vars(self):
107 del self.merged
104 self._merged.add(filename)
108 self._merged.add(filename)
105 self.mark_touched(filename)
109 self.mark_touched(filename)
106
110
107 def update_merged(self, filenames):
111 def update_merged(self, filenames):
108 for f in filenames:
112 for f in filenames:
109 self.mark_merged(f)
113 self.mark_merged(f)
110
114
111 @property
115 @util.propertycache
112 def removed(self):
116 def removed(self):
113 """files actively removed by the changeset
117 """files actively removed by the changeset
114
118
115 In case of merge this will only contain the set of files removing "new"
119 In case of merge this will only contain the set of files removing "new"
116 content. For any file absent in the current changeset:
120 content. For any file absent in the current changeset:
117
121
118 a) If the file exists in both parents, it is clearly "actively" removed
122 a) If the file exists in both parents, it is clearly "actively" removed
119 by this changeset.
123 by this changeset.
120
124
121 b) If a file exists in only one parent and in none of the common
125 b) If a file exists in only one parent and in none of the common
122 ancestors, then the file was newly added in one of the merged branches
126 ancestors, then the file was newly added in one of the merged branches
123 and then got "actively" removed.
127 and then got "actively" removed.
124
128
125 c) If a file exists in only one parent and at least one of the common
129 c) If a file exists in only one parent and at least one of the common
126 ancestors using the same filenode, then the file was unchanged on one
130 ancestors using the same filenode, then the file was unchanged on one
127 side and deleted on the other side. The merge "passively" propagated
131 side and deleted on the other side. The merge "passively" propagated
128 that deletion, but didn't "actively" remove the file. In this case the
132 that deletion, but didn't "actively" remove the file. In this case the
129 file is *not* included in the `removed` set.
133 file is *not* included in the `removed` set.
130
134
131 d) If a file exists in only one parent and at least one of the common
135 d) If a file exists in only one parent and at least one of the common
132 ancestors using a different filenode, then the file was changed on one
136 ancestors using a different filenode, then the file was changed on one
133 side and removed on the other side. The merge process "actively"
137 side and removed on the other side. The merge process "actively"
134 decided to drop the new change and delete the file. Unlike in the
138 decided to drop the new change and delete the file. Unlike in the
135 previous case, (c), the file included in the `removed` set.
139 previous case, (c), the file included in the `removed` set.
136
140
137 Summary table for merge:
141 Summary table for merge:
138
142
139 case | exists in parents | exists in gca || removed
143 case | exists in parents | exists in gca || removed
140 (a) | both | * || yes
144 (a) | both | * || yes
141 (b) | one | none || yes
145 (b) | one | none || yes
142 (c) | one | same filenode || no
146 (c) | one | same filenode || no
143 (d) | one | new filenode || yes
147 (d) | one | new filenode || yes
144 """
148 """
145 return frozenset(self._removed)
149 return frozenset(self._removed)
146
150
147 def mark_removed(self, filename):
151 def mark_removed(self, filename):
152 if 'removed' in vars(self):
153 del self.removed
148 self._removed.add(filename)
154 self._removed.add(filename)
149 self.mark_touched(filename)
155 self.mark_touched(filename)
150
156
151 def update_removed(self, filenames):
157 def update_removed(self, filenames):
152 for f in filenames:
158 for f in filenames:
153 self.mark_removed(f)
159 self.mark_removed(f)
154
160
155 @property
161 @util.propertycache
156 def touched(self):
162 def touched(self):
157 """files either actively modified, added or removed"""
163 """files either actively modified, added or removed"""
158 return frozenset(self._touched)
164 return frozenset(self._touched)
159
165
160 def mark_touched(self, filename):
166 def mark_touched(self, filename):
167 if 'touched' in vars(self):
168 del self.touched
161 self._touched.add(filename)
169 self._touched.add(filename)
162
170
163 def update_touched(self, filenames):
171 def update_touched(self, filenames):
164 for f in filenames:
172 for f in filenames:
165 self.mark_touched(f)
173 self.mark_touched(f)
166
174
167 @property
175 @util.propertycache
168 def copied_from_p1(self):
176 def copied_from_p1(self):
169 return self._p1_copies.copy()
177 return self._p1_copies.copy()
170
178
171 def mark_copied_from_p1(self, source, dest):
179 def mark_copied_from_p1(self, source, dest):
180 if 'copied_from_p1' in vars(self):
181 del self.copied_from_p1
172 self._p1_copies[dest] = source
182 self._p1_copies[dest] = source
173
183
174 def update_copies_from_p1(self, copies):
184 def update_copies_from_p1(self, copies):
175 for dest, source in copies.items():
185 for dest, source in copies.items():
176 self.mark_copied_from_p1(source, dest)
186 self.mark_copied_from_p1(source, dest)
177
187
178 @property
188 @util.propertycache
179 def copied_from_p2(self):
189 def copied_from_p2(self):
180 return self._p2_copies.copy()
190 return self._p2_copies.copy()
181
191
182 def mark_copied_from_p2(self, source, dest):
192 def mark_copied_from_p2(self, source, dest):
193 if 'copied_from_p2' in vars(self):
194 del self.copied_from_p2
183 self._p2_copies[dest] = source
195 self._p2_copies[dest] = source
184
196
185 def update_copies_from_p2(self, copies):
197 def update_copies_from_p2(self, copies):
186 for dest, source in copies.items():
198 for dest, source in copies.items():
187 self.mark_copied_from_p2(source, dest)
199 self.mark_copied_from_p2(source, dest)
188
200
189
201
190 def computechangesetfilesadded(ctx):
202 def computechangesetfilesadded(ctx):
191 """return the list of files added in a changeset
203 """return the list of files added in a changeset
192 """
204 """
193 added = []
205 added = []
194 for f in ctx.files():
206 for f in ctx.files():
195 if not any(f in p for p in ctx.parents()):
207 if not any(f in p for p in ctx.parents()):
196 added.append(f)
208 added.append(f)
197 return added
209 return added
198
210
199
211
200 def get_removal_filter(ctx, x=None):
212 def get_removal_filter(ctx, x=None):
201 """return a function to detect files "wrongly" detected as `removed`
213 """return a function to detect files "wrongly" detected as `removed`
202
214
203 When a file is removed relative to p1 in a merge, this
215 When a file is removed relative to p1 in a merge, this
204 function determines whether the absence is due to a
216 function determines whether the absence is due to a
205 deletion from a parent, or whether the merge commit
217 deletion from a parent, or whether the merge commit
206 itself deletes the file. We decide this by doing a
218 itself deletes the file. We decide this by doing a
207 simplified three way merge of the manifest entry for
219 simplified three way merge of the manifest entry for
208 the file. There are two ways we decide the merge
220 the file. There are two ways we decide the merge
209 itself didn't delete a file:
221 itself didn't delete a file:
210 - neither parent (nor the merge) contain the file
222 - neither parent (nor the merge) contain the file
211 - exactly one parent contains the file, and that
223 - exactly one parent contains the file, and that
212 parent has the same filelog entry as the merge
224 parent has the same filelog entry as the merge
213 ancestor (or all of them if there two). In other
225 ancestor (or all of them if there two). In other
214 words, that parent left the file unchanged while the
226 words, that parent left the file unchanged while the
215 other one deleted it.
227 other one deleted it.
216 One way to think about this is that deleting a file is
228 One way to think about this is that deleting a file is
217 similar to emptying it, so the list of changed files
229 similar to emptying it, so the list of changed files
218 should be similar either way. The computation
230 should be similar either way. The computation
219 described above is not done directly in _filecommit
231 described above is not done directly in _filecommit
220 when creating the list of changed files, however
232 when creating the list of changed files, however
221 it does something very similar by comparing filelog
233 it does something very similar by comparing filelog
222 nodes.
234 nodes.
223 """
235 """
224
236
225 if x is not None:
237 if x is not None:
226 p1, p2, m1, m2 = x
238 p1, p2, m1, m2 = x
227 else:
239 else:
228 p1 = ctx.p1()
240 p1 = ctx.p1()
229 p2 = ctx.p2()
241 p2 = ctx.p2()
230 m1 = p1.manifest()
242 m1 = p1.manifest()
231 m2 = p2.manifest()
243 m2 = p2.manifest()
232
244
233 @util.cachefunc
245 @util.cachefunc
234 def mas():
246 def mas():
235 p1n = p1.node()
247 p1n = p1.node()
236 p2n = p2.node()
248 p2n = p2.node()
237 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
249 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
238 if not cahs:
250 if not cahs:
239 cahs = [node.nullrev]
251 cahs = [node.nullrev]
240 return [ctx.repo()[r].manifest() for r in cahs]
252 return [ctx.repo()[r].manifest() for r in cahs]
241
253
242 def deletionfromparent(f):
254 def deletionfromparent(f):
243 if f in m1:
255 if f in m1:
244 return f not in m2 and all(
256 return f not in m2 and all(
245 f in ma and ma.find(f) == m1.find(f) for ma in mas()
257 f in ma and ma.find(f) == m1.find(f) for ma in mas()
246 )
258 )
247 elif f in m2:
259 elif f in m2:
248 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
260 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
249 else:
261 else:
250 return True
262 return True
251
263
252 return deletionfromparent
264 return deletionfromparent
253
265
254
266
255 def computechangesetfilesremoved(ctx):
267 def computechangesetfilesremoved(ctx):
256 """return the list of files removed in a changeset
268 """return the list of files removed in a changeset
257 """
269 """
258 removed = []
270 removed = []
259 for f in ctx.files():
271 for f in ctx.files():
260 if f not in ctx:
272 if f not in ctx:
261 removed.append(f)
273 removed.append(f)
262 if removed:
274 if removed:
263 rf = get_removal_filter(ctx)
275 rf = get_removal_filter(ctx)
264 removed = [r for r in removed if not rf(r)]
276 removed = [r for r in removed if not rf(r)]
265 return removed
277 return removed
266
278
267
279
268 def computechangesetfilesmerged(ctx):
280 def computechangesetfilesmerged(ctx):
269 """return the list of files merged in a changeset
281 """return the list of files merged in a changeset
270 """
282 """
271 merged = []
283 merged = []
272 if len(ctx.parents()) < 2:
284 if len(ctx.parents()) < 2:
273 return merged
285 return merged
274 for f in ctx.files():
286 for f in ctx.files():
275 if f in ctx:
287 if f in ctx:
276 fctx = ctx[f]
288 fctx = ctx[f]
277 parents = fctx._filelog.parents(fctx._filenode)
289 parents = fctx._filelog.parents(fctx._filenode)
278 if parents[1] != node.nullid:
290 if parents[1] != node.nullid:
279 merged.append(f)
291 merged.append(f)
280 return merged
292 return merged
281
293
282
294
283 def computechangesetcopies(ctx):
295 def computechangesetcopies(ctx):
284 """return the copies data for a changeset
296 """return the copies data for a changeset
285
297
286 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
298 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
287
299
288 Each dictionnary are in the form: `{newname: oldname}`
300 Each dictionnary are in the form: `{newname: oldname}`
289 """
301 """
290 p1copies = {}
302 p1copies = {}
291 p2copies = {}
303 p2copies = {}
292 p1 = ctx.p1()
304 p1 = ctx.p1()
293 p2 = ctx.p2()
305 p2 = ctx.p2()
294 narrowmatch = ctx._repo.narrowmatch()
306 narrowmatch = ctx._repo.narrowmatch()
295 for dst in ctx.files():
307 for dst in ctx.files():
296 if not narrowmatch(dst) or dst not in ctx:
308 if not narrowmatch(dst) or dst not in ctx:
297 continue
309 continue
298 copied = ctx[dst].renamed()
310 copied = ctx[dst].renamed()
299 if not copied:
311 if not copied:
300 continue
312 continue
301 src, srcnode = copied
313 src, srcnode = copied
302 if src in p1 and p1[src].filenode() == srcnode:
314 if src in p1 and p1[src].filenode() == srcnode:
303 p1copies[dst] = src
315 p1copies[dst] = src
304 elif src in p2 and p2[src].filenode() == srcnode:
316 elif src in p2 and p2[src].filenode() == srcnode:
305 p2copies[dst] = src
317 p2copies[dst] = src
306 return p1copies, p2copies
318 return p1copies, p2copies
307
319
308
320
309 def encodecopies(files, copies):
321 def encodecopies(files, copies):
310 items = []
322 items = []
311 for i, dst in enumerate(files):
323 for i, dst in enumerate(files):
312 if dst in copies:
324 if dst in copies:
313 items.append(b'%d\0%s' % (i, copies[dst]))
325 items.append(b'%d\0%s' % (i, copies[dst]))
314 if len(items) != len(copies):
326 if len(items) != len(copies):
315 raise error.ProgrammingError(
327 raise error.ProgrammingError(
316 b'some copy targets missing from file list'
328 b'some copy targets missing from file list'
317 )
329 )
318 return b"\n".join(items)
330 return b"\n".join(items)
319
331
320
332
321 def decodecopies(files, data):
333 def decodecopies(files, data):
322 try:
334 try:
323 copies = {}
335 copies = {}
324 if not data:
336 if not data:
325 return copies
337 return copies
326 for l in data.split(b'\n'):
338 for l in data.split(b'\n'):
327 strindex, src = l.split(b'\0')
339 strindex, src = l.split(b'\0')
328 i = int(strindex)
340 i = int(strindex)
329 dst = files[i]
341 dst = files[i]
330 copies[dst] = src
342 copies[dst] = src
331 return copies
343 return copies
332 except (ValueError, IndexError):
344 except (ValueError, IndexError):
333 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
345 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
334 # used different syntax for the value.
346 # used different syntax for the value.
335 return None
347 return None
336
348
337
349
338 def encodefileindices(files, subset):
350 def encodefileindices(files, subset):
339 subset = set(subset)
351 subset = set(subset)
340 indices = []
352 indices = []
341 for i, f in enumerate(files):
353 for i, f in enumerate(files):
342 if f in subset:
354 if f in subset:
343 indices.append(b'%d' % i)
355 indices.append(b'%d' % i)
344 return b'\n'.join(indices)
356 return b'\n'.join(indices)
345
357
346
358
347 def decodefileindices(files, data):
359 def decodefileindices(files, data):
348 try:
360 try:
349 subset = []
361 subset = []
350 if not data:
362 if not data:
351 return subset
363 return subset
352 for strindex in data.split(b'\n'):
364 for strindex in data.split(b'\n'):
353 i = int(strindex)
365 i = int(strindex)
354 if i < 0 or i >= len(files):
366 if i < 0 or i >= len(files):
355 return None
367 return None
356 subset.append(files[i])
368 subset.append(files[i])
357 return subset
369 return subset
358 except (ValueError, IndexError):
370 except (ValueError, IndexError):
359 # Perhaps someone had chosen the same key name (e.g. "added") and
371 # Perhaps someone had chosen the same key name (e.g. "added") and
360 # used different syntax for the value.
372 # used different syntax for the value.
361 return None
373 return None
362
374
363
375
364 def encode_files_sidedata(files):
376 def encode_files_sidedata(files):
365 sortedfiles = sorted(files.touched)
377 sortedfiles = sorted(files.touched)
366 sidedata = {}
378 sidedata = {}
367 p1copies = files.copied_from_p1
379 p1copies = files.copied_from_p1
368 if p1copies:
380 if p1copies:
369 p1copies = encodecopies(sortedfiles, p1copies)
381 p1copies = encodecopies(sortedfiles, p1copies)
370 sidedata[sidedatamod.SD_P1COPIES] = p1copies
382 sidedata[sidedatamod.SD_P1COPIES] = p1copies
371 p2copies = files.copied_from_p2
383 p2copies = files.copied_from_p2
372 if p2copies:
384 if p2copies:
373 p2copies = encodecopies(sortedfiles, p2copies)
385 p2copies = encodecopies(sortedfiles, p2copies)
374 sidedata[sidedatamod.SD_P2COPIES] = p2copies
386 sidedata[sidedatamod.SD_P2COPIES] = p2copies
375 filesadded = files.added
387 filesadded = files.added
376 if filesadded:
388 if filesadded:
377 filesadded = encodefileindices(sortedfiles, filesadded)
389 filesadded = encodefileindices(sortedfiles, filesadded)
378 sidedata[sidedatamod.SD_FILESADDED] = filesadded
390 sidedata[sidedatamod.SD_FILESADDED] = filesadded
379 filesremoved = files.removed
391 filesremoved = files.removed
380 if filesremoved:
392 if filesremoved:
381 filesremoved = encodefileindices(sortedfiles, filesremoved)
393 filesremoved = encodefileindices(sortedfiles, filesremoved)
382 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
394 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
383 if not sidedata:
395 if not sidedata:
384 sidedata = None
396 sidedata = None
385 return sidedata
397 return sidedata
386
398
387
399
388 def decode_files_sidedata(changelogrevision, sidedata):
400 def decode_files_sidedata(changelogrevision, sidedata):
389 """Return a ChangingFiles instance from a changelogrevision using sidata
401 """Return a ChangingFiles instance from a changelogrevision using sidata
390 """
402 """
391 touched = changelogrevision.files
403 touched = changelogrevision.files
392
404
393 rawindices = sidedata.get(sidedatamod.SD_FILESADDED)
405 rawindices = sidedata.get(sidedatamod.SD_FILESADDED)
394 added = decodefileindices(touched, rawindices)
406 added = decodefileindices(touched, rawindices)
395
407
396 rawindices = sidedata.get(sidedatamod.SD_FILESREMOVED)
408 rawindices = sidedata.get(sidedatamod.SD_FILESREMOVED)
397 removed = decodefileindices(touched, rawindices)
409 removed = decodefileindices(touched, rawindices)
398
410
399 rawcopies = sidedata.get(sidedatamod.SD_P1COPIES)
411 rawcopies = sidedata.get(sidedatamod.SD_P1COPIES)
400 p1_copies = decodecopies(touched, rawcopies)
412 p1_copies = decodecopies(touched, rawcopies)
401
413
402 rawcopies = sidedata.get(sidedatamod.SD_P2COPIES)
414 rawcopies = sidedata.get(sidedatamod.SD_P2COPIES)
403 p2_copies = decodecopies(touched, rawcopies)
415 p2_copies = decodecopies(touched, rawcopies)
404
416
405 return ChangingFiles(
417 return ChangingFiles(
406 touched=touched,
418 touched=touched,
407 added=added,
419 added=added,
408 removed=removed,
420 removed=removed,
409 p1_copies=p1_copies,
421 p1_copies=p1_copies,
410 p2_copies=p2_copies,
422 p2_copies=p2_copies,
411 )
423 )
412
424
413
425
414 def _getsidedata(srcrepo, rev):
426 def _getsidedata(srcrepo, rev):
415 ctx = srcrepo[rev]
427 ctx = srcrepo[rev]
416 filescopies = computechangesetcopies(ctx)
428 filescopies = computechangesetcopies(ctx)
417 filesadded = computechangesetfilesadded(ctx)
429 filesadded = computechangesetfilesadded(ctx)
418 filesremoved = computechangesetfilesremoved(ctx)
430 filesremoved = computechangesetfilesremoved(ctx)
419 sidedata = {}
431 sidedata = {}
420 if any([filescopies, filesadded, filesremoved]):
432 if any([filescopies, filesadded, filesremoved]):
421 sortedfiles = sorted(ctx.files())
433 sortedfiles = sorted(ctx.files())
422 p1copies, p2copies = filescopies
434 p1copies, p2copies = filescopies
423 p1copies = encodecopies(sortedfiles, p1copies)
435 p1copies = encodecopies(sortedfiles, p1copies)
424 p2copies = encodecopies(sortedfiles, p2copies)
436 p2copies = encodecopies(sortedfiles, p2copies)
425 filesadded = encodefileindices(sortedfiles, filesadded)
437 filesadded = encodefileindices(sortedfiles, filesadded)
426 filesremoved = encodefileindices(sortedfiles, filesremoved)
438 filesremoved = encodefileindices(sortedfiles, filesremoved)
427 if p1copies:
439 if p1copies:
428 sidedata[sidedatamod.SD_P1COPIES] = p1copies
440 sidedata[sidedatamod.SD_P1COPIES] = p1copies
429 if p2copies:
441 if p2copies:
430 sidedata[sidedatamod.SD_P2COPIES] = p2copies
442 sidedata[sidedatamod.SD_P2COPIES] = p2copies
431 if filesadded:
443 if filesadded:
432 sidedata[sidedatamod.SD_FILESADDED] = filesadded
444 sidedata[sidedatamod.SD_FILESADDED] = filesadded
433 if filesremoved:
445 if filesremoved:
434 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
446 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
435 return sidedata
447 return sidedata
436
448
437
449
438 def getsidedataadder(srcrepo, destrepo):
450 def getsidedataadder(srcrepo, destrepo):
439 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
451 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
440 if pycompat.iswindows or not use_w:
452 if pycompat.iswindows or not use_w:
441 return _get_simple_sidedata_adder(srcrepo, destrepo)
453 return _get_simple_sidedata_adder(srcrepo, destrepo)
442 else:
454 else:
443 return _get_worker_sidedata_adder(srcrepo, destrepo)
455 return _get_worker_sidedata_adder(srcrepo, destrepo)
444
456
445
457
446 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
458 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
447 """The function used by worker precomputing sidedata
459 """The function used by worker precomputing sidedata
448
460
449 It read an input queue containing revision numbers
461 It read an input queue containing revision numbers
450 It write in an output queue containing (rev, <sidedata-map>)
462 It write in an output queue containing (rev, <sidedata-map>)
451
463
452 The `None` input value is used as a stop signal.
464 The `None` input value is used as a stop signal.
453
465
454 The `tokens` semaphore is user to avoid having too many unprocessed
466 The `tokens` semaphore is user to avoid having too many unprocessed
455 entries. The workers needs to acquire one token before fetching a task.
467 entries. The workers needs to acquire one token before fetching a task.
456 They will be released by the consumer of the produced data.
468 They will be released by the consumer of the produced data.
457 """
469 """
458 tokens.acquire()
470 tokens.acquire()
459 rev = revs_queue.get()
471 rev = revs_queue.get()
460 while rev is not None:
472 while rev is not None:
461 data = _getsidedata(srcrepo, rev)
473 data = _getsidedata(srcrepo, rev)
462 sidedata_queue.put((rev, data))
474 sidedata_queue.put((rev, data))
463 tokens.acquire()
475 tokens.acquire()
464 rev = revs_queue.get()
476 rev = revs_queue.get()
465 # processing of `None` is completed, release the token.
477 # processing of `None` is completed, release the token.
466 tokens.release()
478 tokens.release()
467
479
468
480
469 BUFF_PER_WORKER = 50
481 BUFF_PER_WORKER = 50
470
482
471
483
472 def _get_worker_sidedata_adder(srcrepo, destrepo):
484 def _get_worker_sidedata_adder(srcrepo, destrepo):
473 """The parallel version of the sidedata computation
485 """The parallel version of the sidedata computation
474
486
475 This code spawn a pool of worker that precompute a buffer of sidedata
487 This code spawn a pool of worker that precompute a buffer of sidedata
476 before we actually need them"""
488 before we actually need them"""
477 # avoid circular import copies -> scmutil -> worker -> copies
489 # avoid circular import copies -> scmutil -> worker -> copies
478 from . import worker
490 from . import worker
479
491
480 nbworkers = worker._numworkers(srcrepo.ui)
492 nbworkers = worker._numworkers(srcrepo.ui)
481
493
482 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
494 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
483 revsq = multiprocessing.Queue()
495 revsq = multiprocessing.Queue()
484 sidedataq = multiprocessing.Queue()
496 sidedataq = multiprocessing.Queue()
485
497
486 assert srcrepo.filtername is None
498 assert srcrepo.filtername is None
487 # queue all tasks beforehand, revision numbers are small and it make
499 # queue all tasks beforehand, revision numbers are small and it make
488 # synchronisation simpler
500 # synchronisation simpler
489 #
501 #
490 # Since the computation for each node can be quite expensive, the overhead
502 # Since the computation for each node can be quite expensive, the overhead
491 # of using a single queue is not revelant. In practice, most computation
503 # of using a single queue is not revelant. In practice, most computation
492 # are fast but some are very expensive and dominate all the other smaller
504 # are fast but some are very expensive and dominate all the other smaller
493 # cost.
505 # cost.
494 for r in srcrepo.changelog.revs():
506 for r in srcrepo.changelog.revs():
495 revsq.put(r)
507 revsq.put(r)
496 # queue the "no more tasks" markers
508 # queue the "no more tasks" markers
497 for i in range(nbworkers):
509 for i in range(nbworkers):
498 revsq.put(None)
510 revsq.put(None)
499
511
500 allworkers = []
512 allworkers = []
501 for i in range(nbworkers):
513 for i in range(nbworkers):
502 args = (srcrepo, revsq, sidedataq, tokens)
514 args = (srcrepo, revsq, sidedataq, tokens)
503 w = multiprocessing.Process(target=_sidedata_worker, args=args)
515 w = multiprocessing.Process(target=_sidedata_worker, args=args)
504 allworkers.append(w)
516 allworkers.append(w)
505 w.start()
517 w.start()
506
518
507 # dictionnary to store results for revision higher than we one we are
519 # dictionnary to store results for revision higher than we one we are
508 # looking for. For example, if we need the sidedatamap for 42, and 43 is
520 # looking for. For example, if we need the sidedatamap for 42, and 43 is
509 # received, when shelve 43 for later use.
521 # received, when shelve 43 for later use.
510 staging = {}
522 staging = {}
511
523
512 def sidedata_companion(revlog, rev):
524 def sidedata_companion(revlog, rev):
513 sidedata = {}
525 sidedata = {}
514 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
526 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
515 # Is the data previously shelved ?
527 # Is the data previously shelved ?
516 sidedata = staging.pop(rev, None)
528 sidedata = staging.pop(rev, None)
517 if sidedata is None:
529 if sidedata is None:
518 # look at the queued result until we find the one we are lookig
530 # look at the queued result until we find the one we are lookig
519 # for (shelve the other ones)
531 # for (shelve the other ones)
520 r, sidedata = sidedataq.get()
532 r, sidedata = sidedataq.get()
521 while r != rev:
533 while r != rev:
522 staging[r] = sidedata
534 staging[r] = sidedata
523 r, sidedata = sidedataq.get()
535 r, sidedata = sidedataq.get()
524 tokens.release()
536 tokens.release()
525 return False, (), sidedata
537 return False, (), sidedata
526
538
527 return sidedata_companion
539 return sidedata_companion
528
540
529
541
530 def _get_simple_sidedata_adder(srcrepo, destrepo):
542 def _get_simple_sidedata_adder(srcrepo, destrepo):
531 """The simple version of the sidedata computation
543 """The simple version of the sidedata computation
532
544
533 It just compute it in the same thread on request"""
545 It just compute it in the same thread on request"""
534
546
535 def sidedatacompanion(revlog, rev):
547 def sidedatacompanion(revlog, rev):
536 sidedata = {}
548 sidedata = {}
537 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
549 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
538 sidedata = _getsidedata(srcrepo, rev)
550 sidedata = _getsidedata(srcrepo, rev)
539 return False, (), sidedata
551 return False, (), sidedata
540
552
541 return sidedatacompanion
553 return sidedatacompanion
542
554
543
555
544 def getsidedataremover(srcrepo, destrepo):
556 def getsidedataremover(srcrepo, destrepo):
545 def sidedatacompanion(revlog, rev):
557 def sidedatacompanion(revlog, rev):
546 f = ()
558 f = ()
547 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
559 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
548 if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
560 if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
549 f = (
561 f = (
550 sidedatamod.SD_P1COPIES,
562 sidedatamod.SD_P1COPIES,
551 sidedatamod.SD_P2COPIES,
563 sidedatamod.SD_P2COPIES,
552 sidedatamod.SD_FILESADDED,
564 sidedatamod.SD_FILESADDED,
553 sidedatamod.SD_FILESREMOVED,
565 sidedatamod.SD_FILESREMOVED,
554 )
566 )
555 return False, f, {}
567 return False, f, {}
556
568
557 return sidedatacompanion
569 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now