##// END OF EJS Templates
salvaged: explicitly skip salvaged file while encoding...
marmoute -
r46234:3bfa7c7f default draft
parent child Browse files
Show More
@@ -1,644 +1,644
1 # metadata.py -- code related to various metadata computation and access.
1 # metadata.py -- code related to various metadata computation and access.
2 #
2 #
3 # Copyright 2019 Google, Inc <martinvonz@google.com>
3 # Copyright 2019 Google, Inc <martinvonz@google.com>
4 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import multiprocessing
10 import multiprocessing
11 import struct
11 import struct
12
12
13 from . import (
13 from . import (
14 error,
14 error,
15 node,
15 node,
16 pycompat,
16 pycompat,
17 util,
17 util,
18 )
18 )
19
19
20 from .revlogutils import (
20 from .revlogutils import (
21 flagutil as sidedataflag,
21 flagutil as sidedataflag,
22 sidedata as sidedatamod,
22 sidedata as sidedatamod,
23 )
23 )
24
24
25
25
26 class ChangingFiles(object):
26 class ChangingFiles(object):
27 """A class recording the changes made to files by a changeset
27 """A class recording the changes made to files by a changeset
28
28
29 Actions performed on files are gathered into 3 sets:
29 Actions performed on files are gathered into 3 sets:
30
30
31 - added: files actively added in the changeset.
31 - added: files actively added in the changeset.
32 - merged: files whose history got merged
32 - merged: files whose history got merged
33 - removed: files removed in the revision
33 - removed: files removed in the revision
34 - salvaged: files that might have been deleted by a merge but were not
34 - salvaged: files that might have been deleted by a merge but were not
35 - touched: files affected by the merge
35 - touched: files affected by the merge
36
36
37 and copies information is held by 2 mappings
37 and copies information is held by 2 mappings
38
38
39 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
39 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
40 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
40 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
41
41
42 See their inline help for details.
42 See their inline help for details.
43 """
43 """
44
44
45 def __init__(
45 def __init__(
46 self,
46 self,
47 touched=None,
47 touched=None,
48 added=None,
48 added=None,
49 removed=None,
49 removed=None,
50 merged=None,
50 merged=None,
51 salvaged=None,
51 salvaged=None,
52 p1_copies=None,
52 p1_copies=None,
53 p2_copies=None,
53 p2_copies=None,
54 ):
54 ):
55 self._added = set(() if added is None else added)
55 self._added = set(() if added is None else added)
56 self._merged = set(() if merged is None else merged)
56 self._merged = set(() if merged is None else merged)
57 self._removed = set(() if removed is None else removed)
57 self._removed = set(() if removed is None else removed)
58 self._touched = set(() if touched is None else touched)
58 self._touched = set(() if touched is None else touched)
59 self._salvaged = set(() if salvaged is None else salvaged)
59 self._salvaged = set(() if salvaged is None else salvaged)
60 self._touched.update(self._added)
60 self._touched.update(self._added)
61 self._touched.update(self._merged)
61 self._touched.update(self._merged)
62 self._touched.update(self._removed)
62 self._touched.update(self._removed)
63 self._p1_copies = dict(() if p1_copies is None else p1_copies)
63 self._p1_copies = dict(() if p1_copies is None else p1_copies)
64 self._p2_copies = dict(() if p2_copies is None else p2_copies)
64 self._p2_copies = dict(() if p2_copies is None else p2_copies)
65
65
66 def __eq__(self, other):
66 def __eq__(self, other):
67 return (
67 return (
68 self.added == other.added
68 self.added == other.added
69 and self.merged == other.merged
69 and self.merged == other.merged
70 and self.removed == other.removed
70 and self.removed == other.removed
71 and self.salvaged == other.salvaged
71 and self.salvaged == other.salvaged
72 and self.touched == other.touched
72 and self.touched == other.touched
73 and self.copied_from_p1 == other.copied_from_p1
73 and self.copied_from_p1 == other.copied_from_p1
74 and self.copied_from_p2 == other.copied_from_p2
74 and self.copied_from_p2 == other.copied_from_p2
75 )
75 )
76
76
77 @util.propertycache
77 @util.propertycache
78 def added(self):
78 def added(self):
79 """files actively added in the changeset
79 """files actively added in the changeset
80
80
81 Any file present in that revision that was absent in all the changeset's
81 Any file present in that revision that was absent in all the changeset's
82 parents.
82 parents.
83
83
84 In case of merge, this means a file absent in one of the parents but
84 In case of merge, this means a file absent in one of the parents but
85 existing in the other will *not* be contained in this set. (They were
85 existing in the other will *not* be contained in this set. (They were
86 added by an ancestor)
86 added by an ancestor)
87 """
87 """
88 return frozenset(self._added)
88 return frozenset(self._added)
89
89
90 def mark_added(self, filename):
90 def mark_added(self, filename):
91 if 'added' in vars(self):
91 if 'added' in vars(self):
92 del self.added
92 del self.added
93 self._added.add(filename)
93 self._added.add(filename)
94 self.mark_touched(filename)
94 self.mark_touched(filename)
95
95
96 def update_added(self, filenames):
96 def update_added(self, filenames):
97 for f in filenames:
97 for f in filenames:
98 self.mark_added(f)
98 self.mark_added(f)
99
99
100 @util.propertycache
100 @util.propertycache
101 def merged(self):
101 def merged(self):
102 """files actively merged during a merge
102 """files actively merged during a merge
103
103
104 Any modified files which had modification on both size that needed merging.
104 Any modified files which had modification on both size that needed merging.
105
105
106 In this case a new filenode was created and it has two parents.
106 In this case a new filenode was created and it has two parents.
107 """
107 """
108 return frozenset(self._merged)
108 return frozenset(self._merged)
109
109
110 def mark_merged(self, filename):
110 def mark_merged(self, filename):
111 if 'merged' in vars(self):
111 if 'merged' in vars(self):
112 del self.merged
112 del self.merged
113 self._merged.add(filename)
113 self._merged.add(filename)
114 self.mark_touched(filename)
114 self.mark_touched(filename)
115
115
116 def update_merged(self, filenames):
116 def update_merged(self, filenames):
117 for f in filenames:
117 for f in filenames:
118 self.mark_merged(f)
118 self.mark_merged(f)
119
119
120 @util.propertycache
120 @util.propertycache
121 def removed(self):
121 def removed(self):
122 """files actively removed by the changeset
122 """files actively removed by the changeset
123
123
124 In case of merge this will only contain the set of files removing "new"
124 In case of merge this will only contain the set of files removing "new"
125 content. For any file absent in the current changeset:
125 content. For any file absent in the current changeset:
126
126
127 a) If the file exists in both parents, it is clearly "actively" removed
127 a) If the file exists in both parents, it is clearly "actively" removed
128 by this changeset.
128 by this changeset.
129
129
130 b) If a file exists in only one parent and in none of the common
130 b) If a file exists in only one parent and in none of the common
131 ancestors, then the file was newly added in one of the merged branches
131 ancestors, then the file was newly added in one of the merged branches
132 and then got "actively" removed.
132 and then got "actively" removed.
133
133
134 c) If a file exists in only one parent and at least one of the common
134 c) If a file exists in only one parent and at least one of the common
135 ancestors using the same filenode, then the file was unchanged on one
135 ancestors using the same filenode, then the file was unchanged on one
136 side and deleted on the other side. The merge "passively" propagated
136 side and deleted on the other side. The merge "passively" propagated
137 that deletion, but didn't "actively" remove the file. In this case the
137 that deletion, but didn't "actively" remove the file. In this case the
138 file is *not* included in the `removed` set.
138 file is *not* included in the `removed` set.
139
139
140 d) If a file exists in only one parent and at least one of the common
140 d) If a file exists in only one parent and at least one of the common
141 ancestors using a different filenode, then the file was changed on one
141 ancestors using a different filenode, then the file was changed on one
142 side and removed on the other side. The merge process "actively"
142 side and removed on the other side. The merge process "actively"
143 decided to drop the new change and delete the file. Unlike in the
143 decided to drop the new change and delete the file. Unlike in the
144 previous case, (c), the file included in the `removed` set.
144 previous case, (c), the file included in the `removed` set.
145
145
146 Summary table for merge:
146 Summary table for merge:
147
147
148 case | exists in parents | exists in gca || removed
148 case | exists in parents | exists in gca || removed
149 (a) | both | * || yes
149 (a) | both | * || yes
150 (b) | one | none || yes
150 (b) | one | none || yes
151 (c) | one | same filenode || no
151 (c) | one | same filenode || no
152 (d) | one | new filenode || yes
152 (d) | one | new filenode || yes
153 """
153 """
154 return frozenset(self._removed)
154 return frozenset(self._removed)
155
155
156 def mark_removed(self, filename):
156 def mark_removed(self, filename):
157 if 'removed' in vars(self):
157 if 'removed' in vars(self):
158 del self.removed
158 del self.removed
159 self._removed.add(filename)
159 self._removed.add(filename)
160 self.mark_touched(filename)
160 self.mark_touched(filename)
161
161
162 def update_removed(self, filenames):
162 def update_removed(self, filenames):
163 for f in filenames:
163 for f in filenames:
164 self.mark_removed(f)
164 self.mark_removed(f)
165
165
166 @util.propertycache
166 @util.propertycache
167 def salvaged(self):
167 def salvaged(self):
168 """files that might have been deleted by a merge, but still exists.
168 """files that might have been deleted by a merge, but still exists.
169
169
170 During a merge, the manifest merging might select some files for
170 During a merge, the manifest merging might select some files for
171 removal, or for a removed/changed conflict. If at commit time the file
171 removal, or for a removed/changed conflict. If at commit time the file
172 still exists, its removal was "reverted" and the file is "salvaged"
172 still exists, its removal was "reverted" and the file is "salvaged"
173 """
173 """
174 return frozenset(self._salvaged)
174 return frozenset(self._salvaged)
175
175
176 def mark_salvaged(self, filename):
176 def mark_salvaged(self, filename):
177 if "salvaged" in vars(self):
177 if "salvaged" in vars(self):
178 del self.salvaged
178 del self.salvaged
179 self._salvaged.add(filename)
179 self._salvaged.add(filename)
180 self.mark_touched(filename)
180 self.mark_touched(filename)
181
181
182 def update_salvaged(self, filenames):
182 def update_salvaged(self, filenames):
183 for f in filenames:
183 for f in filenames:
184 self.mark_salvaged(f)
184 self.mark_salvaged(f)
185
185
186 @util.propertycache
186 @util.propertycache
187 def touched(self):
187 def touched(self):
188 """files either actively modified, added or removed"""
188 """files either actively modified, added or removed"""
189 return frozenset(self._touched)
189 return frozenset(self._touched)
190
190
191 def mark_touched(self, filename):
191 def mark_touched(self, filename):
192 if 'touched' in vars(self):
192 if 'touched' in vars(self):
193 del self.touched
193 del self.touched
194 self._touched.add(filename)
194 self._touched.add(filename)
195
195
196 def update_touched(self, filenames):
196 def update_touched(self, filenames):
197 for f in filenames:
197 for f in filenames:
198 self.mark_touched(f)
198 self.mark_touched(f)
199
199
200 @util.propertycache
200 @util.propertycache
201 def copied_from_p1(self):
201 def copied_from_p1(self):
202 return self._p1_copies.copy()
202 return self._p1_copies.copy()
203
203
204 def mark_copied_from_p1(self, source, dest):
204 def mark_copied_from_p1(self, source, dest):
205 if 'copied_from_p1' in vars(self):
205 if 'copied_from_p1' in vars(self):
206 del self.copied_from_p1
206 del self.copied_from_p1
207 self._p1_copies[dest] = source
207 self._p1_copies[dest] = source
208
208
209 def update_copies_from_p1(self, copies):
209 def update_copies_from_p1(self, copies):
210 for dest, source in copies.items():
210 for dest, source in copies.items():
211 self.mark_copied_from_p1(source, dest)
211 self.mark_copied_from_p1(source, dest)
212
212
213 @util.propertycache
213 @util.propertycache
214 def copied_from_p2(self):
214 def copied_from_p2(self):
215 return self._p2_copies.copy()
215 return self._p2_copies.copy()
216
216
217 def mark_copied_from_p2(self, source, dest):
217 def mark_copied_from_p2(self, source, dest):
218 if 'copied_from_p2' in vars(self):
218 if 'copied_from_p2' in vars(self):
219 del self.copied_from_p2
219 del self.copied_from_p2
220 self._p2_copies[dest] = source
220 self._p2_copies[dest] = source
221
221
222 def update_copies_from_p2(self, copies):
222 def update_copies_from_p2(self, copies):
223 for dest, source in copies.items():
223 for dest, source in copies.items():
224 self.mark_copied_from_p2(source, dest)
224 self.mark_copied_from_p2(source, dest)
225
225
226
226
227 def computechangesetfilesadded(ctx):
227 def computechangesetfilesadded(ctx):
228 """return the list of files added in a changeset
228 """return the list of files added in a changeset
229 """
229 """
230 added = []
230 added = []
231 for f in ctx.files():
231 for f in ctx.files():
232 if not any(f in p for p in ctx.parents()):
232 if not any(f in p for p in ctx.parents()):
233 added.append(f)
233 added.append(f)
234 return added
234 return added
235
235
236
236
237 def get_removal_filter(ctx, x=None):
237 def get_removal_filter(ctx, x=None):
238 """return a function to detect files "wrongly" detected as `removed`
238 """return a function to detect files "wrongly" detected as `removed`
239
239
240 When a file is removed relative to p1 in a merge, this
240 When a file is removed relative to p1 in a merge, this
241 function determines whether the absence is due to a
241 function determines whether the absence is due to a
242 deletion from a parent, or whether the merge commit
242 deletion from a parent, or whether the merge commit
243 itself deletes the file. We decide this by doing a
243 itself deletes the file. We decide this by doing a
244 simplified three way merge of the manifest entry for
244 simplified three way merge of the manifest entry for
245 the file. There are two ways we decide the merge
245 the file. There are two ways we decide the merge
246 itself didn't delete a file:
246 itself didn't delete a file:
247 - neither parent (nor the merge) contain the file
247 - neither parent (nor the merge) contain the file
248 - exactly one parent contains the file, and that
248 - exactly one parent contains the file, and that
249 parent has the same filelog entry as the merge
249 parent has the same filelog entry as the merge
250 ancestor (or all of them if there two). In other
250 ancestor (or all of them if there two). In other
251 words, that parent left the file unchanged while the
251 words, that parent left the file unchanged while the
252 other one deleted it.
252 other one deleted it.
253 One way to think about this is that deleting a file is
253 One way to think about this is that deleting a file is
254 similar to emptying it, so the list of changed files
254 similar to emptying it, so the list of changed files
255 should be similar either way. The computation
255 should be similar either way. The computation
256 described above is not done directly in _filecommit
256 described above is not done directly in _filecommit
257 when creating the list of changed files, however
257 when creating the list of changed files, however
258 it does something very similar by comparing filelog
258 it does something very similar by comparing filelog
259 nodes.
259 nodes.
260 """
260 """
261
261
262 if x is not None:
262 if x is not None:
263 p1, p2, m1, m2 = x
263 p1, p2, m1, m2 = x
264 else:
264 else:
265 p1 = ctx.p1()
265 p1 = ctx.p1()
266 p2 = ctx.p2()
266 p2 = ctx.p2()
267 m1 = p1.manifest()
267 m1 = p1.manifest()
268 m2 = p2.manifest()
268 m2 = p2.manifest()
269
269
270 @util.cachefunc
270 @util.cachefunc
271 def mas():
271 def mas():
272 p1n = p1.node()
272 p1n = p1.node()
273 p2n = p2.node()
273 p2n = p2.node()
274 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
274 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
275 if not cahs:
275 if not cahs:
276 cahs = [node.nullrev]
276 cahs = [node.nullrev]
277 return [ctx.repo()[r].manifest() for r in cahs]
277 return [ctx.repo()[r].manifest() for r in cahs]
278
278
279 def deletionfromparent(f):
279 def deletionfromparent(f):
280 if f in m1:
280 if f in m1:
281 return f not in m2 and all(
281 return f not in m2 and all(
282 f in ma and ma.find(f) == m1.find(f) for ma in mas()
282 f in ma and ma.find(f) == m1.find(f) for ma in mas()
283 )
283 )
284 elif f in m2:
284 elif f in m2:
285 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
285 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
286 else:
286 else:
287 return True
287 return True
288
288
289 return deletionfromparent
289 return deletionfromparent
290
290
291
291
292 def computechangesetfilesremoved(ctx):
292 def computechangesetfilesremoved(ctx):
293 """return the list of files removed in a changeset
293 """return the list of files removed in a changeset
294 """
294 """
295 removed = []
295 removed = []
296 for f in ctx.files():
296 for f in ctx.files():
297 if f not in ctx:
297 if f not in ctx:
298 removed.append(f)
298 removed.append(f)
299 if removed:
299 if removed:
300 rf = get_removal_filter(ctx)
300 rf = get_removal_filter(ctx)
301 removed = [r for r in removed if not rf(r)]
301 removed = [r for r in removed if not rf(r)]
302 return removed
302 return removed
303
303
304
304
305 def computechangesetfilesmerged(ctx):
305 def computechangesetfilesmerged(ctx):
306 """return the list of files merged in a changeset
306 """return the list of files merged in a changeset
307 """
307 """
308 merged = []
308 merged = []
309 if len(ctx.parents()) < 2:
309 if len(ctx.parents()) < 2:
310 return merged
310 return merged
311 for f in ctx.files():
311 for f in ctx.files():
312 if f in ctx:
312 if f in ctx:
313 fctx = ctx[f]
313 fctx = ctx[f]
314 parents = fctx._filelog.parents(fctx._filenode)
314 parents = fctx._filelog.parents(fctx._filenode)
315 if parents[1] != node.nullid:
315 if parents[1] != node.nullid:
316 merged.append(f)
316 merged.append(f)
317 return merged
317 return merged
318
318
319
319
320 def computechangesetcopies(ctx):
320 def computechangesetcopies(ctx):
321 """return the copies data for a changeset
321 """return the copies data for a changeset
322
322
323 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
323 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
324
324
325 Each dictionnary are in the form: `{newname: oldname}`
325 Each dictionnary are in the form: `{newname: oldname}`
326 """
326 """
327 p1copies = {}
327 p1copies = {}
328 p2copies = {}
328 p2copies = {}
329 p1 = ctx.p1()
329 p1 = ctx.p1()
330 p2 = ctx.p2()
330 p2 = ctx.p2()
331 narrowmatch = ctx._repo.narrowmatch()
331 narrowmatch = ctx._repo.narrowmatch()
332 for dst in ctx.files():
332 for dst in ctx.files():
333 if not narrowmatch(dst) or dst not in ctx:
333 if not narrowmatch(dst) or dst not in ctx:
334 continue
334 continue
335 copied = ctx[dst].renamed()
335 copied = ctx[dst].renamed()
336 if not copied:
336 if not copied:
337 continue
337 continue
338 src, srcnode = copied
338 src, srcnode = copied
339 if src in p1 and p1[src].filenode() == srcnode:
339 if src in p1 and p1[src].filenode() == srcnode:
340 p1copies[dst] = src
340 p1copies[dst] = src
341 elif src in p2 and p2[src].filenode() == srcnode:
341 elif src in p2 and p2[src].filenode() == srcnode:
342 p2copies[dst] = src
342 p2copies[dst] = src
343 return p1copies, p2copies
343 return p1copies, p2copies
344
344
345
345
346 def encodecopies(files, copies):
346 def encodecopies(files, copies):
347 items = []
347 items = []
348 for i, dst in enumerate(files):
348 for i, dst in enumerate(files):
349 if dst in copies:
349 if dst in copies:
350 items.append(b'%d\0%s' % (i, copies[dst]))
350 items.append(b'%d\0%s' % (i, copies[dst]))
351 if len(items) != len(copies):
351 if len(items) != len(copies):
352 raise error.ProgrammingError(
352 raise error.ProgrammingError(
353 b'some copy targets missing from file list'
353 b'some copy targets missing from file list'
354 )
354 )
355 return b"\n".join(items)
355 return b"\n".join(items)
356
356
357
357
358 def decodecopies(files, data):
358 def decodecopies(files, data):
359 try:
359 try:
360 copies = {}
360 copies = {}
361 if not data:
361 if not data:
362 return copies
362 return copies
363 for l in data.split(b'\n'):
363 for l in data.split(b'\n'):
364 strindex, src = l.split(b'\0')
364 strindex, src = l.split(b'\0')
365 i = int(strindex)
365 i = int(strindex)
366 dst = files[i]
366 dst = files[i]
367 copies[dst] = src
367 copies[dst] = src
368 return copies
368 return copies
369 except (ValueError, IndexError):
369 except (ValueError, IndexError):
370 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
370 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
371 # used different syntax for the value.
371 # used different syntax for the value.
372 return None
372 return None
373
373
374
374
375 def encodefileindices(files, subset):
375 def encodefileindices(files, subset):
376 subset = set(subset)
376 subset = set(subset)
377 indices = []
377 indices = []
378 for i, f in enumerate(files):
378 for i, f in enumerate(files):
379 if f in subset:
379 if f in subset:
380 indices.append(b'%d' % i)
380 indices.append(b'%d' % i)
381 return b'\n'.join(indices)
381 return b'\n'.join(indices)
382
382
383
383
384 def decodefileindices(files, data):
384 def decodefileindices(files, data):
385 try:
385 try:
386 subset = []
386 subset = []
387 if not data:
387 if not data:
388 return subset
388 return subset
389 for strindex in data.split(b'\n'):
389 for strindex in data.split(b'\n'):
390 i = int(strindex)
390 i = int(strindex)
391 if i < 0 or i >= len(files):
391 if i < 0 or i >= len(files):
392 return None
392 return None
393 subset.append(files[i])
393 subset.append(files[i])
394 return subset
394 return subset
395 except (ValueError, IndexError):
395 except (ValueError, IndexError):
396 # Perhaps someone had chosen the same key name (e.g. "added") and
396 # Perhaps someone had chosen the same key name (e.g. "added") and
397 # used different syntax for the value.
397 # used different syntax for the value.
398 return None
398 return None
399
399
400
400
401 # see mercurial/helptext/internals/revlogs.txt for details about the format
401 # see mercurial/helptext/internals/revlogs.txt for details about the format
402
402
403 ACTION_MASK = int("111" "00", 2)
403 ACTION_MASK = int("111" "00", 2)
404 # note: untouched file used as copy source will as `000` for this mask.
404 # note: untouched file used as copy source will as `000` for this mask.
405 ADDED_FLAG = int("001" "00", 2)
405 ADDED_FLAG = int("001" "00", 2)
406 MERGED_FLAG = int("010" "00", 2)
406 MERGED_FLAG = int("010" "00", 2)
407 REMOVED_FLAG = int("011" "00", 2)
407 REMOVED_FLAG = int("011" "00", 2)
408 # `100` is reserved for future use
408 # `100` is reserved for future use
409 TOUCHED_FLAG = int("101" "00", 2)
409 TOUCHED_FLAG = int("101" "00", 2)
410
410
411 COPIED_MASK = int("11", 2)
411 COPIED_MASK = int("11", 2)
412 COPIED_FROM_P1_FLAG = int("10", 2)
412 COPIED_FROM_P1_FLAG = int("10", 2)
413 COPIED_FROM_P2_FLAG = int("11", 2)
413 COPIED_FROM_P2_FLAG = int("11", 2)
414
414
415 # structure is <flag><filename-end><copy-source>
415 # structure is <flag><filename-end><copy-source>
416 INDEX_HEADER = struct.Struct(">L")
416 INDEX_HEADER = struct.Struct(">L")
417 INDEX_ENTRY = struct.Struct(">bLL")
417 INDEX_ENTRY = struct.Struct(">bLL")
418
418
419
419
420 def encode_files_sidedata(files):
420 def encode_files_sidedata(files):
421 all_files = set(files.touched)
421 all_files = set(files.touched - files.salvaged)
422 all_files.update(files.copied_from_p1.values())
422 all_files.update(files.copied_from_p1.values())
423 all_files.update(files.copied_from_p2.values())
423 all_files.update(files.copied_from_p2.values())
424 all_files = sorted(all_files)
424 all_files = sorted(all_files)
425 file_idx = {f: i for (i, f) in enumerate(all_files)}
425 file_idx = {f: i for (i, f) in enumerate(all_files)}
426 file_idx[None] = 0
426 file_idx[None] = 0
427
427
428 chunks = [INDEX_HEADER.pack(len(all_files))]
428 chunks = [INDEX_HEADER.pack(len(all_files))]
429
429
430 filename_length = 0
430 filename_length = 0
431 for f in all_files:
431 for f in all_files:
432 filename_size = len(f)
432 filename_size = len(f)
433 filename_length += filename_size
433 filename_length += filename_size
434 flag = 0
434 flag = 0
435 if f in files.added:
435 if f in files.added:
436 flag |= ADDED_FLAG
436 flag |= ADDED_FLAG
437 elif f in files.merged:
437 elif f in files.merged:
438 flag |= MERGED_FLAG
438 flag |= MERGED_FLAG
439 elif f in files.removed:
439 elif f in files.removed:
440 flag |= REMOVED_FLAG
440 flag |= REMOVED_FLAG
441 elif f in files.touched:
441 elif f in files.touched:
442 flag |= TOUCHED_FLAG
442 flag |= TOUCHED_FLAG
443
443
444 copy = None
444 copy = None
445 if f in files.copied_from_p1:
445 if f in files.copied_from_p1:
446 flag |= COPIED_FROM_P1_FLAG
446 flag |= COPIED_FROM_P1_FLAG
447 copy = files.copied_from_p1.get(f)
447 copy = files.copied_from_p1.get(f)
448 elif f in files.copied_from_p2:
448 elif f in files.copied_from_p2:
449 copy = files.copied_from_p2.get(f)
449 copy = files.copied_from_p2.get(f)
450 flag |= COPIED_FROM_P2_FLAG
450 flag |= COPIED_FROM_P2_FLAG
451 copy_idx = file_idx[copy]
451 copy_idx = file_idx[copy]
452 chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx))
452 chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx))
453 chunks.extend(all_files)
453 chunks.extend(all_files)
454 return {sidedatamod.SD_FILES: b''.join(chunks)}
454 return {sidedatamod.SD_FILES: b''.join(chunks)}
455
455
456
456
457 def decode_files_sidedata(sidedata):
457 def decode_files_sidedata(sidedata):
458 md = ChangingFiles()
458 md = ChangingFiles()
459 raw = sidedata.get(sidedatamod.SD_FILES)
459 raw = sidedata.get(sidedatamod.SD_FILES)
460
460
461 if raw is None:
461 if raw is None:
462 return md
462 return md
463
463
464 copies = []
464 copies = []
465 all_files = []
465 all_files = []
466
466
467 assert len(raw) >= INDEX_HEADER.size
467 assert len(raw) >= INDEX_HEADER.size
468 total_files = INDEX_HEADER.unpack_from(raw, 0)[0]
468 total_files = INDEX_HEADER.unpack_from(raw, 0)[0]
469
469
470 offset = INDEX_HEADER.size
470 offset = INDEX_HEADER.size
471 file_offset_base = offset + (INDEX_ENTRY.size * total_files)
471 file_offset_base = offset + (INDEX_ENTRY.size * total_files)
472 file_offset_last = file_offset_base
472 file_offset_last = file_offset_base
473
473
474 assert len(raw) >= file_offset_base
474 assert len(raw) >= file_offset_base
475
475
476 for idx in range(total_files):
476 for idx in range(total_files):
477 flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset)
477 flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset)
478 file_end += file_offset_base
478 file_end += file_offset_base
479 filename = raw[file_offset_last:file_end]
479 filename = raw[file_offset_last:file_end]
480 filesize = file_end - file_offset_last
480 filesize = file_end - file_offset_last
481 assert len(filename) == filesize
481 assert len(filename) == filesize
482 offset += INDEX_ENTRY.size
482 offset += INDEX_ENTRY.size
483 file_offset_last = file_end
483 file_offset_last = file_end
484 all_files.append(filename)
484 all_files.append(filename)
485 if flag & ACTION_MASK == ADDED_FLAG:
485 if flag & ACTION_MASK == ADDED_FLAG:
486 md.mark_added(filename)
486 md.mark_added(filename)
487 elif flag & ACTION_MASK == MERGED_FLAG:
487 elif flag & ACTION_MASK == MERGED_FLAG:
488 md.mark_merged(filename)
488 md.mark_merged(filename)
489 elif flag & ACTION_MASK == REMOVED_FLAG:
489 elif flag & ACTION_MASK == REMOVED_FLAG:
490 md.mark_removed(filename)
490 md.mark_removed(filename)
491 elif flag & ACTION_MASK == TOUCHED_FLAG:
491 elif flag & ACTION_MASK == TOUCHED_FLAG:
492 md.mark_touched(filename)
492 md.mark_touched(filename)
493
493
494 copied = None
494 copied = None
495 if flag & COPIED_MASK == COPIED_FROM_P1_FLAG:
495 if flag & COPIED_MASK == COPIED_FROM_P1_FLAG:
496 copied = md.mark_copied_from_p1
496 copied = md.mark_copied_from_p1
497 elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG:
497 elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG:
498 copied = md.mark_copied_from_p2
498 copied = md.mark_copied_from_p2
499
499
500 if copied is not None:
500 if copied is not None:
501 copies.append((copied, filename, copy_idx))
501 copies.append((copied, filename, copy_idx))
502
502
503 for copied, filename, copy_idx in copies:
503 for copied, filename, copy_idx in copies:
504 copied(all_files[copy_idx], filename)
504 copied(all_files[copy_idx], filename)
505
505
506 return md
506 return md
507
507
508
508
509 def _getsidedata(srcrepo, rev):
509 def _getsidedata(srcrepo, rev):
510 ctx = srcrepo[rev]
510 ctx = srcrepo[rev]
511 filescopies = computechangesetcopies(ctx)
511 filescopies = computechangesetcopies(ctx)
512 filesadded = computechangesetfilesadded(ctx)
512 filesadded = computechangesetfilesadded(ctx)
513 filesremoved = computechangesetfilesremoved(ctx)
513 filesremoved = computechangesetfilesremoved(ctx)
514 filesmerged = computechangesetfilesmerged(ctx)
514 filesmerged = computechangesetfilesmerged(ctx)
515 files = ChangingFiles()
515 files = ChangingFiles()
516 files.update_touched(ctx.files())
516 files.update_touched(ctx.files())
517 files.update_added(filesadded)
517 files.update_added(filesadded)
518 files.update_removed(filesremoved)
518 files.update_removed(filesremoved)
519 files.update_merged(filesmerged)
519 files.update_merged(filesmerged)
520 files.update_copies_from_p1(filescopies[0])
520 files.update_copies_from_p1(filescopies[0])
521 files.update_copies_from_p2(filescopies[1])
521 files.update_copies_from_p2(filescopies[1])
522 return encode_files_sidedata(files)
522 return encode_files_sidedata(files)
523
523
524
524
525 def getsidedataadder(srcrepo, destrepo):
525 def getsidedataadder(srcrepo, destrepo):
526 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
526 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
527 if pycompat.iswindows or not use_w:
527 if pycompat.iswindows or not use_w:
528 return _get_simple_sidedata_adder(srcrepo, destrepo)
528 return _get_simple_sidedata_adder(srcrepo, destrepo)
529 else:
529 else:
530 return _get_worker_sidedata_adder(srcrepo, destrepo)
530 return _get_worker_sidedata_adder(srcrepo, destrepo)
531
531
532
532
533 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
533 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
534 """The function used by worker precomputing sidedata
534 """The function used by worker precomputing sidedata
535
535
536 It read an input queue containing revision numbers
536 It read an input queue containing revision numbers
537 It write in an output queue containing (rev, <sidedata-map>)
537 It write in an output queue containing (rev, <sidedata-map>)
538
538
539 The `None` input value is used as a stop signal.
539 The `None` input value is used as a stop signal.
540
540
541 The `tokens` semaphore is user to avoid having too many unprocessed
541 The `tokens` semaphore is user to avoid having too many unprocessed
542 entries. The workers needs to acquire one token before fetching a task.
542 entries. The workers needs to acquire one token before fetching a task.
543 They will be released by the consumer of the produced data.
543 They will be released by the consumer of the produced data.
544 """
544 """
545 tokens.acquire()
545 tokens.acquire()
546 rev = revs_queue.get()
546 rev = revs_queue.get()
547 while rev is not None:
547 while rev is not None:
548 data = _getsidedata(srcrepo, rev)
548 data = _getsidedata(srcrepo, rev)
549 sidedata_queue.put((rev, data))
549 sidedata_queue.put((rev, data))
550 tokens.acquire()
550 tokens.acquire()
551 rev = revs_queue.get()
551 rev = revs_queue.get()
552 # processing of `None` is completed, release the token.
552 # processing of `None` is completed, release the token.
553 tokens.release()
553 tokens.release()
554
554
555
555
556 BUFF_PER_WORKER = 50
556 BUFF_PER_WORKER = 50
557
557
558
558
559 def _get_worker_sidedata_adder(srcrepo, destrepo):
559 def _get_worker_sidedata_adder(srcrepo, destrepo):
560 """The parallel version of the sidedata computation
560 """The parallel version of the sidedata computation
561
561
562 This code spawn a pool of worker that precompute a buffer of sidedata
562 This code spawn a pool of worker that precompute a buffer of sidedata
563 before we actually need them"""
563 before we actually need them"""
564 # avoid circular import copies -> scmutil -> worker -> copies
564 # avoid circular import copies -> scmutil -> worker -> copies
565 from . import worker
565 from . import worker
566
566
567 nbworkers = worker._numworkers(srcrepo.ui)
567 nbworkers = worker._numworkers(srcrepo.ui)
568
568
569 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
569 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
570 revsq = multiprocessing.Queue()
570 revsq = multiprocessing.Queue()
571 sidedataq = multiprocessing.Queue()
571 sidedataq = multiprocessing.Queue()
572
572
573 assert srcrepo.filtername is None
573 assert srcrepo.filtername is None
574 # queue all tasks beforehand, revision numbers are small and it make
574 # queue all tasks beforehand, revision numbers are small and it make
575 # synchronisation simpler
575 # synchronisation simpler
576 #
576 #
577 # Since the computation for each node can be quite expensive, the overhead
577 # Since the computation for each node can be quite expensive, the overhead
578 # of using a single queue is not revelant. In practice, most computation
578 # of using a single queue is not revelant. In practice, most computation
579 # are fast but some are very expensive and dominate all the other smaller
579 # are fast but some are very expensive and dominate all the other smaller
580 # cost.
580 # cost.
581 for r in srcrepo.changelog.revs():
581 for r in srcrepo.changelog.revs():
582 revsq.put(r)
582 revsq.put(r)
583 # queue the "no more tasks" markers
583 # queue the "no more tasks" markers
584 for i in range(nbworkers):
584 for i in range(nbworkers):
585 revsq.put(None)
585 revsq.put(None)
586
586
587 allworkers = []
587 allworkers = []
588 for i in range(nbworkers):
588 for i in range(nbworkers):
589 args = (srcrepo, revsq, sidedataq, tokens)
589 args = (srcrepo, revsq, sidedataq, tokens)
590 w = multiprocessing.Process(target=_sidedata_worker, args=args)
590 w = multiprocessing.Process(target=_sidedata_worker, args=args)
591 allworkers.append(w)
591 allworkers.append(w)
592 w.start()
592 w.start()
593
593
594 # dictionnary to store results for revision higher than we one we are
594 # dictionnary to store results for revision higher than we one we are
595 # looking for. For example, if we need the sidedatamap for 42, and 43 is
595 # looking for. For example, if we need the sidedatamap for 42, and 43 is
596 # received, when shelve 43 for later use.
596 # received, when shelve 43 for later use.
597 staging = {}
597 staging = {}
598
598
599 def sidedata_companion(revlog, rev):
599 def sidedata_companion(revlog, rev):
600 sidedata = {}
600 sidedata = {}
601 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
601 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
602 # Is the data previously shelved ?
602 # Is the data previously shelved ?
603 sidedata = staging.pop(rev, None)
603 sidedata = staging.pop(rev, None)
604 if sidedata is None:
604 if sidedata is None:
605 # look at the queued result until we find the one we are lookig
605 # look at the queued result until we find the one we are lookig
606 # for (shelve the other ones)
606 # for (shelve the other ones)
607 r, sidedata = sidedataq.get()
607 r, sidedata = sidedataq.get()
608 while r != rev:
608 while r != rev:
609 staging[r] = sidedata
609 staging[r] = sidedata
610 r, sidedata = sidedataq.get()
610 r, sidedata = sidedataq.get()
611 tokens.release()
611 tokens.release()
612 return False, (), sidedata
612 return False, (), sidedata
613
613
614 return sidedata_companion
614 return sidedata_companion
615
615
616
616
617 def _get_simple_sidedata_adder(srcrepo, destrepo):
617 def _get_simple_sidedata_adder(srcrepo, destrepo):
618 """The simple version of the sidedata computation
618 """The simple version of the sidedata computation
619
619
620 It just compute it in the same thread on request"""
620 It just compute it in the same thread on request"""
621
621
622 def sidedatacompanion(revlog, rev):
622 def sidedatacompanion(revlog, rev):
623 sidedata = {}
623 sidedata = {}
624 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
624 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
625 sidedata = _getsidedata(srcrepo, rev)
625 sidedata = _getsidedata(srcrepo, rev)
626 return False, (), sidedata
626 return False, (), sidedata
627
627
628 return sidedatacompanion
628 return sidedatacompanion
629
629
630
630
631 def getsidedataremover(srcrepo, destrepo):
631 def getsidedataremover(srcrepo, destrepo):
632 def sidedatacompanion(revlog, rev):
632 def sidedatacompanion(revlog, rev):
633 f = ()
633 f = ()
634 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
634 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
635 if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
635 if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
636 f = (
636 f = (
637 sidedatamod.SD_P1COPIES,
637 sidedatamod.SD_P1COPIES,
638 sidedatamod.SD_P2COPIES,
638 sidedatamod.SD_P2COPIES,
639 sidedatamod.SD_FILESADDED,
639 sidedatamod.SD_FILESADDED,
640 sidedatamod.SD_FILESREMOVED,
640 sidedatamod.SD_FILESREMOVED,
641 )
641 )
642 return False, f, {}
642 return False, f, {}
643
643
644 return sidedatacompanion
644 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now