##// END OF EJS Templates
metadata: filter the `removed` set to only contains relevant data...
marmoute -
r45468:25512a65 default
parent child Browse files
Show More
@@ -1,324 +1,327 b''
1 # metadata.py -- code related to various metadata computation and access.
1 # metadata.py -- code related to various metadata computation and access.
2 #
2 #
3 # Copyright 2019 Google, Inc <martinvonz@google.com>
3 # Copyright 2019 Google, Inc <martinvonz@google.com>
4 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import multiprocessing
10 import multiprocessing
11
11
12 from . import (
12 from . import (
13 error,
13 error,
14 node,
14 node,
15 pycompat,
15 pycompat,
16 util,
16 util,
17 )
17 )
18
18
19 from .revlogutils import (
19 from .revlogutils import (
20 flagutil as sidedataflag,
20 flagutil as sidedataflag,
21 sidedata as sidedatamod,
21 sidedata as sidedatamod,
22 )
22 )
23
23
24
24
25 def computechangesetfilesadded(ctx):
25 def computechangesetfilesadded(ctx):
26 """return the list of files added in a changeset
26 """return the list of files added in a changeset
27 """
27 """
28 added = []
28 added = []
29 for f in ctx.files():
29 for f in ctx.files():
30 if not any(f in p for p in ctx.parents()):
30 if not any(f in p for p in ctx.parents()):
31 added.append(f)
31 added.append(f)
32 return added
32 return added
33
33
34
34
35 def get_removal_filter(ctx, x=None):
35 def get_removal_filter(ctx, x=None):
36 """return a function to detect files "wrongly" detected as `removed`
36 """return a function to detect files "wrongly" detected as `removed`
37
37
38 When a file is removed relative to p1 in a merge, this
38 When a file is removed relative to p1 in a merge, this
39 function determines whether the absence is due to a
39 function determines whether the absence is due to a
40 deletion from a parent, or whether the merge commit
40 deletion from a parent, or whether the merge commit
41 itself deletes the file. We decide this by doing a
41 itself deletes the file. We decide this by doing a
42 simplified three way merge of the manifest entry for
42 simplified three way merge of the manifest entry for
43 the file. There are two ways we decide the merge
43 the file. There are two ways we decide the merge
44 itself didn't delete a file:
44 itself didn't delete a file:
45 - neither parent (nor the merge) contain the file
45 - neither parent (nor the merge) contain the file
46 - exactly one parent contains the file, and that
46 - exactly one parent contains the file, and that
47 parent has the same filelog entry as the merge
47 parent has the same filelog entry as the merge
48 ancestor (or all of them if there two). In other
48 ancestor (or all of them if there two). In other
49 words, that parent left the file unchanged while the
49 words, that parent left the file unchanged while the
50 other one deleted it.
50 other one deleted it.
51 One way to think about this is that deleting a file is
51 One way to think about this is that deleting a file is
52 similar to emptying it, so the list of changed files
52 similar to emptying it, so the list of changed files
53 should be similar either way. The computation
53 should be similar either way. The computation
54 described above is not done directly in _filecommit
54 described above is not done directly in _filecommit
55 when creating the list of changed files, however
55 when creating the list of changed files, however
56 it does something very similar by comparing filelog
56 it does something very similar by comparing filelog
57 nodes.
57 nodes.
58 """
58 """
59
59
60 if x is not None:
60 if x is not None:
61 p1, p2, m1, m2 = x
61 p1, p2, m1, m2 = x
62 else:
62 else:
63 p1 = ctx.p1()
63 p1 = ctx.p1()
64 p2 = ctx.p2()
64 p2 = ctx.p2()
65 m1 = p1.manifest()
65 m1 = p1.manifest()
66 m2 = p2.manifest()
66 m2 = p2.manifest()
67
67
68 @util.cachefunc
68 @util.cachefunc
69 def mas():
69 def mas():
70 p1n = p1.node()
70 p1n = p1.node()
71 p2n = p2.node()
71 p2n = p2.node()
72 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
72 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
73 if not cahs:
73 if not cahs:
74 cahs = [node.nullrev]
74 cahs = [node.nullrev]
75 return [ctx.repo()[r].manifest() for r in cahs]
75 return [ctx.repo()[r].manifest() for r in cahs]
76
76
77 def deletionfromparent(f):
77 def deletionfromparent(f):
78 if f in m1:
78 if f in m1:
79 return f not in m2 and all(
79 return f not in m2 and all(
80 f in ma and ma.find(f) == m1.find(f) for ma in mas()
80 f in ma and ma.find(f) == m1.find(f) for ma in mas()
81 )
81 )
82 elif f in m2:
82 elif f in m2:
83 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
83 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
84 else:
84 else:
85 return True
85 return True
86
86
87 return deletionfromparent
87 return deletionfromparent
88
88
89
89
90 def computechangesetfilesremoved(ctx):
90 def computechangesetfilesremoved(ctx):
91 """return the list of files removed in a changeset
91 """return the list of files removed in a changeset
92 """
92 """
93 removed = []
93 removed = []
94 for f in ctx.files():
94 for f in ctx.files():
95 if f not in ctx:
95 if f not in ctx:
96 removed.append(f)
96 removed.append(f)
97 if removed:
98 rf = get_removal_filter(ctx)
99 removed = [r for r in removed if not rf(r)]
97 return removed
100 return removed
98
101
99
102
100 def computechangesetcopies(ctx):
103 def computechangesetcopies(ctx):
101 """return the copies data for a changeset
104 """return the copies data for a changeset
102
105
103 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
106 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
104
107
105 Each dictionnary are in the form: `{newname: oldname}`
108 Each dictionnary are in the form: `{newname: oldname}`
106 """
109 """
107 p1copies = {}
110 p1copies = {}
108 p2copies = {}
111 p2copies = {}
109 p1 = ctx.p1()
112 p1 = ctx.p1()
110 p2 = ctx.p2()
113 p2 = ctx.p2()
111 narrowmatch = ctx._repo.narrowmatch()
114 narrowmatch = ctx._repo.narrowmatch()
112 for dst in ctx.files():
115 for dst in ctx.files():
113 if not narrowmatch(dst) or dst not in ctx:
116 if not narrowmatch(dst) or dst not in ctx:
114 continue
117 continue
115 copied = ctx[dst].renamed()
118 copied = ctx[dst].renamed()
116 if not copied:
119 if not copied:
117 continue
120 continue
118 src, srcnode = copied
121 src, srcnode = copied
119 if src in p1 and p1[src].filenode() == srcnode:
122 if src in p1 and p1[src].filenode() == srcnode:
120 p1copies[dst] = src
123 p1copies[dst] = src
121 elif src in p2 and p2[src].filenode() == srcnode:
124 elif src in p2 and p2[src].filenode() == srcnode:
122 p2copies[dst] = src
125 p2copies[dst] = src
123 return p1copies, p2copies
126 return p1copies, p2copies
124
127
125
128
126 def encodecopies(files, copies):
129 def encodecopies(files, copies):
127 items = []
130 items = []
128 for i, dst in enumerate(files):
131 for i, dst in enumerate(files):
129 if dst in copies:
132 if dst in copies:
130 items.append(b'%d\0%s' % (i, copies[dst]))
133 items.append(b'%d\0%s' % (i, copies[dst]))
131 if len(items) != len(copies):
134 if len(items) != len(copies):
132 raise error.ProgrammingError(
135 raise error.ProgrammingError(
133 b'some copy targets missing from file list'
136 b'some copy targets missing from file list'
134 )
137 )
135 return b"\n".join(items)
138 return b"\n".join(items)
136
139
137
140
138 def decodecopies(files, data):
141 def decodecopies(files, data):
139 try:
142 try:
140 copies = {}
143 copies = {}
141 if not data:
144 if not data:
142 return copies
145 return copies
143 for l in data.split(b'\n'):
146 for l in data.split(b'\n'):
144 strindex, src = l.split(b'\0')
147 strindex, src = l.split(b'\0')
145 i = int(strindex)
148 i = int(strindex)
146 dst = files[i]
149 dst = files[i]
147 copies[dst] = src
150 copies[dst] = src
148 return copies
151 return copies
149 except (ValueError, IndexError):
152 except (ValueError, IndexError):
150 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
153 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
151 # used different syntax for the value.
154 # used different syntax for the value.
152 return None
155 return None
153
156
154
157
155 def encodefileindices(files, subset):
158 def encodefileindices(files, subset):
156 subset = set(subset)
159 subset = set(subset)
157 indices = []
160 indices = []
158 for i, f in enumerate(files):
161 for i, f in enumerate(files):
159 if f in subset:
162 if f in subset:
160 indices.append(b'%d' % i)
163 indices.append(b'%d' % i)
161 return b'\n'.join(indices)
164 return b'\n'.join(indices)
162
165
163
166
164 def decodefileindices(files, data):
167 def decodefileindices(files, data):
165 try:
168 try:
166 subset = []
169 subset = []
167 if not data:
170 if not data:
168 return subset
171 return subset
169 for strindex in data.split(b'\n'):
172 for strindex in data.split(b'\n'):
170 i = int(strindex)
173 i = int(strindex)
171 if i < 0 or i >= len(files):
174 if i < 0 or i >= len(files):
172 return None
175 return None
173 subset.append(files[i])
176 subset.append(files[i])
174 return subset
177 return subset
175 except (ValueError, IndexError):
178 except (ValueError, IndexError):
176 # Perhaps someone had chosen the same key name (e.g. "added") and
179 # Perhaps someone had chosen the same key name (e.g. "added") and
177 # used different syntax for the value.
180 # used different syntax for the value.
178 return None
181 return None
179
182
180
183
181 def _getsidedata(srcrepo, rev):
184 def _getsidedata(srcrepo, rev):
182 ctx = srcrepo[rev]
185 ctx = srcrepo[rev]
183 filescopies = computechangesetcopies(ctx)
186 filescopies = computechangesetcopies(ctx)
184 filesadded = computechangesetfilesadded(ctx)
187 filesadded = computechangesetfilesadded(ctx)
185 filesremoved = computechangesetfilesremoved(ctx)
188 filesremoved = computechangesetfilesremoved(ctx)
186 sidedata = {}
189 sidedata = {}
187 if any([filescopies, filesadded, filesremoved]):
190 if any([filescopies, filesadded, filesremoved]):
188 sortedfiles = sorted(ctx.files())
191 sortedfiles = sorted(ctx.files())
189 p1copies, p2copies = filescopies
192 p1copies, p2copies = filescopies
190 p1copies = encodecopies(sortedfiles, p1copies)
193 p1copies = encodecopies(sortedfiles, p1copies)
191 p2copies = encodecopies(sortedfiles, p2copies)
194 p2copies = encodecopies(sortedfiles, p2copies)
192 filesadded = encodefileindices(sortedfiles, filesadded)
195 filesadded = encodefileindices(sortedfiles, filesadded)
193 filesremoved = encodefileindices(sortedfiles, filesremoved)
196 filesremoved = encodefileindices(sortedfiles, filesremoved)
194 if p1copies:
197 if p1copies:
195 sidedata[sidedatamod.SD_P1COPIES] = p1copies
198 sidedata[sidedatamod.SD_P1COPIES] = p1copies
196 if p2copies:
199 if p2copies:
197 sidedata[sidedatamod.SD_P2COPIES] = p2copies
200 sidedata[sidedatamod.SD_P2COPIES] = p2copies
198 if filesadded:
201 if filesadded:
199 sidedata[sidedatamod.SD_FILESADDED] = filesadded
202 sidedata[sidedatamod.SD_FILESADDED] = filesadded
200 if filesremoved:
203 if filesremoved:
201 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
204 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
202 return sidedata
205 return sidedata
203
206
204
207
205 def getsidedataadder(srcrepo, destrepo):
208 def getsidedataadder(srcrepo, destrepo):
206 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
209 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
207 if pycompat.iswindows or not use_w:
210 if pycompat.iswindows or not use_w:
208 return _get_simple_sidedata_adder(srcrepo, destrepo)
211 return _get_simple_sidedata_adder(srcrepo, destrepo)
209 else:
212 else:
210 return _get_worker_sidedata_adder(srcrepo, destrepo)
213 return _get_worker_sidedata_adder(srcrepo, destrepo)
211
214
212
215
213 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
216 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
214 """The function used by worker precomputing sidedata
217 """The function used by worker precomputing sidedata
215
218
216 It read an input queue containing revision numbers
219 It read an input queue containing revision numbers
217 It write in an output queue containing (rev, <sidedata-map>)
220 It write in an output queue containing (rev, <sidedata-map>)
218
221
219 The `None` input value is used as a stop signal.
222 The `None` input value is used as a stop signal.
220
223
221 The `tokens` semaphore is user to avoid having too many unprocessed
224 The `tokens` semaphore is user to avoid having too many unprocessed
222 entries. The workers needs to acquire one token before fetching a task.
225 entries. The workers needs to acquire one token before fetching a task.
223 They will be released by the consumer of the produced data.
226 They will be released by the consumer of the produced data.
224 """
227 """
225 tokens.acquire()
228 tokens.acquire()
226 rev = revs_queue.get()
229 rev = revs_queue.get()
227 while rev is not None:
230 while rev is not None:
228 data = _getsidedata(srcrepo, rev)
231 data = _getsidedata(srcrepo, rev)
229 sidedata_queue.put((rev, data))
232 sidedata_queue.put((rev, data))
230 tokens.acquire()
233 tokens.acquire()
231 rev = revs_queue.get()
234 rev = revs_queue.get()
232 # processing of `None` is completed, release the token.
235 # processing of `None` is completed, release the token.
233 tokens.release()
236 tokens.release()
234
237
235
238
236 BUFF_PER_WORKER = 50
239 BUFF_PER_WORKER = 50
237
240
238
241
239 def _get_worker_sidedata_adder(srcrepo, destrepo):
242 def _get_worker_sidedata_adder(srcrepo, destrepo):
240 """The parallel version of the sidedata computation
243 """The parallel version of the sidedata computation
241
244
242 This code spawn a pool of worker that precompute a buffer of sidedata
245 This code spawn a pool of worker that precompute a buffer of sidedata
243 before we actually need them"""
246 before we actually need them"""
244 # avoid circular import copies -> scmutil -> worker -> copies
247 # avoid circular import copies -> scmutil -> worker -> copies
245 from . import worker
248 from . import worker
246
249
247 nbworkers = worker._numworkers(srcrepo.ui)
250 nbworkers = worker._numworkers(srcrepo.ui)
248
251
249 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
252 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
250 revsq = multiprocessing.Queue()
253 revsq = multiprocessing.Queue()
251 sidedataq = multiprocessing.Queue()
254 sidedataq = multiprocessing.Queue()
252
255
253 assert srcrepo.filtername is None
256 assert srcrepo.filtername is None
254 # queue all tasks beforehand, revision numbers are small and it make
257 # queue all tasks beforehand, revision numbers are small and it make
255 # synchronisation simpler
258 # synchronisation simpler
256 #
259 #
257 # Since the computation for each node can be quite expensive, the overhead
260 # Since the computation for each node can be quite expensive, the overhead
258 # of using a single queue is not revelant. In practice, most computation
261 # of using a single queue is not revelant. In practice, most computation
259 # are fast but some are very expensive and dominate all the other smaller
262 # are fast but some are very expensive and dominate all the other smaller
260 # cost.
263 # cost.
261 for r in srcrepo.changelog.revs():
264 for r in srcrepo.changelog.revs():
262 revsq.put(r)
265 revsq.put(r)
263 # queue the "no more tasks" markers
266 # queue the "no more tasks" markers
264 for i in range(nbworkers):
267 for i in range(nbworkers):
265 revsq.put(None)
268 revsq.put(None)
266
269
267 allworkers = []
270 allworkers = []
268 for i in range(nbworkers):
271 for i in range(nbworkers):
269 args = (srcrepo, revsq, sidedataq, tokens)
272 args = (srcrepo, revsq, sidedataq, tokens)
270 w = multiprocessing.Process(target=_sidedata_worker, args=args)
273 w = multiprocessing.Process(target=_sidedata_worker, args=args)
271 allworkers.append(w)
274 allworkers.append(w)
272 w.start()
275 w.start()
273
276
274 # dictionnary to store results for revision higher than we one we are
277 # dictionnary to store results for revision higher than we one we are
275 # looking for. For example, if we need the sidedatamap for 42, and 43 is
278 # looking for. For example, if we need the sidedatamap for 42, and 43 is
276 # received, when shelve 43 for later use.
279 # received, when shelve 43 for later use.
277 staging = {}
280 staging = {}
278
281
279 def sidedata_companion(revlog, rev):
282 def sidedata_companion(revlog, rev):
280 sidedata = {}
283 sidedata = {}
281 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
284 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
282 # Is the data previously shelved ?
285 # Is the data previously shelved ?
283 sidedata = staging.pop(rev, None)
286 sidedata = staging.pop(rev, None)
284 if sidedata is None:
287 if sidedata is None:
285 # look at the queued result until we find the one we are lookig
288 # look at the queued result until we find the one we are lookig
286 # for (shelve the other ones)
289 # for (shelve the other ones)
287 r, sidedata = sidedataq.get()
290 r, sidedata = sidedataq.get()
288 while r != rev:
291 while r != rev:
289 staging[r] = sidedata
292 staging[r] = sidedata
290 r, sidedata = sidedataq.get()
293 r, sidedata = sidedataq.get()
291 tokens.release()
294 tokens.release()
292 return False, (), sidedata
295 return False, (), sidedata
293
296
294 return sidedata_companion
297 return sidedata_companion
295
298
296
299
297 def _get_simple_sidedata_adder(srcrepo, destrepo):
300 def _get_simple_sidedata_adder(srcrepo, destrepo):
298 """The simple version of the sidedata computation
301 """The simple version of the sidedata computation
299
302
300 It just compute it in the same thread on request"""
303 It just compute it in the same thread on request"""
301
304
302 def sidedatacompanion(revlog, rev):
305 def sidedatacompanion(revlog, rev):
303 sidedata = {}
306 sidedata = {}
304 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
307 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
305 sidedata = _getsidedata(srcrepo, rev)
308 sidedata = _getsidedata(srcrepo, rev)
306 return False, (), sidedata
309 return False, (), sidedata
307
310
308 return sidedatacompanion
311 return sidedatacompanion
309
312
310
313
311 def getsidedataremover(srcrepo, destrepo):
314 def getsidedataremover(srcrepo, destrepo):
312 def sidedatacompanion(revlog, rev):
315 def sidedatacompanion(revlog, rev):
313 f = ()
316 f = ()
314 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
317 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
315 if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
318 if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
316 f = (
319 f = (
317 sidedatamod.SD_P1COPIES,
320 sidedatamod.SD_P1COPIES,
318 sidedatamod.SD_P2COPIES,
321 sidedatamod.SD_P2COPIES,
319 sidedatamod.SD_FILESADDED,
322 sidedatamod.SD_FILESADDED,
320 sidedatamod.SD_FILESREMOVED,
323 sidedatamod.SD_FILESREMOVED,
321 )
324 )
322 return False, f, {}
325 return False, f, {}
323
326
324 return sidedatacompanion
327 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now