##// END OF EJS Templates
changing-files: implement equality checking...
marmoute -
r46079:df878210 default
parent child Browse files
Show More
@@ -1,479 +1,488 b''
1 1 # metadata.py -- code related to various metadata computation and access.
2 2 #
3 3 # Copyright 2019 Google, Inc <martinvonz@google.com>
4 4 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import multiprocessing
11 11
12 12 from . import (
13 13 error,
14 14 node,
15 15 pycompat,
16 16 util,
17 17 )
18 18
19 19 from .revlogutils import (
20 20 flagutil as sidedataflag,
21 21 sidedata as sidedatamod,
22 22 )
23 23
24 24
25 25 class ChangingFiles(object):
26 26 """A class recording the changes made to a file by a changeset
27 27
28 28 Actions performed on files are gathered into 3 sets:
29 29
30 30 - added: files actively added in the changeset.
31 31 - removed: files removed in the revision
32 32 - touched: files affected by the merge
33 33
34 34 and copies information is held by 2 mappings
35 35
36 36 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
37 37 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
38 38
39 39 See their inline help for details.
40 40 """
41 41
42 42 def __init__(
43 43 self, touched=(), added=(), removed=(), p1_copies=(), p2_copies=(),
44 44 ):
45 45 self._added = set(added)
46 46 self._removed = set(removed)
47 47 self._touched = set(touched)
48 48 self._touched.update(self._added)
49 49 self._touched.update(self._removed)
50 50 self._p1_copies = dict(p1_copies)
51 51 self._p2_copies = dict(p2_copies)
52 52
53 def __eq__(self, other):
54 return (
55 self.added == other.added
56 and self.removed == other.removed
57 and self.touched == other.touched
58 and self.copied_from_p1 == other.copied_from_p1
59 and self.copied_from_p2 == other.copied_from_p2
60 )
61
53 62 @property
54 63 def added(self):
55 64 """files actively added in the changeset
56 65
57 66 Any file present in that revision that was absent in all the changeset's
58 67 parents.
59 68
60 69 In case of merge, this means a file absent in one of the parents but
61 70 existing in the other will *not* be contained in this set. (They were
62 71 added by an ancestor)
63 72 """
64 73 return frozenset(self._added)
65 74
66 75 def mark_added(self, filename):
67 76 self._added.add(filename)
68 77 self._touched.add(filename)
69 78
70 79 def update_added(self, filenames):
71 80 for f in filenames:
72 81 self.mark_added(f)
73 82
74 83 @property
75 84 def removed(self):
76 85 """files actively removed by the changeset
77 86
78 87 In case of merge this will only contain the set of files removing "new"
79 88 content. For any file absent in the current changeset:
80 89
81 90 a) If the file exists in both parents, it is clearly "actively" removed
82 91 by this changeset.
83 92
84 93 b) If a file exists in only one parent and in none of the common
85 94 ancestors, then the file was newly added in one of the merged branches
86 95 and then got "actively" removed.
87 96
88 97 c) If a file exists in only one parent and at least one of the common
89 98 ancestors using the same filenode, then the file was unchanged on one
90 99 side and deleted on the other side. The merge "passively" propagated
91 100 that deletion, but didn't "actively" remove the file. In this case the
92 101 file is *not* included in the `removed` set.
93 102
94 103 d) If a file exists in only one parent and at least one of the common
95 104 ancestors using a different filenode, then the file was changed on one
96 105 side and removed on the other side. The merge process "actively"
97 106 decided to drop the new change and delete the file. Unlike in the
98 107 previous case, (c), the file included in the `removed` set.
99 108
100 109 Summary table for merge:
101 110
102 111 case | exists in parents | exists in gca || removed
103 112 (a) | both | * || yes
104 113 (b) | one | none || yes
105 114 (c) | one | same filenode || no
106 115 (d) | one | new filenode || yes
107 116 """
108 117 return frozenset(self._removed)
109 118
110 119 def mark_removed(self, filename):
111 120 self._removed.add(filename)
112 121 self._touched.add(filename)
113 122
114 123 def update_removed(self, filenames):
115 124 for f in filenames:
116 125 self.mark_removed(f)
117 126
118 127 @property
119 128 def touched(self):
120 129 """files either actively modified, added or removed"""
121 130 return frozenset(self._touched)
122 131
123 132 def mark_touched(self, filename):
124 133 self._touched.add(filename)
125 134
126 135 def update_touched(self, filenames):
127 136 for f in filenames:
128 137 self.mark_touched(f)
129 138
130 139 @property
131 140 def copied_from_p1(self):
132 141 return self._p1_copies.copy()
133 142
134 143 def mark_copied_from_p1(self, source, dest):
135 144 self._p1_copies[dest] = source
136 145
137 146 def update_copies_from_p1(self, copies):
138 147 for dest, source in copies.items():
139 148 self.mark_copied_from_p1(source, dest)
140 149
141 150 @property
142 151 def copied_from_p2(self):
143 152 return self._p2_copies.copy()
144 153
145 154 def mark_copied_from_p2(self, source, dest):
146 155 self._p2_copies[dest] = source
147 156
148 157 def update_copies_from_p2(self, copies):
149 158 for dest, source in copies.items():
150 159 self.mark_copied_from_p2(source, dest)
151 160
152 161
153 162 def computechangesetfilesadded(ctx):
154 163 """return the list of files added in a changeset
155 164 """
156 165 added = []
157 166 for f in ctx.files():
158 167 if not any(f in p for p in ctx.parents()):
159 168 added.append(f)
160 169 return added
161 170
162 171
163 172 def get_removal_filter(ctx, x=None):
164 173 """return a function to detect files "wrongly" detected as `removed`
165 174
166 175 When a file is removed relative to p1 in a merge, this
167 176 function determines whether the absence is due to a
168 177 deletion from a parent, or whether the merge commit
169 178 itself deletes the file. We decide this by doing a
170 179 simplified three way merge of the manifest entry for
171 180 the file. There are two ways we decide the merge
172 181 itself didn't delete a file:
173 182 - neither parent (nor the merge) contain the file
174 183 - exactly one parent contains the file, and that
175 184 parent has the same filelog entry as the merge
176 185 ancestor (or all of them if there two). In other
177 186 words, that parent left the file unchanged while the
178 187 other one deleted it.
179 188 One way to think about this is that deleting a file is
180 189 similar to emptying it, so the list of changed files
181 190 should be similar either way. The computation
182 191 described above is not done directly in _filecommit
183 192 when creating the list of changed files, however
184 193 it does something very similar by comparing filelog
185 194 nodes.
186 195 """
187 196
188 197 if x is not None:
189 198 p1, p2, m1, m2 = x
190 199 else:
191 200 p1 = ctx.p1()
192 201 p2 = ctx.p2()
193 202 m1 = p1.manifest()
194 203 m2 = p2.manifest()
195 204
196 205 @util.cachefunc
197 206 def mas():
198 207 p1n = p1.node()
199 208 p2n = p2.node()
200 209 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
201 210 if not cahs:
202 211 cahs = [node.nullrev]
203 212 return [ctx.repo()[r].manifest() for r in cahs]
204 213
205 214 def deletionfromparent(f):
206 215 if f in m1:
207 216 return f not in m2 and all(
208 217 f in ma and ma.find(f) == m1.find(f) for ma in mas()
209 218 )
210 219 elif f in m2:
211 220 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
212 221 else:
213 222 return True
214 223
215 224 return deletionfromparent
216 225
217 226
218 227 def computechangesetfilesremoved(ctx):
219 228 """return the list of files removed in a changeset
220 229 """
221 230 removed = []
222 231 for f in ctx.files():
223 232 if f not in ctx:
224 233 removed.append(f)
225 234 if removed:
226 235 rf = get_removal_filter(ctx)
227 236 removed = [r for r in removed if not rf(r)]
228 237 return removed
229 238
230 239
231 240 def computechangesetcopies(ctx):
232 241 """return the copies data for a changeset
233 242
234 243 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
235 244
236 245 Each dictionnary are in the form: `{newname: oldname}`
237 246 """
238 247 p1copies = {}
239 248 p2copies = {}
240 249 p1 = ctx.p1()
241 250 p2 = ctx.p2()
242 251 narrowmatch = ctx._repo.narrowmatch()
243 252 for dst in ctx.files():
244 253 if not narrowmatch(dst) or dst not in ctx:
245 254 continue
246 255 copied = ctx[dst].renamed()
247 256 if not copied:
248 257 continue
249 258 src, srcnode = copied
250 259 if src in p1 and p1[src].filenode() == srcnode:
251 260 p1copies[dst] = src
252 261 elif src in p2 and p2[src].filenode() == srcnode:
253 262 p2copies[dst] = src
254 263 return p1copies, p2copies
255 264
256 265
257 266 def encodecopies(files, copies):
258 267 items = []
259 268 for i, dst in enumerate(files):
260 269 if dst in copies:
261 270 items.append(b'%d\0%s' % (i, copies[dst]))
262 271 if len(items) != len(copies):
263 272 raise error.ProgrammingError(
264 273 b'some copy targets missing from file list'
265 274 )
266 275 return b"\n".join(items)
267 276
268 277
269 278 def decodecopies(files, data):
270 279 try:
271 280 copies = {}
272 281 if not data:
273 282 return copies
274 283 for l in data.split(b'\n'):
275 284 strindex, src = l.split(b'\0')
276 285 i = int(strindex)
277 286 dst = files[i]
278 287 copies[dst] = src
279 288 return copies
280 289 except (ValueError, IndexError):
281 290 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
282 291 # used different syntax for the value.
283 292 return None
284 293
285 294
286 295 def encodefileindices(files, subset):
287 296 subset = set(subset)
288 297 indices = []
289 298 for i, f in enumerate(files):
290 299 if f in subset:
291 300 indices.append(b'%d' % i)
292 301 return b'\n'.join(indices)
293 302
294 303
295 304 def decodefileindices(files, data):
296 305 try:
297 306 subset = []
298 307 if not data:
299 308 return subset
300 309 for strindex in data.split(b'\n'):
301 310 i = int(strindex)
302 311 if i < 0 or i >= len(files):
303 312 return None
304 313 subset.append(files[i])
305 314 return subset
306 315 except (ValueError, IndexError):
307 316 # Perhaps someone had chosen the same key name (e.g. "added") and
308 317 # used different syntax for the value.
309 318 return None
310 319
311 320
312 321 def encode_copies_sidedata(files):
313 322 sortedfiles = sorted(files.touched)
314 323 sidedata = {}
315 324 p1copies = files.copied_from_p1
316 325 if p1copies:
317 326 p1copies = encodecopies(sortedfiles, p1copies)
318 327 sidedata[sidedatamod.SD_P1COPIES] = p1copies
319 328 p2copies = files.copied_from_p2
320 329 if p2copies:
321 330 p2copies = encodecopies(sortedfiles, p2copies)
322 331 sidedata[sidedatamod.SD_P2COPIES] = p2copies
323 332 filesadded = files.added
324 333 if filesadded:
325 334 filesadded = encodefileindices(sortedfiles, filesadded)
326 335 sidedata[sidedatamod.SD_FILESADDED] = filesadded
327 336 filesremoved = files.removed
328 337 if filesremoved:
329 338 filesremoved = encodefileindices(sortedfiles, filesremoved)
330 339 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
331 340 if not sidedata:
332 341 sidedata = None
333 342 return sidedata
334 343
335 344
336 345 def _getsidedata(srcrepo, rev):
337 346 ctx = srcrepo[rev]
338 347 filescopies = computechangesetcopies(ctx)
339 348 filesadded = computechangesetfilesadded(ctx)
340 349 filesremoved = computechangesetfilesremoved(ctx)
341 350 sidedata = {}
342 351 if any([filescopies, filesadded, filesremoved]):
343 352 sortedfiles = sorted(ctx.files())
344 353 p1copies, p2copies = filescopies
345 354 p1copies = encodecopies(sortedfiles, p1copies)
346 355 p2copies = encodecopies(sortedfiles, p2copies)
347 356 filesadded = encodefileindices(sortedfiles, filesadded)
348 357 filesremoved = encodefileindices(sortedfiles, filesremoved)
349 358 if p1copies:
350 359 sidedata[sidedatamod.SD_P1COPIES] = p1copies
351 360 if p2copies:
352 361 sidedata[sidedatamod.SD_P2COPIES] = p2copies
353 362 if filesadded:
354 363 sidedata[sidedatamod.SD_FILESADDED] = filesadded
355 364 if filesremoved:
356 365 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
357 366 return sidedata
358 367
359 368
360 369 def getsidedataadder(srcrepo, destrepo):
361 370 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
362 371 if pycompat.iswindows or not use_w:
363 372 return _get_simple_sidedata_adder(srcrepo, destrepo)
364 373 else:
365 374 return _get_worker_sidedata_adder(srcrepo, destrepo)
366 375
367 376
368 377 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
369 378 """The function used by worker precomputing sidedata
370 379
371 380 It read an input queue containing revision numbers
372 381 It write in an output queue containing (rev, <sidedata-map>)
373 382
374 383 The `None` input value is used as a stop signal.
375 384
376 385 The `tokens` semaphore is user to avoid having too many unprocessed
377 386 entries. The workers needs to acquire one token before fetching a task.
378 387 They will be released by the consumer of the produced data.
379 388 """
380 389 tokens.acquire()
381 390 rev = revs_queue.get()
382 391 while rev is not None:
383 392 data = _getsidedata(srcrepo, rev)
384 393 sidedata_queue.put((rev, data))
385 394 tokens.acquire()
386 395 rev = revs_queue.get()
387 396 # processing of `None` is completed, release the token.
388 397 tokens.release()
389 398
390 399
391 400 BUFF_PER_WORKER = 50
392 401
393 402
394 403 def _get_worker_sidedata_adder(srcrepo, destrepo):
395 404 """The parallel version of the sidedata computation
396 405
397 406 This code spawn a pool of worker that precompute a buffer of sidedata
398 407 before we actually need them"""
399 408 # avoid circular import copies -> scmutil -> worker -> copies
400 409 from . import worker
401 410
402 411 nbworkers = worker._numworkers(srcrepo.ui)
403 412
404 413 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
405 414 revsq = multiprocessing.Queue()
406 415 sidedataq = multiprocessing.Queue()
407 416
408 417 assert srcrepo.filtername is None
409 418 # queue all tasks beforehand, revision numbers are small and it make
410 419 # synchronisation simpler
411 420 #
412 421 # Since the computation for each node can be quite expensive, the overhead
413 422 # of using a single queue is not revelant. In practice, most computation
414 423 # are fast but some are very expensive and dominate all the other smaller
415 424 # cost.
416 425 for r in srcrepo.changelog.revs():
417 426 revsq.put(r)
418 427 # queue the "no more tasks" markers
419 428 for i in range(nbworkers):
420 429 revsq.put(None)
421 430
422 431 allworkers = []
423 432 for i in range(nbworkers):
424 433 args = (srcrepo, revsq, sidedataq, tokens)
425 434 w = multiprocessing.Process(target=_sidedata_worker, args=args)
426 435 allworkers.append(w)
427 436 w.start()
428 437
429 438 # dictionnary to store results for revision higher than we one we are
430 439 # looking for. For example, if we need the sidedatamap for 42, and 43 is
431 440 # received, when shelve 43 for later use.
432 441 staging = {}
433 442
434 443 def sidedata_companion(revlog, rev):
435 444 sidedata = {}
436 445 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
437 446 # Is the data previously shelved ?
438 447 sidedata = staging.pop(rev, None)
439 448 if sidedata is None:
440 449 # look at the queued result until we find the one we are lookig
441 450 # for (shelve the other ones)
442 451 r, sidedata = sidedataq.get()
443 452 while r != rev:
444 453 staging[r] = sidedata
445 454 r, sidedata = sidedataq.get()
446 455 tokens.release()
447 456 return False, (), sidedata
448 457
449 458 return sidedata_companion
450 459
451 460
452 461 def _get_simple_sidedata_adder(srcrepo, destrepo):
453 462 """The simple version of the sidedata computation
454 463
455 464 It just compute it in the same thread on request"""
456 465
457 466 def sidedatacompanion(revlog, rev):
458 467 sidedata = {}
459 468 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
460 469 sidedata = _getsidedata(srcrepo, rev)
461 470 return False, (), sidedata
462 471
463 472 return sidedatacompanion
464 473
465 474
466 475 def getsidedataremover(srcrepo, destrepo):
467 476 def sidedatacompanion(revlog, rev):
468 477 f = ()
469 478 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
470 479 if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
471 480 f = (
472 481 sidedatamod.SD_P1COPIES,
473 482 sidedatamod.SD_P2COPIES,
474 483 sidedatamod.SD_FILESADDED,
475 484 sidedatamod.SD_FILESREMOVED,
476 485 )
477 486 return False, f, {}
478 487
479 488 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now