##// END OF EJS Templates
changing-files: add a shorthand property to check for copy relevant info...
marmoute -
r46320:232c88dd default
parent child Browse files
Show More
@@ -1,618 +1,612 b''
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 nullid,
15 15 )
16 16 from .thirdparty import attr
17 17
18 18 from . import (
19 19 encoding,
20 20 error,
21 21 metadata,
22 22 pycompat,
23 23 revlog,
24 24 )
25 25 from .utils import (
26 26 dateutil,
27 27 stringutil,
28 28 )
29 29 from .revlogutils import flagutil
30 30
31 31 _defaultextra = {b'branch': b'default'}
32 32
33 33
34 34 def _string_escape(text):
35 35 """
36 36 >>> from .pycompat import bytechr as chr
37 37 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
38 38 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
39 39 >>> s
40 40 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
41 41 >>> res = _string_escape(s)
42 42 >>> s == _string_unescape(res)
43 43 True
44 44 """
45 45 # subset of the string_escape codec
46 46 text = (
47 47 text.replace(b'\\', b'\\\\')
48 48 .replace(b'\n', b'\\n')
49 49 .replace(b'\r', b'\\r')
50 50 )
51 51 return text.replace(b'\0', b'\\0')
52 52
53 53
54 54 def _string_unescape(text):
55 55 if b'\\0' in text:
56 56 # fix up \0 without getting into trouble with \\0
57 57 text = text.replace(b'\\\\', b'\\\\\n')
58 58 text = text.replace(b'\\0', b'\0')
59 59 text = text.replace(b'\n', b'')
60 60 return stringutil.unescapestr(text)
61 61
62 62
63 63 def decodeextra(text):
64 64 """
65 65 >>> from .pycompat import bytechr as chr
66 66 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
67 67 ... ).items())
68 68 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
69 69 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
70 70 ... b'baz': chr(92) + chr(0) + b'2'})
71 71 ... ).items())
72 72 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
73 73 """
74 74 extra = _defaultextra.copy()
75 75 for l in text.split(b'\0'):
76 76 if l:
77 77 k, v = _string_unescape(l).split(b':', 1)
78 78 extra[k] = v
79 79 return extra
80 80
81 81
82 82 def encodeextra(d):
83 83 # keys must be sorted to produce a deterministic changelog entry
84 84 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
85 85 return b"\0".join(items)
86 86
87 87
88 88 def stripdesc(desc):
89 89 """strip trailing whitespace and leading and trailing empty lines"""
90 90 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
91 91
92 92
93 93 class appender(object):
94 94 '''the changelog index must be updated last on disk, so we use this class
95 95 to delay writes to it'''
96 96
97 97 def __init__(self, vfs, name, mode, buf):
98 98 self.data = buf
99 99 fp = vfs(name, mode)
100 100 self.fp = fp
101 101 self.offset = fp.tell()
102 102 self.size = vfs.fstat(fp).st_size
103 103 self._end = self.size
104 104
105 105 def end(self):
106 106 return self._end
107 107
108 108 def tell(self):
109 109 return self.offset
110 110
111 111 def flush(self):
112 112 pass
113 113
114 114 @property
115 115 def closed(self):
116 116 return self.fp.closed
117 117
118 118 def close(self):
119 119 self.fp.close()
120 120
121 121 def seek(self, offset, whence=0):
122 122 '''virtual file offset spans real file and data'''
123 123 if whence == 0:
124 124 self.offset = offset
125 125 elif whence == 1:
126 126 self.offset += offset
127 127 elif whence == 2:
128 128 self.offset = self.end() + offset
129 129 if self.offset < self.size:
130 130 self.fp.seek(self.offset)
131 131
132 132 def read(self, count=-1):
133 133 '''only trick here is reads that span real file and data'''
134 134 ret = b""
135 135 if self.offset < self.size:
136 136 s = self.fp.read(count)
137 137 ret = s
138 138 self.offset += len(s)
139 139 if count > 0:
140 140 count -= len(s)
141 141 if count != 0:
142 142 doff = self.offset - self.size
143 143 self.data.insert(0, b"".join(self.data))
144 144 del self.data[1:]
145 145 s = self.data[0][doff : doff + count]
146 146 self.offset += len(s)
147 147 ret += s
148 148 return ret
149 149
150 150 def write(self, s):
151 151 self.data.append(bytes(s))
152 152 self.offset += len(s)
153 153 self._end += len(s)
154 154
155 155 def __enter__(self):
156 156 self.fp.__enter__()
157 157 return self
158 158
159 159 def __exit__(self, *args):
160 160 return self.fp.__exit__(*args)
161 161
162 162
163 163 class _divertopener(object):
164 164 def __init__(self, opener, target):
165 165 self._opener = opener
166 166 self._target = target
167 167
168 168 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
169 169 if name != self._target:
170 170 return self._opener(name, mode, **kwargs)
171 171 return self._opener(name + b".a", mode, **kwargs)
172 172
173 173 def __getattr__(self, attr):
174 174 return getattr(self._opener, attr)
175 175
176 176
177 177 def _delayopener(opener, target, buf):
178 178 """build an opener that stores chunks in 'buf' instead of 'target'"""
179 179
180 180 def _delay(name, mode=b'r', checkambig=False, **kwargs):
181 181 if name != target:
182 182 return opener(name, mode, **kwargs)
183 183 assert not kwargs
184 184 return appender(opener, name, mode, buf)
185 185
186 186 return _delay
187 187
188 188
189 189 @attr.s
190 190 class _changelogrevision(object):
191 191 # Extensions might modify _defaultextra, so let the constructor below pass
192 192 # it in
193 193 extra = attr.ib()
194 194 manifest = attr.ib(default=nullid)
195 195 user = attr.ib(default=b'')
196 196 date = attr.ib(default=(0, 0))
197 197 files = attr.ib(default=attr.Factory(list))
198 198 filesadded = attr.ib(default=None)
199 199 filesremoved = attr.ib(default=None)
200 200 p1copies = attr.ib(default=None)
201 201 p2copies = attr.ib(default=None)
202 202 description = attr.ib(default=b'')
203 203
204 204
205 205 class changelogrevision(object):
206 206 """Holds results of a parsed changelog revision.
207 207
208 208 Changelog revisions consist of multiple pieces of data, including
209 209 the manifest node, user, and date. This object exposes a view into
210 210 the parsed object.
211 211 """
212 212
213 213 __slots__ = (
214 214 '_offsets',
215 215 '_text',
216 216 '_sidedata',
217 217 '_cpsd',
218 218 '_changes',
219 219 )
220 220
221 221 def __new__(cls, text, sidedata, cpsd):
222 222 if not text:
223 223 return _changelogrevision(extra=_defaultextra)
224 224
225 225 self = super(changelogrevision, cls).__new__(cls)
226 226 # We could return here and implement the following as an __init__.
227 227 # But doing it here is equivalent and saves an extra function call.
228 228
229 229 # format used:
230 230 # nodeid\n : manifest node in ascii
231 231 # user\n : user, no \n or \r allowed
232 232 # time tz extra\n : date (time is int or float, timezone is int)
233 233 # : extra is metadata, encoded and separated by '\0'
234 234 # : older versions ignore it
235 235 # files\n\n : files modified by the cset, no \n or \r allowed
236 236 # (.*) : comment (free text, ideally utf-8)
237 237 #
238 238 # changelog v0 doesn't use extra
239 239
240 240 nl1 = text.index(b'\n')
241 241 nl2 = text.index(b'\n', nl1 + 1)
242 242 nl3 = text.index(b'\n', nl2 + 1)
243 243
244 244 # The list of files may be empty. Which means nl3 is the first of the
245 245 # double newline that precedes the description.
246 246 if text[nl3 + 1 : nl3 + 2] == b'\n':
247 247 doublenl = nl3
248 248 else:
249 249 doublenl = text.index(b'\n\n', nl3 + 1)
250 250
251 251 self._offsets = (nl1, nl2, nl3, doublenl)
252 252 self._text = text
253 253 self._sidedata = sidedata
254 254 self._cpsd = cpsd
255 255 self._changes = None
256 256
257 257 return self
258 258
259 259 @property
260 260 def manifest(self):
261 261 return bin(self._text[0 : self._offsets[0]])
262 262
263 263 @property
264 264 def user(self):
265 265 off = self._offsets
266 266 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
267 267
268 268 @property
269 269 def _rawdate(self):
270 270 off = self._offsets
271 271 dateextra = self._text[off[1] + 1 : off[2]]
272 272 return dateextra.split(b' ', 2)[0:2]
273 273
274 274 @property
275 275 def _rawextra(self):
276 276 off = self._offsets
277 277 dateextra = self._text[off[1] + 1 : off[2]]
278 278 fields = dateextra.split(b' ', 2)
279 279 if len(fields) != 3:
280 280 return None
281 281
282 282 return fields[2]
283 283
284 284 @property
285 285 def date(self):
286 286 raw = self._rawdate
287 287 time = float(raw[0])
288 288 # Various tools did silly things with the timezone.
289 289 try:
290 290 timezone = int(raw[1])
291 291 except ValueError:
292 292 timezone = 0
293 293
294 294 return time, timezone
295 295
296 296 @property
297 297 def extra(self):
298 298 raw = self._rawextra
299 299 if raw is None:
300 300 return _defaultextra
301 301
302 302 return decodeextra(raw)
303 303
304 304 @property
305 305 def changes(self):
306 306 if self._changes is not None:
307 307 return self._changes
308 308 if self._cpsd:
309 309 changes = metadata.decode_files_sidedata(self._sidedata)
310 310 else:
311 311 changes = metadata.ChangingFiles(
312 312 touched=self.files or (),
313 313 added=self.filesadded or (),
314 314 removed=self.filesremoved or (),
315 315 p1_copies=self.p1copies or {},
316 316 p2_copies=self.p2copies or {},
317 317 )
318 318 self._changes = changes
319 319 return changes
320 320
321 321 @property
322 322 def files(self):
323 323 if self._cpsd:
324 324 return sorted(self.changes.touched)
325 325 off = self._offsets
326 326 if off[2] == off[3]:
327 327 return []
328 328
329 329 return self._text[off[2] + 1 : off[3]].split(b'\n')
330 330
331 331 @property
332 332 def filesadded(self):
333 333 if self._cpsd:
334 334 return self.changes.added
335 335 else:
336 336 rawindices = self.extra.get(b'filesadded')
337 337 if rawindices is None:
338 338 return None
339 339 return metadata.decodefileindices(self.files, rawindices)
340 340
341 341 @property
342 342 def filesremoved(self):
343 343 if self._cpsd:
344 344 return self.changes.removed
345 345 else:
346 346 rawindices = self.extra.get(b'filesremoved')
347 347 if rawindices is None:
348 348 return None
349 349 return metadata.decodefileindices(self.files, rawindices)
350 350
351 351 @property
352 352 def p1copies(self):
353 353 if self._cpsd:
354 354 return self.changes.copied_from_p1
355 355 else:
356 356 rawcopies = self.extra.get(b'p1copies')
357 357 if rawcopies is None:
358 358 return None
359 359 return metadata.decodecopies(self.files, rawcopies)
360 360
361 361 @property
362 362 def p2copies(self):
363 363 if self._cpsd:
364 364 return self.changes.copied_from_p2
365 365 else:
366 366 rawcopies = self.extra.get(b'p2copies')
367 367 if rawcopies is None:
368 368 return None
369 369 return metadata.decodecopies(self.files, rawcopies)
370 370
371 371 @property
372 372 def description(self):
373 373 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
374 374
375 375
376 376 class changelog(revlog.revlog):
377 377 def __init__(self, opener, trypending=False):
378 378 """Load a changelog revlog using an opener.
379 379
380 380 If ``trypending`` is true, we attempt to load the index from a
381 381 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
382 382 The ``00changelog.i.a`` file contains index (and possibly inline
383 383 revision) data for a transaction that hasn't been finalized yet.
384 384 It exists in a separate file to facilitate readers (such as
385 385 hooks processes) accessing data before a transaction is finalized.
386 386 """
387 387 if trypending and opener.exists(b'00changelog.i.a'):
388 388 indexfile = b'00changelog.i.a'
389 389 else:
390 390 indexfile = b'00changelog.i'
391 391
392 392 datafile = b'00changelog.d'
393 393 revlog.revlog.__init__(
394 394 self,
395 395 opener,
396 396 indexfile,
397 397 datafile=datafile,
398 398 checkambig=True,
399 399 mmaplargeindex=True,
400 400 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
401 401 )
402 402
403 403 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
404 404 # changelogs don't benefit from generaldelta.
405 405
406 406 self.version &= ~revlog.FLAG_GENERALDELTA
407 407 self._generaldelta = False
408 408
409 409 # Delta chains for changelogs tend to be very small because entries
410 410 # tend to be small and don't delta well with each. So disable delta
411 411 # chains.
412 412 self._storedeltachains = False
413 413
414 414 self._realopener = opener
415 415 self._delayed = False
416 416 self._delaybuf = None
417 417 self._divert = False
418 418 self._filteredrevs = frozenset()
419 419 self._filteredrevs_hashcache = {}
420 420 self._copiesstorage = opener.options.get(b'copies-storage')
421 421
422 422 @property
423 423 def filteredrevs(self):
424 424 return self._filteredrevs
425 425
426 426 @filteredrevs.setter
427 427 def filteredrevs(self, val):
428 428 # Ensure all updates go through this function
429 429 assert isinstance(val, frozenset)
430 430 self._filteredrevs = val
431 431 self._filteredrevs_hashcache = {}
432 432
433 433 def delayupdate(self, tr):
434 434 """delay visibility of index updates to other readers"""
435 435
436 436 if not self._delayed:
437 437 if len(self) == 0:
438 438 self._divert = True
439 439 if self._realopener.exists(self.indexfile + b'.a'):
440 440 self._realopener.unlink(self.indexfile + b'.a')
441 441 self.opener = _divertopener(self._realopener, self.indexfile)
442 442 else:
443 443 self._delaybuf = []
444 444 self.opener = _delayopener(
445 445 self._realopener, self.indexfile, self._delaybuf
446 446 )
447 447 self._delayed = True
448 448 tr.addpending(b'cl-%i' % id(self), self._writepending)
449 449 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
450 450
451 451 def _finalize(self, tr):
452 452 """finalize index updates"""
453 453 self._delayed = False
454 454 self.opener = self._realopener
455 455 # move redirected index data back into place
456 456 if self._divert:
457 457 assert not self._delaybuf
458 458 tmpname = self.indexfile + b".a"
459 459 nfile = self.opener.open(tmpname)
460 460 nfile.close()
461 461 self.opener.rename(tmpname, self.indexfile, checkambig=True)
462 462 elif self._delaybuf:
463 463 fp = self.opener(self.indexfile, b'a', checkambig=True)
464 464 fp.write(b"".join(self._delaybuf))
465 465 fp.close()
466 466 self._delaybuf = None
467 467 self._divert = False
468 468 # split when we're done
469 469 self._enforceinlinesize(tr)
470 470
471 471 def _writepending(self, tr):
472 472 """create a file containing the unfinalized state for
473 473 pretxnchangegroup"""
474 474 if self._delaybuf:
475 475 # make a temporary copy of the index
476 476 fp1 = self._realopener(self.indexfile)
477 477 pendingfilename = self.indexfile + b".a"
478 478 # register as a temp file to ensure cleanup on failure
479 479 tr.registertmp(pendingfilename)
480 480 # write existing data
481 481 fp2 = self._realopener(pendingfilename, b"w")
482 482 fp2.write(fp1.read())
483 483 # add pending data
484 484 fp2.write(b"".join(self._delaybuf))
485 485 fp2.close()
486 486 # switch modes so finalize can simply rename
487 487 self._delaybuf = None
488 488 self._divert = True
489 489 self.opener = _divertopener(self._realopener, self.indexfile)
490 490
491 491 if self._divert:
492 492 return True
493 493
494 494 return False
495 495
496 496 def _enforceinlinesize(self, tr, fp=None):
497 497 if not self._delayed:
498 498 revlog.revlog._enforceinlinesize(self, tr, fp)
499 499
500 500 def read(self, node):
501 501 """Obtain data from a parsed changelog revision.
502 502
503 503 Returns a 6-tuple of:
504 504
505 505 - manifest node in binary
506 506 - author/user as a localstr
507 507 - date as a 2-tuple of (time, timezone)
508 508 - list of files
509 509 - commit message as a localstr
510 510 - dict of extra metadata
511 511
512 512 Unless you need to access all fields, consider calling
513 513 ``changelogrevision`` instead, as it is faster for partial object
514 514 access.
515 515 """
516 516 d, s = self._revisiondata(node)
517 517 c = changelogrevision(
518 518 d, s, self._copiesstorage == b'changeset-sidedata'
519 519 )
520 520 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
521 521
522 522 def changelogrevision(self, nodeorrev):
523 523 """Obtain a ``changelogrevision`` for a node or revision."""
524 524 text, sidedata = self._revisiondata(nodeorrev)
525 525 return changelogrevision(
526 526 text, sidedata, self._copiesstorage == b'changeset-sidedata'
527 527 )
528 528
529 529 def readfiles(self, node):
530 530 """
531 531 short version of read that only returns the files modified by the cset
532 532 """
533 533 text = self.revision(node)
534 534 if not text:
535 535 return []
536 536 last = text.index(b"\n\n")
537 537 l = text[:last].split(b'\n')
538 538 return l[3:]
539 539
540 540 def add(
541 541 self,
542 542 manifest,
543 543 files,
544 544 desc,
545 545 transaction,
546 546 p1,
547 547 p2,
548 548 user,
549 549 date=None,
550 550 extra=None,
551 551 ):
552 552 # Convert to UTF-8 encoded bytestrings as the very first
553 553 # thing: calling any method on a localstr object will turn it
554 554 # into a str object and the cached UTF-8 string is thus lost.
555 555 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
556 556
557 557 user = user.strip()
558 558 # An empty username or a username with a "\n" will make the
559 559 # revision text contain two "\n\n" sequences -> corrupt
560 560 # repository since read cannot unpack the revision.
561 561 if not user:
562 562 raise error.StorageError(_(b"empty username"))
563 563 if b"\n" in user:
564 564 raise error.StorageError(
565 565 _(b"username %r contains a newline") % pycompat.bytestr(user)
566 566 )
567 567
568 568 desc = stripdesc(desc)
569 569
570 570 if date:
571 571 parseddate = b"%d %d" % dateutil.parsedate(date)
572 572 else:
573 573 parseddate = b"%d %d" % dateutil.makedate()
574 574 if extra:
575 575 branch = extra.get(b"branch")
576 576 if branch in (b"default", b""):
577 577 del extra[b"branch"]
578 578 elif branch in (b".", b"null", b"tip"):
579 579 raise error.StorageError(
580 580 _(b'the name \'%s\' is reserved') % branch
581 581 )
582 582 sortedfiles = sorted(files.touched)
583 583 flags = 0
584 584 sidedata = None
585 585 if self._copiesstorage == b'changeset-sidedata':
586 if (
587 files.removed
588 or files.merged
589 or files.salvaged
590 or files.copied_from_p1
591 or files.copied_from_p2
592 ):
586 if files.has_copies_info:
593 587 flags |= flagutil.REVIDX_HASCOPIESINFO
594 588 sidedata = metadata.encode_files_sidedata(files)
595 589
596 590 if extra:
597 591 extra = encodeextra(extra)
598 592 parseddate = b"%s %s" % (parseddate, extra)
599 593 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
600 594 text = b"\n".join(l)
601 595 return self.addrevision(
602 596 text, transaction, len(self), p1, p2, sidedata=sidedata, flags=flags
603 597 )
604 598
605 599 def branchinfo(self, rev):
606 600 """return the branch name and open/close state of a revision
607 601
608 602 This function exists because creating a changectx object
609 603 just to access this is costly."""
610 604 extra = self.read(rev)[5]
611 605 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
612 606
613 607 def _nodeduplicatecallback(self, transaction, node):
614 608 # keep track of revisions that got "re-added", eg: unbunde of know rev.
615 609 #
616 610 # We track them in a list to preserve their order from the source bundle
617 611 duplicates = transaction.changes.setdefault(b'revduplicates', [])
618 612 duplicates.append(self.rev(node))
@@ -1,918 +1,928 b''
1 1 # coding: utf8
2 2 # metadata.py -- code related to various metadata computation and access.
3 3 #
4 4 # Copyright 2019 Google, Inc <martinvonz@google.com>
5 5 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9 from __future__ import absolute_import, print_function
10 10
11 11 import multiprocessing
12 12 import struct
13 13
14 14 from . import (
15 15 error,
16 16 node,
17 17 pycompat,
18 18 util,
19 19 )
20 20
21 21 from .revlogutils import (
22 22 flagutil as sidedataflag,
23 23 sidedata as sidedatamod,
24 24 )
25 25
26 26
27 27 class ChangingFiles(object):
28 28 """A class recording the changes made to files by a changeset
29 29
30 30 Actions performed on files are gathered into 3 sets:
31 31
32 32 - added: files actively added in the changeset.
33 33 - merged: files whose history got merged
34 34 - removed: files removed in the revision
35 35 - salvaged: files that might have been deleted by a merge but were not
36 36 - touched: files affected by the merge
37 37
38 38 and copies information is held by 2 mappings
39 39
40 40 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
41 41 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
42 42
43 43 See their inline help for details.
44 44 """
45 45
46 46 def __init__(
47 47 self,
48 48 touched=None,
49 49 added=None,
50 50 removed=None,
51 51 merged=None,
52 52 salvaged=None,
53 53 p1_copies=None,
54 54 p2_copies=None,
55 55 ):
56 56 self._added = set(() if added is None else added)
57 57 self._merged = set(() if merged is None else merged)
58 58 self._removed = set(() if removed is None else removed)
59 59 self._touched = set(() if touched is None else touched)
60 60 self._salvaged = set(() if salvaged is None else salvaged)
61 61 self._touched.update(self._added)
62 62 self._touched.update(self._merged)
63 63 self._touched.update(self._removed)
64 64 self._p1_copies = dict(() if p1_copies is None else p1_copies)
65 65 self._p2_copies = dict(() if p2_copies is None else p2_copies)
66 66
67 67 def __eq__(self, other):
68 68 return (
69 69 self.added == other.added
70 70 and self.merged == other.merged
71 71 and self.removed == other.removed
72 72 and self.salvaged == other.salvaged
73 73 and self.touched == other.touched
74 74 and self.copied_from_p1 == other.copied_from_p1
75 75 and self.copied_from_p2 == other.copied_from_p2
76 76 )
77 77
78 @property
79 def has_copies_info(self):
80 return bool(
81 self.removed
82 or self.merged
83 or self.salvaged
84 or self.copied_from_p1
85 or self.copied_from_p2
86 )
87
78 88 @util.propertycache
79 89 def added(self):
80 90 """files actively added in the changeset
81 91
82 92 Any file present in that revision that was absent in all the changeset's
83 93 parents.
84 94
85 95 In case of merge, this means a file absent in one of the parents but
86 96 existing in the other will *not* be contained in this set. (They were
87 97 added by an ancestor)
88 98 """
89 99 return frozenset(self._added)
90 100
91 101 def mark_added(self, filename):
92 102 if 'added' in vars(self):
93 103 del self.added
94 104 self._added.add(filename)
95 105 self.mark_touched(filename)
96 106
97 107 def update_added(self, filenames):
98 108 for f in filenames:
99 109 self.mark_added(f)
100 110
101 111 @util.propertycache
102 112 def merged(self):
103 113 """files actively merged during a merge
104 114
105 115 Any modified files which had modification on both size that needed merging.
106 116
107 117 In this case a new filenode was created and it has two parents.
108 118 """
109 119 return frozenset(self._merged)
110 120
111 121 def mark_merged(self, filename):
112 122 if 'merged' in vars(self):
113 123 del self.merged
114 124 self._merged.add(filename)
115 125 self.mark_touched(filename)
116 126
117 127 def update_merged(self, filenames):
118 128 for f in filenames:
119 129 self.mark_merged(f)
120 130
121 131 @util.propertycache
122 132 def removed(self):
123 133 """files actively removed by the changeset
124 134
125 135 In case of merge this will only contain the set of files removing "new"
126 136 content. For any file absent in the current changeset:
127 137
128 138 a) If the file exists in both parents, it is clearly "actively" removed
129 139 by this changeset.
130 140
131 141 b) If a file exists in only one parent and in none of the common
132 142 ancestors, then the file was newly added in one of the merged branches
133 143 and then got "actively" removed.
134 144
135 145 c) If a file exists in only one parent and at least one of the common
136 146 ancestors using the same filenode, then the file was unchanged on one
137 147 side and deleted on the other side. The merge "passively" propagated
138 148 that deletion, but didn't "actively" remove the file. In this case the
139 149 file is *not* included in the `removed` set.
140 150
141 151 d) If a file exists in only one parent and at least one of the common
142 152 ancestors using a different filenode, then the file was changed on one
143 153 side and removed on the other side. The merge process "actively"
144 154 decided to drop the new change and delete the file. Unlike in the
145 155 previous case, (c), the file included in the `removed` set.
146 156
147 157 Summary table for merge:
148 158
149 159 case | exists in parents | exists in gca || removed
150 160 (a) | both | * || yes
151 161 (b) | one | none || yes
152 162 (c) | one | same filenode || no
153 163 (d) | one | new filenode || yes
154 164 """
155 165 return frozenset(self._removed)
156 166
157 167 def mark_removed(self, filename):
158 168 if 'removed' in vars(self):
159 169 del self.removed
160 170 self._removed.add(filename)
161 171 self.mark_touched(filename)
162 172
163 173 def update_removed(self, filenames):
164 174 for f in filenames:
165 175 self.mark_removed(f)
166 176
167 177 @util.propertycache
168 178 def salvaged(self):
169 179 """files that might have been deleted by a merge, but still exists.
170 180
171 181 During a merge, the manifest merging might select some files for
172 182 removal, or for a removed/changed conflict. If at commit time the file
173 183 still exists, its removal was "reverted" and the file is "salvaged"
174 184 """
175 185 return frozenset(self._salvaged)
176 186
177 187 def mark_salvaged(self, filename):
178 188 if "salvaged" in vars(self):
179 189 del self.salvaged
180 190 self._salvaged.add(filename)
181 191 self.mark_touched(filename)
182 192
183 193 def update_salvaged(self, filenames):
184 194 for f in filenames:
185 195 self.mark_salvaged(f)
186 196
187 197 @util.propertycache
188 198 def touched(self):
189 199 """files either actively modified, added or removed"""
190 200 return frozenset(self._touched)
191 201
192 202 def mark_touched(self, filename):
193 203 if 'touched' in vars(self):
194 204 del self.touched
195 205 self._touched.add(filename)
196 206
197 207 def update_touched(self, filenames):
198 208 for f in filenames:
199 209 self.mark_touched(f)
200 210
201 211 @util.propertycache
202 212 def copied_from_p1(self):
203 213 return self._p1_copies.copy()
204 214
205 215 def mark_copied_from_p1(self, source, dest):
206 216 if 'copied_from_p1' in vars(self):
207 217 del self.copied_from_p1
208 218 self._p1_copies[dest] = source
209 219
210 220 def update_copies_from_p1(self, copies):
211 221 for dest, source in copies.items():
212 222 self.mark_copied_from_p1(source, dest)
213 223
214 224 @util.propertycache
215 225 def copied_from_p2(self):
216 226 return self._p2_copies.copy()
217 227
218 228 def mark_copied_from_p2(self, source, dest):
219 229 if 'copied_from_p2' in vars(self):
220 230 del self.copied_from_p2
221 231 self._p2_copies[dest] = source
222 232
223 233 def update_copies_from_p2(self, copies):
224 234 for dest, source in copies.items():
225 235 self.mark_copied_from_p2(source, dest)
226 236
227 237
228 238 def compute_all_files_changes(ctx):
229 239 """compute the files changed by a revision"""
230 240 p1 = ctx.p1()
231 241 p2 = ctx.p2()
232 242 if p1.rev() == node.nullrev and p2.rev() == node.nullrev:
233 243 return _process_root(ctx)
234 244 elif p1.rev() != node.nullrev and p2.rev() == node.nullrev:
235 245 return _process_linear(p1, ctx)
236 246 elif p1.rev() == node.nullrev and p2.rev() != node.nullrev:
237 247 # In the wild, one can encounter changeset where p1 is null but p2 is not
238 248 return _process_linear(p1, ctx, parent=2)
239 249 elif p1.rev() == p2.rev():
240 250 # In the wild, one can encounter such "non-merge"
241 251 return _process_linear(p1, ctx)
242 252 else:
243 253 return _process_merge(p1, p2, ctx)
244 254
245 255
246 256 def _process_root(ctx):
247 257 """compute the appropriate changed files for a changeset with no parents
248 258 """
249 259 # Simple, there was nothing before it, so everything is added.
250 260 md = ChangingFiles()
251 261 manifest = ctx.manifest()
252 262 for filename in manifest:
253 263 md.mark_added(filename)
254 264 return md
255 265
256 266
257 267 def _process_linear(parent_ctx, children_ctx, parent=1):
258 268 """compute the appropriate changed files for a changeset with a single parent
259 269 """
260 270 md = ChangingFiles()
261 271 parent_manifest = parent_ctx.manifest()
262 272 children_manifest = children_ctx.manifest()
263 273
264 274 copies_candidate = []
265 275
266 276 for filename, d in parent_manifest.diff(children_manifest).items():
267 277 if d[1][0] is None:
268 278 # no filenode for the "new" value, file is absent
269 279 md.mark_removed(filename)
270 280 else:
271 281 copies_candidate.append(filename)
272 282 if d[0][0] is None:
273 283 # not filenode for the "old" value file was absent
274 284 md.mark_added(filename)
275 285 else:
276 286 # filenode for both "old" and "new"
277 287 md.mark_touched(filename)
278 288
279 289 if parent == 1:
280 290 copied = md.mark_copied_from_p1
281 291 elif parent == 2:
282 292 copied = md.mark_copied_from_p2
283 293 else:
284 294 assert False, "bad parent value %d" % parent
285 295
286 296 for filename in copies_candidate:
287 297 copy_info = children_ctx[filename].renamed()
288 298 if copy_info:
289 299 source, srcnode = copy_info
290 300 copied(source, filename)
291 301
292 302 return md
293 303
294 304
295 305 def _process_merge(p1_ctx, p2_ctx, ctx):
296 306 """compute the appropriate changed files for a changeset with two parents
297 307
298 308 This is a more advance case. The information we need to record is summarise
299 309 in the following table:
300 310
301 311 β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
302 312 β”‚ diff β•² diff β”‚ ΓΈ β”‚ (Some, None) β”‚ (None, Some) β”‚ (Some, Some) β”‚
303 313 β”‚ p2 β•² p1 β”‚ β”‚ β”‚ β”‚ β”‚
304 314 β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
305 315 β”‚ β”‚ β”‚πŸ„± No Changes β”‚πŸ„³ No Changes β”‚ β”‚
306 316 β”‚ ΓΈ β”‚πŸ„° No Changes β”‚ OR β”‚ OR β”‚πŸ„΅ No Changes β”‚
307 317 β”‚ β”‚ β”‚πŸ„² Deleted[1] β”‚πŸ„΄ Salvaged[2]β”‚ [3] β”‚
308 318 β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
309 319 β”‚ β”‚πŸ„Ά No Changes β”‚ β”‚ β”‚ β”‚
310 320 β”‚ (Some, None) β”‚ OR β”‚πŸ„» Deleted β”‚ ΓΈ β”‚ ΓΈ β”‚
311 321 β”‚ β”‚πŸ„· Deleted[1] β”‚ β”‚ β”‚ β”‚
312 322 β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
313 323 β”‚ β”‚πŸ„Έ No Changes β”‚ β”‚ β”‚ β”‚
314 324 β”‚ (None, Some) β”‚ OR β”‚ ΓΈ β”‚πŸ„Ό Added β”‚πŸ„½ Merged β”‚
315 325 β”‚ β”‚πŸ„Ή Salvaged[2]β”‚ β”‚ (copied?) β”‚ (copied?) β”‚
316 326 β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
317 327 β”‚ β”‚ β”‚ β”‚ β”‚ β”‚
318 328 β”‚ (Some, Some) β”‚πŸ„Ί No Changes β”‚ ΓΈ β”‚πŸ„Ύ Merged β”‚πŸ„Ώ Merged β”‚
319 329 β”‚ β”‚ [3] β”‚ β”‚ (copied?) β”‚ (copied?) β”‚
320 330 β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
321 331
322 332 Special case [1]:
323 333
324 334 The situation is:
325 335 - parent-A: file exists,
326 336 - parent-B: no file,
327 337 - working-copy: no file.
328 338
329 339 Detecting a "deletion" will depend on the presence of actual change on
330 340 the "parent-A" branch:
331 341
332 342 Subcase πŸ„± or πŸ„Ά : if the state of the file in "parent-A" is unchanged
333 343 compared to the merge ancestors, then parent-A branch left the file
334 344 untouched while parent-B deleted it. We simply apply the change from
335 345 "parent-B" branch the file was automatically dropped.
336 346 The result is:
337 347 - file is not recorded as touched by the merge.
338 348
339 349 Subcase πŸ„² or πŸ„· : otherwise, the change from parent-A branch were explicitly dropped and
340 350 the file was "deleted again". From a user perspective, the message
341 351 about "locally changed" while "remotely deleted" (or the other way
342 352 around) was issued and the user chose to deleted the file.
343 353 The result:
344 354 - file is recorded as touched by the merge.
345 355
346 356
347 357 Special case [2]:
348 358
349 359 The situation is:
350 360 - parent-A: no file,
351 361 - parent-B: file,
352 362 - working-copy: file (same content as parent-B).
353 363
354 364 There are three subcases depending on the ancestors contents:
355 365
356 366 - A) the file is missing in all ancestors,
357 367 - B) at least one ancestor has the file with filenode β‰  from parent-B,
358 368 - C) all ancestors use the same filenode as parent-B,
359 369
360 370 Subcase (A) is the simpler, nothing happend on parent-A side while
361 371 parent-B added it.
362 372
363 373 The result:
364 374 - the file is not marked as touched by the merge.
365 375
366 376 Subcase (B) is the counter part of "Special case [1]", the file was
367 377 modified on parent-B side, while parent-A side deleted it. However this
368 378 time, the conflict was solved by keeping the file (and its
369 379 modification). We consider the file as "salvaged".
370 380
371 381 The result:
372 382 - the file is marked as "salvaged" by the merge.
373 383
374 384 Subcase (C) is subtle variation of the case above. In this case, the
375 385 file in unchanged on the parent-B side and actively removed on the
376 386 parent-A side. So the merge machinery correctly decide it should be
377 387 removed. However, the file was explicitly restored to its parent-B
378 388 content before the merge was commited. The file is be marked
379 389 as salvaged too. From the merge result perspective, this is similar to
380 390 Subcase (B), however from the merge resolution perspective they differ
381 391 since in (C), there was some conflict not obvious solution to the
382 392 merge (That got reversed)
383 393
384 394 Special case [3]:
385 395
386 396 The situation is:
387 397 - parent-A: file,
388 398 - parent-B: file (different filenode as parent-A),
389 399 - working-copy: file (same filenode as parent-B).
390 400
391 401 This case is in theory much simple, for this to happens, this mean the
392 402 filenode in parent-A is purely replacing the one in parent-B (either a
393 403 descendant, or a full new file history, see changeset). So the merge
394 404 introduce no changes, and the file is not affected by the merge...
395 405
396 406 However, in the wild it is possible to find commit with the above is not
397 407 True. For example repository have some commit where the *new* node is an
398 408 ancestor of the node in parent-A, or where parent-A and parent-B are two
399 409 branches of the same file history, yet not merge-filenode were created
400 410 (while the "merge" should have led to a "modification").
401 411
402 412 Detecting such cases (and not recording the file as modified) would be a
403 413 nice bonus. However do not any of this yet.
404 414 """
405 415
406 416 md = ChangingFiles()
407 417
408 418 m = ctx.manifest()
409 419 p1m = p1_ctx.manifest()
410 420 p2m = p2_ctx.manifest()
411 421 diff_p1 = p1m.diff(m)
412 422 diff_p2 = p2m.diff(m)
413 423
414 424 cahs = ctx.repo().changelog.commonancestorsheads(
415 425 p1_ctx.node(), p2_ctx.node()
416 426 )
417 427 if not cahs:
418 428 cahs = [node.nullrev]
419 429 mas = [ctx.repo()[r].manifest() for r in cahs]
420 430
421 431 copy_candidates = []
422 432
423 433 # Dealing with case πŸ„° happens automatically. Since there are no entry in
424 434 # d1 nor d2, we won't iterate on it ever.
425 435
426 436 # Iteration over d1 content will deal with all cases, but the one in the
427 437 # first column of the table.
428 438 for filename, d1 in diff_p1.items():
429 439
430 440 d2 = diff_p2.pop(filename, None)
431 441
432 442 if d2 is None:
433 443 # this deal with the first line of the table.
434 444 _process_other_unchanged(md, mas, filename, d1)
435 445 else:
436 446
437 447 if d1[0][0] is None and d2[0][0] is None:
438 448 # case πŸ„Ό β€” both deleted the file.
439 449 md.mark_added(filename)
440 450 copy_candidates.append(filename)
441 451 elif d1[1][0] is None and d2[1][0] is None:
442 452 # case πŸ„» β€” both deleted the file.
443 453 md.mark_removed(filename)
444 454 elif d1[1][0] is not None and d2[1][0] is not None:
445 455 # case πŸ„½ πŸ„Ύ πŸ„Ώ
446 456 md.mark_merged(filename)
447 457 copy_candidates.append(filename)
448 458 else:
449 459 # Impossible case, the post-merge file status cannot be None on
450 460 # one side and Something on the other side.
451 461 assert False, "unreachable"
452 462
453 463 # Iteration over remaining d2 content deal with the first column of the
454 464 # table.
455 465 for filename, d2 in diff_p2.items():
456 466 _process_other_unchanged(md, mas, filename, d2)
457 467
458 468 for filename in copy_candidates:
459 469 copy_info = ctx[filename].renamed()
460 470 if copy_info:
461 471 source, srcnode = copy_info
462 472 if source in p1_ctx and p1_ctx[source].filenode() == srcnode:
463 473 md.mark_copied_from_p1(source, filename)
464 474 elif source in p2_ctx and p2_ctx[source].filenode() == srcnode:
465 475 md.mark_copied_from_p2(source, filename)
466 476 return md
467 477
468 478
469 479 def _find(manifest, filename):
470 480 """return the associate filenode or None"""
471 481 if filename not in manifest:
472 482 return None
473 483 return manifest.find(filename)[0]
474 484
475 485
476 486 def _process_other_unchanged(md, mas, filename, diff):
477 487 source_node = diff[0][0]
478 488 target_node = diff[1][0]
479 489
480 490 if source_node is not None and target_node is None:
481 491 if any(not _find(ma, filename) == source_node for ma in mas):
482 492 # case πŸ„² of πŸ„·
483 493 md.mark_removed(filename)
484 494 # else, we have case πŸ„± or πŸ„Ά : no change need to be recorded
485 495 elif source_node is None and target_node is not None:
486 496 if any(_find(ma, filename) is not None for ma in mas):
487 497 # case πŸ„΄ or πŸ„Ή
488 498 md.mark_salvaged(filename)
489 499 # else, we have case πŸ„³ or πŸ„Έ : simple merge without intervention
490 500 elif source_node is not None and target_node is not None:
491 501 # case πŸ„΅ or πŸ„Ί : simple merge without intervention
492 502 #
493 503 # In buggy case where source_node is not an ancestors of target_node.
494 504 # There should have a been a new filenode created, recording this as
495 505 # "modified". We do not deal with them yet.
496 506 pass
497 507 else:
498 508 # An impossible case, the diff algorithm should not return entry if the
499 509 # file is missing on both side.
500 510 assert False, "unreachable"
501 511
502 512
503 513 def _missing_from_all_ancestors(mas, filename):
504 514 return all(_find(ma, filename) is None for ma in mas)
505 515
506 516
507 517 def computechangesetfilesadded(ctx):
508 518 """return the list of files added in a changeset
509 519 """
510 520 added = []
511 521 for f in ctx.files():
512 522 if not any(f in p for p in ctx.parents()):
513 523 added.append(f)
514 524 return added
515 525
516 526
517 527 def get_removal_filter(ctx, x=None):
518 528 """return a function to detect files "wrongly" detected as `removed`
519 529
520 530 When a file is removed relative to p1 in a merge, this
521 531 function determines whether the absence is due to a
522 532 deletion from a parent, or whether the merge commit
523 533 itself deletes the file. We decide this by doing a
524 534 simplified three way merge of the manifest entry for
525 535 the file. There are two ways we decide the merge
526 536 itself didn't delete a file:
527 537 - neither parent (nor the merge) contain the file
528 538 - exactly one parent contains the file, and that
529 539 parent has the same filelog entry as the merge
530 540 ancestor (or all of them if there two). In other
531 541 words, that parent left the file unchanged while the
532 542 other one deleted it.
533 543 One way to think about this is that deleting a file is
534 544 similar to emptying it, so the list of changed files
535 545 should be similar either way. The computation
536 546 described above is not done directly in _filecommit
537 547 when creating the list of changed files, however
538 548 it does something very similar by comparing filelog
539 549 nodes.
540 550 """
541 551
542 552 if x is not None:
543 553 p1, p2, m1, m2 = x
544 554 else:
545 555 p1 = ctx.p1()
546 556 p2 = ctx.p2()
547 557 m1 = p1.manifest()
548 558 m2 = p2.manifest()
549 559
550 560 @util.cachefunc
551 561 def mas():
552 562 p1n = p1.node()
553 563 p2n = p2.node()
554 564 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
555 565 if not cahs:
556 566 cahs = [node.nullrev]
557 567 return [ctx.repo()[r].manifest() for r in cahs]
558 568
559 569 def deletionfromparent(f):
560 570 if f in m1:
561 571 return f not in m2 and all(
562 572 f in ma and ma.find(f) == m1.find(f) for ma in mas()
563 573 )
564 574 elif f in m2:
565 575 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
566 576 else:
567 577 return True
568 578
569 579 return deletionfromparent
570 580
571 581
572 582 def computechangesetfilesremoved(ctx):
573 583 """return the list of files removed in a changeset
574 584 """
575 585 removed = []
576 586 for f in ctx.files():
577 587 if f not in ctx:
578 588 removed.append(f)
579 589 if removed:
580 590 rf = get_removal_filter(ctx)
581 591 removed = [r for r in removed if not rf(r)]
582 592 return removed
583 593
584 594
585 595 def computechangesetfilesmerged(ctx):
586 596 """return the list of files merged in a changeset
587 597 """
588 598 merged = []
589 599 if len(ctx.parents()) < 2:
590 600 return merged
591 601 for f in ctx.files():
592 602 if f in ctx:
593 603 fctx = ctx[f]
594 604 parents = fctx._filelog.parents(fctx._filenode)
595 605 if parents[1] != node.nullid:
596 606 merged.append(f)
597 607 return merged
598 608
599 609
600 610 def computechangesetcopies(ctx):
601 611 """return the copies data for a changeset
602 612
603 613 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
604 614
605 615 Each dictionnary are in the form: `{newname: oldname}`
606 616 """
607 617 p1copies = {}
608 618 p2copies = {}
609 619 p1 = ctx.p1()
610 620 p2 = ctx.p2()
611 621 narrowmatch = ctx._repo.narrowmatch()
612 622 for dst in ctx.files():
613 623 if not narrowmatch(dst) or dst not in ctx:
614 624 continue
615 625 copied = ctx[dst].renamed()
616 626 if not copied:
617 627 continue
618 628 src, srcnode = copied
619 629 if src in p1 and p1[src].filenode() == srcnode:
620 630 p1copies[dst] = src
621 631 elif src in p2 and p2[src].filenode() == srcnode:
622 632 p2copies[dst] = src
623 633 return p1copies, p2copies
624 634
625 635
626 636 def encodecopies(files, copies):
627 637 items = []
628 638 for i, dst in enumerate(files):
629 639 if dst in copies:
630 640 items.append(b'%d\0%s' % (i, copies[dst]))
631 641 if len(items) != len(copies):
632 642 raise error.ProgrammingError(
633 643 b'some copy targets missing from file list'
634 644 )
635 645 return b"\n".join(items)
636 646
637 647
638 648 def decodecopies(files, data):
639 649 try:
640 650 copies = {}
641 651 if not data:
642 652 return copies
643 653 for l in data.split(b'\n'):
644 654 strindex, src = l.split(b'\0')
645 655 i = int(strindex)
646 656 dst = files[i]
647 657 copies[dst] = src
648 658 return copies
649 659 except (ValueError, IndexError):
650 660 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
651 661 # used different syntax for the value.
652 662 return None
653 663
654 664
655 665 def encodefileindices(files, subset):
656 666 subset = set(subset)
657 667 indices = []
658 668 for i, f in enumerate(files):
659 669 if f in subset:
660 670 indices.append(b'%d' % i)
661 671 return b'\n'.join(indices)
662 672
663 673
664 674 def decodefileindices(files, data):
665 675 try:
666 676 subset = []
667 677 if not data:
668 678 return subset
669 679 for strindex in data.split(b'\n'):
670 680 i = int(strindex)
671 681 if i < 0 or i >= len(files):
672 682 return None
673 683 subset.append(files[i])
674 684 return subset
675 685 except (ValueError, IndexError):
676 686 # Perhaps someone had chosen the same key name (e.g. "added") and
677 687 # used different syntax for the value.
678 688 return None
679 689
680 690
681 691 # see mercurial/helptext/internals/revlogs.txt for details about the format
682 692
683 693 ACTION_MASK = int("111" "00", 2)
684 694 # note: untouched file used as copy source will as `000` for this mask.
685 695 ADDED_FLAG = int("001" "00", 2)
686 696 MERGED_FLAG = int("010" "00", 2)
687 697 REMOVED_FLAG = int("011" "00", 2)
688 698 SALVAGED_FLAG = int("100" "00", 2)
689 699 TOUCHED_FLAG = int("101" "00", 2)
690 700
691 701 COPIED_MASK = int("11", 2)
692 702 COPIED_FROM_P1_FLAG = int("10", 2)
693 703 COPIED_FROM_P2_FLAG = int("11", 2)
694 704
695 705 # structure is <flag><filename-end><copy-source>
696 706 INDEX_HEADER = struct.Struct(">L")
697 707 INDEX_ENTRY = struct.Struct(">bLL")
698 708
699 709
700 710 def encode_files_sidedata(files):
701 711 all_files = set(files.touched)
702 712 all_files.update(files.copied_from_p1.values())
703 713 all_files.update(files.copied_from_p2.values())
704 714 all_files = sorted(all_files)
705 715 file_idx = {f: i for (i, f) in enumerate(all_files)}
706 716 file_idx[None] = 0
707 717
708 718 chunks = [INDEX_HEADER.pack(len(all_files))]
709 719
710 720 filename_length = 0
711 721 for f in all_files:
712 722 filename_size = len(f)
713 723 filename_length += filename_size
714 724 flag = 0
715 725 if f in files.added:
716 726 flag |= ADDED_FLAG
717 727 elif f in files.merged:
718 728 flag |= MERGED_FLAG
719 729 elif f in files.removed:
720 730 flag |= REMOVED_FLAG
721 731 elif f in files.salvaged:
722 732 flag |= SALVAGED_FLAG
723 733 elif f in files.touched:
724 734 flag |= TOUCHED_FLAG
725 735
726 736 copy = None
727 737 if f in files.copied_from_p1:
728 738 flag |= COPIED_FROM_P1_FLAG
729 739 copy = files.copied_from_p1.get(f)
730 740 elif f in files.copied_from_p2:
731 741 copy = files.copied_from_p2.get(f)
732 742 flag |= COPIED_FROM_P2_FLAG
733 743 copy_idx = file_idx[copy]
734 744 chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx))
735 745 chunks.extend(all_files)
736 746 return {sidedatamod.SD_FILES: b''.join(chunks)}
737 747
738 748
739 749 def decode_files_sidedata(sidedata):
740 750 md = ChangingFiles()
741 751 raw = sidedata.get(sidedatamod.SD_FILES)
742 752
743 753 if raw is None:
744 754 return md
745 755
746 756 copies = []
747 757 all_files = []
748 758
749 759 assert len(raw) >= INDEX_HEADER.size
750 760 total_files = INDEX_HEADER.unpack_from(raw, 0)[0]
751 761
752 762 offset = INDEX_HEADER.size
753 763 file_offset_base = offset + (INDEX_ENTRY.size * total_files)
754 764 file_offset_last = file_offset_base
755 765
756 766 assert len(raw) >= file_offset_base
757 767
758 768 for idx in range(total_files):
759 769 flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset)
760 770 file_end += file_offset_base
761 771 filename = raw[file_offset_last:file_end]
762 772 filesize = file_end - file_offset_last
763 773 assert len(filename) == filesize
764 774 offset += INDEX_ENTRY.size
765 775 file_offset_last = file_end
766 776 all_files.append(filename)
767 777 if flag & ACTION_MASK == ADDED_FLAG:
768 778 md.mark_added(filename)
769 779 elif flag & ACTION_MASK == MERGED_FLAG:
770 780 md.mark_merged(filename)
771 781 elif flag & ACTION_MASK == REMOVED_FLAG:
772 782 md.mark_removed(filename)
773 783 elif flag & ACTION_MASK == SALVAGED_FLAG:
774 784 md.mark_salvaged(filename)
775 785 elif flag & ACTION_MASK == TOUCHED_FLAG:
776 786 md.mark_touched(filename)
777 787
778 788 copied = None
779 789 if flag & COPIED_MASK == COPIED_FROM_P1_FLAG:
780 790 copied = md.mark_copied_from_p1
781 791 elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG:
782 792 copied = md.mark_copied_from_p2
783 793
784 794 if copied is not None:
785 795 copies.append((copied, filename, copy_idx))
786 796
787 797 for copied, filename, copy_idx in copies:
788 798 copied(all_files[copy_idx], filename)
789 799
790 800 return md
791 801
792 802
793 803 def _getsidedata(srcrepo, rev):
794 804 ctx = srcrepo[rev]
795 805 files = compute_all_files_changes(ctx)
796 806 return encode_files_sidedata(files)
797 807
798 808
799 809 def getsidedataadder(srcrepo, destrepo):
800 810 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
801 811 if pycompat.iswindows or not use_w:
802 812 return _get_simple_sidedata_adder(srcrepo, destrepo)
803 813 else:
804 814 return _get_worker_sidedata_adder(srcrepo, destrepo)
805 815
806 816
807 817 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
808 818 """The function used by worker precomputing sidedata
809 819
810 820 It read an input queue containing revision numbers
811 821 It write in an output queue containing (rev, <sidedata-map>)
812 822
813 823 The `None` input value is used as a stop signal.
814 824
815 825 The `tokens` semaphore is user to avoid having too many unprocessed
816 826 entries. The workers needs to acquire one token before fetching a task.
817 827 They will be released by the consumer of the produced data.
818 828 """
819 829 tokens.acquire()
820 830 rev = revs_queue.get()
821 831 while rev is not None:
822 832 data = _getsidedata(srcrepo, rev)
823 833 sidedata_queue.put((rev, data))
824 834 tokens.acquire()
825 835 rev = revs_queue.get()
826 836 # processing of `None` is completed, release the token.
827 837 tokens.release()
828 838
829 839
830 840 BUFF_PER_WORKER = 50
831 841
832 842
833 843 def _get_worker_sidedata_adder(srcrepo, destrepo):
834 844 """The parallel version of the sidedata computation
835 845
836 846 This code spawn a pool of worker that precompute a buffer of sidedata
837 847 before we actually need them"""
838 848 # avoid circular import copies -> scmutil -> worker -> copies
839 849 from . import worker
840 850
841 851 nbworkers = worker._numworkers(srcrepo.ui)
842 852
843 853 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
844 854 revsq = multiprocessing.Queue()
845 855 sidedataq = multiprocessing.Queue()
846 856
847 857 assert srcrepo.filtername is None
848 858 # queue all tasks beforehand, revision numbers are small and it make
849 859 # synchronisation simpler
850 860 #
851 861 # Since the computation for each node can be quite expensive, the overhead
852 862 # of using a single queue is not revelant. In practice, most computation
853 863 # are fast but some are very expensive and dominate all the other smaller
854 864 # cost.
855 865 for r in srcrepo.changelog.revs():
856 866 revsq.put(r)
857 867 # queue the "no more tasks" markers
858 868 for i in range(nbworkers):
859 869 revsq.put(None)
860 870
861 871 allworkers = []
862 872 for i in range(nbworkers):
863 873 args = (srcrepo, revsq, sidedataq, tokens)
864 874 w = multiprocessing.Process(target=_sidedata_worker, args=args)
865 875 allworkers.append(w)
866 876 w.start()
867 877
868 878 # dictionnary to store results for revision higher than we one we are
869 879 # looking for. For example, if we need the sidedatamap for 42, and 43 is
870 880 # received, when shelve 43 for later use.
871 881 staging = {}
872 882
873 883 def sidedata_companion(revlog, rev):
874 884 sidedata = {}
875 885 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
876 886 # Is the data previously shelved ?
877 887 sidedata = staging.pop(rev, None)
878 888 if sidedata is None:
879 889 # look at the queued result until we find the one we are lookig
880 890 # for (shelve the other ones)
881 891 r, sidedata = sidedataq.get()
882 892 while r != rev:
883 893 staging[r] = sidedata
884 894 r, sidedata = sidedataq.get()
885 895 tokens.release()
886 896 return False, (), sidedata
887 897
888 898 return sidedata_companion
889 899
890 900
891 901 def _get_simple_sidedata_adder(srcrepo, destrepo):
892 902 """The simple version of the sidedata computation
893 903
894 904 It just compute it in the same thread on request"""
895 905
896 906 def sidedatacompanion(revlog, rev):
897 907 sidedata = {}
898 908 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
899 909 sidedata = _getsidedata(srcrepo, rev)
900 910 return False, (), sidedata
901 911
902 912 return sidedatacompanion
903 913
904 914
905 915 def getsidedataremover(srcrepo, destrepo):
906 916 def sidedatacompanion(revlog, rev):
907 917 f = ()
908 918 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
909 919 if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
910 920 f = (
911 921 sidedatamod.SD_P1COPIES,
912 922 sidedatamod.SD_P2COPIES,
913 923 sidedatamod.SD_FILESADDED,
914 924 sidedatamod.SD_FILESREMOVED,
915 925 )
916 926 return False, f, {}
917 927
918 928 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now