##// END OF EJS Templates
changing-files: drop the now useless changelogrevision argument...
marmoute -
r46212:9003e652 default
parent child Browse files
Show More
@@ -1,606 +1,606 b''
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 nullid,
15 15 )
16 16 from .thirdparty import attr
17 17
18 18 from . import (
19 19 encoding,
20 20 error,
21 21 metadata,
22 22 pycompat,
23 23 revlog,
24 24 )
25 25 from .utils import (
26 26 dateutil,
27 27 stringutil,
28 28 )
29 29
30 30 _defaultextra = {b'branch': b'default'}
31 31
32 32
33 33 def _string_escape(text):
34 34 """
35 35 >>> from .pycompat import bytechr as chr
36 36 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
37 37 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
38 38 >>> s
39 39 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
40 40 >>> res = _string_escape(s)
41 41 >>> s == _string_unescape(res)
42 42 True
43 43 """
44 44 # subset of the string_escape codec
45 45 text = (
46 46 text.replace(b'\\', b'\\\\')
47 47 .replace(b'\n', b'\\n')
48 48 .replace(b'\r', b'\\r')
49 49 )
50 50 return text.replace(b'\0', b'\\0')
51 51
52 52
53 53 def _string_unescape(text):
54 54 if b'\\0' in text:
55 55 # fix up \0 without getting into trouble with \\0
56 56 text = text.replace(b'\\\\', b'\\\\\n')
57 57 text = text.replace(b'\\0', b'\0')
58 58 text = text.replace(b'\n', b'')
59 59 return stringutil.unescapestr(text)
60 60
61 61
62 62 def decodeextra(text):
63 63 """
64 64 >>> from .pycompat import bytechr as chr
65 65 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
66 66 ... ).items())
67 67 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
68 68 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
69 69 ... b'baz': chr(92) + chr(0) + b'2'})
70 70 ... ).items())
71 71 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
72 72 """
73 73 extra = _defaultextra.copy()
74 74 for l in text.split(b'\0'):
75 75 if l:
76 76 k, v = _string_unescape(l).split(b':', 1)
77 77 extra[k] = v
78 78 return extra
79 79
80 80
81 81 def encodeextra(d):
82 82 # keys must be sorted to produce a deterministic changelog entry
83 83 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
84 84 return b"\0".join(items)
85 85
86 86
87 87 def stripdesc(desc):
88 88 """strip trailing whitespace and leading and trailing empty lines"""
89 89 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
90 90
91 91
92 92 class appender(object):
93 93 '''the changelog index must be updated last on disk, so we use this class
94 94 to delay writes to it'''
95 95
96 96 def __init__(self, vfs, name, mode, buf):
97 97 self.data = buf
98 98 fp = vfs(name, mode)
99 99 self.fp = fp
100 100 self.offset = fp.tell()
101 101 self.size = vfs.fstat(fp).st_size
102 102 self._end = self.size
103 103
104 104 def end(self):
105 105 return self._end
106 106
107 107 def tell(self):
108 108 return self.offset
109 109
110 110 def flush(self):
111 111 pass
112 112
113 113 @property
114 114 def closed(self):
115 115 return self.fp.closed
116 116
117 117 def close(self):
118 118 self.fp.close()
119 119
120 120 def seek(self, offset, whence=0):
121 121 '''virtual file offset spans real file and data'''
122 122 if whence == 0:
123 123 self.offset = offset
124 124 elif whence == 1:
125 125 self.offset += offset
126 126 elif whence == 2:
127 127 self.offset = self.end() + offset
128 128 if self.offset < self.size:
129 129 self.fp.seek(self.offset)
130 130
131 131 def read(self, count=-1):
132 132 '''only trick here is reads that span real file and data'''
133 133 ret = b""
134 134 if self.offset < self.size:
135 135 s = self.fp.read(count)
136 136 ret = s
137 137 self.offset += len(s)
138 138 if count > 0:
139 139 count -= len(s)
140 140 if count != 0:
141 141 doff = self.offset - self.size
142 142 self.data.insert(0, b"".join(self.data))
143 143 del self.data[1:]
144 144 s = self.data[0][doff : doff + count]
145 145 self.offset += len(s)
146 146 ret += s
147 147 return ret
148 148
149 149 def write(self, s):
150 150 self.data.append(bytes(s))
151 151 self.offset += len(s)
152 152 self._end += len(s)
153 153
154 154 def __enter__(self):
155 155 self.fp.__enter__()
156 156 return self
157 157
158 158 def __exit__(self, *args):
159 159 return self.fp.__exit__(*args)
160 160
161 161
162 162 class _divertopener(object):
163 163 def __init__(self, opener, target):
164 164 self._opener = opener
165 165 self._target = target
166 166
167 167 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
168 168 if name != self._target:
169 169 return self._opener(name, mode, **kwargs)
170 170 return self._opener(name + b".a", mode, **kwargs)
171 171
172 172 def __getattr__(self, attr):
173 173 return getattr(self._opener, attr)
174 174
175 175
176 176 def _delayopener(opener, target, buf):
177 177 """build an opener that stores chunks in 'buf' instead of 'target'"""
178 178
179 179 def _delay(name, mode=b'r', checkambig=False, **kwargs):
180 180 if name != target:
181 181 return opener(name, mode, **kwargs)
182 182 assert not kwargs
183 183 return appender(opener, name, mode, buf)
184 184
185 185 return _delay
186 186
187 187
188 188 @attr.s
189 189 class _changelogrevision(object):
190 190 # Extensions might modify _defaultextra, so let the constructor below pass
191 191 # it in
192 192 extra = attr.ib()
193 193 manifest = attr.ib(default=nullid)
194 194 user = attr.ib(default=b'')
195 195 date = attr.ib(default=(0, 0))
196 196 files = attr.ib(default=attr.Factory(list))
197 197 filesadded = attr.ib(default=None)
198 198 filesremoved = attr.ib(default=None)
199 199 p1copies = attr.ib(default=None)
200 200 p2copies = attr.ib(default=None)
201 201 description = attr.ib(default=b'')
202 202
203 203
204 204 class changelogrevision(object):
205 205 """Holds results of a parsed changelog revision.
206 206
207 207 Changelog revisions consist of multiple pieces of data, including
208 208 the manifest node, user, and date. This object exposes a view into
209 209 the parsed object.
210 210 """
211 211
212 212 __slots__ = (
213 213 '_offsets',
214 214 '_text',
215 215 '_sidedata',
216 216 '_cpsd',
217 217 '_changes',
218 218 )
219 219
220 220 def __new__(cls, text, sidedata, cpsd):
221 221 if not text:
222 222 return _changelogrevision(extra=_defaultextra)
223 223
224 224 self = super(changelogrevision, cls).__new__(cls)
225 225 # We could return here and implement the following as an __init__.
226 226 # But doing it here is equivalent and saves an extra function call.
227 227
228 228 # format used:
229 229 # nodeid\n : manifest node in ascii
230 230 # user\n : user, no \n or \r allowed
231 231 # time tz extra\n : date (time is int or float, timezone is int)
232 232 # : extra is metadata, encoded and separated by '\0'
233 233 # : older versions ignore it
234 234 # files\n\n : files modified by the cset, no \n or \r allowed
235 235 # (.*) : comment (free text, ideally utf-8)
236 236 #
237 237 # changelog v0 doesn't use extra
238 238
239 239 nl1 = text.index(b'\n')
240 240 nl2 = text.index(b'\n', nl1 + 1)
241 241 nl3 = text.index(b'\n', nl2 + 1)
242 242
243 243 # The list of files may be empty. Which means nl3 is the first of the
244 244 # double newline that precedes the description.
245 245 if text[nl3 + 1 : nl3 + 2] == b'\n':
246 246 doublenl = nl3
247 247 else:
248 248 doublenl = text.index(b'\n\n', nl3 + 1)
249 249
250 250 self._offsets = (nl1, nl2, nl3, doublenl)
251 251 self._text = text
252 252 self._sidedata = sidedata
253 253 self._cpsd = cpsd
254 254 self._changes = None
255 255
256 256 return self
257 257
258 258 @property
259 259 def manifest(self):
260 260 return bin(self._text[0 : self._offsets[0]])
261 261
262 262 @property
263 263 def user(self):
264 264 off = self._offsets
265 265 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
266 266
267 267 @property
268 268 def _rawdate(self):
269 269 off = self._offsets
270 270 dateextra = self._text[off[1] + 1 : off[2]]
271 271 return dateextra.split(b' ', 2)[0:2]
272 272
273 273 @property
274 274 def _rawextra(self):
275 275 off = self._offsets
276 276 dateextra = self._text[off[1] + 1 : off[2]]
277 277 fields = dateextra.split(b' ', 2)
278 278 if len(fields) != 3:
279 279 return None
280 280
281 281 return fields[2]
282 282
283 283 @property
284 284 def date(self):
285 285 raw = self._rawdate
286 286 time = float(raw[0])
287 287 # Various tools did silly things with the timezone.
288 288 try:
289 289 timezone = int(raw[1])
290 290 except ValueError:
291 291 timezone = 0
292 292
293 293 return time, timezone
294 294
295 295 @property
296 296 def extra(self):
297 297 raw = self._rawextra
298 298 if raw is None:
299 299 return _defaultextra
300 300
301 301 return decodeextra(raw)
302 302
303 303 @property
304 304 def changes(self):
305 305 if self._changes is not None:
306 306 return self._changes
307 307 if self._cpsd:
308 changes = metadata.decode_files_sidedata(self, self._sidedata)
308 changes = metadata.decode_files_sidedata(self._sidedata)
309 309 else:
310 310 changes = metadata.ChangingFiles(
311 311 touched=self.files or (),
312 312 added=self.filesadded or (),
313 313 removed=self.filesremoved or (),
314 314 p1_copies=self.p1copies or {},
315 315 p2_copies=self.p2copies or {},
316 316 )
317 317 self._changes = changes
318 318 return changes
319 319
320 320 @property
321 321 def files(self):
322 322 off = self._offsets
323 323 if off[2] == off[3]:
324 324 return []
325 325
326 326 return self._text[off[2] + 1 : off[3]].split(b'\n')
327 327
328 328 @property
329 329 def filesadded(self):
330 330 if self._cpsd:
331 331 return self.changes.added
332 332 else:
333 333 rawindices = self.extra.get(b'filesadded')
334 334 if rawindices is None:
335 335 return None
336 336 return metadata.decodefileindices(self.files, rawindices)
337 337
338 338 @property
339 339 def filesremoved(self):
340 340 if self._cpsd:
341 341 return self.changes.removed
342 342 else:
343 343 rawindices = self.extra.get(b'filesremoved')
344 344 if rawindices is None:
345 345 return None
346 346 return metadata.decodefileindices(self.files, rawindices)
347 347
348 348 @property
349 349 def p1copies(self):
350 350 if self._cpsd:
351 351 return self.changes.copied_from_p1
352 352 else:
353 353 rawcopies = self.extra.get(b'p1copies')
354 354 if rawcopies is None:
355 355 return None
356 356 return metadata.decodecopies(self.files, rawcopies)
357 357
358 358 @property
359 359 def p2copies(self):
360 360 if self._cpsd:
361 361 return self.changes.copied_from_p2
362 362 else:
363 363 rawcopies = self.extra.get(b'p2copies')
364 364 if rawcopies is None:
365 365 return None
366 366 return metadata.decodecopies(self.files, rawcopies)
367 367
368 368 @property
369 369 def description(self):
370 370 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
371 371
372 372
373 373 class changelog(revlog.revlog):
374 374 def __init__(self, opener, trypending=False):
375 375 """Load a changelog revlog using an opener.
376 376
377 377 If ``trypending`` is true, we attempt to load the index from a
378 378 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
379 379 The ``00changelog.i.a`` file contains index (and possibly inline
380 380 revision) data for a transaction that hasn't been finalized yet.
381 381 It exists in a separate file to facilitate readers (such as
382 382 hooks processes) accessing data before a transaction is finalized.
383 383 """
384 384 if trypending and opener.exists(b'00changelog.i.a'):
385 385 indexfile = b'00changelog.i.a'
386 386 else:
387 387 indexfile = b'00changelog.i'
388 388
389 389 datafile = b'00changelog.d'
390 390 revlog.revlog.__init__(
391 391 self,
392 392 opener,
393 393 indexfile,
394 394 datafile=datafile,
395 395 checkambig=True,
396 396 mmaplargeindex=True,
397 397 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
398 398 )
399 399
400 400 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
401 401 # changelogs don't benefit from generaldelta.
402 402
403 403 self.version &= ~revlog.FLAG_GENERALDELTA
404 404 self._generaldelta = False
405 405
406 406 # Delta chains for changelogs tend to be very small because entries
407 407 # tend to be small and don't delta well with each. So disable delta
408 408 # chains.
409 409 self._storedeltachains = False
410 410
411 411 self._realopener = opener
412 412 self._delayed = False
413 413 self._delaybuf = None
414 414 self._divert = False
415 415 self._filteredrevs = frozenset()
416 416 self._filteredrevs_hashcache = {}
417 417 self._copiesstorage = opener.options.get(b'copies-storage')
418 418
419 419 @property
420 420 def filteredrevs(self):
421 421 return self._filteredrevs
422 422
423 423 @filteredrevs.setter
424 424 def filteredrevs(self, val):
425 425 # Ensure all updates go through this function
426 426 assert isinstance(val, frozenset)
427 427 self._filteredrevs = val
428 428 self._filteredrevs_hashcache = {}
429 429
430 430 def delayupdate(self, tr):
431 431 """delay visibility of index updates to other readers"""
432 432
433 433 if not self._delayed:
434 434 if len(self) == 0:
435 435 self._divert = True
436 436 if self._realopener.exists(self.indexfile + b'.a'):
437 437 self._realopener.unlink(self.indexfile + b'.a')
438 438 self.opener = _divertopener(self._realopener, self.indexfile)
439 439 else:
440 440 self._delaybuf = []
441 441 self.opener = _delayopener(
442 442 self._realopener, self.indexfile, self._delaybuf
443 443 )
444 444 self._delayed = True
445 445 tr.addpending(b'cl-%i' % id(self), self._writepending)
446 446 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
447 447
448 448 def _finalize(self, tr):
449 449 """finalize index updates"""
450 450 self._delayed = False
451 451 self.opener = self._realopener
452 452 # move redirected index data back into place
453 453 if self._divert:
454 454 assert not self._delaybuf
455 455 tmpname = self.indexfile + b".a"
456 456 nfile = self.opener.open(tmpname)
457 457 nfile.close()
458 458 self.opener.rename(tmpname, self.indexfile, checkambig=True)
459 459 elif self._delaybuf:
460 460 fp = self.opener(self.indexfile, b'a', checkambig=True)
461 461 fp.write(b"".join(self._delaybuf))
462 462 fp.close()
463 463 self._delaybuf = None
464 464 self._divert = False
465 465 # split when we're done
466 466 self._enforceinlinesize(tr)
467 467
468 468 def _writepending(self, tr):
469 469 """create a file containing the unfinalized state for
470 470 pretxnchangegroup"""
471 471 if self._delaybuf:
472 472 # make a temporary copy of the index
473 473 fp1 = self._realopener(self.indexfile)
474 474 pendingfilename = self.indexfile + b".a"
475 475 # register as a temp file to ensure cleanup on failure
476 476 tr.registertmp(pendingfilename)
477 477 # write existing data
478 478 fp2 = self._realopener(pendingfilename, b"w")
479 479 fp2.write(fp1.read())
480 480 # add pending data
481 481 fp2.write(b"".join(self._delaybuf))
482 482 fp2.close()
483 483 # switch modes so finalize can simply rename
484 484 self._delaybuf = None
485 485 self._divert = True
486 486 self.opener = _divertopener(self._realopener, self.indexfile)
487 487
488 488 if self._divert:
489 489 return True
490 490
491 491 return False
492 492
493 493 def _enforceinlinesize(self, tr, fp=None):
494 494 if not self._delayed:
495 495 revlog.revlog._enforceinlinesize(self, tr, fp)
496 496
497 497 def read(self, node):
498 498 """Obtain data from a parsed changelog revision.
499 499
500 500 Returns a 6-tuple of:
501 501
502 502 - manifest node in binary
503 503 - author/user as a localstr
504 504 - date as a 2-tuple of (time, timezone)
505 505 - list of files
506 506 - commit message as a localstr
507 507 - dict of extra metadata
508 508
509 509 Unless you need to access all fields, consider calling
510 510 ``changelogrevision`` instead, as it is faster for partial object
511 511 access.
512 512 """
513 513 d, s = self._revisiondata(node)
514 514 c = changelogrevision(
515 515 d, s, self._copiesstorage == b'changeset-sidedata'
516 516 )
517 517 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
518 518
519 519 def changelogrevision(self, nodeorrev):
520 520 """Obtain a ``changelogrevision`` for a node or revision."""
521 521 text, sidedata = self._revisiondata(nodeorrev)
522 522 return changelogrevision(
523 523 text, sidedata, self._copiesstorage == b'changeset-sidedata'
524 524 )
525 525
526 526 def readfiles(self, node):
527 527 """
528 528 short version of read that only returns the files modified by the cset
529 529 """
530 530 text = self.revision(node)
531 531 if not text:
532 532 return []
533 533 last = text.index(b"\n\n")
534 534 l = text[:last].split(b'\n')
535 535 return l[3:]
536 536
537 537 def add(
538 538 self,
539 539 manifest,
540 540 files,
541 541 desc,
542 542 transaction,
543 543 p1,
544 544 p2,
545 545 user,
546 546 date=None,
547 547 extra=None,
548 548 ):
549 549 # Convert to UTF-8 encoded bytestrings as the very first
550 550 # thing: calling any method on a localstr object will turn it
551 551 # into a str object and the cached UTF-8 string is thus lost.
552 552 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
553 553
554 554 user = user.strip()
555 555 # An empty username or a username with a "\n" will make the
556 556 # revision text contain two "\n\n" sequences -> corrupt
557 557 # repository since read cannot unpack the revision.
558 558 if not user:
559 559 raise error.StorageError(_(b"empty username"))
560 560 if b"\n" in user:
561 561 raise error.StorageError(
562 562 _(b"username %r contains a newline") % pycompat.bytestr(user)
563 563 )
564 564
565 565 desc = stripdesc(desc)
566 566
567 567 if date:
568 568 parseddate = b"%d %d" % dateutil.parsedate(date)
569 569 else:
570 570 parseddate = b"%d %d" % dateutil.makedate()
571 571 if extra:
572 572 branch = extra.get(b"branch")
573 573 if branch in (b"default", b""):
574 574 del extra[b"branch"]
575 575 elif branch in (b".", b"null", b"tip"):
576 576 raise error.StorageError(
577 577 _(b'the name \'%s\' is reserved') % branch
578 578 )
579 579 sortedfiles = sorted(files.touched)
580 580 sidedata = None
581 581 if self._copiesstorage == b'changeset-sidedata':
582 582 sidedata = metadata.encode_files_sidedata(files)
583 583
584 584 if extra:
585 585 extra = encodeextra(extra)
586 586 parseddate = b"%s %s" % (parseddate, extra)
587 587 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
588 588 text = b"\n".join(l)
589 589 return self.addrevision(
590 590 text, transaction, len(self), p1, p2, sidedata=sidedata
591 591 )
592 592
593 593 def branchinfo(self, rev):
594 594 """return the branch name and open/close state of a revision
595 595
596 596 This function exists because creating a changectx object
597 597 just to access this is costly."""
598 598 extra = self.read(rev)[5]
599 599 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
600 600
601 601 def _nodeduplicatecallback(self, transaction, node):
602 602 # keep track of revisions that got "re-added", eg: unbunde of know rev.
603 603 #
604 604 # We track them in a list to preserve their order from the source bundle
605 605 duplicates = transaction.changes.setdefault(b'revduplicates', [])
606 606 duplicates.append(self.rev(node))
@@ -1,620 +1,620 b''
1 1 # metadata.py -- code related to various metadata computation and access.
2 2 #
3 3 # Copyright 2019 Google, Inc <martinvonz@google.com>
4 4 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import multiprocessing
11 11 import struct
12 12
13 13 from . import (
14 14 error,
15 15 node,
16 16 pycompat,
17 17 util,
18 18 )
19 19
20 20 from .revlogutils import (
21 21 flagutil as sidedataflag,
22 22 sidedata as sidedatamod,
23 23 )
24 24
25 25
26 26 class ChangingFiles(object):
27 27 """A class recording the changes made to files by a changeset
28 28
29 29 Actions performed on files are gathered into 3 sets:
30 30
31 31 - added: files actively added in the changeset.
32 32 - merged: files whose history got merged
33 33 - removed: files removed in the revision
34 34 - touched: files affected by the merge
35 35
36 36 and copies information is held by 2 mappings
37 37
38 38 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
39 39 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
40 40
41 41 See their inline help for details.
42 42 """
43 43
44 44 def __init__(
45 45 self,
46 46 touched=None,
47 47 added=None,
48 48 removed=None,
49 49 merged=None,
50 50 p1_copies=None,
51 51 p2_copies=None,
52 52 ):
53 53 self._added = set(() if added is None else added)
54 54 self._merged = set(() if merged is None else merged)
55 55 self._removed = set(() if removed is None else removed)
56 56 self._touched = set(() if touched is None else touched)
57 57 self._touched.update(self._added)
58 58 self._touched.update(self._merged)
59 59 self._touched.update(self._removed)
60 60 self._p1_copies = dict(() if p1_copies is None else p1_copies)
61 61 self._p2_copies = dict(() if p2_copies is None else p2_copies)
62 62
63 63 def __eq__(self, other):
64 64 return (
65 65 self.added == other.added
66 66 and self.merged == other.merged
67 67 and self.removed == other.removed
68 68 and self.touched == other.touched
69 69 and self.copied_from_p1 == other.copied_from_p1
70 70 and self.copied_from_p2 == other.copied_from_p2
71 71 )
72 72
73 73 @util.propertycache
74 74 def added(self):
75 75 """files actively added in the changeset
76 76
77 77 Any file present in that revision that was absent in all the changeset's
78 78 parents.
79 79
80 80 In case of merge, this means a file absent in one of the parents but
81 81 existing in the other will *not* be contained in this set. (They were
82 82 added by an ancestor)
83 83 """
84 84 return frozenset(self._added)
85 85
86 86 def mark_added(self, filename):
87 87 if 'added' in vars(self):
88 88 del self.added
89 89 self._added.add(filename)
90 90 self.mark_touched(filename)
91 91
92 92 def update_added(self, filenames):
93 93 for f in filenames:
94 94 self.mark_added(f)
95 95
96 96 @util.propertycache
97 97 def merged(self):
98 98 """files actively merged during a merge
99 99
100 100 Any modified files which had modification on both size that needed merging.
101 101
102 102 In this case a new filenode was created and it has two parents.
103 103 """
104 104 return frozenset(self._merged)
105 105
106 106 def mark_merged(self, filename):
107 107 if 'merged' in vars(self):
108 108 del self.merged
109 109 self._merged.add(filename)
110 110 self.mark_touched(filename)
111 111
112 112 def update_merged(self, filenames):
113 113 for f in filenames:
114 114 self.mark_merged(f)
115 115
116 116 @util.propertycache
117 117 def removed(self):
118 118 """files actively removed by the changeset
119 119
120 120 In case of merge this will only contain the set of files removing "new"
121 121 content. For any file absent in the current changeset:
122 122
123 123 a) If the file exists in both parents, it is clearly "actively" removed
124 124 by this changeset.
125 125
126 126 b) If a file exists in only one parent and in none of the common
127 127 ancestors, then the file was newly added in one of the merged branches
128 128 and then got "actively" removed.
129 129
130 130 c) If a file exists in only one parent and at least one of the common
131 131 ancestors using the same filenode, then the file was unchanged on one
132 132 side and deleted on the other side. The merge "passively" propagated
133 133 that deletion, but didn't "actively" remove the file. In this case the
134 134 file is *not* included in the `removed` set.
135 135
136 136 d) If a file exists in only one parent and at least one of the common
137 137 ancestors using a different filenode, then the file was changed on one
138 138 side and removed on the other side. The merge process "actively"
139 139 decided to drop the new change and delete the file. Unlike in the
140 140 previous case, (c), the file included in the `removed` set.
141 141
142 142 Summary table for merge:
143 143
144 144 case | exists in parents | exists in gca || removed
145 145 (a) | both | * || yes
146 146 (b) | one | none || yes
147 147 (c) | one | same filenode || no
148 148 (d) | one | new filenode || yes
149 149 """
150 150 return frozenset(self._removed)
151 151
152 152 def mark_removed(self, filename):
153 153 if 'removed' in vars(self):
154 154 del self.removed
155 155 self._removed.add(filename)
156 156 self.mark_touched(filename)
157 157
158 158 def update_removed(self, filenames):
159 159 for f in filenames:
160 160 self.mark_removed(f)
161 161
162 162 @util.propertycache
163 163 def touched(self):
164 164 """files either actively modified, added or removed"""
165 165 return frozenset(self._touched)
166 166
167 167 def mark_touched(self, filename):
168 168 if 'touched' in vars(self):
169 169 del self.touched
170 170 self._touched.add(filename)
171 171
172 172 def update_touched(self, filenames):
173 173 for f in filenames:
174 174 self.mark_touched(f)
175 175
176 176 @util.propertycache
177 177 def copied_from_p1(self):
178 178 return self._p1_copies.copy()
179 179
180 180 def mark_copied_from_p1(self, source, dest):
181 181 if 'copied_from_p1' in vars(self):
182 182 del self.copied_from_p1
183 183 self._p1_copies[dest] = source
184 184
185 185 def update_copies_from_p1(self, copies):
186 186 for dest, source in copies.items():
187 187 self.mark_copied_from_p1(source, dest)
188 188
189 189 @util.propertycache
190 190 def copied_from_p2(self):
191 191 return self._p2_copies.copy()
192 192
193 193 def mark_copied_from_p2(self, source, dest):
194 194 if 'copied_from_p2' in vars(self):
195 195 del self.copied_from_p2
196 196 self._p2_copies[dest] = source
197 197
198 198 def update_copies_from_p2(self, copies):
199 199 for dest, source in copies.items():
200 200 self.mark_copied_from_p2(source, dest)
201 201
202 202
203 203 def computechangesetfilesadded(ctx):
204 204 """return the list of files added in a changeset
205 205 """
206 206 added = []
207 207 for f in ctx.files():
208 208 if not any(f in p for p in ctx.parents()):
209 209 added.append(f)
210 210 return added
211 211
212 212
213 213 def get_removal_filter(ctx, x=None):
214 214 """return a function to detect files "wrongly" detected as `removed`
215 215
216 216 When a file is removed relative to p1 in a merge, this
217 217 function determines whether the absence is due to a
218 218 deletion from a parent, or whether the merge commit
219 219 itself deletes the file. We decide this by doing a
220 220 simplified three way merge of the manifest entry for
221 221 the file. There are two ways we decide the merge
222 222 itself didn't delete a file:
223 223 - neither parent (nor the merge) contain the file
224 224 - exactly one parent contains the file, and that
225 225 parent has the same filelog entry as the merge
226 226 ancestor (or all of them if there two). In other
227 227 words, that parent left the file unchanged while the
228 228 other one deleted it.
229 229 One way to think about this is that deleting a file is
230 230 similar to emptying it, so the list of changed files
231 231 should be similar either way. The computation
232 232 described above is not done directly in _filecommit
233 233 when creating the list of changed files, however
234 234 it does something very similar by comparing filelog
235 235 nodes.
236 236 """
237 237
238 238 if x is not None:
239 239 p1, p2, m1, m2 = x
240 240 else:
241 241 p1 = ctx.p1()
242 242 p2 = ctx.p2()
243 243 m1 = p1.manifest()
244 244 m2 = p2.manifest()
245 245
246 246 @util.cachefunc
247 247 def mas():
248 248 p1n = p1.node()
249 249 p2n = p2.node()
250 250 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
251 251 if not cahs:
252 252 cahs = [node.nullrev]
253 253 return [ctx.repo()[r].manifest() for r in cahs]
254 254
255 255 def deletionfromparent(f):
256 256 if f in m1:
257 257 return f not in m2 and all(
258 258 f in ma and ma.find(f) == m1.find(f) for ma in mas()
259 259 )
260 260 elif f in m2:
261 261 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
262 262 else:
263 263 return True
264 264
265 265 return deletionfromparent
266 266
267 267
268 268 def computechangesetfilesremoved(ctx):
269 269 """return the list of files removed in a changeset
270 270 """
271 271 removed = []
272 272 for f in ctx.files():
273 273 if f not in ctx:
274 274 removed.append(f)
275 275 if removed:
276 276 rf = get_removal_filter(ctx)
277 277 removed = [r for r in removed if not rf(r)]
278 278 return removed
279 279
280 280
281 281 def computechangesetfilesmerged(ctx):
282 282 """return the list of files merged in a changeset
283 283 """
284 284 merged = []
285 285 if len(ctx.parents()) < 2:
286 286 return merged
287 287 for f in ctx.files():
288 288 if f in ctx:
289 289 fctx = ctx[f]
290 290 parents = fctx._filelog.parents(fctx._filenode)
291 291 if parents[1] != node.nullid:
292 292 merged.append(f)
293 293 return merged
294 294
295 295
296 296 def computechangesetcopies(ctx):
297 297 """return the copies data for a changeset
298 298
299 299 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
300 300
301 301 Each dictionnary are in the form: `{newname: oldname}`
302 302 """
303 303 p1copies = {}
304 304 p2copies = {}
305 305 p1 = ctx.p1()
306 306 p2 = ctx.p2()
307 307 narrowmatch = ctx._repo.narrowmatch()
308 308 for dst in ctx.files():
309 309 if not narrowmatch(dst) or dst not in ctx:
310 310 continue
311 311 copied = ctx[dst].renamed()
312 312 if not copied:
313 313 continue
314 314 src, srcnode = copied
315 315 if src in p1 and p1[src].filenode() == srcnode:
316 316 p1copies[dst] = src
317 317 elif src in p2 and p2[src].filenode() == srcnode:
318 318 p2copies[dst] = src
319 319 return p1copies, p2copies
320 320
321 321
322 322 def encodecopies(files, copies):
323 323 items = []
324 324 for i, dst in enumerate(files):
325 325 if dst in copies:
326 326 items.append(b'%d\0%s' % (i, copies[dst]))
327 327 if len(items) != len(copies):
328 328 raise error.ProgrammingError(
329 329 b'some copy targets missing from file list'
330 330 )
331 331 return b"\n".join(items)
332 332
333 333
334 334 def decodecopies(files, data):
335 335 try:
336 336 copies = {}
337 337 if not data:
338 338 return copies
339 339 for l in data.split(b'\n'):
340 340 strindex, src = l.split(b'\0')
341 341 i = int(strindex)
342 342 dst = files[i]
343 343 copies[dst] = src
344 344 return copies
345 345 except (ValueError, IndexError):
346 346 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
347 347 # used different syntax for the value.
348 348 return None
349 349
350 350
351 351 def encodefileindices(files, subset):
352 352 subset = set(subset)
353 353 indices = []
354 354 for i, f in enumerate(files):
355 355 if f in subset:
356 356 indices.append(b'%d' % i)
357 357 return b'\n'.join(indices)
358 358
359 359
360 360 def decodefileindices(files, data):
361 361 try:
362 362 subset = []
363 363 if not data:
364 364 return subset
365 365 for strindex in data.split(b'\n'):
366 366 i = int(strindex)
367 367 if i < 0 or i >= len(files):
368 368 return None
369 369 subset.append(files[i])
370 370 return subset
371 371 except (ValueError, IndexError):
372 372 # Perhaps someone had chosen the same key name (e.g. "added") and
373 373 # used different syntax for the value.
374 374 return None
375 375
376 376
377 377 # see mercurial/helptext/internals/revlogs.txt for details about the format
378 378
379 379 ACTION_MASK = int("111" "00", 2)
380 380 # note: untouched file used as copy source will as `000` for this mask.
381 381 ADDED_FLAG = int("001" "00", 2)
382 382 MERGED_FLAG = int("010" "00", 2)
383 383 REMOVED_FLAG = int("011" "00", 2)
384 384 # `100` is reserved for future use
385 385 TOUCHED_FLAG = int("101" "00", 2)
386 386
387 387 COPIED_MASK = int("11", 2)
388 388 COPIED_FROM_P1_FLAG = int("10", 2)
389 389 COPIED_FROM_P2_FLAG = int("11", 2)
390 390
391 391 # structure is <flag><filename-end><copy-source>
392 392 INDEX_HEADER = struct.Struct(">L")
393 393 INDEX_ENTRY = struct.Struct(">bLL")
394 394
395 395
396 396 def encode_files_sidedata(files):
397 397 all_files = set(files.touched)
398 398 all_files.update(files.copied_from_p1.values())
399 399 all_files.update(files.copied_from_p2.values())
400 400 all_files = sorted(all_files)
401 401 file_idx = {f: i for (i, f) in enumerate(all_files)}
402 402 file_idx[None] = 0
403 403
404 404 chunks = [INDEX_HEADER.pack(len(all_files))]
405 405
406 406 filename_length = 0
407 407 for f in all_files:
408 408 filename_size = len(f)
409 409 filename_length += filename_size
410 410 flag = 0
411 411 if f in files.added:
412 412 flag |= ADDED_FLAG
413 413 elif f in files.merged:
414 414 flag |= MERGED_FLAG
415 415 elif f in files.removed:
416 416 flag |= REMOVED_FLAG
417 417 elif f in files.touched:
418 418 flag |= TOUCHED_FLAG
419 419
420 420 copy = None
421 421 if f in files.copied_from_p1:
422 422 flag |= COPIED_FROM_P1_FLAG
423 423 copy = files.copied_from_p1.get(f)
424 424 elif f in files.copied_from_p2:
425 425 copy = files.copied_from_p2.get(f)
426 426 flag |= COPIED_FROM_P2_FLAG
427 427 copy_idx = file_idx[copy]
428 428 chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx))
429 429 chunks.extend(all_files)
430 430 return {sidedatamod.SD_FILES: b''.join(chunks)}
431 431
432 432
433 def decode_files_sidedata(changelogrevision, sidedata):
433 def decode_files_sidedata(sidedata):
434 434 md = ChangingFiles()
435 435 raw = sidedata.get(sidedatamod.SD_FILES)
436 436
437 437 if raw is None:
438 438 return md
439 439
440 440 copies = []
441 441 all_files = []
442 442
443 443 assert len(raw) >= INDEX_HEADER.size
444 444 total_files = INDEX_HEADER.unpack_from(raw, 0)[0]
445 445
446 446 offset = INDEX_HEADER.size
447 447 file_offset_base = offset + (INDEX_ENTRY.size * total_files)
448 448 file_offset_last = file_offset_base
449 449
450 450 assert len(raw) >= file_offset_base
451 451
452 452 for idx in range(total_files):
453 453 flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset)
454 454 file_end += file_offset_base
455 455 filename = raw[file_offset_last:file_end]
456 456 filesize = file_end - file_offset_last
457 457 assert len(filename) == filesize
458 458 offset += INDEX_ENTRY.size
459 459 file_offset_last = file_end
460 460 all_files.append(filename)
461 461 if flag & ACTION_MASK == ADDED_FLAG:
462 462 md.mark_added(filename)
463 463 elif flag & ACTION_MASK == MERGED_FLAG:
464 464 md.mark_merged(filename)
465 465 elif flag & ACTION_MASK == REMOVED_FLAG:
466 466 md.mark_removed(filename)
467 467 elif flag & ACTION_MASK == TOUCHED_FLAG:
468 468 md.mark_touched(filename)
469 469
470 470 copied = None
471 471 if flag & COPIED_MASK == COPIED_FROM_P1_FLAG:
472 472 copied = md.mark_copied_from_p1
473 473 elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG:
474 474 copied = md.mark_copied_from_p2
475 475
476 476 if copied is not None:
477 477 copies.append((copied, filename, copy_idx))
478 478
479 479 for copied, filename, copy_idx in copies:
480 480 copied(all_files[copy_idx], filename)
481 481
482 482 return md
483 483
484 484
485 485 def _getsidedata(srcrepo, rev):
486 486 ctx = srcrepo[rev]
487 487 filescopies = computechangesetcopies(ctx)
488 488 filesadded = computechangesetfilesadded(ctx)
489 489 filesremoved = computechangesetfilesremoved(ctx)
490 490 filesmerged = computechangesetfilesmerged(ctx)
491 491 files = ChangingFiles()
492 492 files.update_touched(ctx.files())
493 493 files.update_added(filesadded)
494 494 files.update_removed(filesremoved)
495 495 files.update_merged(filesmerged)
496 496 files.update_copies_from_p1(filescopies[0])
497 497 files.update_copies_from_p2(filescopies[1])
498 498 return encode_files_sidedata(files)
499 499
500 500
501 501 def getsidedataadder(srcrepo, destrepo):
502 502 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
503 503 if pycompat.iswindows or not use_w:
504 504 return _get_simple_sidedata_adder(srcrepo, destrepo)
505 505 else:
506 506 return _get_worker_sidedata_adder(srcrepo, destrepo)
507 507
508 508
509 509 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
510 510 """The function used by worker precomputing sidedata
511 511
512 512 It read an input queue containing revision numbers
513 513 It write in an output queue containing (rev, <sidedata-map>)
514 514
515 515 The `None` input value is used as a stop signal.
516 516
517 517 The `tokens` semaphore is user to avoid having too many unprocessed
518 518 entries. The workers needs to acquire one token before fetching a task.
519 519 They will be released by the consumer of the produced data.
520 520 """
521 521 tokens.acquire()
522 522 rev = revs_queue.get()
523 523 while rev is not None:
524 524 data = _getsidedata(srcrepo, rev)
525 525 sidedata_queue.put((rev, data))
526 526 tokens.acquire()
527 527 rev = revs_queue.get()
528 528 # processing of `None` is completed, release the token.
529 529 tokens.release()
530 530
531 531
532 532 BUFF_PER_WORKER = 50
533 533
534 534
535 535 def _get_worker_sidedata_adder(srcrepo, destrepo):
536 536 """The parallel version of the sidedata computation
537 537
538 538 This code spawn a pool of worker that precompute a buffer of sidedata
539 539 before we actually need them"""
540 540 # avoid circular import copies -> scmutil -> worker -> copies
541 541 from . import worker
542 542
543 543 nbworkers = worker._numworkers(srcrepo.ui)
544 544
545 545 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
546 546 revsq = multiprocessing.Queue()
547 547 sidedataq = multiprocessing.Queue()
548 548
549 549 assert srcrepo.filtername is None
550 550 # queue all tasks beforehand, revision numbers are small and it make
551 551 # synchronisation simpler
552 552 #
553 553 # Since the computation for each node can be quite expensive, the overhead
554 554 # of using a single queue is not revelant. In practice, most computation
555 555 # are fast but some are very expensive and dominate all the other smaller
556 556 # cost.
557 557 for r in srcrepo.changelog.revs():
558 558 revsq.put(r)
559 559 # queue the "no more tasks" markers
560 560 for i in range(nbworkers):
561 561 revsq.put(None)
562 562
563 563 allworkers = []
564 564 for i in range(nbworkers):
565 565 args = (srcrepo, revsq, sidedataq, tokens)
566 566 w = multiprocessing.Process(target=_sidedata_worker, args=args)
567 567 allworkers.append(w)
568 568 w.start()
569 569
570 570 # dictionnary to store results for revision higher than we one we are
571 571 # looking for. For example, if we need the sidedatamap for 42, and 43 is
572 572 # received, when shelve 43 for later use.
573 573 staging = {}
574 574
575 575 def sidedata_companion(revlog, rev):
576 576 sidedata = {}
577 577 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
578 578 # Is the data previously shelved ?
579 579 sidedata = staging.pop(rev, None)
580 580 if sidedata is None:
581 581 # look at the queued result until we find the one we are lookig
582 582 # for (shelve the other ones)
583 583 r, sidedata = sidedataq.get()
584 584 while r != rev:
585 585 staging[r] = sidedata
586 586 r, sidedata = sidedataq.get()
587 587 tokens.release()
588 588 return False, (), sidedata
589 589
590 590 return sidedata_companion
591 591
592 592
593 593 def _get_simple_sidedata_adder(srcrepo, destrepo):
594 594 """The simple version of the sidedata computation
595 595
596 596 It just compute it in the same thread on request"""
597 597
598 598 def sidedatacompanion(revlog, rev):
599 599 sidedata = {}
600 600 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
601 601 sidedata = _getsidedata(srcrepo, rev)
602 602 return False, (), sidedata
603 603
604 604 return sidedatacompanion
605 605
606 606
607 607 def getsidedataremover(srcrepo, destrepo):
608 608 def sidedatacompanion(revlog, rev):
609 609 f = ()
610 610 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
611 611 if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
612 612 f = (
613 613 sidedatamod.SD_P1COPIES,
614 614 sidedatamod.SD_P2COPIES,
615 615 sidedatamod.SD_FILESADDED,
616 616 sidedatamod.SD_FILESREMOVED,
617 617 )
618 618 return False, f, {}
619 619
620 620 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now