##// END OF EJS Templates
commitctx: extract sidedata encoding inside its own function...
marmoute -
r45885:c6eea580 default
parent child Browse files
Show More
@@ -1,605 +1,585 b''
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 nullid,
15 15 )
16 16 from .thirdparty import attr
17 17
18 18 from . import (
19 19 encoding,
20 20 error,
21 21 metadata,
22 22 pycompat,
23 23 revlog,
24 24 )
25 25 from .utils import (
26 26 dateutil,
27 27 stringutil,
28 28 )
29 29
30 30 from .revlogutils import sidedata as sidedatamod
31 31
32 32 _defaultextra = {b'branch': b'default'}
33 33
34 34
35 35 def _string_escape(text):
36 36 """
37 37 >>> from .pycompat import bytechr as chr
38 38 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
39 39 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
40 40 >>> s
41 41 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
42 42 >>> res = _string_escape(s)
43 43 >>> s == _string_unescape(res)
44 44 True
45 45 """
46 46 # subset of the string_escape codec
47 47 text = (
48 48 text.replace(b'\\', b'\\\\')
49 49 .replace(b'\n', b'\\n')
50 50 .replace(b'\r', b'\\r')
51 51 )
52 52 return text.replace(b'\0', b'\\0')
53 53
54 54
55 55 def _string_unescape(text):
56 56 if b'\\0' in text:
57 57 # fix up \0 without getting into trouble with \\0
58 58 text = text.replace(b'\\\\', b'\\\\\n')
59 59 text = text.replace(b'\\0', b'\0')
60 60 text = text.replace(b'\n', b'')
61 61 return stringutil.unescapestr(text)
62 62
63 63
64 64 def decodeextra(text):
65 65 """
66 66 >>> from .pycompat import bytechr as chr
67 67 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
68 68 ... ).items())
69 69 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
70 70 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
71 71 ... b'baz': chr(92) + chr(0) + b'2'})
72 72 ... ).items())
73 73 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
74 74 """
75 75 extra = _defaultextra.copy()
76 76 for l in text.split(b'\0'):
77 77 if l:
78 78 k, v = _string_unescape(l).split(b':', 1)
79 79 extra[k] = v
80 80 return extra
81 81
82 82
83 83 def encodeextra(d):
84 84 # keys must be sorted to produce a deterministic changelog entry
85 85 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
86 86 return b"\0".join(items)
87 87
88 88
89 89 def stripdesc(desc):
90 90 """strip trailing whitespace and leading and trailing empty lines"""
91 91 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
92 92
93 93
94 94 class appender(object):
95 95 '''the changelog index must be updated last on disk, so we use this class
96 96 to delay writes to it'''
97 97
98 98 def __init__(self, vfs, name, mode, buf):
99 99 self.data = buf
100 100 fp = vfs(name, mode)
101 101 self.fp = fp
102 102 self.offset = fp.tell()
103 103 self.size = vfs.fstat(fp).st_size
104 104 self._end = self.size
105 105
106 106 def end(self):
107 107 return self._end
108 108
109 109 def tell(self):
110 110 return self.offset
111 111
112 112 def flush(self):
113 113 pass
114 114
115 115 @property
116 116 def closed(self):
117 117 return self.fp.closed
118 118
119 119 def close(self):
120 120 self.fp.close()
121 121
122 122 def seek(self, offset, whence=0):
123 123 '''virtual file offset spans real file and data'''
124 124 if whence == 0:
125 125 self.offset = offset
126 126 elif whence == 1:
127 127 self.offset += offset
128 128 elif whence == 2:
129 129 self.offset = self.end() + offset
130 130 if self.offset < self.size:
131 131 self.fp.seek(self.offset)
132 132
133 133 def read(self, count=-1):
134 134 '''only trick here is reads that span real file and data'''
135 135 ret = b""
136 136 if self.offset < self.size:
137 137 s = self.fp.read(count)
138 138 ret = s
139 139 self.offset += len(s)
140 140 if count > 0:
141 141 count -= len(s)
142 142 if count != 0:
143 143 doff = self.offset - self.size
144 144 self.data.insert(0, b"".join(self.data))
145 145 del self.data[1:]
146 146 s = self.data[0][doff : doff + count]
147 147 self.offset += len(s)
148 148 ret += s
149 149 return ret
150 150
151 151 def write(self, s):
152 152 self.data.append(bytes(s))
153 153 self.offset += len(s)
154 154 self._end += len(s)
155 155
156 156 def __enter__(self):
157 157 self.fp.__enter__()
158 158 return self
159 159
160 160 def __exit__(self, *args):
161 161 return self.fp.__exit__(*args)
162 162
163 163
164 164 class _divertopener(object):
165 165 def __init__(self, opener, target):
166 166 self._opener = opener
167 167 self._target = target
168 168
169 169 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
170 170 if name != self._target:
171 171 return self._opener(name, mode, **kwargs)
172 172 return self._opener(name + b".a", mode, **kwargs)
173 173
174 174 def __getattr__(self, attr):
175 175 return getattr(self._opener, attr)
176 176
177 177
178 178 def _delayopener(opener, target, buf):
179 179 """build an opener that stores chunks in 'buf' instead of 'target'"""
180 180
181 181 def _delay(name, mode=b'r', checkambig=False, **kwargs):
182 182 if name != target:
183 183 return opener(name, mode, **kwargs)
184 184 assert not kwargs
185 185 return appender(opener, name, mode, buf)
186 186
187 187 return _delay
188 188
189 189
190 190 @attr.s
191 191 class _changelogrevision(object):
192 192 # Extensions might modify _defaultextra, so let the constructor below pass
193 193 # it in
194 194 extra = attr.ib()
195 195 manifest = attr.ib(default=nullid)
196 196 user = attr.ib(default=b'')
197 197 date = attr.ib(default=(0, 0))
198 198 files = attr.ib(default=attr.Factory(list))
199 199 filesadded = attr.ib(default=None)
200 200 filesremoved = attr.ib(default=None)
201 201 p1copies = attr.ib(default=None)
202 202 p2copies = attr.ib(default=None)
203 203 description = attr.ib(default=b'')
204 204
205 205
206 206 class changelogrevision(object):
207 207 """Holds results of a parsed changelog revision.
208 208
209 209 Changelog revisions consist of multiple pieces of data, including
210 210 the manifest node, user, and date. This object exposes a view into
211 211 the parsed object.
212 212 """
213 213
214 214 __slots__ = (
215 215 '_offsets',
216 216 '_text',
217 217 '_sidedata',
218 218 '_cpsd',
219 219 )
220 220
221 221 def __new__(cls, text, sidedata, cpsd):
222 222 if not text:
223 223 return _changelogrevision(extra=_defaultextra)
224 224
225 225 self = super(changelogrevision, cls).__new__(cls)
226 226 # We could return here and implement the following as an __init__.
227 227 # But doing it here is equivalent and saves an extra function call.
228 228
229 229 # format used:
230 230 # nodeid\n : manifest node in ascii
231 231 # user\n : user, no \n or \r allowed
232 232 # time tz extra\n : date (time is int or float, timezone is int)
233 233 # : extra is metadata, encoded and separated by '\0'
234 234 # : older versions ignore it
235 235 # files\n\n : files modified by the cset, no \n or \r allowed
236 236 # (.*) : comment (free text, ideally utf-8)
237 237 #
238 238 # changelog v0 doesn't use extra
239 239
240 240 nl1 = text.index(b'\n')
241 241 nl2 = text.index(b'\n', nl1 + 1)
242 242 nl3 = text.index(b'\n', nl2 + 1)
243 243
244 244 # The list of files may be empty. Which means nl3 is the first of the
245 245 # double newline that precedes the description.
246 246 if text[nl3 + 1 : nl3 + 2] == b'\n':
247 247 doublenl = nl3
248 248 else:
249 249 doublenl = text.index(b'\n\n', nl3 + 1)
250 250
251 251 self._offsets = (nl1, nl2, nl3, doublenl)
252 252 self._text = text
253 253 self._sidedata = sidedata
254 254 self._cpsd = cpsd
255 255
256 256 return self
257 257
258 258 @property
259 259 def manifest(self):
260 260 return bin(self._text[0 : self._offsets[0]])
261 261
262 262 @property
263 263 def user(self):
264 264 off = self._offsets
265 265 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
266 266
267 267 @property
268 268 def _rawdate(self):
269 269 off = self._offsets
270 270 dateextra = self._text[off[1] + 1 : off[2]]
271 271 return dateextra.split(b' ', 2)[0:2]
272 272
273 273 @property
274 274 def _rawextra(self):
275 275 off = self._offsets
276 276 dateextra = self._text[off[1] + 1 : off[2]]
277 277 fields = dateextra.split(b' ', 2)
278 278 if len(fields) != 3:
279 279 return None
280 280
281 281 return fields[2]
282 282
283 283 @property
284 284 def date(self):
285 285 raw = self._rawdate
286 286 time = float(raw[0])
287 287 # Various tools did silly things with the timezone.
288 288 try:
289 289 timezone = int(raw[1])
290 290 except ValueError:
291 291 timezone = 0
292 292
293 293 return time, timezone
294 294
295 295 @property
296 296 def extra(self):
297 297 raw = self._rawextra
298 298 if raw is None:
299 299 return _defaultextra
300 300
301 301 return decodeextra(raw)
302 302
303 303 @property
304 304 def files(self):
305 305 off = self._offsets
306 306 if off[2] == off[3]:
307 307 return []
308 308
309 309 return self._text[off[2] + 1 : off[3]].split(b'\n')
310 310
311 311 @property
312 312 def filesadded(self):
313 313 if self._cpsd:
314 314 rawindices = self._sidedata.get(sidedatamod.SD_FILESADDED)
315 315 if not rawindices:
316 316 return []
317 317 else:
318 318 rawindices = self.extra.get(b'filesadded')
319 319 if rawindices is None:
320 320 return None
321 321 return metadata.decodefileindices(self.files, rawindices)
322 322
323 323 @property
324 324 def filesremoved(self):
325 325 if self._cpsd:
326 326 rawindices = self._sidedata.get(sidedatamod.SD_FILESREMOVED)
327 327 if not rawindices:
328 328 return []
329 329 else:
330 330 rawindices = self.extra.get(b'filesremoved')
331 331 if rawindices is None:
332 332 return None
333 333 return metadata.decodefileindices(self.files, rawindices)
334 334
335 335 @property
336 336 def p1copies(self):
337 337 if self._cpsd:
338 338 rawcopies = self._sidedata.get(sidedatamod.SD_P1COPIES)
339 339 if not rawcopies:
340 340 return {}
341 341 else:
342 342 rawcopies = self.extra.get(b'p1copies')
343 343 if rawcopies is None:
344 344 return None
345 345 return metadata.decodecopies(self.files, rawcopies)
346 346
347 347 @property
348 348 def p2copies(self):
349 349 if self._cpsd:
350 350 rawcopies = self._sidedata.get(sidedatamod.SD_P2COPIES)
351 351 if not rawcopies:
352 352 return {}
353 353 else:
354 354 rawcopies = self.extra.get(b'p2copies')
355 355 if rawcopies is None:
356 356 return None
357 357 return metadata.decodecopies(self.files, rawcopies)
358 358
359 359 @property
360 360 def description(self):
361 361 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
362 362
363 363
364 364 class changelog(revlog.revlog):
365 365 def __init__(self, opener, trypending=False):
366 366 """Load a changelog revlog using an opener.
367 367
368 368 If ``trypending`` is true, we attempt to load the index from a
369 369 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
370 370 The ``00changelog.i.a`` file contains index (and possibly inline
371 371 revision) data for a transaction that hasn't been finalized yet.
372 372 It exists in a separate file to facilitate readers (such as
373 373 hooks processes) accessing data before a transaction is finalized.
374 374 """
375 375 if trypending and opener.exists(b'00changelog.i.a'):
376 376 indexfile = b'00changelog.i.a'
377 377 else:
378 378 indexfile = b'00changelog.i'
379 379
380 380 datafile = b'00changelog.d'
381 381 revlog.revlog.__init__(
382 382 self,
383 383 opener,
384 384 indexfile,
385 385 datafile=datafile,
386 386 checkambig=True,
387 387 mmaplargeindex=True,
388 388 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
389 389 )
390 390
391 391 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
392 392 # changelogs don't benefit from generaldelta.
393 393
394 394 self.version &= ~revlog.FLAG_GENERALDELTA
395 395 self._generaldelta = False
396 396
397 397 # Delta chains for changelogs tend to be very small because entries
398 398 # tend to be small and don't delta well with each. So disable delta
399 399 # chains.
400 400 self._storedeltachains = False
401 401
402 402 self._realopener = opener
403 403 self._delayed = False
404 404 self._delaybuf = None
405 405 self._divert = False
406 406 self.filteredrevs = frozenset()
407 407 self._copiesstorage = opener.options.get(b'copies-storage')
408 408
409 409 def delayupdate(self, tr):
410 410 """delay visibility of index updates to other readers"""
411 411
412 412 if not self._delayed:
413 413 if len(self) == 0:
414 414 self._divert = True
415 415 if self._realopener.exists(self.indexfile + b'.a'):
416 416 self._realopener.unlink(self.indexfile + b'.a')
417 417 self.opener = _divertopener(self._realopener, self.indexfile)
418 418 else:
419 419 self._delaybuf = []
420 420 self.opener = _delayopener(
421 421 self._realopener, self.indexfile, self._delaybuf
422 422 )
423 423 self._delayed = True
424 424 tr.addpending(b'cl-%i' % id(self), self._writepending)
425 425 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
426 426
427 427 def _finalize(self, tr):
428 428 """finalize index updates"""
429 429 self._delayed = False
430 430 self.opener = self._realopener
431 431 # move redirected index data back into place
432 432 if self._divert:
433 433 assert not self._delaybuf
434 434 tmpname = self.indexfile + b".a"
435 435 nfile = self.opener.open(tmpname)
436 436 nfile.close()
437 437 self.opener.rename(tmpname, self.indexfile, checkambig=True)
438 438 elif self._delaybuf:
439 439 fp = self.opener(self.indexfile, b'a', checkambig=True)
440 440 fp.write(b"".join(self._delaybuf))
441 441 fp.close()
442 442 self._delaybuf = None
443 443 self._divert = False
444 444 # split when we're done
445 445 self._enforceinlinesize(tr)
446 446
447 447 def _writepending(self, tr):
448 448 """create a file containing the unfinalized state for
449 449 pretxnchangegroup"""
450 450 if self._delaybuf:
451 451 # make a temporary copy of the index
452 452 fp1 = self._realopener(self.indexfile)
453 453 pendingfilename = self.indexfile + b".a"
454 454 # register as a temp file to ensure cleanup on failure
455 455 tr.registertmp(pendingfilename)
456 456 # write existing data
457 457 fp2 = self._realopener(pendingfilename, b"w")
458 458 fp2.write(fp1.read())
459 459 # add pending data
460 460 fp2.write(b"".join(self._delaybuf))
461 461 fp2.close()
462 462 # switch modes so finalize can simply rename
463 463 self._delaybuf = None
464 464 self._divert = True
465 465 self.opener = _divertopener(self._realopener, self.indexfile)
466 466
467 467 if self._divert:
468 468 return True
469 469
470 470 return False
471 471
472 472 def _enforceinlinesize(self, tr, fp=None):
473 473 if not self._delayed:
474 474 revlog.revlog._enforceinlinesize(self, tr, fp)
475 475
476 476 def read(self, node):
477 477 """Obtain data from a parsed changelog revision.
478 478
479 479 Returns a 6-tuple of:
480 480
481 481 - manifest node in binary
482 482 - author/user as a localstr
483 483 - date as a 2-tuple of (time, timezone)
484 484 - list of files
485 485 - commit message as a localstr
486 486 - dict of extra metadata
487 487
488 488 Unless you need to access all fields, consider calling
489 489 ``changelogrevision`` instead, as it is faster for partial object
490 490 access.
491 491 """
492 492 d, s = self._revisiondata(node)
493 493 c = changelogrevision(
494 494 d, s, self._copiesstorage == b'changeset-sidedata'
495 495 )
496 496 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
497 497
498 498 def changelogrevision(self, nodeorrev):
499 499 """Obtain a ``changelogrevision`` for a node or revision."""
500 500 text, sidedata = self._revisiondata(nodeorrev)
501 501 return changelogrevision(
502 502 text, sidedata, self._copiesstorage == b'changeset-sidedata'
503 503 )
504 504
505 505 def readfiles(self, node):
506 506 """
507 507 short version of read that only returns the files modified by the cset
508 508 """
509 509 text = self.revision(node)
510 510 if not text:
511 511 return []
512 512 last = text.index(b"\n\n")
513 513 l = text[:last].split(b'\n')
514 514 return l[3:]
515 515
516 516 def add(
517 517 self,
518 518 manifest,
519 519 files,
520 520 desc,
521 521 transaction,
522 522 p1,
523 523 p2,
524 524 user,
525 525 date=None,
526 526 extra=None,
527 527 ):
528 528 # Convert to UTF-8 encoded bytestrings as the very first
529 529 # thing: calling any method on a localstr object will turn it
530 530 # into a str object and the cached UTF-8 string is thus lost.
531 531 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
532 532
533 533 user = user.strip()
534 534 # An empty username or a username with a "\n" will make the
535 535 # revision text contain two "\n\n" sequences -> corrupt
536 536 # repository since read cannot unpack the revision.
537 537 if not user:
538 538 raise error.StorageError(_(b"empty username"))
539 539 if b"\n" in user:
540 540 raise error.StorageError(
541 541 _(b"username %r contains a newline") % pycompat.bytestr(user)
542 542 )
543 543
544 544 desc = stripdesc(desc)
545 545
546 546 if date:
547 547 parseddate = b"%d %d" % dateutil.parsedate(date)
548 548 else:
549 549 parseddate = b"%d %d" % dateutil.makedate()
550 550 if extra:
551 551 branch = extra.get(b"branch")
552 552 if branch in (b"default", b""):
553 553 del extra[b"branch"]
554 554 elif branch in (b".", b"null", b"tip"):
555 555 raise error.StorageError(
556 556 _(b'the name \'%s\' is reserved') % branch
557 557 )
558 558 sortedfiles = sorted(files.touched)
559 559 sidedata = None
560 560 if self._copiesstorage == b'changeset-sidedata':
561 sidedata = {}
562 p1copies = files.copied_from_p1
563 if p1copies:
564 p1copies = metadata.encodecopies(sortedfiles, p1copies)
565 sidedata[sidedatamod.SD_P1COPIES] = p1copies
566 p2copies = files.copied_from_p2
567 if p2copies:
568 p2copies = metadata.encodecopies(sortedfiles, p2copies)
569 sidedata[sidedatamod.SD_P2COPIES] = p2copies
570 filesadded = files.added
571 if filesadded:
572 filesadded = metadata.encodefileindices(sortedfiles, filesadded)
573 sidedata[sidedatamod.SD_FILESADDED] = filesadded
574 filesremoved = files.removed
575 if filesremoved:
576 filesremoved = metadata.encodefileindices(
577 sortedfiles, filesremoved
578 )
579 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
580 if not sidedata:
581 sidedata = None
561 sidedata = metadata.encode_copies_sidedata(files)
582 562
583 563 if extra:
584 564 extra = encodeextra(extra)
585 565 parseddate = b"%s %s" % (parseddate, extra)
586 566 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
587 567 text = b"\n".join(l)
588 568 return self.addrevision(
589 569 text, transaction, len(self), p1, p2, sidedata=sidedata
590 570 )
591 571
592 572 def branchinfo(self, rev):
593 573 """return the branch name and open/close state of a revision
594 574
595 575 This function exists because creating a changectx object
596 576 just to access this is costly."""
597 577 extra = self.read(rev)[5]
598 578 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
599 579
600 580 def _nodeduplicatecallback(self, transaction, node):
601 581 # keep track of revisions that got "re-added", eg: unbunde of know rev.
602 582 #
603 583 # We track them in a list to preserve their order from the source bundle
604 584 duplicates = transaction.changes.setdefault(b'revduplicates', [])
605 585 duplicates.append(self.rev(node))
@@ -1,400 +1,424 b''
1 1 # metadata.py -- code related to various metadata computation and access.
2 2 #
3 3 # Copyright 2019 Google, Inc <martinvonz@google.com>
4 4 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import multiprocessing
11 11
12 12 from . import (
13 13 error,
14 14 node,
15 15 pycompat,
16 16 util,
17 17 )
18 18
19 19 from .revlogutils import (
20 20 flagutil as sidedataflag,
21 21 sidedata as sidedatamod,
22 22 )
23 23
24 24
25 25 class ChangingFiles(object):
26 26 """A class recording the changes made to a file by a revision
27 27 """
28 28
29 29 def __init__(
30 30 self, touched=(), added=(), removed=(), p1_copies=(), p2_copies=(),
31 31 ):
32 32 self._added = set(added)
33 33 self._removed = set(removed)
34 34 self._touched = set(touched)
35 35 self._touched.update(self._added)
36 36 self._touched.update(self._removed)
37 37 self._p1_copies = dict(p1_copies)
38 38 self._p2_copies = dict(p2_copies)
39 39
40 40 @property
41 41 def added(self):
42 42 return frozenset(self._added)
43 43
44 44 def mark_added(self, filename):
45 45 self._added.add(filename)
46 46 self._touched.add(filename)
47 47
48 48 def update_added(self, filenames):
49 49 for f in filenames:
50 50 self.mark_added(f)
51 51
52 52 @property
53 53 def removed(self):
54 54 return frozenset(self._removed)
55 55
56 56 def mark_removed(self, filename):
57 57 self._removed.add(filename)
58 58 self._touched.add(filename)
59 59
60 60 def update_removed(self, filenames):
61 61 for f in filenames:
62 62 self.mark_removed(f)
63 63
64 64 @property
65 65 def touched(self):
66 66 return frozenset(self._touched)
67 67
68 68 def mark_touched(self, filename):
69 69 self._touched.add(filename)
70 70
71 71 def update_touched(self, filenames):
72 72 for f in filenames:
73 73 self.mark_touched(f)
74 74
75 75 @property
76 76 def copied_from_p1(self):
77 77 return self._p1_copies.copy()
78 78
79 79 def mark_copied_from_p1(self, source, dest):
80 80 self._p1_copies[dest] = source
81 81
82 82 def update_copies_from_p1(self, copies):
83 83 for dest, source in copies.items():
84 84 self.mark_copied_from_p1(source, dest)
85 85
86 86 @property
87 87 def copied_from_p2(self):
88 88 return self._p2_copies.copy()
89 89
90 90 def mark_copied_from_p2(self, source, dest):
91 91 self._p2_copies[dest] = source
92 92
93 93 def update_copies_from_p2(self, copies):
94 94 for dest, source in copies.items():
95 95 self.mark_copied_from_p2(source, dest)
96 96
97 97
98 98 def computechangesetfilesadded(ctx):
99 99 """return the list of files added in a changeset
100 100 """
101 101 added = []
102 102 for f in ctx.files():
103 103 if not any(f in p for p in ctx.parents()):
104 104 added.append(f)
105 105 return added
106 106
107 107
108 108 def get_removal_filter(ctx, x=None):
109 109 """return a function to detect files "wrongly" detected as `removed`
110 110
111 111 When a file is removed relative to p1 in a merge, this
112 112 function determines whether the absence is due to a
113 113 deletion from a parent, or whether the merge commit
114 114 itself deletes the file. We decide this by doing a
115 115 simplified three way merge of the manifest entry for
116 116 the file. There are two ways we decide the merge
117 117 itself didn't delete a file:
118 118 - neither parent (nor the merge) contain the file
119 119 - exactly one parent contains the file, and that
120 120 parent has the same filelog entry as the merge
121 121 ancestor (or all of them if there two). In other
122 122 words, that parent left the file unchanged while the
123 123 other one deleted it.
124 124 One way to think about this is that deleting a file is
125 125 similar to emptying it, so the list of changed files
126 126 should be similar either way. The computation
127 127 described above is not done directly in _filecommit
128 128 when creating the list of changed files, however
129 129 it does something very similar by comparing filelog
130 130 nodes.
131 131 """
132 132
133 133 if x is not None:
134 134 p1, p2, m1, m2 = x
135 135 else:
136 136 p1 = ctx.p1()
137 137 p2 = ctx.p2()
138 138 m1 = p1.manifest()
139 139 m2 = p2.manifest()
140 140
141 141 @util.cachefunc
142 142 def mas():
143 143 p1n = p1.node()
144 144 p2n = p2.node()
145 145 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
146 146 if not cahs:
147 147 cahs = [node.nullrev]
148 148 return [ctx.repo()[r].manifest() for r in cahs]
149 149
150 150 def deletionfromparent(f):
151 151 if f in m1:
152 152 return f not in m2 and all(
153 153 f in ma and ma.find(f) == m1.find(f) for ma in mas()
154 154 )
155 155 elif f in m2:
156 156 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
157 157 else:
158 158 return True
159 159
160 160 return deletionfromparent
161 161
162 162
163 163 def computechangesetfilesremoved(ctx):
164 164 """return the list of files removed in a changeset
165 165 """
166 166 removed = []
167 167 for f in ctx.files():
168 168 if f not in ctx:
169 169 removed.append(f)
170 170 if removed:
171 171 rf = get_removal_filter(ctx)
172 172 removed = [r for r in removed if not rf(r)]
173 173 return removed
174 174
175 175
176 176 def computechangesetcopies(ctx):
177 177 """return the copies data for a changeset
178 178
179 179 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
180 180
181 181 Each dictionnary are in the form: `{newname: oldname}`
182 182 """
183 183 p1copies = {}
184 184 p2copies = {}
185 185 p1 = ctx.p1()
186 186 p2 = ctx.p2()
187 187 narrowmatch = ctx._repo.narrowmatch()
188 188 for dst in ctx.files():
189 189 if not narrowmatch(dst) or dst not in ctx:
190 190 continue
191 191 copied = ctx[dst].renamed()
192 192 if not copied:
193 193 continue
194 194 src, srcnode = copied
195 195 if src in p1 and p1[src].filenode() == srcnode:
196 196 p1copies[dst] = src
197 197 elif src in p2 and p2[src].filenode() == srcnode:
198 198 p2copies[dst] = src
199 199 return p1copies, p2copies
200 200
201 201
202 202 def encodecopies(files, copies):
203 203 items = []
204 204 for i, dst in enumerate(files):
205 205 if dst in copies:
206 206 items.append(b'%d\0%s' % (i, copies[dst]))
207 207 if len(items) != len(copies):
208 208 raise error.ProgrammingError(
209 209 b'some copy targets missing from file list'
210 210 )
211 211 return b"\n".join(items)
212 212
213 213
214 214 def decodecopies(files, data):
215 215 try:
216 216 copies = {}
217 217 if not data:
218 218 return copies
219 219 for l in data.split(b'\n'):
220 220 strindex, src = l.split(b'\0')
221 221 i = int(strindex)
222 222 dst = files[i]
223 223 copies[dst] = src
224 224 return copies
225 225 except (ValueError, IndexError):
226 226 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
227 227 # used different syntax for the value.
228 228 return None
229 229
230 230
231 231 def encodefileindices(files, subset):
232 232 subset = set(subset)
233 233 indices = []
234 234 for i, f in enumerate(files):
235 235 if f in subset:
236 236 indices.append(b'%d' % i)
237 237 return b'\n'.join(indices)
238 238
239 239
240 240 def decodefileindices(files, data):
241 241 try:
242 242 subset = []
243 243 if not data:
244 244 return subset
245 245 for strindex in data.split(b'\n'):
246 246 i = int(strindex)
247 247 if i < 0 or i >= len(files):
248 248 return None
249 249 subset.append(files[i])
250 250 return subset
251 251 except (ValueError, IndexError):
252 252 # Perhaps someone had chosen the same key name (e.g. "added") and
253 253 # used different syntax for the value.
254 254 return None
255 255
256 256
257 def encode_copies_sidedata(files):
258 sortedfiles = sorted(files.touched)
259 sidedata = {}
260 p1copies = files.copied_from_p1
261 if p1copies:
262 p1copies = encodecopies(sortedfiles, p1copies)
263 sidedata[sidedatamod.SD_P1COPIES] = p1copies
264 p2copies = files.copied_from_p2
265 if p2copies:
266 p2copies = encodecopies(sortedfiles, p2copies)
267 sidedata[sidedatamod.SD_P2COPIES] = p2copies
268 filesadded = files.added
269 if filesadded:
270 filesadded = encodefileindices(sortedfiles, filesadded)
271 sidedata[sidedatamod.SD_FILESADDED] = filesadded
272 filesremoved = files.removed
273 if filesremoved:
274 filesremoved = encodefileindices(sortedfiles, filesremoved)
275 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
276 if not sidedata:
277 sidedata = None
278 return sidedata
279
280
257 281 def _getsidedata(srcrepo, rev):
258 282 ctx = srcrepo[rev]
259 283 filescopies = computechangesetcopies(ctx)
260 284 filesadded = computechangesetfilesadded(ctx)
261 285 filesremoved = computechangesetfilesremoved(ctx)
262 286 sidedata = {}
263 287 if any([filescopies, filesadded, filesremoved]):
264 288 sortedfiles = sorted(ctx.files())
265 289 p1copies, p2copies = filescopies
266 290 p1copies = encodecopies(sortedfiles, p1copies)
267 291 p2copies = encodecopies(sortedfiles, p2copies)
268 292 filesadded = encodefileindices(sortedfiles, filesadded)
269 293 filesremoved = encodefileindices(sortedfiles, filesremoved)
270 294 if p1copies:
271 295 sidedata[sidedatamod.SD_P1COPIES] = p1copies
272 296 if p2copies:
273 297 sidedata[sidedatamod.SD_P2COPIES] = p2copies
274 298 if filesadded:
275 299 sidedata[sidedatamod.SD_FILESADDED] = filesadded
276 300 if filesremoved:
277 301 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
278 302 return sidedata
279 303
280 304
281 305 def getsidedataadder(srcrepo, destrepo):
282 306 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
283 307 if pycompat.iswindows or not use_w:
284 308 return _get_simple_sidedata_adder(srcrepo, destrepo)
285 309 else:
286 310 return _get_worker_sidedata_adder(srcrepo, destrepo)
287 311
288 312
289 313 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
290 314 """The function used by worker precomputing sidedata
291 315
292 316 It read an input queue containing revision numbers
293 317 It write in an output queue containing (rev, <sidedata-map>)
294 318
295 319 The `None` input value is used as a stop signal.
296 320
297 321 The `tokens` semaphore is user to avoid having too many unprocessed
298 322 entries. The workers needs to acquire one token before fetching a task.
299 323 They will be released by the consumer of the produced data.
300 324 """
301 325 tokens.acquire()
302 326 rev = revs_queue.get()
303 327 while rev is not None:
304 328 data = _getsidedata(srcrepo, rev)
305 329 sidedata_queue.put((rev, data))
306 330 tokens.acquire()
307 331 rev = revs_queue.get()
308 332 # processing of `None` is completed, release the token.
309 333 tokens.release()
310 334
311 335
312 336 BUFF_PER_WORKER = 50
313 337
314 338
315 339 def _get_worker_sidedata_adder(srcrepo, destrepo):
316 340 """The parallel version of the sidedata computation
317 341
318 342 This code spawn a pool of worker that precompute a buffer of sidedata
319 343 before we actually need them"""
320 344 # avoid circular import copies -> scmutil -> worker -> copies
321 345 from . import worker
322 346
323 347 nbworkers = worker._numworkers(srcrepo.ui)
324 348
325 349 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
326 350 revsq = multiprocessing.Queue()
327 351 sidedataq = multiprocessing.Queue()
328 352
329 353 assert srcrepo.filtername is None
330 354 # queue all tasks beforehand, revision numbers are small and it make
331 355 # synchronisation simpler
332 356 #
333 357 # Since the computation for each node can be quite expensive, the overhead
334 358 # of using a single queue is not revelant. In practice, most computation
335 359 # are fast but some are very expensive and dominate all the other smaller
336 360 # cost.
337 361 for r in srcrepo.changelog.revs():
338 362 revsq.put(r)
339 363 # queue the "no more tasks" markers
340 364 for i in range(nbworkers):
341 365 revsq.put(None)
342 366
343 367 allworkers = []
344 368 for i in range(nbworkers):
345 369 args = (srcrepo, revsq, sidedataq, tokens)
346 370 w = multiprocessing.Process(target=_sidedata_worker, args=args)
347 371 allworkers.append(w)
348 372 w.start()
349 373
350 374 # dictionnary to store results for revision higher than we one we are
351 375 # looking for. For example, if we need the sidedatamap for 42, and 43 is
352 376 # received, when shelve 43 for later use.
353 377 staging = {}
354 378
355 379 def sidedata_companion(revlog, rev):
356 380 sidedata = {}
357 381 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
358 382 # Is the data previously shelved ?
359 383 sidedata = staging.pop(rev, None)
360 384 if sidedata is None:
361 385 # look at the queued result until we find the one we are lookig
362 386 # for (shelve the other ones)
363 387 r, sidedata = sidedataq.get()
364 388 while r != rev:
365 389 staging[r] = sidedata
366 390 r, sidedata = sidedataq.get()
367 391 tokens.release()
368 392 return False, (), sidedata
369 393
370 394 return sidedata_companion
371 395
372 396
373 397 def _get_simple_sidedata_adder(srcrepo, destrepo):
374 398 """The simple version of the sidedata computation
375 399
376 400 It just compute it in the same thread on request"""
377 401
378 402 def sidedatacompanion(revlog, rev):
379 403 sidedata = {}
380 404 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
381 405 sidedata = _getsidedata(srcrepo, rev)
382 406 return False, (), sidedata
383 407
384 408 return sidedatacompanion
385 409
386 410
387 411 def getsidedataremover(srcrepo, destrepo):
388 412 def sidedatacompanion(revlog, rev):
389 413 f = ()
390 414 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
391 415 if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
392 416 f = (
393 417 sidedatamod.SD_P1COPIES,
394 418 sidedatamod.SD_P2COPIES,
395 419 sidedatamod.SD_FILESADDED,
396 420 sidedatamod.SD_FILESREMOVED,
397 421 )
398 422 return False, f, {}
399 423
400 424 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now