##// END OF EJS Templates
py3: __repr__ needs to return str, not bytes...
Kyle Lippincott -
r44742:c443b9ba stable
parent child Browse files
Show More
@@ -1,2578 +1,2579 b''
1 1 # bundle2.py - generic container format to transmit arbitrary data.
2 2 #
3 3 # Copyright 2013 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 """Handling of the new bundle2 format
8 8
9 9 The goal of bundle2 is to act as an atomically packet to transmit a set of
10 10 payloads in an application agnostic way. It consist in a sequence of "parts"
11 11 that will be handed to and processed by the application layer.
12 12
13 13
14 14 General format architecture
15 15 ===========================
16 16
17 17 The format is architectured as follow
18 18
19 19 - magic string
20 20 - stream level parameters
21 21 - payload parts (any number)
22 22 - end of stream marker.
23 23
24 24 the Binary format
25 25 ============================
26 26
27 27 All numbers are unsigned and big-endian.
28 28
29 29 stream level parameters
30 30 ------------------------
31 31
32 32 Binary format is as follow
33 33
34 34 :params size: int32
35 35
36 36 The total number of Bytes used by the parameters
37 37
38 38 :params value: arbitrary number of Bytes
39 39
40 40 A blob of `params size` containing the serialized version of all stream level
41 41 parameters.
42 42
43 43 The blob contains a space separated list of parameters. Parameters with value
44 44 are stored in the form `<name>=<value>`. Both name and value are urlquoted.
45 45
46 46 Empty name are obviously forbidden.
47 47
48 48 Name MUST start with a letter. If this first letter is lower case, the
49 49 parameter is advisory and can be safely ignored. However when the first
50 50 letter is capital, the parameter is mandatory and the bundling process MUST
51 51 stop if he is not able to proceed it.
52 52
53 53 Stream parameters use a simple textual format for two main reasons:
54 54
55 55 - Stream level parameters should remain simple and we want to discourage any
56 56 crazy usage.
57 57 - Textual data allow easy human inspection of a bundle2 header in case of
58 58 troubles.
59 59
60 60 Any Applicative level options MUST go into a bundle2 part instead.
61 61
62 62 Payload part
63 63 ------------------------
64 64
65 65 Binary format is as follow
66 66
67 67 :header size: int32
68 68
69 69 The total number of Bytes used by the part header. When the header is empty
70 70 (size = 0) this is interpreted as the end of stream marker.
71 71
72 72 :header:
73 73
74 74 The header defines how to interpret the part. It contains two piece of
75 75 data: the part type, and the part parameters.
76 76
77 77 The part type is used to route an application level handler, that can
78 78 interpret payload.
79 79
80 80 Part parameters are passed to the application level handler. They are
81 81 meant to convey information that will help the application level object to
82 82 interpret the part payload.
83 83
84 84 The binary format of the header is has follow
85 85
86 86 :typesize: (one byte)
87 87
88 88 :parttype: alphanumerical part name (restricted to [a-zA-Z0-9_:-]*)
89 89
90 90 :partid: A 32bits integer (unique in the bundle) that can be used to refer
91 91 to this part.
92 92
93 93 :parameters:
94 94
95 95 Part's parameter may have arbitrary content, the binary structure is::
96 96
97 97 <mandatory-count><advisory-count><param-sizes><param-data>
98 98
99 99 :mandatory-count: 1 byte, number of mandatory parameters
100 100
101 101 :advisory-count: 1 byte, number of advisory parameters
102 102
103 103 :param-sizes:
104 104
105 105 N couple of bytes, where N is the total number of parameters. Each
106 106 couple contains (<size-of-key>, <size-of-value) for one parameter.
107 107
108 108 :param-data:
109 109
110 110 A blob of bytes from which each parameter key and value can be
111 111 retrieved using the list of size couples stored in the previous
112 112 field.
113 113
114 114 Mandatory parameters comes first, then the advisory ones.
115 115
116 116 Each parameter's key MUST be unique within the part.
117 117
118 118 :payload:
119 119
120 120 payload is a series of `<chunksize><chunkdata>`.
121 121
122 122 `chunksize` is an int32, `chunkdata` are plain bytes (as much as
123 123 `chunksize` says)` The payload part is concluded by a zero size chunk.
124 124
125 125 The current implementation always produces either zero or one chunk.
126 126 This is an implementation limitation that will ultimately be lifted.
127 127
128 128 `chunksize` can be negative to trigger special case processing. No such
129 129 processing is in place yet.
130 130
131 131 Bundle processing
132 132 ============================
133 133
134 134 Each part is processed in order using a "part handler". Handler are registered
135 135 for a certain part type.
136 136
137 137 The matching of a part to its handler is case insensitive. The case of the
138 138 part type is used to know if a part is mandatory or advisory. If the Part type
139 139 contains any uppercase char it is considered mandatory. When no handler is
140 140 known for a Mandatory part, the process is aborted and an exception is raised.
141 141 If the part is advisory and no handler is known, the part is ignored. When the
142 142 process is aborted, the full bundle is still read from the stream to keep the
143 143 channel usable. But none of the part read from an abort are processed. In the
144 144 future, dropping the stream may become an option for channel we do not care to
145 145 preserve.
146 146 """
147 147
148 148 from __future__ import absolute_import, division
149 149
150 150 import collections
151 151 import errno
152 152 import os
153 153 import re
154 154 import string
155 155 import struct
156 156 import sys
157 157
158 158 from .i18n import _
159 159 from . import (
160 160 bookmarks,
161 161 changegroup,
162 162 encoding,
163 163 error,
164 164 node as nodemod,
165 165 obsolete,
166 166 phases,
167 167 pushkey,
168 168 pycompat,
169 169 streamclone,
170 170 tags,
171 171 url,
172 172 util,
173 173 )
174 174 from .utils import stringutil
175 175
176 176 urlerr = util.urlerr
177 177 urlreq = util.urlreq
178 178
179 179 _pack = struct.pack
180 180 _unpack = struct.unpack
181 181
182 182 _fstreamparamsize = b'>i'
183 183 _fpartheadersize = b'>i'
184 184 _fparttypesize = b'>B'
185 185 _fpartid = b'>I'
186 186 _fpayloadsize = b'>i'
187 187 _fpartparamcount = b'>BB'
188 188
189 189 preferedchunksize = 32768
190 190
191 191 _parttypeforbidden = re.compile(b'[^a-zA-Z0-9_:-]')
192 192
193 193
194 194 def outdebug(ui, message):
195 195 """debug regarding output stream (bundling)"""
196 196 if ui.configbool(b'devel', b'bundle2.debug'):
197 197 ui.debug(b'bundle2-output: %s\n' % message)
198 198
199 199
200 200 def indebug(ui, message):
201 201 """debug on input stream (unbundling)"""
202 202 if ui.configbool(b'devel', b'bundle2.debug'):
203 203 ui.debug(b'bundle2-input: %s\n' % message)
204 204
205 205
206 206 def validateparttype(parttype):
207 207 """raise ValueError if a parttype contains invalid character"""
208 208 if _parttypeforbidden.search(parttype):
209 209 raise ValueError(parttype)
210 210
211 211
212 212 def _makefpartparamsizes(nbparams):
213 213 """return a struct format to read part parameter sizes
214 214
215 215 The number parameters is variable so we need to build that format
216 216 dynamically.
217 217 """
218 218 return b'>' + (b'BB' * nbparams)
219 219
220 220
221 221 parthandlermapping = {}
222 222
223 223
224 224 def parthandler(parttype, params=()):
225 225 """decorator that register a function as a bundle2 part handler
226 226
227 227 eg::
228 228
229 229 @parthandler('myparttype', ('mandatory', 'param', 'handled'))
230 230 def myparttypehandler(...):
231 231 '''process a part of type "my part".'''
232 232 ...
233 233 """
234 234 validateparttype(parttype)
235 235
236 236 def _decorator(func):
237 237 lparttype = parttype.lower() # enforce lower case matching.
238 238 assert lparttype not in parthandlermapping
239 239 parthandlermapping[lparttype] = func
240 240 func.params = frozenset(params)
241 241 return func
242 242
243 243 return _decorator
244 244
245 245
246 246 class unbundlerecords(object):
247 247 """keep record of what happens during and unbundle
248 248
249 249 New records are added using `records.add('cat', obj)`. Where 'cat' is a
250 250 category of record and obj is an arbitrary object.
251 251
252 252 `records['cat']` will return all entries of this category 'cat'.
253 253
254 254 Iterating on the object itself will yield `('category', obj)` tuples
255 255 for all entries.
256 256
257 257 All iterations happens in chronological order.
258 258 """
259 259
260 260 def __init__(self):
261 261 self._categories = {}
262 262 self._sequences = []
263 263 self._replies = {}
264 264
265 265 def add(self, category, entry, inreplyto=None):
266 266 """add a new record of a given category.
267 267
268 268 The entry can then be retrieved in the list returned by
269 269 self['category']."""
270 270 self._categories.setdefault(category, []).append(entry)
271 271 self._sequences.append((category, entry))
272 272 if inreplyto is not None:
273 273 self.getreplies(inreplyto).add(category, entry)
274 274
275 275 def getreplies(self, partid):
276 276 """get the records that are replies to a specific part"""
277 277 return self._replies.setdefault(partid, unbundlerecords())
278 278
279 279 def __getitem__(self, cat):
280 280 return tuple(self._categories.get(cat, ()))
281 281
282 282 def __iter__(self):
283 283 return iter(self._sequences)
284 284
285 285 def __len__(self):
286 286 return len(self._sequences)
287 287
288 288 def __nonzero__(self):
289 289 return bool(self._sequences)
290 290
291 291 __bool__ = __nonzero__
292 292
293 293
294 294 class bundleoperation(object):
295 295 """an object that represents a single bundling process
296 296
297 297 Its purpose is to carry unbundle-related objects and states.
298 298
299 299 A new object should be created at the beginning of each bundle processing.
300 300 The object is to be returned by the processing function.
301 301
302 302 The object has very little content now it will ultimately contain:
303 303 * an access to the repo the bundle is applied to,
304 304 * a ui object,
305 305 * a way to retrieve a transaction to add changes to the repo,
306 306 * a way to record the result of processing each part,
307 307 * a way to construct a bundle response when applicable.
308 308 """
309 309
310 310 def __init__(self, repo, transactiongetter, captureoutput=True, source=b''):
311 311 self.repo = repo
312 312 self.ui = repo.ui
313 313 self.records = unbundlerecords()
314 314 self.reply = None
315 315 self.captureoutput = captureoutput
316 316 self.hookargs = {}
317 317 self._gettransaction = transactiongetter
318 318 # carries value that can modify part behavior
319 319 self.modes = {}
320 320 self.source = source
321 321
322 322 def gettransaction(self):
323 323 transaction = self._gettransaction()
324 324
325 325 if self.hookargs:
326 326 # the ones added to the transaction supercede those added
327 327 # to the operation.
328 328 self.hookargs.update(transaction.hookargs)
329 329 transaction.hookargs = self.hookargs
330 330
331 331 # mark the hookargs as flushed. further attempts to add to
332 332 # hookargs will result in an abort.
333 333 self.hookargs = None
334 334
335 335 return transaction
336 336
337 337 def addhookargs(self, hookargs):
338 338 if self.hookargs is None:
339 339 raise error.ProgrammingError(
340 340 b'attempted to add hookargs to '
341 341 b'operation after transaction started'
342 342 )
343 343 self.hookargs.update(hookargs)
344 344
345 345
346 346 class TransactionUnavailable(RuntimeError):
347 347 pass
348 348
349 349
350 350 def _notransaction():
351 351 """default method to get a transaction while processing a bundle
352 352
353 353 Raise an exception to highlight the fact that no transaction was expected
354 354 to be created"""
355 355 raise TransactionUnavailable()
356 356
357 357
358 358 def applybundle(repo, unbundler, tr, source, url=None, **kwargs):
359 359 # transform me into unbundler.apply() as soon as the freeze is lifted
360 360 if isinstance(unbundler, unbundle20):
361 361 tr.hookargs[b'bundle2'] = b'1'
362 362 if source is not None and b'source' not in tr.hookargs:
363 363 tr.hookargs[b'source'] = source
364 364 if url is not None and b'url' not in tr.hookargs:
365 365 tr.hookargs[b'url'] = url
366 366 return processbundle(repo, unbundler, lambda: tr, source=source)
367 367 else:
368 368 # the transactiongetter won't be used, but we might as well set it
369 369 op = bundleoperation(repo, lambda: tr, source=source)
370 370 _processchangegroup(op, unbundler, tr, source, url, **kwargs)
371 371 return op
372 372
373 373
374 374 class partiterator(object):
375 375 def __init__(self, repo, op, unbundler):
376 376 self.repo = repo
377 377 self.op = op
378 378 self.unbundler = unbundler
379 379 self.iterator = None
380 380 self.count = 0
381 381 self.current = None
382 382
383 383 def __enter__(self):
384 384 def func():
385 385 itr = enumerate(self.unbundler.iterparts(), 1)
386 386 for count, p in itr:
387 387 self.count = count
388 388 self.current = p
389 389 yield p
390 390 p.consume()
391 391 self.current = None
392 392
393 393 self.iterator = func()
394 394 return self.iterator
395 395
396 396 def __exit__(self, type, exc, tb):
397 397 if not self.iterator:
398 398 return
399 399
400 400 # Only gracefully abort in a normal exception situation. User aborts
401 401 # like Ctrl+C throw a KeyboardInterrupt which is not a base Exception,
402 402 # and should not gracefully cleanup.
403 403 if isinstance(exc, Exception):
404 404 # Any exceptions seeking to the end of the bundle at this point are
405 405 # almost certainly related to the underlying stream being bad.
406 406 # And, chances are that the exception we're handling is related to
407 407 # getting in that bad state. So, we swallow the seeking error and
408 408 # re-raise the original error.
409 409 seekerror = False
410 410 try:
411 411 if self.current:
412 412 # consume the part content to not corrupt the stream.
413 413 self.current.consume()
414 414
415 415 for part in self.iterator:
416 416 # consume the bundle content
417 417 part.consume()
418 418 except Exception:
419 419 seekerror = True
420 420
421 421 # Small hack to let caller code distinguish exceptions from bundle2
422 422 # processing from processing the old format. This is mostly needed
423 423 # to handle different return codes to unbundle according to the type
424 424 # of bundle. We should probably clean up or drop this return code
425 425 # craziness in a future version.
426 426 exc.duringunbundle2 = True
427 427 salvaged = []
428 428 replycaps = None
429 429 if self.op.reply is not None:
430 430 salvaged = self.op.reply.salvageoutput()
431 431 replycaps = self.op.reply.capabilities
432 432 exc._replycaps = replycaps
433 433 exc._bundle2salvagedoutput = salvaged
434 434
435 435 # Re-raising from a variable loses the original stack. So only use
436 436 # that form if we need to.
437 437 if seekerror:
438 438 raise exc
439 439
440 440 self.repo.ui.debug(
441 441 b'bundle2-input-bundle: %i parts total\n' % self.count
442 442 )
443 443
444 444
445 445 def processbundle(repo, unbundler, transactiongetter=None, op=None, source=b''):
446 446 """This function process a bundle, apply effect to/from a repo
447 447
448 448 It iterates over each part then searches for and uses the proper handling
449 449 code to process the part. Parts are processed in order.
450 450
451 451 Unknown Mandatory part will abort the process.
452 452
453 453 It is temporarily possible to provide a prebuilt bundleoperation to the
454 454 function. This is used to ensure output is properly propagated in case of
455 455 an error during the unbundling. This output capturing part will likely be
456 456 reworked and this ability will probably go away in the process.
457 457 """
458 458 if op is None:
459 459 if transactiongetter is None:
460 460 transactiongetter = _notransaction
461 461 op = bundleoperation(repo, transactiongetter, source=source)
462 462 # todo:
463 463 # - replace this is a init function soon.
464 464 # - exception catching
465 465 unbundler.params
466 466 if repo.ui.debugflag:
467 467 msg = [b'bundle2-input-bundle:']
468 468 if unbundler.params:
469 469 msg.append(b' %i params' % len(unbundler.params))
470 470 if op._gettransaction is None or op._gettransaction is _notransaction:
471 471 msg.append(b' no-transaction')
472 472 else:
473 473 msg.append(b' with-transaction')
474 474 msg.append(b'\n')
475 475 repo.ui.debug(b''.join(msg))
476 476
477 477 processparts(repo, op, unbundler)
478 478
479 479 return op
480 480
481 481
482 482 def processparts(repo, op, unbundler):
483 483 with partiterator(repo, op, unbundler) as parts:
484 484 for part in parts:
485 485 _processpart(op, part)
486 486
487 487
488 488 def _processchangegroup(op, cg, tr, source, url, **kwargs):
489 489 ret = cg.apply(op.repo, tr, source, url, **kwargs)
490 490 op.records.add(b'changegroup', {b'return': ret,})
491 491 return ret
492 492
493 493
494 494 def _gethandler(op, part):
495 495 status = b'unknown' # used by debug output
496 496 try:
497 497 handler = parthandlermapping.get(part.type)
498 498 if handler is None:
499 499 status = b'unsupported-type'
500 500 raise error.BundleUnknownFeatureError(parttype=part.type)
501 501 indebug(op.ui, b'found a handler for part %s' % part.type)
502 502 unknownparams = part.mandatorykeys - handler.params
503 503 if unknownparams:
504 504 unknownparams = list(unknownparams)
505 505 unknownparams.sort()
506 506 status = b'unsupported-params (%s)' % b', '.join(unknownparams)
507 507 raise error.BundleUnknownFeatureError(
508 508 parttype=part.type, params=unknownparams
509 509 )
510 510 status = b'supported'
511 511 except error.BundleUnknownFeatureError as exc:
512 512 if part.mandatory: # mandatory parts
513 513 raise
514 514 indebug(op.ui, b'ignoring unsupported advisory part %s' % exc)
515 515 return # skip to part processing
516 516 finally:
517 517 if op.ui.debugflag:
518 518 msg = [b'bundle2-input-part: "%s"' % part.type]
519 519 if not part.mandatory:
520 520 msg.append(b' (advisory)')
521 521 nbmp = len(part.mandatorykeys)
522 522 nbap = len(part.params) - nbmp
523 523 if nbmp or nbap:
524 524 msg.append(b' (params:')
525 525 if nbmp:
526 526 msg.append(b' %i mandatory' % nbmp)
527 527 if nbap:
528 528 msg.append(b' %i advisory' % nbmp)
529 529 msg.append(b')')
530 530 msg.append(b' %s\n' % status)
531 531 op.ui.debug(b''.join(msg))
532 532
533 533 return handler
534 534
535 535
536 536 def _processpart(op, part):
537 537 """process a single part from a bundle
538 538
539 539 The part is guaranteed to have been fully consumed when the function exits
540 540 (even if an exception is raised)."""
541 541 handler = _gethandler(op, part)
542 542 if handler is None:
543 543 return
544 544
545 545 # handler is called outside the above try block so that we don't
546 546 # risk catching KeyErrors from anything other than the
547 547 # parthandlermapping lookup (any KeyError raised by handler()
548 548 # itself represents a defect of a different variety).
549 549 output = None
550 550 if op.captureoutput and op.reply is not None:
551 551 op.ui.pushbuffer(error=True, subproc=True)
552 552 output = b''
553 553 try:
554 554 handler(op, part)
555 555 finally:
556 556 if output is not None:
557 557 output = op.ui.popbuffer()
558 558 if output:
559 559 outpart = op.reply.newpart(b'output', data=output, mandatory=False)
560 560 outpart.addparam(
561 561 b'in-reply-to', pycompat.bytestr(part.id), mandatory=False
562 562 )
563 563
564 564
565 565 def decodecaps(blob):
566 566 """decode a bundle2 caps bytes blob into a dictionary
567 567
568 568 The blob is a list of capabilities (one per line)
569 569 Capabilities may have values using a line of the form::
570 570
571 571 capability=value1,value2,value3
572 572
573 573 The values are always a list."""
574 574 caps = {}
575 575 for line in blob.splitlines():
576 576 if not line:
577 577 continue
578 578 if b'=' not in line:
579 579 key, vals = line, ()
580 580 else:
581 581 key, vals = line.split(b'=', 1)
582 582 vals = vals.split(b',')
583 583 key = urlreq.unquote(key)
584 584 vals = [urlreq.unquote(v) for v in vals]
585 585 caps[key] = vals
586 586 return caps
587 587
588 588
589 589 def encodecaps(caps):
590 590 """encode a bundle2 caps dictionary into a bytes blob"""
591 591 chunks = []
592 592 for ca in sorted(caps):
593 593 vals = caps[ca]
594 594 ca = urlreq.quote(ca)
595 595 vals = [urlreq.quote(v) for v in vals]
596 596 if vals:
597 597 ca = b"%s=%s" % (ca, b','.join(vals))
598 598 chunks.append(ca)
599 599 return b'\n'.join(chunks)
600 600
601 601
602 602 bundletypes = {
603 603 b"": (b"", b'UN'), # only when using unbundle on ssh and old http servers
604 604 # since the unification ssh accepts a header but there
605 605 # is no capability signaling it.
606 606 b"HG20": (), # special-cased below
607 607 b"HG10UN": (b"HG10UN", b'UN'),
608 608 b"HG10BZ": (b"HG10", b'BZ'),
609 609 b"HG10GZ": (b"HG10GZ", b'GZ'),
610 610 }
611 611
612 612 # hgweb uses this list to communicate its preferred type
613 613 bundlepriority = [b'HG10GZ', b'HG10BZ', b'HG10UN']
614 614
615 615
616 616 class bundle20(object):
617 617 """represent an outgoing bundle2 container
618 618
619 619 Use the `addparam` method to add stream level parameter. and `newpart` to
620 620 populate it. Then call `getchunks` to retrieve all the binary chunks of
621 621 data that compose the bundle2 container."""
622 622
623 623 _magicstring = b'HG20'
624 624
625 625 def __init__(self, ui, capabilities=()):
626 626 self.ui = ui
627 627 self._params = []
628 628 self._parts = []
629 629 self.capabilities = dict(capabilities)
630 630 self._compengine = util.compengines.forbundletype(b'UN')
631 631 self._compopts = None
632 632 # If compression is being handled by a consumer of the raw
633 633 # data (e.g. the wire protocol), unsetting this flag tells
634 634 # consumers that the bundle is best left uncompressed.
635 635 self.prefercompressed = True
636 636
637 637 def setcompression(self, alg, compopts=None):
638 638 """setup core part compression to <alg>"""
639 639 if alg in (None, b'UN'):
640 640 return
641 641 assert not any(n.lower() == b'compression' for n, v in self._params)
642 642 self.addparam(b'Compression', alg)
643 643 self._compengine = util.compengines.forbundletype(alg)
644 644 self._compopts = compopts
645 645
646 646 @property
647 647 def nbparts(self):
648 648 """total number of parts added to the bundler"""
649 649 return len(self._parts)
650 650
651 651 # methods used to defines the bundle2 content
652 652 def addparam(self, name, value=None):
653 653 """add a stream level parameter"""
654 654 if not name:
655 655 raise error.ProgrammingError(b'empty parameter name')
656 656 if name[0:1] not in pycompat.bytestr(
657 657 string.ascii_letters # pytype: disable=wrong-arg-types
658 658 ):
659 659 raise error.ProgrammingError(
660 660 b'non letter first character: %s' % name
661 661 )
662 662 self._params.append((name, value))
663 663
664 664 def addpart(self, part):
665 665 """add a new part to the bundle2 container
666 666
667 667 Parts contains the actual applicative payload."""
668 668 assert part.id is None
669 669 part.id = len(self._parts) # very cheap counter
670 670 self._parts.append(part)
671 671
672 672 def newpart(self, typeid, *args, **kwargs):
673 673 """create a new part and add it to the containers
674 674
675 675 As the part is directly added to the containers. For now, this means
676 676 that any failure to properly initialize the part after calling
677 677 ``newpart`` should result in a failure of the whole bundling process.
678 678
679 679 You can still fall back to manually create and add if you need better
680 680 control."""
681 681 part = bundlepart(typeid, *args, **kwargs)
682 682 self.addpart(part)
683 683 return part
684 684
685 685 # methods used to generate the bundle2 stream
686 686 def getchunks(self):
687 687 if self.ui.debugflag:
688 688 msg = [b'bundle2-output-bundle: "%s",' % self._magicstring]
689 689 if self._params:
690 690 msg.append(b' (%i params)' % len(self._params))
691 691 msg.append(b' %i parts total\n' % len(self._parts))
692 692 self.ui.debug(b''.join(msg))
693 693 outdebug(self.ui, b'start emission of %s stream' % self._magicstring)
694 694 yield self._magicstring
695 695 param = self._paramchunk()
696 696 outdebug(self.ui, b'bundle parameter: %s' % param)
697 697 yield _pack(_fstreamparamsize, len(param))
698 698 if param:
699 699 yield param
700 700 for chunk in self._compengine.compressstream(
701 701 self._getcorechunk(), self._compopts
702 702 ):
703 703 yield chunk
704 704
705 705 def _paramchunk(self):
706 706 """return a encoded version of all stream parameters"""
707 707 blocks = []
708 708 for par, value in self._params:
709 709 par = urlreq.quote(par)
710 710 if value is not None:
711 711 value = urlreq.quote(value)
712 712 par = b'%s=%s' % (par, value)
713 713 blocks.append(par)
714 714 return b' '.join(blocks)
715 715
716 716 def _getcorechunk(self):
717 717 """yield chunk for the core part of the bundle
718 718
719 719 (all but headers and parameters)"""
720 720 outdebug(self.ui, b'start of parts')
721 721 for part in self._parts:
722 722 outdebug(self.ui, b'bundle part: "%s"' % part.type)
723 723 for chunk in part.getchunks(ui=self.ui):
724 724 yield chunk
725 725 outdebug(self.ui, b'end of bundle')
726 726 yield _pack(_fpartheadersize, 0)
727 727
728 728 def salvageoutput(self):
729 729 """return a list with a copy of all output parts in the bundle
730 730
731 731 This is meant to be used during error handling to make sure we preserve
732 732 server output"""
733 733 salvaged = []
734 734 for part in self._parts:
735 735 if part.type.startswith(b'output'):
736 736 salvaged.append(part.copy())
737 737 return salvaged
738 738
739 739
740 740 class unpackermixin(object):
741 741 """A mixin to extract bytes and struct data from a stream"""
742 742
743 743 def __init__(self, fp):
744 744 self._fp = fp
745 745
746 746 def _unpack(self, format):
747 747 """unpack this struct format from the stream
748 748
749 749 This method is meant for internal usage by the bundle2 protocol only.
750 750 They directly manipulate the low level stream including bundle2 level
751 751 instruction.
752 752
753 753 Do not use it to implement higher-level logic or methods."""
754 754 data = self._readexact(struct.calcsize(format))
755 755 return _unpack(format, data)
756 756
757 757 def _readexact(self, size):
758 758 """read exactly <size> bytes from the stream
759 759
760 760 This method is meant for internal usage by the bundle2 protocol only.
761 761 They directly manipulate the low level stream including bundle2 level
762 762 instruction.
763 763
764 764 Do not use it to implement higher-level logic or methods."""
765 765 return changegroup.readexactly(self._fp, size)
766 766
767 767
768 768 def getunbundler(ui, fp, magicstring=None):
769 769 """return a valid unbundler object for a given magicstring"""
770 770 if magicstring is None:
771 771 magicstring = changegroup.readexactly(fp, 4)
772 772 magic, version = magicstring[0:2], magicstring[2:4]
773 773 if magic != b'HG':
774 774 ui.debug(
775 775 b"error: invalid magic: %r (version %r), should be 'HG'\n"
776 776 % (magic, version)
777 777 )
778 778 raise error.Abort(_(b'not a Mercurial bundle'))
779 779 unbundlerclass = formatmap.get(version)
780 780 if unbundlerclass is None:
781 781 raise error.Abort(_(b'unknown bundle version %s') % version)
782 782 unbundler = unbundlerclass(ui, fp)
783 783 indebug(ui, b'start processing of %s stream' % magicstring)
784 784 return unbundler
785 785
786 786
787 787 class unbundle20(unpackermixin):
788 788 """interpret a bundle2 stream
789 789
790 790 This class is fed with a binary stream and yields parts through its
791 791 `iterparts` methods."""
792 792
793 793 _magicstring = b'HG20'
794 794
795 795 def __init__(self, ui, fp):
796 796 """If header is specified, we do not read it out of the stream."""
797 797 self.ui = ui
798 798 self._compengine = util.compengines.forbundletype(b'UN')
799 799 self._compressed = None
800 800 super(unbundle20, self).__init__(fp)
801 801
802 802 @util.propertycache
803 803 def params(self):
804 804 """dictionary of stream level parameters"""
805 805 indebug(self.ui, b'reading bundle2 stream parameters')
806 806 params = {}
807 807 paramssize = self._unpack(_fstreamparamsize)[0]
808 808 if paramssize < 0:
809 809 raise error.BundleValueError(
810 810 b'negative bundle param size: %i' % paramssize
811 811 )
812 812 if paramssize:
813 813 params = self._readexact(paramssize)
814 814 params = self._processallparams(params)
815 815 return params
816 816
817 817 def _processallparams(self, paramsblock):
818 818 """"""
819 819 params = util.sortdict()
820 820 for p in paramsblock.split(b' '):
821 821 p = p.split(b'=', 1)
822 822 p = [urlreq.unquote(i) for i in p]
823 823 if len(p) < 2:
824 824 p.append(None)
825 825 self._processparam(*p)
826 826 params[p[0]] = p[1]
827 827 return params
828 828
829 829 def _processparam(self, name, value):
830 830 """process a parameter, applying its effect if needed
831 831
832 832 Parameter starting with a lower case letter are advisory and will be
833 833 ignored when unknown. Those starting with an upper case letter are
834 834 mandatory and will this function will raise a KeyError when unknown.
835 835
836 836 Note: no option are currently supported. Any input will be either
837 837 ignored or failing.
838 838 """
839 839 if not name:
840 840 raise ValueError('empty parameter name')
841 841 if name[0:1] not in pycompat.bytestr(
842 842 string.ascii_letters # pytype: disable=wrong-arg-types
843 843 ):
844 844 raise ValueError('non letter first character: %s' % name)
845 845 try:
846 846 handler = b2streamparamsmap[name.lower()]
847 847 except KeyError:
848 848 if name[0:1].islower():
849 849 indebug(self.ui, b"ignoring unknown parameter %s" % name)
850 850 else:
851 851 raise error.BundleUnknownFeatureError(params=(name,))
852 852 else:
853 853 handler(self, name, value)
854 854
855 855 def _forwardchunks(self):
856 856 """utility to transfer a bundle2 as binary
857 857
858 858 This is made necessary by the fact the 'getbundle' command over 'ssh'
859 859 have no way to know then the reply end, relying on the bundle to be
860 860 interpreted to know its end. This is terrible and we are sorry, but we
861 861 needed to move forward to get general delta enabled.
862 862 """
863 863 yield self._magicstring
864 864 assert 'params' not in vars(self)
865 865 paramssize = self._unpack(_fstreamparamsize)[0]
866 866 if paramssize < 0:
867 867 raise error.BundleValueError(
868 868 b'negative bundle param size: %i' % paramssize
869 869 )
870 870 if paramssize:
871 871 params = self._readexact(paramssize)
872 872 self._processallparams(params)
873 873 # The payload itself is decompressed below, so drop
874 874 # the compression parameter passed down to compensate.
875 875 outparams = []
876 876 for p in params.split(b' '):
877 877 k, v = p.split(b'=', 1)
878 878 if k.lower() != b'compression':
879 879 outparams.append(p)
880 880 outparams = b' '.join(outparams)
881 881 yield _pack(_fstreamparamsize, len(outparams))
882 882 yield outparams
883 883 else:
884 884 yield _pack(_fstreamparamsize, paramssize)
885 885 # From there, payload might need to be decompressed
886 886 self._fp = self._compengine.decompressorreader(self._fp)
887 887 emptycount = 0
888 888 while emptycount < 2:
889 889 # so we can brainlessly loop
890 890 assert _fpartheadersize == _fpayloadsize
891 891 size = self._unpack(_fpartheadersize)[0]
892 892 yield _pack(_fpartheadersize, size)
893 893 if size:
894 894 emptycount = 0
895 895 else:
896 896 emptycount += 1
897 897 continue
898 898 if size == flaginterrupt:
899 899 continue
900 900 elif size < 0:
901 901 raise error.BundleValueError(b'negative chunk size: %i')
902 902 yield self._readexact(size)
903 903
904 904 def iterparts(self, seekable=False):
905 905 """yield all parts contained in the stream"""
906 906 cls = seekableunbundlepart if seekable else unbundlepart
907 907 # make sure param have been loaded
908 908 self.params
909 909 # From there, payload need to be decompressed
910 910 self._fp = self._compengine.decompressorreader(self._fp)
911 911 indebug(self.ui, b'start extraction of bundle2 parts')
912 912 headerblock = self._readpartheader()
913 913 while headerblock is not None:
914 914 part = cls(self.ui, headerblock, self._fp)
915 915 yield part
916 916 # Ensure part is fully consumed so we can start reading the next
917 917 # part.
918 918 part.consume()
919 919
920 920 headerblock = self._readpartheader()
921 921 indebug(self.ui, b'end of bundle2 stream')
922 922
923 923 def _readpartheader(self):
924 924 """reads a part header size and return the bytes blob
925 925
926 926 returns None if empty"""
927 927 headersize = self._unpack(_fpartheadersize)[0]
928 928 if headersize < 0:
929 929 raise error.BundleValueError(
930 930 b'negative part header size: %i' % headersize
931 931 )
932 932 indebug(self.ui, b'part header size: %i' % headersize)
933 933 if headersize:
934 934 return self._readexact(headersize)
935 935 return None
936 936
937 937 def compressed(self):
938 938 self.params # load params
939 939 return self._compressed
940 940
941 941 def close(self):
942 942 """close underlying file"""
943 943 if util.safehasattr(self._fp, 'close'):
944 944 return self._fp.close()
945 945
946 946
947 947 formatmap = {b'20': unbundle20}
948 948
949 949 b2streamparamsmap = {}
950 950
951 951
952 952 def b2streamparamhandler(name):
953 953 """register a handler for a stream level parameter"""
954 954
955 955 def decorator(func):
956 956 assert name not in formatmap
957 957 b2streamparamsmap[name] = func
958 958 return func
959 959
960 960 return decorator
961 961
962 962
963 963 @b2streamparamhandler(b'compression')
964 964 def processcompression(unbundler, param, value):
965 965 """read compression parameter and install payload decompression"""
966 966 if value not in util.compengines.supportedbundletypes:
967 967 raise error.BundleUnknownFeatureError(params=(param,), values=(value,))
968 968 unbundler._compengine = util.compengines.forbundletype(value)
969 969 if value is not None:
970 970 unbundler._compressed = True
971 971
972 972
973 973 class bundlepart(object):
974 974 """A bundle2 part contains application level payload
975 975
976 976 The part `type` is used to route the part to the application level
977 977 handler.
978 978
979 979 The part payload is contained in ``part.data``. It could be raw bytes or a
980 980 generator of byte chunks.
981 981
982 982 You can add parameters to the part using the ``addparam`` method.
983 983 Parameters can be either mandatory (default) or advisory. Remote side
984 984 should be able to safely ignore the advisory ones.
985 985
986 986 Both data and parameters cannot be modified after the generation has begun.
987 987 """
988 988
989 989 def __init__(
990 990 self,
991 991 parttype,
992 992 mandatoryparams=(),
993 993 advisoryparams=(),
994 994 data=b'',
995 995 mandatory=True,
996 996 ):
997 997 validateparttype(parttype)
998 998 self.id = None
999 999 self.type = parttype
1000 1000 self._data = data
1001 1001 self._mandatoryparams = list(mandatoryparams)
1002 1002 self._advisoryparams = list(advisoryparams)
1003 1003 # checking for duplicated entries
1004 1004 self._seenparams = set()
1005 1005 for pname, __ in self._mandatoryparams + self._advisoryparams:
1006 1006 if pname in self._seenparams:
1007 1007 raise error.ProgrammingError(b'duplicated params: %s' % pname)
1008 1008 self._seenparams.add(pname)
1009 1009 # status of the part's generation:
1010 1010 # - None: not started,
1011 1011 # - False: currently generated,
1012 1012 # - True: generation done.
1013 1013 self._generated = None
1014 1014 self.mandatory = mandatory
1015 1015
1016 @encoding.strmethod
1016 1017 def __repr__(self):
1017 1018 cls = b"%s.%s" % (self.__class__.__module__, self.__class__.__name__)
1018 1019 return b'<%s object at %x; id: %s; type: %s; mandatory: %s>' % (
1019 1020 cls,
1020 1021 id(self),
1021 1022 self.id,
1022 1023 self.type,
1023 1024 self.mandatory,
1024 1025 )
1025 1026
1026 1027 def copy(self):
1027 1028 """return a copy of the part
1028 1029
1029 1030 The new part have the very same content but no partid assigned yet.
1030 1031 Parts with generated data cannot be copied."""
1031 1032 assert not util.safehasattr(self.data, 'next')
1032 1033 return self.__class__(
1033 1034 self.type,
1034 1035 self._mandatoryparams,
1035 1036 self._advisoryparams,
1036 1037 self._data,
1037 1038 self.mandatory,
1038 1039 )
1039 1040
1040 1041 # methods used to defines the part content
1041 1042 @property
1042 1043 def data(self):
1043 1044 return self._data
1044 1045
1045 1046 @data.setter
1046 1047 def data(self, data):
1047 1048 if self._generated is not None:
1048 1049 raise error.ReadOnlyPartError(b'part is being generated')
1049 1050 self._data = data
1050 1051
1051 1052 @property
1052 1053 def mandatoryparams(self):
1053 1054 # make it an immutable tuple to force people through ``addparam``
1054 1055 return tuple(self._mandatoryparams)
1055 1056
1056 1057 @property
1057 1058 def advisoryparams(self):
1058 1059 # make it an immutable tuple to force people through ``addparam``
1059 1060 return tuple(self._advisoryparams)
1060 1061
1061 1062 def addparam(self, name, value=b'', mandatory=True):
1062 1063 """add a parameter to the part
1063 1064
1064 1065 If 'mandatory' is set to True, the remote handler must claim support
1065 1066 for this parameter or the unbundling will be aborted.
1066 1067
1067 1068 The 'name' and 'value' cannot exceed 255 bytes each.
1068 1069 """
1069 1070 if self._generated is not None:
1070 1071 raise error.ReadOnlyPartError(b'part is being generated')
1071 1072 if name in self._seenparams:
1072 1073 raise ValueError(b'duplicated params: %s' % name)
1073 1074 self._seenparams.add(name)
1074 1075 params = self._advisoryparams
1075 1076 if mandatory:
1076 1077 params = self._mandatoryparams
1077 1078 params.append((name, value))
1078 1079
1079 1080 # methods used to generates the bundle2 stream
1080 1081 def getchunks(self, ui):
1081 1082 if self._generated is not None:
1082 1083 raise error.ProgrammingError(b'part can only be consumed once')
1083 1084 self._generated = False
1084 1085
1085 1086 if ui.debugflag:
1086 1087 msg = [b'bundle2-output-part: "%s"' % self.type]
1087 1088 if not self.mandatory:
1088 1089 msg.append(b' (advisory)')
1089 1090 nbmp = len(self.mandatoryparams)
1090 1091 nbap = len(self.advisoryparams)
1091 1092 if nbmp or nbap:
1092 1093 msg.append(b' (params:')
1093 1094 if nbmp:
1094 1095 msg.append(b' %i mandatory' % nbmp)
1095 1096 if nbap:
1096 1097 msg.append(b' %i advisory' % nbmp)
1097 1098 msg.append(b')')
1098 1099 if not self.data:
1099 1100 msg.append(b' empty payload')
1100 1101 elif util.safehasattr(self.data, 'next') or util.safehasattr(
1101 1102 self.data, b'__next__'
1102 1103 ):
1103 1104 msg.append(b' streamed payload')
1104 1105 else:
1105 1106 msg.append(b' %i bytes payload' % len(self.data))
1106 1107 msg.append(b'\n')
1107 1108 ui.debug(b''.join(msg))
1108 1109
1109 1110 #### header
1110 1111 if self.mandatory:
1111 1112 parttype = self.type.upper()
1112 1113 else:
1113 1114 parttype = self.type.lower()
1114 1115 outdebug(ui, b'part %s: "%s"' % (pycompat.bytestr(self.id), parttype))
1115 1116 ## parttype
1116 1117 header = [
1117 1118 _pack(_fparttypesize, len(parttype)),
1118 1119 parttype,
1119 1120 _pack(_fpartid, self.id),
1120 1121 ]
1121 1122 ## parameters
1122 1123 # count
1123 1124 manpar = self.mandatoryparams
1124 1125 advpar = self.advisoryparams
1125 1126 header.append(_pack(_fpartparamcount, len(manpar), len(advpar)))
1126 1127 # size
1127 1128 parsizes = []
1128 1129 for key, value in manpar:
1129 1130 parsizes.append(len(key))
1130 1131 parsizes.append(len(value))
1131 1132 for key, value in advpar:
1132 1133 parsizes.append(len(key))
1133 1134 parsizes.append(len(value))
1134 1135 paramsizes = _pack(_makefpartparamsizes(len(parsizes) // 2), *parsizes)
1135 1136 header.append(paramsizes)
1136 1137 # key, value
1137 1138 for key, value in manpar:
1138 1139 header.append(key)
1139 1140 header.append(value)
1140 1141 for key, value in advpar:
1141 1142 header.append(key)
1142 1143 header.append(value)
1143 1144 ## finalize header
1144 1145 try:
1145 1146 headerchunk = b''.join(header)
1146 1147 except TypeError:
1147 1148 raise TypeError(
1148 1149 'Found a non-bytes trying to '
1149 1150 'build bundle part header: %r' % header
1150 1151 )
1151 1152 outdebug(ui, b'header chunk size: %i' % len(headerchunk))
1152 1153 yield _pack(_fpartheadersize, len(headerchunk))
1153 1154 yield headerchunk
1154 1155 ## payload
1155 1156 try:
1156 1157 for chunk in self._payloadchunks():
1157 1158 outdebug(ui, b'payload chunk size: %i' % len(chunk))
1158 1159 yield _pack(_fpayloadsize, len(chunk))
1159 1160 yield chunk
1160 1161 except GeneratorExit:
1161 1162 # GeneratorExit means that nobody is listening for our
1162 1163 # results anyway, so just bail quickly rather than trying
1163 1164 # to produce an error part.
1164 1165 ui.debug(b'bundle2-generatorexit\n')
1165 1166 raise
1166 1167 except BaseException as exc:
1167 1168 bexc = stringutil.forcebytestr(exc)
1168 1169 # backup exception data for later
1169 1170 ui.debug(
1170 1171 b'bundle2-input-stream-interrupt: encoding exception %s' % bexc
1171 1172 )
1172 1173 tb = sys.exc_info()[2]
1173 1174 msg = b'unexpected error: %s' % bexc
1174 1175 interpart = bundlepart(
1175 1176 b'error:abort', [(b'message', msg)], mandatory=False
1176 1177 )
1177 1178 interpart.id = 0
1178 1179 yield _pack(_fpayloadsize, -1)
1179 1180 for chunk in interpart.getchunks(ui=ui):
1180 1181 yield chunk
1181 1182 outdebug(ui, b'closing payload chunk')
1182 1183 # abort current part payload
1183 1184 yield _pack(_fpayloadsize, 0)
1184 1185 pycompat.raisewithtb(exc, tb)
1185 1186 # end of payload
1186 1187 outdebug(ui, b'closing payload chunk')
1187 1188 yield _pack(_fpayloadsize, 0)
1188 1189 self._generated = True
1189 1190
1190 1191 def _payloadchunks(self):
1191 1192 """yield chunks of a the part payload
1192 1193
1193 1194 Exists to handle the different methods to provide data to a part."""
1194 1195 # we only support fixed size data now.
1195 1196 # This will be improved in the future.
1196 1197 if util.safehasattr(self.data, 'next') or util.safehasattr(
1197 1198 self.data, b'__next__'
1198 1199 ):
1199 1200 buff = util.chunkbuffer(self.data)
1200 1201 chunk = buff.read(preferedchunksize)
1201 1202 while chunk:
1202 1203 yield chunk
1203 1204 chunk = buff.read(preferedchunksize)
1204 1205 elif len(self.data):
1205 1206 yield self.data
1206 1207
1207 1208
1208 1209 flaginterrupt = -1
1209 1210
1210 1211
1211 1212 class interrupthandler(unpackermixin):
1212 1213 """read one part and process it with restricted capability
1213 1214
1214 1215 This allows to transmit exception raised on the producer size during part
1215 1216 iteration while the consumer is reading a part.
1216 1217
1217 1218 Part processed in this manner only have access to a ui object,"""
1218 1219
1219 1220 def __init__(self, ui, fp):
1220 1221 super(interrupthandler, self).__init__(fp)
1221 1222 self.ui = ui
1222 1223
1223 1224 def _readpartheader(self):
1224 1225 """reads a part header size and return the bytes blob
1225 1226
1226 1227 returns None if empty"""
1227 1228 headersize = self._unpack(_fpartheadersize)[0]
1228 1229 if headersize < 0:
1229 1230 raise error.BundleValueError(
1230 1231 b'negative part header size: %i' % headersize
1231 1232 )
1232 1233 indebug(self.ui, b'part header size: %i\n' % headersize)
1233 1234 if headersize:
1234 1235 return self._readexact(headersize)
1235 1236 return None
1236 1237
1237 1238 def __call__(self):
1238 1239
1239 1240 self.ui.debug(
1240 1241 b'bundle2-input-stream-interrupt: opening out of band context\n'
1241 1242 )
1242 1243 indebug(self.ui, b'bundle2 stream interruption, looking for a part.')
1243 1244 headerblock = self._readpartheader()
1244 1245 if headerblock is None:
1245 1246 indebug(self.ui, b'no part found during interruption.')
1246 1247 return
1247 1248 part = unbundlepart(self.ui, headerblock, self._fp)
1248 1249 op = interruptoperation(self.ui)
1249 1250 hardabort = False
1250 1251 try:
1251 1252 _processpart(op, part)
1252 1253 except (SystemExit, KeyboardInterrupt):
1253 1254 hardabort = True
1254 1255 raise
1255 1256 finally:
1256 1257 if not hardabort:
1257 1258 part.consume()
1258 1259 self.ui.debug(
1259 1260 b'bundle2-input-stream-interrupt: closing out of band context\n'
1260 1261 )
1261 1262
1262 1263
1263 1264 class interruptoperation(object):
1264 1265 """A limited operation to be use by part handler during interruption
1265 1266
1266 1267 It only have access to an ui object.
1267 1268 """
1268 1269
1269 1270 def __init__(self, ui):
1270 1271 self.ui = ui
1271 1272 self.reply = None
1272 1273 self.captureoutput = False
1273 1274
1274 1275 @property
1275 1276 def repo(self):
1276 1277 raise error.ProgrammingError(b'no repo access from stream interruption')
1277 1278
1278 1279 def gettransaction(self):
1279 1280 raise TransactionUnavailable(b'no repo access from stream interruption')
1280 1281
1281 1282
1282 1283 def decodepayloadchunks(ui, fh):
1283 1284 """Reads bundle2 part payload data into chunks.
1284 1285
1285 1286 Part payload data consists of framed chunks. This function takes
1286 1287 a file handle and emits those chunks.
1287 1288 """
1288 1289 dolog = ui.configbool(b'devel', b'bundle2.debug')
1289 1290 debug = ui.debug
1290 1291
1291 1292 headerstruct = struct.Struct(_fpayloadsize)
1292 1293 headersize = headerstruct.size
1293 1294 unpack = headerstruct.unpack
1294 1295
1295 1296 readexactly = changegroup.readexactly
1296 1297 read = fh.read
1297 1298
1298 1299 chunksize = unpack(readexactly(fh, headersize))[0]
1299 1300 indebug(ui, b'payload chunk size: %i' % chunksize)
1300 1301
1301 1302 # changegroup.readexactly() is inlined below for performance.
1302 1303 while chunksize:
1303 1304 if chunksize >= 0:
1304 1305 s = read(chunksize)
1305 1306 if len(s) < chunksize:
1306 1307 raise error.Abort(
1307 1308 _(
1308 1309 b'stream ended unexpectedly '
1309 1310 b' (got %d bytes, expected %d)'
1310 1311 )
1311 1312 % (len(s), chunksize)
1312 1313 )
1313 1314
1314 1315 yield s
1315 1316 elif chunksize == flaginterrupt:
1316 1317 # Interrupt "signal" detected. The regular stream is interrupted
1317 1318 # and a bundle2 part follows. Consume it.
1318 1319 interrupthandler(ui, fh)()
1319 1320 else:
1320 1321 raise error.BundleValueError(
1321 1322 b'negative payload chunk size: %s' % chunksize
1322 1323 )
1323 1324
1324 1325 s = read(headersize)
1325 1326 if len(s) < headersize:
1326 1327 raise error.Abort(
1327 1328 _(b'stream ended unexpectedly (got %d bytes, expected %d)')
1328 1329 % (len(s), chunksize)
1329 1330 )
1330 1331
1331 1332 chunksize = unpack(s)[0]
1332 1333
1333 1334 # indebug() inlined for performance.
1334 1335 if dolog:
1335 1336 debug(b'bundle2-input: payload chunk size: %i\n' % chunksize)
1336 1337
1337 1338
1338 1339 class unbundlepart(unpackermixin):
1339 1340 """a bundle part read from a bundle"""
1340 1341
1341 1342 def __init__(self, ui, header, fp):
1342 1343 super(unbundlepart, self).__init__(fp)
1343 1344 self._seekable = util.safehasattr(fp, 'seek') and util.safehasattr(
1344 1345 fp, b'tell'
1345 1346 )
1346 1347 self.ui = ui
1347 1348 # unbundle state attr
1348 1349 self._headerdata = header
1349 1350 self._headeroffset = 0
1350 1351 self._initialized = False
1351 1352 self.consumed = False
1352 1353 # part data
1353 1354 self.id = None
1354 1355 self.type = None
1355 1356 self.mandatoryparams = None
1356 1357 self.advisoryparams = None
1357 1358 self.params = None
1358 1359 self.mandatorykeys = ()
1359 1360 self._readheader()
1360 1361 self._mandatory = None
1361 1362 self._pos = 0
1362 1363
1363 1364 def _fromheader(self, size):
1364 1365 """return the next <size> byte from the header"""
1365 1366 offset = self._headeroffset
1366 1367 data = self._headerdata[offset : (offset + size)]
1367 1368 self._headeroffset = offset + size
1368 1369 return data
1369 1370
1370 1371 def _unpackheader(self, format):
1371 1372 """read given format from header
1372 1373
1373 1374 This automatically compute the size of the format to read."""
1374 1375 data = self._fromheader(struct.calcsize(format))
1375 1376 return _unpack(format, data)
1376 1377
1377 1378 def _initparams(self, mandatoryparams, advisoryparams):
1378 1379 """internal function to setup all logic related parameters"""
1379 1380 # make it read only to prevent people touching it by mistake.
1380 1381 self.mandatoryparams = tuple(mandatoryparams)
1381 1382 self.advisoryparams = tuple(advisoryparams)
1382 1383 # user friendly UI
1383 1384 self.params = util.sortdict(self.mandatoryparams)
1384 1385 self.params.update(self.advisoryparams)
1385 1386 self.mandatorykeys = frozenset(p[0] for p in mandatoryparams)
1386 1387
1387 1388 def _readheader(self):
1388 1389 """read the header and setup the object"""
1389 1390 typesize = self._unpackheader(_fparttypesize)[0]
1390 1391 self.type = self._fromheader(typesize)
1391 1392 indebug(self.ui, b'part type: "%s"' % self.type)
1392 1393 self.id = self._unpackheader(_fpartid)[0]
1393 1394 indebug(self.ui, b'part id: "%s"' % pycompat.bytestr(self.id))
1394 1395 # extract mandatory bit from type
1395 1396 self.mandatory = self.type != self.type.lower()
1396 1397 self.type = self.type.lower()
1397 1398 ## reading parameters
1398 1399 # param count
1399 1400 mancount, advcount = self._unpackheader(_fpartparamcount)
1400 1401 indebug(self.ui, b'part parameters: %i' % (mancount + advcount))
1401 1402 # param size
1402 1403 fparamsizes = _makefpartparamsizes(mancount + advcount)
1403 1404 paramsizes = self._unpackheader(fparamsizes)
1404 1405 # make it a list of couple again
1405 1406 paramsizes = list(zip(paramsizes[::2], paramsizes[1::2]))
1406 1407 # split mandatory from advisory
1407 1408 mansizes = paramsizes[:mancount]
1408 1409 advsizes = paramsizes[mancount:]
1409 1410 # retrieve param value
1410 1411 manparams = []
1411 1412 for key, value in mansizes:
1412 1413 manparams.append((self._fromheader(key), self._fromheader(value)))
1413 1414 advparams = []
1414 1415 for key, value in advsizes:
1415 1416 advparams.append((self._fromheader(key), self._fromheader(value)))
1416 1417 self._initparams(manparams, advparams)
1417 1418 ## part payload
1418 1419 self._payloadstream = util.chunkbuffer(self._payloadchunks())
1419 1420 # we read the data, tell it
1420 1421 self._initialized = True
1421 1422
1422 1423 def _payloadchunks(self):
1423 1424 """Generator of decoded chunks in the payload."""
1424 1425 return decodepayloadchunks(self.ui, self._fp)
1425 1426
1426 1427 def consume(self):
1427 1428 """Read the part payload until completion.
1428 1429
1429 1430 By consuming the part data, the underlying stream read offset will
1430 1431 be advanced to the next part (or end of stream).
1431 1432 """
1432 1433 if self.consumed:
1433 1434 return
1434 1435
1435 1436 chunk = self.read(32768)
1436 1437 while chunk:
1437 1438 self._pos += len(chunk)
1438 1439 chunk = self.read(32768)
1439 1440
1440 1441 def read(self, size=None):
1441 1442 """read payload data"""
1442 1443 if not self._initialized:
1443 1444 self._readheader()
1444 1445 if size is None:
1445 1446 data = self._payloadstream.read()
1446 1447 else:
1447 1448 data = self._payloadstream.read(size)
1448 1449 self._pos += len(data)
1449 1450 if size is None or len(data) < size:
1450 1451 if not self.consumed and self._pos:
1451 1452 self.ui.debug(
1452 1453 b'bundle2-input-part: total payload size %i\n' % self._pos
1453 1454 )
1454 1455 self.consumed = True
1455 1456 return data
1456 1457
1457 1458
1458 1459 class seekableunbundlepart(unbundlepart):
1459 1460 """A bundle2 part in a bundle that is seekable.
1460 1461
1461 1462 Regular ``unbundlepart`` instances can only be read once. This class
1462 1463 extends ``unbundlepart`` to enable bi-directional seeking within the
1463 1464 part.
1464 1465
1465 1466 Bundle2 part data consists of framed chunks. Offsets when seeking
1466 1467 refer to the decoded data, not the offsets in the underlying bundle2
1467 1468 stream.
1468 1469
1469 1470 To facilitate quickly seeking within the decoded data, instances of this
1470 1471 class maintain a mapping between offsets in the underlying stream and
1471 1472 the decoded payload. This mapping will consume memory in proportion
1472 1473 to the number of chunks within the payload (which almost certainly
1473 1474 increases in proportion with the size of the part).
1474 1475 """
1475 1476
1476 1477 def __init__(self, ui, header, fp):
1477 1478 # (payload, file) offsets for chunk starts.
1478 1479 self._chunkindex = []
1479 1480
1480 1481 super(seekableunbundlepart, self).__init__(ui, header, fp)
1481 1482
1482 1483 def _payloadchunks(self, chunknum=0):
1483 1484 '''seek to specified chunk and start yielding data'''
1484 1485 if len(self._chunkindex) == 0:
1485 1486 assert chunknum == 0, b'Must start with chunk 0'
1486 1487 self._chunkindex.append((0, self._tellfp()))
1487 1488 else:
1488 1489 assert chunknum < len(self._chunkindex), (
1489 1490 b'Unknown chunk %d' % chunknum
1490 1491 )
1491 1492 self._seekfp(self._chunkindex[chunknum][1])
1492 1493
1493 1494 pos = self._chunkindex[chunknum][0]
1494 1495
1495 1496 for chunk in decodepayloadchunks(self.ui, self._fp):
1496 1497 chunknum += 1
1497 1498 pos += len(chunk)
1498 1499 if chunknum == len(self._chunkindex):
1499 1500 self._chunkindex.append((pos, self._tellfp()))
1500 1501
1501 1502 yield chunk
1502 1503
1503 1504 def _findchunk(self, pos):
1504 1505 '''for a given payload position, return a chunk number and offset'''
1505 1506 for chunk, (ppos, fpos) in enumerate(self._chunkindex):
1506 1507 if ppos == pos:
1507 1508 return chunk, 0
1508 1509 elif ppos > pos:
1509 1510 return chunk - 1, pos - self._chunkindex[chunk - 1][0]
1510 1511 raise ValueError(b'Unknown chunk')
1511 1512
1512 1513 def tell(self):
1513 1514 return self._pos
1514 1515
1515 1516 def seek(self, offset, whence=os.SEEK_SET):
1516 1517 if whence == os.SEEK_SET:
1517 1518 newpos = offset
1518 1519 elif whence == os.SEEK_CUR:
1519 1520 newpos = self._pos + offset
1520 1521 elif whence == os.SEEK_END:
1521 1522 if not self.consumed:
1522 1523 # Can't use self.consume() here because it advances self._pos.
1523 1524 chunk = self.read(32768)
1524 1525 while chunk:
1525 1526 chunk = self.read(32768)
1526 1527 newpos = self._chunkindex[-1][0] - offset
1527 1528 else:
1528 1529 raise ValueError(b'Unknown whence value: %r' % (whence,))
1529 1530
1530 1531 if newpos > self._chunkindex[-1][0] and not self.consumed:
1531 1532 # Can't use self.consume() here because it advances self._pos.
1532 1533 chunk = self.read(32768)
1533 1534 while chunk:
1534 1535 chunk = self.read(32668)
1535 1536
1536 1537 if not 0 <= newpos <= self._chunkindex[-1][0]:
1537 1538 raise ValueError(b'Offset out of range')
1538 1539
1539 1540 if self._pos != newpos:
1540 1541 chunk, internaloffset = self._findchunk(newpos)
1541 1542 self._payloadstream = util.chunkbuffer(self._payloadchunks(chunk))
1542 1543 adjust = self.read(internaloffset)
1543 1544 if len(adjust) != internaloffset:
1544 1545 raise error.Abort(_(b'Seek failed\n'))
1545 1546 self._pos = newpos
1546 1547
1547 1548 def _seekfp(self, offset, whence=0):
1548 1549 """move the underlying file pointer
1549 1550
1550 1551 This method is meant for internal usage by the bundle2 protocol only.
1551 1552 They directly manipulate the low level stream including bundle2 level
1552 1553 instruction.
1553 1554
1554 1555 Do not use it to implement higher-level logic or methods."""
1555 1556 if self._seekable:
1556 1557 return self._fp.seek(offset, whence)
1557 1558 else:
1558 1559 raise NotImplementedError(_(b'File pointer is not seekable'))
1559 1560
1560 1561 def _tellfp(self):
1561 1562 """return the file offset, or None if file is not seekable
1562 1563
1563 1564 This method is meant for internal usage by the bundle2 protocol only.
1564 1565 They directly manipulate the low level stream including bundle2 level
1565 1566 instruction.
1566 1567
1567 1568 Do not use it to implement higher-level logic or methods."""
1568 1569 if self._seekable:
1569 1570 try:
1570 1571 return self._fp.tell()
1571 1572 except IOError as e:
1572 1573 if e.errno == errno.ESPIPE:
1573 1574 self._seekable = False
1574 1575 else:
1575 1576 raise
1576 1577 return None
1577 1578
1578 1579
1579 1580 # These are only the static capabilities.
1580 1581 # Check the 'getrepocaps' function for the rest.
1581 1582 capabilities = {
1582 1583 b'HG20': (),
1583 1584 b'bookmarks': (),
1584 1585 b'error': (b'abort', b'unsupportedcontent', b'pushraced', b'pushkey'),
1585 1586 b'listkeys': (),
1586 1587 b'pushkey': (),
1587 1588 b'digests': tuple(sorted(util.DIGESTS.keys())),
1588 1589 b'remote-changegroup': (b'http', b'https'),
1589 1590 b'hgtagsfnodes': (),
1590 1591 b'rev-branch-cache': (),
1591 1592 b'phases': (b'heads',),
1592 1593 b'stream': (b'v2',),
1593 1594 }
1594 1595
1595 1596
1596 1597 def getrepocaps(repo, allowpushback=False, role=None):
1597 1598 """return the bundle2 capabilities for a given repo
1598 1599
1599 1600 Exists to allow extensions (like evolution) to mutate the capabilities.
1600 1601
1601 1602 The returned value is used for servers advertising their capabilities as
1602 1603 well as clients advertising their capabilities to servers as part of
1603 1604 bundle2 requests. The ``role`` argument specifies which is which.
1604 1605 """
1605 1606 if role not in (b'client', b'server'):
1606 1607 raise error.ProgrammingError(b'role argument must be client or server')
1607 1608
1608 1609 caps = capabilities.copy()
1609 1610 caps[b'changegroup'] = tuple(
1610 1611 sorted(changegroup.supportedincomingversions(repo))
1611 1612 )
1612 1613 if obsolete.isenabled(repo, obsolete.exchangeopt):
1613 1614 supportedformat = tuple(b'V%i' % v for v in obsolete.formats)
1614 1615 caps[b'obsmarkers'] = supportedformat
1615 1616 if allowpushback:
1616 1617 caps[b'pushback'] = ()
1617 1618 cpmode = repo.ui.config(b'server', b'concurrent-push-mode')
1618 1619 if cpmode == b'check-related':
1619 1620 caps[b'checkheads'] = (b'related',)
1620 1621 if b'phases' in repo.ui.configlist(b'devel', b'legacy.exchange'):
1621 1622 caps.pop(b'phases')
1622 1623
1623 1624 # Don't advertise stream clone support in server mode if not configured.
1624 1625 if role == b'server':
1625 1626 streamsupported = repo.ui.configbool(
1626 1627 b'server', b'uncompressed', untrusted=True
1627 1628 )
1628 1629 featuresupported = repo.ui.configbool(b'server', b'bundle2.stream')
1629 1630
1630 1631 if not streamsupported or not featuresupported:
1631 1632 caps.pop(b'stream')
1632 1633 # Else always advertise support on client, because payload support
1633 1634 # should always be advertised.
1634 1635
1635 1636 return caps
1636 1637
1637 1638
1638 1639 def bundle2caps(remote):
1639 1640 """return the bundle capabilities of a peer as dict"""
1640 1641 raw = remote.capable(b'bundle2')
1641 1642 if not raw and raw != b'':
1642 1643 return {}
1643 1644 capsblob = urlreq.unquote(remote.capable(b'bundle2'))
1644 1645 return decodecaps(capsblob)
1645 1646
1646 1647
1647 1648 def obsmarkersversion(caps):
1648 1649 """extract the list of supported obsmarkers versions from a bundle2caps dict
1649 1650 """
1650 1651 obscaps = caps.get(b'obsmarkers', ())
1651 1652 return [int(c[1:]) for c in obscaps if c.startswith(b'V')]
1652 1653
1653 1654
1654 1655 def writenewbundle(
1655 1656 ui,
1656 1657 repo,
1657 1658 source,
1658 1659 filename,
1659 1660 bundletype,
1660 1661 outgoing,
1661 1662 opts,
1662 1663 vfs=None,
1663 1664 compression=None,
1664 1665 compopts=None,
1665 1666 ):
1666 1667 if bundletype.startswith(b'HG10'):
1667 1668 cg = changegroup.makechangegroup(repo, outgoing, b'01', source)
1668 1669 return writebundle(
1669 1670 ui,
1670 1671 cg,
1671 1672 filename,
1672 1673 bundletype,
1673 1674 vfs=vfs,
1674 1675 compression=compression,
1675 1676 compopts=compopts,
1676 1677 )
1677 1678 elif not bundletype.startswith(b'HG20'):
1678 1679 raise error.ProgrammingError(b'unknown bundle type: %s' % bundletype)
1679 1680
1680 1681 caps = {}
1681 1682 if b'obsolescence' in opts:
1682 1683 caps[b'obsmarkers'] = (b'V1',)
1683 1684 bundle = bundle20(ui, caps)
1684 1685 bundle.setcompression(compression, compopts)
1685 1686 _addpartsfromopts(ui, repo, bundle, source, outgoing, opts)
1686 1687 chunkiter = bundle.getchunks()
1687 1688
1688 1689 return changegroup.writechunks(ui, chunkiter, filename, vfs=vfs)
1689 1690
1690 1691
1691 1692 def _addpartsfromopts(ui, repo, bundler, source, outgoing, opts):
1692 1693 # We should eventually reconcile this logic with the one behind
1693 1694 # 'exchange.getbundle2partsgenerator'.
1694 1695 #
1695 1696 # The type of input from 'getbundle' and 'writenewbundle' are a bit
1696 1697 # different right now. So we keep them separated for now for the sake of
1697 1698 # simplicity.
1698 1699
1699 1700 # we might not always want a changegroup in such bundle, for example in
1700 1701 # stream bundles
1701 1702 if opts.get(b'changegroup', True):
1702 1703 cgversion = opts.get(b'cg.version')
1703 1704 if cgversion is None:
1704 1705 cgversion = changegroup.safeversion(repo)
1705 1706 cg = changegroup.makechangegroup(repo, outgoing, cgversion, source)
1706 1707 part = bundler.newpart(b'changegroup', data=cg.getchunks())
1707 1708 part.addparam(b'version', cg.version)
1708 1709 if b'clcount' in cg.extras:
1709 1710 part.addparam(
1710 1711 b'nbchanges', b'%d' % cg.extras[b'clcount'], mandatory=False
1711 1712 )
1712 1713 if opts.get(b'phases') and repo.revs(
1713 1714 b'%ln and secret()', outgoing.missingheads
1714 1715 ):
1715 1716 part.addparam(
1716 1717 b'targetphase', b'%d' % phases.secret, mandatory=False
1717 1718 )
1718 1719 if b'exp-sidedata-flag' in repo.requirements:
1719 1720 part.addparam(b'exp-sidedata', b'1')
1720 1721
1721 1722 if opts.get(b'streamv2', False):
1722 1723 addpartbundlestream2(bundler, repo, stream=True)
1723 1724
1724 1725 if opts.get(b'tagsfnodescache', True):
1725 1726 addparttagsfnodescache(repo, bundler, outgoing)
1726 1727
1727 1728 if opts.get(b'revbranchcache', True):
1728 1729 addpartrevbranchcache(repo, bundler, outgoing)
1729 1730
1730 1731 if opts.get(b'obsolescence', False):
1731 1732 obsmarkers = repo.obsstore.relevantmarkers(outgoing.missing)
1732 1733 buildobsmarkerspart(bundler, obsmarkers)
1733 1734
1734 1735 if opts.get(b'phases', False):
1735 1736 headsbyphase = phases.subsetphaseheads(repo, outgoing.missing)
1736 1737 phasedata = phases.binaryencode(headsbyphase)
1737 1738 bundler.newpart(b'phase-heads', data=phasedata)
1738 1739
1739 1740
1740 1741 def addparttagsfnodescache(repo, bundler, outgoing):
1741 1742 # we include the tags fnode cache for the bundle changeset
1742 1743 # (as an optional parts)
1743 1744 cache = tags.hgtagsfnodescache(repo.unfiltered())
1744 1745 chunks = []
1745 1746
1746 1747 # .hgtags fnodes are only relevant for head changesets. While we could
1747 1748 # transfer values for all known nodes, there will likely be little to
1748 1749 # no benefit.
1749 1750 #
1750 1751 # We don't bother using a generator to produce output data because
1751 1752 # a) we only have 40 bytes per head and even esoteric numbers of heads
1752 1753 # consume little memory (1M heads is 40MB) b) we don't want to send the
1753 1754 # part if we don't have entries and knowing if we have entries requires
1754 1755 # cache lookups.
1755 1756 for node in outgoing.missingheads:
1756 1757 # Don't compute missing, as this may slow down serving.
1757 1758 fnode = cache.getfnode(node, computemissing=False)
1758 1759 if fnode is not None:
1759 1760 chunks.extend([node, fnode])
1760 1761
1761 1762 if chunks:
1762 1763 bundler.newpart(b'hgtagsfnodes', data=b''.join(chunks))
1763 1764
1764 1765
1765 1766 def addpartrevbranchcache(repo, bundler, outgoing):
1766 1767 # we include the rev branch cache for the bundle changeset
1767 1768 # (as an optional parts)
1768 1769 cache = repo.revbranchcache()
1769 1770 cl = repo.unfiltered().changelog
1770 1771 branchesdata = collections.defaultdict(lambda: (set(), set()))
1771 1772 for node in outgoing.missing:
1772 1773 branch, close = cache.branchinfo(cl.rev(node))
1773 1774 branchesdata[branch][close].add(node)
1774 1775
1775 1776 def generate():
1776 1777 for branch, (nodes, closed) in sorted(branchesdata.items()):
1777 1778 utf8branch = encoding.fromlocal(branch)
1778 1779 yield rbcstruct.pack(len(utf8branch), len(nodes), len(closed))
1779 1780 yield utf8branch
1780 1781 for n in sorted(nodes):
1781 1782 yield n
1782 1783 for n in sorted(closed):
1783 1784 yield n
1784 1785
1785 1786 bundler.newpart(b'cache:rev-branch-cache', data=generate(), mandatory=False)
1786 1787
1787 1788
1788 1789 def _formatrequirementsspec(requirements):
1789 1790 requirements = [req for req in requirements if req != b"shared"]
1790 1791 return urlreq.quote(b','.join(sorted(requirements)))
1791 1792
1792 1793
1793 1794 def _formatrequirementsparams(requirements):
1794 1795 requirements = _formatrequirementsspec(requirements)
1795 1796 params = b"%s%s" % (urlreq.quote(b"requirements="), requirements)
1796 1797 return params
1797 1798
1798 1799
1799 1800 def addpartbundlestream2(bundler, repo, **kwargs):
1800 1801 if not kwargs.get('stream', False):
1801 1802 return
1802 1803
1803 1804 if not streamclone.allowservergeneration(repo):
1804 1805 raise error.Abort(
1805 1806 _(
1806 1807 b'stream data requested but server does not allow '
1807 1808 b'this feature'
1808 1809 ),
1809 1810 hint=_(
1810 1811 b'well-behaved clients should not be '
1811 1812 b'requesting stream data from servers not '
1812 1813 b'advertising it; the client may be buggy'
1813 1814 ),
1814 1815 )
1815 1816
1816 1817 # Stream clones don't compress well. And compression undermines a
1817 1818 # goal of stream clones, which is to be fast. Communicate the desire
1818 1819 # to avoid compression to consumers of the bundle.
1819 1820 bundler.prefercompressed = False
1820 1821
1821 1822 # get the includes and excludes
1822 1823 includepats = kwargs.get('includepats')
1823 1824 excludepats = kwargs.get('excludepats')
1824 1825
1825 1826 narrowstream = repo.ui.configbool(
1826 1827 b'experimental', b'server.stream-narrow-clones'
1827 1828 )
1828 1829
1829 1830 if (includepats or excludepats) and not narrowstream:
1830 1831 raise error.Abort(_(b'server does not support narrow stream clones'))
1831 1832
1832 1833 includeobsmarkers = False
1833 1834 if repo.obsstore:
1834 1835 remoteversions = obsmarkersversion(bundler.capabilities)
1835 1836 if not remoteversions:
1836 1837 raise error.Abort(
1837 1838 _(
1838 1839 b'server has obsolescence markers, but client '
1839 1840 b'cannot receive them via stream clone'
1840 1841 )
1841 1842 )
1842 1843 elif repo.obsstore._version in remoteversions:
1843 1844 includeobsmarkers = True
1844 1845
1845 1846 filecount, bytecount, it = streamclone.generatev2(
1846 1847 repo, includepats, excludepats, includeobsmarkers
1847 1848 )
1848 1849 requirements = _formatrequirementsspec(repo.requirements)
1849 1850 part = bundler.newpart(b'stream2', data=it)
1850 1851 part.addparam(b'bytecount', b'%d' % bytecount, mandatory=True)
1851 1852 part.addparam(b'filecount', b'%d' % filecount, mandatory=True)
1852 1853 part.addparam(b'requirements', requirements, mandatory=True)
1853 1854
1854 1855
1855 1856 def buildobsmarkerspart(bundler, markers):
1856 1857 """add an obsmarker part to the bundler with <markers>
1857 1858
1858 1859 No part is created if markers is empty.
1859 1860 Raises ValueError if the bundler doesn't support any known obsmarker format.
1860 1861 """
1861 1862 if not markers:
1862 1863 return None
1863 1864
1864 1865 remoteversions = obsmarkersversion(bundler.capabilities)
1865 1866 version = obsolete.commonversion(remoteversions)
1866 1867 if version is None:
1867 1868 raise ValueError(b'bundler does not support common obsmarker format')
1868 1869 stream = obsolete.encodemarkers(markers, True, version=version)
1869 1870 return bundler.newpart(b'obsmarkers', data=stream)
1870 1871
1871 1872
1872 1873 def writebundle(
1873 1874 ui, cg, filename, bundletype, vfs=None, compression=None, compopts=None
1874 1875 ):
1875 1876 """Write a bundle file and return its filename.
1876 1877
1877 1878 Existing files will not be overwritten.
1878 1879 If no filename is specified, a temporary file is created.
1879 1880 bz2 compression can be turned off.
1880 1881 The bundle file will be deleted in case of errors.
1881 1882 """
1882 1883
1883 1884 if bundletype == b"HG20":
1884 1885 bundle = bundle20(ui)
1885 1886 bundle.setcompression(compression, compopts)
1886 1887 part = bundle.newpart(b'changegroup', data=cg.getchunks())
1887 1888 part.addparam(b'version', cg.version)
1888 1889 if b'clcount' in cg.extras:
1889 1890 part.addparam(
1890 1891 b'nbchanges', b'%d' % cg.extras[b'clcount'], mandatory=False
1891 1892 )
1892 1893 chunkiter = bundle.getchunks()
1893 1894 else:
1894 1895 # compression argument is only for the bundle2 case
1895 1896 assert compression is None
1896 1897 if cg.version != b'01':
1897 1898 raise error.Abort(
1898 1899 _(b'old bundle types only supports v1 changegroups')
1899 1900 )
1900 1901 header, comp = bundletypes[bundletype]
1901 1902 if comp not in util.compengines.supportedbundletypes:
1902 1903 raise error.Abort(_(b'unknown stream compression type: %s') % comp)
1903 1904 compengine = util.compengines.forbundletype(comp)
1904 1905
1905 1906 def chunkiter():
1906 1907 yield header
1907 1908 for chunk in compengine.compressstream(cg.getchunks(), compopts):
1908 1909 yield chunk
1909 1910
1910 1911 chunkiter = chunkiter()
1911 1912
1912 1913 # parse the changegroup data, otherwise we will block
1913 1914 # in case of sshrepo because we don't know the end of the stream
1914 1915 return changegroup.writechunks(ui, chunkiter, filename, vfs=vfs)
1915 1916
1916 1917
1917 1918 def combinechangegroupresults(op):
1918 1919 """logic to combine 0 or more addchangegroup results into one"""
1919 1920 results = [r.get(b'return', 0) for r in op.records[b'changegroup']]
1920 1921 changedheads = 0
1921 1922 result = 1
1922 1923 for ret in results:
1923 1924 # If any changegroup result is 0, return 0
1924 1925 if ret == 0:
1925 1926 result = 0
1926 1927 break
1927 1928 if ret < -1:
1928 1929 changedheads += ret + 1
1929 1930 elif ret > 1:
1930 1931 changedheads += ret - 1
1931 1932 if changedheads > 0:
1932 1933 result = 1 + changedheads
1933 1934 elif changedheads < 0:
1934 1935 result = -1 + changedheads
1935 1936 return result
1936 1937
1937 1938
1938 1939 @parthandler(
1939 1940 b'changegroup',
1940 1941 (
1941 1942 b'version',
1942 1943 b'nbchanges',
1943 1944 b'exp-sidedata',
1944 1945 b'treemanifest',
1945 1946 b'targetphase',
1946 1947 ),
1947 1948 )
1948 1949 def handlechangegroup(op, inpart):
1949 1950 """apply a changegroup part on the repo
1950 1951
1951 1952 This is a very early implementation that will massive rework before being
1952 1953 inflicted to any end-user.
1953 1954 """
1954 1955 from . import localrepo
1955 1956
1956 1957 tr = op.gettransaction()
1957 1958 unpackerversion = inpart.params.get(b'version', b'01')
1958 1959 # We should raise an appropriate exception here
1959 1960 cg = changegroup.getunbundler(unpackerversion, inpart, None)
1960 1961 # the source and url passed here are overwritten by the one contained in
1961 1962 # the transaction.hookargs argument. So 'bundle2' is a placeholder
1962 1963 nbchangesets = None
1963 1964 if b'nbchanges' in inpart.params:
1964 1965 nbchangesets = int(inpart.params.get(b'nbchanges'))
1965 1966 if (
1966 1967 b'treemanifest' in inpart.params
1967 1968 and b'treemanifest' not in op.repo.requirements
1968 1969 ):
1969 1970 if len(op.repo.changelog) != 0:
1970 1971 raise error.Abort(
1971 1972 _(
1972 1973 b"bundle contains tree manifests, but local repo is "
1973 1974 b"non-empty and does not use tree manifests"
1974 1975 )
1975 1976 )
1976 1977 op.repo.requirements.add(b'treemanifest')
1977 1978 op.repo.svfs.options = localrepo.resolvestorevfsoptions(
1978 1979 op.repo.ui, op.repo.requirements, op.repo.features
1979 1980 )
1980 1981 op.repo._writerequirements()
1981 1982
1982 1983 bundlesidedata = bool(b'exp-sidedata' in inpart.params)
1983 1984 reposidedata = bool(b'exp-sidedata-flag' in op.repo.requirements)
1984 1985 if reposidedata and not bundlesidedata:
1985 1986 msg = b"repository is using sidedata but the bundle source do not"
1986 1987 hint = b'this is currently unsupported'
1987 1988 raise error.Abort(msg, hint=hint)
1988 1989
1989 1990 extrakwargs = {}
1990 1991 targetphase = inpart.params.get(b'targetphase')
1991 1992 if targetphase is not None:
1992 1993 extrakwargs['targetphase'] = int(targetphase)
1993 1994 ret = _processchangegroup(
1994 1995 op,
1995 1996 cg,
1996 1997 tr,
1997 1998 b'bundle2',
1998 1999 b'bundle2',
1999 2000 expectedtotal=nbchangesets,
2000 2001 **extrakwargs
2001 2002 )
2002 2003 if op.reply is not None:
2003 2004 # This is definitely not the final form of this
2004 2005 # return. But one need to start somewhere.
2005 2006 part = op.reply.newpart(b'reply:changegroup', mandatory=False)
2006 2007 part.addparam(
2007 2008 b'in-reply-to', pycompat.bytestr(inpart.id), mandatory=False
2008 2009 )
2009 2010 part.addparam(b'return', b'%i' % ret, mandatory=False)
2010 2011 assert not inpart.read()
2011 2012
2012 2013
2013 2014 _remotechangegroupparams = tuple(
2014 2015 [b'url', b'size', b'digests']
2015 2016 + [b'digest:%s' % k for k in util.DIGESTS.keys()]
2016 2017 )
2017 2018
2018 2019
2019 2020 @parthandler(b'remote-changegroup', _remotechangegroupparams)
2020 2021 def handleremotechangegroup(op, inpart):
2021 2022 """apply a bundle10 on the repo, given an url and validation information
2022 2023
2023 2024 All the information about the remote bundle to import are given as
2024 2025 parameters. The parameters include:
2025 2026 - url: the url to the bundle10.
2026 2027 - size: the bundle10 file size. It is used to validate what was
2027 2028 retrieved by the client matches the server knowledge about the bundle.
2028 2029 - digests: a space separated list of the digest types provided as
2029 2030 parameters.
2030 2031 - digest:<digest-type>: the hexadecimal representation of the digest with
2031 2032 that name. Like the size, it is used to validate what was retrieved by
2032 2033 the client matches what the server knows about the bundle.
2033 2034
2034 2035 When multiple digest types are given, all of them are checked.
2035 2036 """
2036 2037 try:
2037 2038 raw_url = inpart.params[b'url']
2038 2039 except KeyError:
2039 2040 raise error.Abort(_(b'remote-changegroup: missing "%s" param') % b'url')
2040 2041 parsed_url = util.url(raw_url)
2041 2042 if parsed_url.scheme not in capabilities[b'remote-changegroup']:
2042 2043 raise error.Abort(
2043 2044 _(b'remote-changegroup does not support %s urls')
2044 2045 % parsed_url.scheme
2045 2046 )
2046 2047
2047 2048 try:
2048 2049 size = int(inpart.params[b'size'])
2049 2050 except ValueError:
2050 2051 raise error.Abort(
2051 2052 _(b'remote-changegroup: invalid value for param "%s"') % b'size'
2052 2053 )
2053 2054 except KeyError:
2054 2055 raise error.Abort(
2055 2056 _(b'remote-changegroup: missing "%s" param') % b'size'
2056 2057 )
2057 2058
2058 2059 digests = {}
2059 2060 for typ in inpart.params.get(b'digests', b'').split():
2060 2061 param = b'digest:%s' % typ
2061 2062 try:
2062 2063 value = inpart.params[param]
2063 2064 except KeyError:
2064 2065 raise error.Abort(
2065 2066 _(b'remote-changegroup: missing "%s" param') % param
2066 2067 )
2067 2068 digests[typ] = value
2068 2069
2069 2070 real_part = util.digestchecker(url.open(op.ui, raw_url), size, digests)
2070 2071
2071 2072 tr = op.gettransaction()
2072 2073 from . import exchange
2073 2074
2074 2075 cg = exchange.readbundle(op.repo.ui, real_part, raw_url)
2075 2076 if not isinstance(cg, changegroup.cg1unpacker):
2076 2077 raise error.Abort(
2077 2078 _(b'%s: not a bundle version 1.0') % util.hidepassword(raw_url)
2078 2079 )
2079 2080 ret = _processchangegroup(op, cg, tr, b'bundle2', b'bundle2')
2080 2081 if op.reply is not None:
2081 2082 # This is definitely not the final form of this
2082 2083 # return. But one need to start somewhere.
2083 2084 part = op.reply.newpart(b'reply:changegroup')
2084 2085 part.addparam(
2085 2086 b'in-reply-to', pycompat.bytestr(inpart.id), mandatory=False
2086 2087 )
2087 2088 part.addparam(b'return', b'%i' % ret, mandatory=False)
2088 2089 try:
2089 2090 real_part.validate()
2090 2091 except error.Abort as e:
2091 2092 raise error.Abort(
2092 2093 _(b'bundle at %s is corrupted:\n%s')
2093 2094 % (util.hidepassword(raw_url), bytes(e))
2094 2095 )
2095 2096 assert not inpart.read()
2096 2097
2097 2098
2098 2099 @parthandler(b'reply:changegroup', (b'return', b'in-reply-to'))
2099 2100 def handlereplychangegroup(op, inpart):
2100 2101 ret = int(inpart.params[b'return'])
2101 2102 replyto = int(inpart.params[b'in-reply-to'])
2102 2103 op.records.add(b'changegroup', {b'return': ret}, replyto)
2103 2104
2104 2105
2105 2106 @parthandler(b'check:bookmarks')
2106 2107 def handlecheckbookmarks(op, inpart):
2107 2108 """check location of bookmarks
2108 2109
2109 2110 This part is to be used to detect push race regarding bookmark, it
2110 2111 contains binary encoded (bookmark, node) tuple. If the local state does
2111 2112 not marks the one in the part, a PushRaced exception is raised
2112 2113 """
2113 2114 bookdata = bookmarks.binarydecode(inpart)
2114 2115
2115 2116 msgstandard = (
2116 2117 b'remote repository changed while pushing - please try again '
2117 2118 b'(bookmark "%s" move from %s to %s)'
2118 2119 )
2119 2120 msgmissing = (
2120 2121 b'remote repository changed while pushing - please try again '
2121 2122 b'(bookmark "%s" is missing, expected %s)'
2122 2123 )
2123 2124 msgexist = (
2124 2125 b'remote repository changed while pushing - please try again '
2125 2126 b'(bookmark "%s" set on %s, expected missing)'
2126 2127 )
2127 2128 for book, node in bookdata:
2128 2129 currentnode = op.repo._bookmarks.get(book)
2129 2130 if currentnode != node:
2130 2131 if node is None:
2131 2132 finalmsg = msgexist % (book, nodemod.short(currentnode))
2132 2133 elif currentnode is None:
2133 2134 finalmsg = msgmissing % (book, nodemod.short(node))
2134 2135 else:
2135 2136 finalmsg = msgstandard % (
2136 2137 book,
2137 2138 nodemod.short(node),
2138 2139 nodemod.short(currentnode),
2139 2140 )
2140 2141 raise error.PushRaced(finalmsg)
2141 2142
2142 2143
2143 2144 @parthandler(b'check:heads')
2144 2145 def handlecheckheads(op, inpart):
2145 2146 """check that head of the repo did not change
2146 2147
2147 2148 This is used to detect a push race when using unbundle.
2148 2149 This replaces the "heads" argument of unbundle."""
2149 2150 h = inpart.read(20)
2150 2151 heads = []
2151 2152 while len(h) == 20:
2152 2153 heads.append(h)
2153 2154 h = inpart.read(20)
2154 2155 assert not h
2155 2156 # Trigger a transaction so that we are guaranteed to have the lock now.
2156 2157 if op.ui.configbool(b'experimental', b'bundle2lazylocking'):
2157 2158 op.gettransaction()
2158 2159 if sorted(heads) != sorted(op.repo.heads()):
2159 2160 raise error.PushRaced(
2160 2161 b'remote repository changed while pushing - please try again'
2161 2162 )
2162 2163
2163 2164
2164 2165 @parthandler(b'check:updated-heads')
2165 2166 def handlecheckupdatedheads(op, inpart):
2166 2167 """check for race on the heads touched by a push
2167 2168
2168 2169 This is similar to 'check:heads' but focus on the heads actually updated
2169 2170 during the push. If other activities happen on unrelated heads, it is
2170 2171 ignored.
2171 2172
2172 2173 This allow server with high traffic to avoid push contention as long as
2173 2174 unrelated parts of the graph are involved."""
2174 2175 h = inpart.read(20)
2175 2176 heads = []
2176 2177 while len(h) == 20:
2177 2178 heads.append(h)
2178 2179 h = inpart.read(20)
2179 2180 assert not h
2180 2181 # trigger a transaction so that we are guaranteed to have the lock now.
2181 2182 if op.ui.configbool(b'experimental', b'bundle2lazylocking'):
2182 2183 op.gettransaction()
2183 2184
2184 2185 currentheads = set()
2185 2186 for ls in op.repo.branchmap().iterheads():
2186 2187 currentheads.update(ls)
2187 2188
2188 2189 for h in heads:
2189 2190 if h not in currentheads:
2190 2191 raise error.PushRaced(
2191 2192 b'remote repository changed while pushing - '
2192 2193 b'please try again'
2193 2194 )
2194 2195
2195 2196
2196 2197 @parthandler(b'check:phases')
2197 2198 def handlecheckphases(op, inpart):
2198 2199 """check that phase boundaries of the repository did not change
2199 2200
2200 2201 This is used to detect a push race.
2201 2202 """
2202 2203 phasetonodes = phases.binarydecode(inpart)
2203 2204 unfi = op.repo.unfiltered()
2204 2205 cl = unfi.changelog
2205 2206 phasecache = unfi._phasecache
2206 2207 msg = (
2207 2208 b'remote repository changed while pushing - please try again '
2208 2209 b'(%s is %s expected %s)'
2209 2210 )
2210 2211 for expectedphase, nodes in enumerate(phasetonodes):
2211 2212 for n in nodes:
2212 2213 actualphase = phasecache.phase(unfi, cl.rev(n))
2213 2214 if actualphase != expectedphase:
2214 2215 finalmsg = msg % (
2215 2216 nodemod.short(n),
2216 2217 phases.phasenames[actualphase],
2217 2218 phases.phasenames[expectedphase],
2218 2219 )
2219 2220 raise error.PushRaced(finalmsg)
2220 2221
2221 2222
2222 2223 @parthandler(b'output')
2223 2224 def handleoutput(op, inpart):
2224 2225 """forward output captured on the server to the client"""
2225 2226 for line in inpart.read().splitlines():
2226 2227 op.ui.status(_(b'remote: %s\n') % line)
2227 2228
2228 2229
2229 2230 @parthandler(b'replycaps')
2230 2231 def handlereplycaps(op, inpart):
2231 2232 """Notify that a reply bundle should be created
2232 2233
2233 2234 The payload contains the capabilities information for the reply"""
2234 2235 caps = decodecaps(inpart.read())
2235 2236 if op.reply is None:
2236 2237 op.reply = bundle20(op.ui, caps)
2237 2238
2238 2239
2239 2240 class AbortFromPart(error.Abort):
2240 2241 """Sub-class of Abort that denotes an error from a bundle2 part."""
2241 2242
2242 2243
2243 2244 @parthandler(b'error:abort', (b'message', b'hint'))
2244 2245 def handleerrorabort(op, inpart):
2245 2246 """Used to transmit abort error over the wire"""
2246 2247 raise AbortFromPart(
2247 2248 inpart.params[b'message'], hint=inpart.params.get(b'hint')
2248 2249 )
2249 2250
2250 2251
2251 2252 @parthandler(
2252 2253 b'error:pushkey',
2253 2254 (b'namespace', b'key', b'new', b'old', b'ret', b'in-reply-to'),
2254 2255 )
2255 2256 def handleerrorpushkey(op, inpart):
2256 2257 """Used to transmit failure of a mandatory pushkey over the wire"""
2257 2258 kwargs = {}
2258 2259 for name in (b'namespace', b'key', b'new', b'old', b'ret'):
2259 2260 value = inpart.params.get(name)
2260 2261 if value is not None:
2261 2262 kwargs[name] = value
2262 2263 raise error.PushkeyFailed(
2263 2264 inpart.params[b'in-reply-to'], **pycompat.strkwargs(kwargs)
2264 2265 )
2265 2266
2266 2267
2267 2268 @parthandler(b'error:unsupportedcontent', (b'parttype', b'params'))
2268 2269 def handleerrorunsupportedcontent(op, inpart):
2269 2270 """Used to transmit unknown content error over the wire"""
2270 2271 kwargs = {}
2271 2272 parttype = inpart.params.get(b'parttype')
2272 2273 if parttype is not None:
2273 2274 kwargs[b'parttype'] = parttype
2274 2275 params = inpart.params.get(b'params')
2275 2276 if params is not None:
2276 2277 kwargs[b'params'] = params.split(b'\0')
2277 2278
2278 2279 raise error.BundleUnknownFeatureError(**pycompat.strkwargs(kwargs))
2279 2280
2280 2281
2281 2282 @parthandler(b'error:pushraced', (b'message',))
2282 2283 def handleerrorpushraced(op, inpart):
2283 2284 """Used to transmit push race error over the wire"""
2284 2285 raise error.ResponseError(_(b'push failed:'), inpart.params[b'message'])
2285 2286
2286 2287
2287 2288 @parthandler(b'listkeys', (b'namespace',))
2288 2289 def handlelistkeys(op, inpart):
2289 2290 """retrieve pushkey namespace content stored in a bundle2"""
2290 2291 namespace = inpart.params[b'namespace']
2291 2292 r = pushkey.decodekeys(inpart.read())
2292 2293 op.records.add(b'listkeys', (namespace, r))
2293 2294
2294 2295
2295 2296 @parthandler(b'pushkey', (b'namespace', b'key', b'old', b'new'))
2296 2297 def handlepushkey(op, inpart):
2297 2298 """process a pushkey request"""
2298 2299 dec = pushkey.decode
2299 2300 namespace = dec(inpart.params[b'namespace'])
2300 2301 key = dec(inpart.params[b'key'])
2301 2302 old = dec(inpart.params[b'old'])
2302 2303 new = dec(inpart.params[b'new'])
2303 2304 # Grab the transaction to ensure that we have the lock before performing the
2304 2305 # pushkey.
2305 2306 if op.ui.configbool(b'experimental', b'bundle2lazylocking'):
2306 2307 op.gettransaction()
2307 2308 ret = op.repo.pushkey(namespace, key, old, new)
2308 2309 record = {b'namespace': namespace, b'key': key, b'old': old, b'new': new}
2309 2310 op.records.add(b'pushkey', record)
2310 2311 if op.reply is not None:
2311 2312 rpart = op.reply.newpart(b'reply:pushkey')
2312 2313 rpart.addparam(
2313 2314 b'in-reply-to', pycompat.bytestr(inpart.id), mandatory=False
2314 2315 )
2315 2316 rpart.addparam(b'return', b'%i' % ret, mandatory=False)
2316 2317 if inpart.mandatory and not ret:
2317 2318 kwargs = {}
2318 2319 for key in (b'namespace', b'key', b'new', b'old', b'ret'):
2319 2320 if key in inpart.params:
2320 2321 kwargs[key] = inpart.params[key]
2321 2322 raise error.PushkeyFailed(
2322 2323 partid=b'%d' % inpart.id, **pycompat.strkwargs(kwargs)
2323 2324 )
2324 2325
2325 2326
2326 2327 @parthandler(b'bookmarks')
2327 2328 def handlebookmark(op, inpart):
2328 2329 """transmit bookmark information
2329 2330
2330 2331 The part contains binary encoded bookmark information.
2331 2332
2332 2333 The exact behavior of this part can be controlled by the 'bookmarks' mode
2333 2334 on the bundle operation.
2334 2335
2335 2336 When mode is 'apply' (the default) the bookmark information is applied as
2336 2337 is to the unbundling repository. Make sure a 'check:bookmarks' part is
2337 2338 issued earlier to check for push races in such update. This behavior is
2338 2339 suitable for pushing.
2339 2340
2340 2341 When mode is 'records', the information is recorded into the 'bookmarks'
2341 2342 records of the bundle operation. This behavior is suitable for pulling.
2342 2343 """
2343 2344 changes = bookmarks.binarydecode(inpart)
2344 2345
2345 2346 pushkeycompat = op.repo.ui.configbool(
2346 2347 b'server', b'bookmarks-pushkey-compat'
2347 2348 )
2348 2349 bookmarksmode = op.modes.get(b'bookmarks', b'apply')
2349 2350
2350 2351 if bookmarksmode == b'apply':
2351 2352 tr = op.gettransaction()
2352 2353 bookstore = op.repo._bookmarks
2353 2354 if pushkeycompat:
2354 2355 allhooks = []
2355 2356 for book, node in changes:
2356 2357 hookargs = tr.hookargs.copy()
2357 2358 hookargs[b'pushkeycompat'] = b'1'
2358 2359 hookargs[b'namespace'] = b'bookmarks'
2359 2360 hookargs[b'key'] = book
2360 2361 hookargs[b'old'] = nodemod.hex(bookstore.get(book, b''))
2361 2362 hookargs[b'new'] = nodemod.hex(
2362 2363 node if node is not None else b''
2363 2364 )
2364 2365 allhooks.append(hookargs)
2365 2366
2366 2367 for hookargs in allhooks:
2367 2368 op.repo.hook(
2368 2369 b'prepushkey', throw=True, **pycompat.strkwargs(hookargs)
2369 2370 )
2370 2371
2371 2372 bookstore.applychanges(op.repo, op.gettransaction(), changes)
2372 2373
2373 2374 if pushkeycompat:
2374 2375
2375 2376 def runhook(unused_success):
2376 2377 for hookargs in allhooks:
2377 2378 op.repo.hook(b'pushkey', **pycompat.strkwargs(hookargs))
2378 2379
2379 2380 op.repo._afterlock(runhook)
2380 2381
2381 2382 elif bookmarksmode == b'records':
2382 2383 for book, node in changes:
2383 2384 record = {b'bookmark': book, b'node': node}
2384 2385 op.records.add(b'bookmarks', record)
2385 2386 else:
2386 2387 raise error.ProgrammingError(
2387 2388 b'unkown bookmark mode: %s' % bookmarksmode
2388 2389 )
2389 2390
2390 2391
2391 2392 @parthandler(b'phase-heads')
2392 2393 def handlephases(op, inpart):
2393 2394 """apply phases from bundle part to repo"""
2394 2395 headsbyphase = phases.binarydecode(inpart)
2395 2396 phases.updatephases(op.repo.unfiltered(), op.gettransaction, headsbyphase)
2396 2397
2397 2398
2398 2399 @parthandler(b'reply:pushkey', (b'return', b'in-reply-to'))
2399 2400 def handlepushkeyreply(op, inpart):
2400 2401 """retrieve the result of a pushkey request"""
2401 2402 ret = int(inpart.params[b'return'])
2402 2403 partid = int(inpart.params[b'in-reply-to'])
2403 2404 op.records.add(b'pushkey', {b'return': ret}, partid)
2404 2405
2405 2406
2406 2407 @parthandler(b'obsmarkers')
2407 2408 def handleobsmarker(op, inpart):
2408 2409 """add a stream of obsmarkers to the repo"""
2409 2410 tr = op.gettransaction()
2410 2411 markerdata = inpart.read()
2411 2412 if op.ui.config(b'experimental', b'obsmarkers-exchange-debug'):
2412 2413 op.ui.writenoi18n(
2413 2414 b'obsmarker-exchange: %i bytes received\n' % len(markerdata)
2414 2415 )
2415 2416 # The mergemarkers call will crash if marker creation is not enabled.
2416 2417 # we want to avoid this if the part is advisory.
2417 2418 if not inpart.mandatory and op.repo.obsstore.readonly:
2418 2419 op.repo.ui.debug(
2419 2420 b'ignoring obsolescence markers, feature not enabled\n'
2420 2421 )
2421 2422 return
2422 2423 new = op.repo.obsstore.mergemarkers(tr, markerdata)
2423 2424 op.repo.invalidatevolatilesets()
2424 2425 op.records.add(b'obsmarkers', {b'new': new})
2425 2426 if op.reply is not None:
2426 2427 rpart = op.reply.newpart(b'reply:obsmarkers')
2427 2428 rpart.addparam(
2428 2429 b'in-reply-to', pycompat.bytestr(inpart.id), mandatory=False
2429 2430 )
2430 2431 rpart.addparam(b'new', b'%i' % new, mandatory=False)
2431 2432
2432 2433
2433 2434 @parthandler(b'reply:obsmarkers', (b'new', b'in-reply-to'))
2434 2435 def handleobsmarkerreply(op, inpart):
2435 2436 """retrieve the result of a pushkey request"""
2436 2437 ret = int(inpart.params[b'new'])
2437 2438 partid = int(inpart.params[b'in-reply-to'])
2438 2439 op.records.add(b'obsmarkers', {b'new': ret}, partid)
2439 2440
2440 2441
2441 2442 @parthandler(b'hgtagsfnodes')
2442 2443 def handlehgtagsfnodes(op, inpart):
2443 2444 """Applies .hgtags fnodes cache entries to the local repo.
2444 2445
2445 2446 Payload is pairs of 20 byte changeset nodes and filenodes.
2446 2447 """
2447 2448 # Grab the transaction so we ensure that we have the lock at this point.
2448 2449 if op.ui.configbool(b'experimental', b'bundle2lazylocking'):
2449 2450 op.gettransaction()
2450 2451 cache = tags.hgtagsfnodescache(op.repo.unfiltered())
2451 2452
2452 2453 count = 0
2453 2454 while True:
2454 2455 node = inpart.read(20)
2455 2456 fnode = inpart.read(20)
2456 2457 if len(node) < 20 or len(fnode) < 20:
2457 2458 op.ui.debug(b'ignoring incomplete received .hgtags fnodes data\n')
2458 2459 break
2459 2460 cache.setfnode(node, fnode)
2460 2461 count += 1
2461 2462
2462 2463 cache.write()
2463 2464 op.ui.debug(b'applied %i hgtags fnodes cache entries\n' % count)
2464 2465
2465 2466
2466 2467 rbcstruct = struct.Struct(b'>III')
2467 2468
2468 2469
2469 2470 @parthandler(b'cache:rev-branch-cache')
2470 2471 def handlerbc(op, inpart):
2471 2472 """receive a rev-branch-cache payload and update the local cache
2472 2473
2473 2474 The payload is a series of data related to each branch
2474 2475
2475 2476 1) branch name length
2476 2477 2) number of open heads
2477 2478 3) number of closed heads
2478 2479 4) open heads nodes
2479 2480 5) closed heads nodes
2480 2481 """
2481 2482 total = 0
2482 2483 rawheader = inpart.read(rbcstruct.size)
2483 2484 cache = op.repo.revbranchcache()
2484 2485 cl = op.repo.unfiltered().changelog
2485 2486 while rawheader:
2486 2487 header = rbcstruct.unpack(rawheader)
2487 2488 total += header[1] + header[2]
2488 2489 utf8branch = inpart.read(header[0])
2489 2490 branch = encoding.tolocal(utf8branch)
2490 2491 for x in pycompat.xrange(header[1]):
2491 2492 node = inpart.read(20)
2492 2493 rev = cl.rev(node)
2493 2494 cache.setdata(branch, rev, node, False)
2494 2495 for x in pycompat.xrange(header[2]):
2495 2496 node = inpart.read(20)
2496 2497 rev = cl.rev(node)
2497 2498 cache.setdata(branch, rev, node, True)
2498 2499 rawheader = inpart.read(rbcstruct.size)
2499 2500 cache.write()
2500 2501
2501 2502
2502 2503 @parthandler(b'pushvars')
2503 2504 def bundle2getvars(op, part):
2504 2505 '''unbundle a bundle2 containing shellvars on the server'''
2505 2506 # An option to disable unbundling on server-side for security reasons
2506 2507 if op.ui.configbool(b'push', b'pushvars.server'):
2507 2508 hookargs = {}
2508 2509 for key, value in part.advisoryparams:
2509 2510 key = key.upper()
2510 2511 # We want pushed variables to have USERVAR_ prepended so we know
2511 2512 # they came from the --pushvar flag.
2512 2513 key = b"USERVAR_" + key
2513 2514 hookargs[key] = value
2514 2515 op.addhookargs(hookargs)
2515 2516
2516 2517
2517 2518 @parthandler(b'stream2', (b'requirements', b'filecount', b'bytecount'))
2518 2519 def handlestreamv2bundle(op, part):
2519 2520
2520 2521 requirements = urlreq.unquote(part.params[b'requirements']).split(b',')
2521 2522 filecount = int(part.params[b'filecount'])
2522 2523 bytecount = int(part.params[b'bytecount'])
2523 2524
2524 2525 repo = op.repo
2525 2526 if len(repo):
2526 2527 msg = _(b'cannot apply stream clone to non empty repository')
2527 2528 raise error.Abort(msg)
2528 2529
2529 2530 repo.ui.debug(b'applying stream bundle\n')
2530 2531 streamclone.applybundlev2(repo, part, filecount, bytecount, requirements)
2531 2532
2532 2533
2533 2534 def widen_bundle(
2534 2535 bundler, repo, oldmatcher, newmatcher, common, known, cgversion, ellipses
2535 2536 ):
2536 2537 """generates bundle2 for widening a narrow clone
2537 2538
2538 2539 bundler is the bundle to which data should be added
2539 2540 repo is the localrepository instance
2540 2541 oldmatcher matches what the client already has
2541 2542 newmatcher matches what the client needs (including what it already has)
2542 2543 common is set of common heads between server and client
2543 2544 known is a set of revs known on the client side (used in ellipses)
2544 2545 cgversion is the changegroup version to send
2545 2546 ellipses is boolean value telling whether to send ellipses data or not
2546 2547
2547 2548 returns bundle2 of the data required for extending
2548 2549 """
2549 2550 commonnodes = set()
2550 2551 cl = repo.changelog
2551 2552 for r in repo.revs(b"::%ln", common):
2552 2553 commonnodes.add(cl.node(r))
2553 2554 if commonnodes:
2554 2555 # XXX: we should only send the filelogs (and treemanifest). user
2555 2556 # already has the changelog and manifest
2556 2557 packer = changegroup.getbundler(
2557 2558 cgversion,
2558 2559 repo,
2559 2560 oldmatcher=oldmatcher,
2560 2561 matcher=newmatcher,
2561 2562 fullnodes=commonnodes,
2562 2563 )
2563 2564 cgdata = packer.generate(
2564 2565 {nodemod.nullid},
2565 2566 list(commonnodes),
2566 2567 False,
2567 2568 b'narrow_widen',
2568 2569 changelog=False,
2569 2570 )
2570 2571
2571 2572 part = bundler.newpart(b'changegroup', data=cgdata)
2572 2573 part.addparam(b'version', cgversion)
2573 2574 if b'treemanifest' in repo.requirements:
2574 2575 part.addparam(b'treemanifest', b'1')
2575 2576 if b'exp-sidedata-flag' in repo.requirements:
2576 2577 part.addparam(b'exp-sidedata', b'1')
2577 2578
2578 2579 return bundler
@@ -1,467 +1,467 b''
1 1 # linelog - efficient cache for annotate data
2 2 #
3 3 # Copyright 2018 Google LLC.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 """linelog is an efficient cache for annotate data inspired by SCCS Weaves.
8 8
9 9 SCCS Weaves are an implementation of
10 10 https://en.wikipedia.org/wiki/Interleaved_deltas. See
11 11 mercurial/helptext/internals/linelog.txt for an exploration of SCCS weaves
12 12 and how linelog works in detail.
13 13
14 14 Here's a hacker's summary: a linelog is a program which is executed in
15 15 the context of a revision. Executing the program emits information
16 16 about lines, including the revision that introduced them and the line
17 17 number in the file at the introducing revision. When an insertion or
18 18 deletion is performed on the file, a jump instruction is used to patch
19 19 in a new body of annotate information.
20 20 """
21 21 from __future__ import absolute_import, print_function
22 22
23 23 import abc
24 24 import struct
25 25
26 26 from .thirdparty import attr
27 27 from . import pycompat
28 28
29 29 _llentry = struct.Struct(b'>II')
30 30
31 31
32 32 class LineLogError(Exception):
33 33 """Error raised when something bad happens internally in linelog."""
34 34
35 35
36 36 @attr.s
37 37 class lineinfo(object):
38 38 # Introducing revision of this line.
39 39 rev = attr.ib()
40 40 # Line number for this line in its introducing revision.
41 41 linenum = attr.ib()
42 42 # Private. Offset in the linelog program of this line. Used internally.
43 43 _offset = attr.ib()
44 44
45 45
46 46 @attr.s
47 47 class annotateresult(object):
48 48 rev = attr.ib()
49 49 lines = attr.ib()
50 50 _eof = attr.ib()
51 51
52 52 def __iter__(self):
53 53 return iter(self.lines)
54 54
55 55
56 56 class _llinstruction(object): # pytype: disable=ignored-metaclass
57 57
58 58 __metaclass__ = abc.ABCMeta
59 59
60 60 @abc.abstractmethod
61 61 def __init__(self, op1, op2):
62 62 pass
63 63
64 64 @abc.abstractmethod
65 65 def __str__(self):
66 66 pass
67 67
68 68 def __repr__(self):
69 69 return str(self)
70 70
71 71 @abc.abstractmethod
72 72 def __eq__(self, other):
73 73 pass
74 74
75 75 @abc.abstractmethod
76 76 def encode(self):
77 77 """Encode this instruction to the binary linelog format."""
78 78
79 79 @abc.abstractmethod
80 80 def execute(self, rev, pc, emit):
81 81 """Execute this instruction.
82 82
83 83 Args:
84 84 rev: The revision we're annotating.
85 85 pc: The current offset in the linelog program.
86 86 emit: A function that accepts a single lineinfo object.
87 87
88 88 Returns:
89 89 The new value of pc. Returns None if exeuction should stop
90 90 (that is, we've found the end of the file.)
91 91 """
92 92
93 93
94 94 class _jge(_llinstruction):
95 95 """If the current rev is greater than or equal to op1, jump to op2."""
96 96
97 97 def __init__(self, op1, op2):
98 98 self._cmprev = op1
99 99 self._target = op2
100 100
101 101 def __str__(self):
102 102 return 'JGE %d %d' % (self._cmprev, self._target)
103 103
104 104 def __eq__(self, other):
105 105 return (
106 106 type(self) == type(other)
107 107 and self._cmprev == other._cmprev
108 108 and self._target == other._target
109 109 )
110 110
111 111 def encode(self):
112 112 return _llentry.pack(self._cmprev << 2, self._target)
113 113
114 114 def execute(self, rev, pc, emit):
115 115 if rev >= self._cmprev:
116 116 return self._target
117 117 return pc + 1
118 118
119 119
120 120 class _jump(_llinstruction):
121 121 """Unconditional jumps are expressed as a JGE with op1 set to 0."""
122 122
123 123 def __init__(self, op1, op2):
124 124 if op1 != 0:
125 125 raise LineLogError(b"malformed JUMP, op1 must be 0, got %d" % op1)
126 126 self._target = op2
127 127
128 128 def __str__(self):
129 129 return 'JUMP %d' % (self._target)
130 130
131 131 def __eq__(self, other):
132 132 return type(self) == type(other) and self._target == other._target
133 133
134 134 def encode(self):
135 135 return _llentry.pack(0, self._target)
136 136
137 137 def execute(self, rev, pc, emit):
138 138 return self._target
139 139
140 140
141 141 class _eof(_llinstruction):
142 142 """EOF is expressed as a JGE that always jumps to 0."""
143 143
144 144 def __init__(self, op1, op2):
145 145 if op1 != 0:
146 146 raise LineLogError(b"malformed EOF, op1 must be 0, got %d" % op1)
147 147 if op2 != 0:
148 148 raise LineLogError(b"malformed EOF, op2 must be 0, got %d" % op2)
149 149
150 150 def __str__(self):
151 151 return r'EOF'
152 152
153 153 def __eq__(self, other):
154 154 return type(self) == type(other)
155 155
156 156 def encode(self):
157 157 return _llentry.pack(0, 0)
158 158
159 159 def execute(self, rev, pc, emit):
160 160 return None
161 161
162 162
163 163 class _jl(_llinstruction):
164 164 """If the current rev is less than op1, jump to op2."""
165 165
166 166 def __init__(self, op1, op2):
167 167 self._cmprev = op1
168 168 self._target = op2
169 169
170 170 def __str__(self):
171 171 return 'JL %d %d' % (self._cmprev, self._target)
172 172
173 173 def __eq__(self, other):
174 174 return (
175 175 type(self) == type(other)
176 176 and self._cmprev == other._cmprev
177 177 and self._target == other._target
178 178 )
179 179
180 180 def encode(self):
181 181 return _llentry.pack(1 | (self._cmprev << 2), self._target)
182 182
183 183 def execute(self, rev, pc, emit):
184 184 if rev < self._cmprev:
185 185 return self._target
186 186 return pc + 1
187 187
188 188
189 189 class _line(_llinstruction):
190 190 """Emit a line."""
191 191
192 192 def __init__(self, op1, op2):
193 193 # This line was introduced by this revision number.
194 194 self._rev = op1
195 195 # This line had the specified line number in the introducing revision.
196 196 self._origlineno = op2
197 197
198 198 def __str__(self):
199 199 return 'LINE %d %d' % (self._rev, self._origlineno)
200 200
201 201 def __eq__(self, other):
202 202 return (
203 203 type(self) == type(other)
204 204 and self._rev == other._rev
205 205 and self._origlineno == other._origlineno
206 206 )
207 207
208 208 def encode(self):
209 209 return _llentry.pack(2 | (self._rev << 2), self._origlineno)
210 210
211 211 def execute(self, rev, pc, emit):
212 212 emit(lineinfo(self._rev, self._origlineno, pc))
213 213 return pc + 1
214 214
215 215
216 216 def _decodeone(data, offset):
217 217 """Decode a single linelog instruction from an offset in a buffer."""
218 218 try:
219 219 op1, op2 = _llentry.unpack_from(data, offset)
220 220 except struct.error as e:
221 221 raise LineLogError(b'reading an instruction failed: %r' % e)
222 222 opcode = op1 & 0b11
223 223 op1 = op1 >> 2
224 224 if opcode == 0:
225 225 if op1 == 0:
226 226 if op2 == 0:
227 227 return _eof(op1, op2)
228 228 return _jump(op1, op2)
229 229 return _jge(op1, op2)
230 230 elif opcode == 1:
231 231 return _jl(op1, op2)
232 232 elif opcode == 2:
233 233 return _line(op1, op2)
234 234 raise NotImplementedError(b'Unimplemented opcode %r' % opcode)
235 235
236 236
237 237 class linelog(object):
238 238 """Efficient cache for per-line history information."""
239 239
240 240 def __init__(self, program=None, maxrev=0):
241 241 if program is None:
242 242 # We pad the program with an extra leading EOF so that our
243 243 # offsets will match the C code exactly. This means we can
244 244 # interoperate with the C code.
245 245 program = [_eof(0, 0), _eof(0, 0)]
246 246 self._program = program
247 247 self._lastannotate = None
248 248 self._maxrev = maxrev
249 249
250 250 def __eq__(self, other):
251 251 return (
252 252 type(self) == type(other)
253 253 and self._program == other._program
254 254 and self._maxrev == other._maxrev
255 255 )
256 256
257 257 def __repr__(self):
258 return b'<linelog at %s: maxrev=%d size=%d>' % (
258 return '<linelog at %s: maxrev=%d size=%d>' % (
259 259 hex(id(self)),
260 260 self._maxrev,
261 261 len(self._program),
262 262 )
263 263
264 264 def debugstr(self):
265 265 fmt = '%%%dd %%s' % len(str(len(self._program)))
266 266 return pycompat.sysstr(b'\n').join(
267 267 fmt % (idx, i) for idx, i in enumerate(self._program[1:], 1)
268 268 )
269 269
270 270 @classmethod
271 271 def fromdata(cls, buf):
272 272 if len(buf) % _llentry.size != 0:
273 273 raise LineLogError(
274 274 b"invalid linelog buffer size %d (must be a multiple of %d)"
275 275 % (len(buf), _llentry.size)
276 276 )
277 277 expected = len(buf) / _llentry.size
278 278 fakejge = _decodeone(buf, 0)
279 279 if isinstance(fakejge, _jump):
280 280 maxrev = 0
281 281 elif isinstance(fakejge, (_jge, _jl)):
282 282 maxrev = fakejge._cmprev
283 283 else:
284 284 raise LineLogError(
285 285 'Expected one of _jump, _jge, or _jl. Got %s.'
286 286 % type(fakejge).__name__
287 287 )
288 288 assert isinstance(fakejge, (_jump, _jge, _jl)) # help pytype
289 289 numentries = fakejge._target
290 290 if expected != numentries:
291 291 raise LineLogError(
292 292 b"corrupt linelog data: claimed"
293 293 b" %d entries but given data for %d entries"
294 294 % (expected, numentries)
295 295 )
296 296 instructions = [_eof(0, 0)]
297 297 for offset in pycompat.xrange(1, numentries):
298 298 instructions.append(_decodeone(buf, offset * _llentry.size))
299 299 return cls(instructions, maxrev=maxrev)
300 300
301 301 def encode(self):
302 302 hdr = _jge(self._maxrev, len(self._program)).encode()
303 303 return hdr + b''.join(i.encode() for i in self._program[1:])
304 304
305 305 def clear(self):
306 306 self._program = []
307 307 self._maxrev = 0
308 308 self._lastannotate = None
309 309
310 310 def replacelines_vec(self, rev, a1, a2, blines):
311 311 return self.replacelines(
312 312 rev, a1, a2, 0, len(blines), _internal_blines=blines
313 313 )
314 314
315 315 def replacelines(self, rev, a1, a2, b1, b2, _internal_blines=None):
316 316 """Replace lines [a1, a2) with lines [b1, b2)."""
317 317 if self._lastannotate:
318 318 # TODO(augie): make replacelines() accept a revision at
319 319 # which we're editing as well as a revision to mark
320 320 # responsible for the edits. In hg-experimental it's
321 321 # stateful like this, so we're doing the same thing to
322 322 # retain compatibility with absorb until that's imported.
323 323 ar = self._lastannotate
324 324 else:
325 325 ar = self.annotate(rev)
326 326 # ar = self.annotate(self._maxrev)
327 327 if a1 > len(ar.lines):
328 328 raise LineLogError(
329 329 b'%d contains %d lines, tried to access line %d'
330 330 % (rev, len(ar.lines), a1)
331 331 )
332 332 elif a1 == len(ar.lines):
333 333 # Simulated EOF instruction since we're at EOF, which
334 334 # doesn't have a "real" line.
335 335 a1inst = _eof(0, 0)
336 336 a1info = lineinfo(0, 0, ar._eof)
337 337 else:
338 338 a1info = ar.lines[a1]
339 339 a1inst = self._program[a1info._offset]
340 340 programlen = self._program.__len__
341 341 oldproglen = programlen()
342 342 appendinst = self._program.append
343 343
344 344 # insert
345 345 blineinfos = []
346 346 bappend = blineinfos.append
347 347 if b1 < b2:
348 348 # Determine the jump target for the JGE at the start of
349 349 # the new block.
350 350 tgt = oldproglen + (b2 - b1 + 1)
351 351 # Jump to skip the insert if we're at an older revision.
352 352 appendinst(_jl(rev, tgt))
353 353 for linenum in pycompat.xrange(b1, b2):
354 354 if _internal_blines is None:
355 355 bappend(lineinfo(rev, linenum, programlen()))
356 356 appendinst(_line(rev, linenum))
357 357 else:
358 358 newrev, newlinenum = _internal_blines[linenum]
359 359 bappend(lineinfo(newrev, newlinenum, programlen()))
360 360 appendinst(_line(newrev, newlinenum))
361 361 # delete
362 362 if a1 < a2:
363 363 if a2 > len(ar.lines):
364 364 raise LineLogError(
365 365 b'%d contains %d lines, tried to access line %d'
366 366 % (rev, len(ar.lines), a2)
367 367 )
368 368 elif a2 == len(ar.lines):
369 369 endaddr = ar._eof
370 370 else:
371 371 endaddr = ar.lines[a2]._offset
372 372 if a2 > 0 and rev < self._maxrev:
373 373 # If we're here, we're deleting a chunk of an old
374 374 # commit, so we need to be careful and not touch
375 375 # invisible lines between a2-1 and a2 (IOW, lines that
376 376 # are added later).
377 377 endaddr = ar.lines[a2 - 1]._offset + 1
378 378 appendinst(_jge(rev, endaddr))
379 379 # copy instruction from a1
380 380 a1instpc = programlen()
381 381 appendinst(a1inst)
382 382 # if a1inst isn't a jump or EOF, then we need to add an unconditional
383 383 # jump back into the program here.
384 384 if not isinstance(a1inst, (_jump, _eof)):
385 385 appendinst(_jump(0, a1info._offset + 1))
386 386 # Patch instruction at a1, which makes our patch live.
387 387 self._program[a1info._offset] = _jump(0, oldproglen)
388 388
389 389 # Update self._lastannotate in place. This serves as a cache to avoid
390 390 # expensive "self.annotate" in this function, when "replacelines" is
391 391 # used continuously.
392 392 if len(self._lastannotate.lines) > a1:
393 393 self._lastannotate.lines[a1]._offset = a1instpc
394 394 else:
395 395 assert isinstance(a1inst, _eof)
396 396 self._lastannotate._eof = a1instpc
397 397 self._lastannotate.lines[a1:a2] = blineinfos
398 398 self._lastannotate.rev = max(self._lastannotate.rev, rev)
399 399
400 400 if rev > self._maxrev:
401 401 self._maxrev = rev
402 402
403 403 def annotate(self, rev):
404 404 pc = 1
405 405 lines = []
406 406 executed = 0
407 407 # Sanity check: if instructions executed exceeds len(program), we
408 408 # hit an infinite loop in the linelog program somehow and we
409 409 # should stop.
410 410 while pc is not None and executed < len(self._program):
411 411 inst = self._program[pc]
412 412 lastpc = pc
413 413 pc = inst.execute(rev, pc, lines.append)
414 414 executed += 1
415 415 if pc is not None:
416 416 raise LineLogError(
417 417 r'Probably hit an infinite loop in linelog. Program:\n'
418 418 + self.debugstr()
419 419 )
420 420 ar = annotateresult(rev, lines, lastpc)
421 421 self._lastannotate = ar
422 422 return ar
423 423
424 424 @property
425 425 def maxrev(self):
426 426 return self._maxrev
427 427
428 428 # Stateful methods which depend on the value of the last
429 429 # annotation run. This API is for compatiblity with the original
430 430 # linelog, and we should probably consider refactoring it.
431 431 @property
432 432 def annotateresult(self):
433 433 """Return the last annotation result. C linelog code exposed this."""
434 434 return [(l.rev, l.linenum) for l in self._lastannotate.lines]
435 435
436 436 def getoffset(self, line):
437 437 return self._lastannotate.lines[line]._offset
438 438
439 439 def getalllines(self, start=0, end=0):
440 440 """Get all lines that ever occurred in [start, end).
441 441
442 442 Passing start == end == 0 means "all lines ever".
443 443
444 444 This works in terms of *internal* program offsets, not line numbers.
445 445 """
446 446 pc = start or 1
447 447 lines = []
448 448 # only take as many steps as there are instructions in the
449 449 # program - if we don't find an EOF or our stop-line before
450 450 # then, something is badly broken.
451 451 for step in pycompat.xrange(len(self._program)):
452 452 inst = self._program[pc]
453 453 nextpc = pc + 1
454 454 if isinstance(inst, _jump):
455 455 nextpc = inst._target
456 456 elif isinstance(inst, _eof):
457 457 return lines
458 458 elif isinstance(inst, (_jl, _jge)):
459 459 pass
460 460 elif isinstance(inst, _line):
461 461 lines.append((inst._rev, inst._origlineno))
462 462 else:
463 463 raise LineLogError(b"Illegal instruction %r" % inst)
464 464 if nextpc == end:
465 465 return lines
466 466 pc = nextpc
467 467 raise LineLogError(b"Failed to perform getalllines")
@@ -1,2273 +1,2275 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11 import itertools
12 12 import struct
13 13 import weakref
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 bin,
18 18 hex,
19 19 nullid,
20 20 nullrev,
21 21 )
22 22 from .pycompat import getattr
23 23 from . import (
24 encoding,
24 25 error,
25 26 mdiff,
26 27 pathutil,
27 28 policy,
28 29 pycompat,
29 30 revlog,
30 31 util,
31 32 )
32 33 from .interfaces import (
33 34 repository,
34 35 util as interfaceutil,
35 36 )
36 37
37 38 parsers = policy.importmod('parsers')
38 39 propertycache = util.propertycache
39 40
40 41 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
41 42 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
42 43
43 44
44 45 def _parse(data):
45 46 # This method does a little bit of excessive-looking
46 47 # precondition checking. This is so that the behavior of this
47 48 # class exactly matches its C counterpart to try and help
48 49 # prevent surprise breakage for anyone that develops against
49 50 # the pure version.
50 51 if data and data[-1:] != b'\n':
51 52 raise ValueError(b'Manifest did not end in a newline.')
52 53 prev = None
53 54 for l in data.splitlines():
54 55 if prev is not None and prev > l:
55 56 raise ValueError(b'Manifest lines not in sorted order.')
56 57 prev = l
57 58 f, n = l.split(b'\0')
58 59 if len(n) > 40:
59 60 yield f, bin(n[:40]), n[40:]
60 61 else:
61 62 yield f, bin(n), b''
62 63
63 64
64 65 def _text(it):
65 66 files = []
66 67 lines = []
67 68 for f, n, fl in it:
68 69 files.append(f)
69 70 # if this is changed to support newlines in filenames,
70 71 # be sure to check the templates/ dir again (especially *-raw.tmpl)
71 72 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
72 73
73 74 _checkforbidden(files)
74 75 return b''.join(lines)
75 76
76 77
77 78 class lazymanifestiter(object):
78 79 def __init__(self, lm):
79 80 self.pos = 0
80 81 self.lm = lm
81 82
82 83 def __iter__(self):
83 84 return self
84 85
85 86 def next(self):
86 87 try:
87 88 data, pos = self.lm._get(self.pos)
88 89 except IndexError:
89 90 raise StopIteration
90 91 if pos == -1:
91 92 self.pos += 1
92 93 return data[0]
93 94 self.pos += 1
94 95 zeropos = data.find(b'\x00', pos)
95 96 return data[pos:zeropos]
96 97
97 98 __next__ = next
98 99
99 100
100 101 class lazymanifestiterentries(object):
101 102 def __init__(self, lm):
102 103 self.lm = lm
103 104 self.pos = 0
104 105
105 106 def __iter__(self):
106 107 return self
107 108
108 109 def next(self):
109 110 try:
110 111 data, pos = self.lm._get(self.pos)
111 112 except IndexError:
112 113 raise StopIteration
113 114 if pos == -1:
114 115 self.pos += 1
115 116 return data
116 117 zeropos = data.find(b'\x00', pos)
117 118 hashval = unhexlify(data, self.lm.extrainfo[self.pos], zeropos + 1, 40)
118 119 flags = self.lm._getflags(data, self.pos, zeropos)
119 120 self.pos += 1
120 121 return (data[pos:zeropos], hashval, flags)
121 122
122 123 __next__ = next
123 124
124 125
125 126 def unhexlify(data, extra, pos, length):
126 127 s = bin(data[pos : pos + length])
127 128 if extra:
128 129 s += chr(extra & 0xFF)
129 130 return s
130 131
131 132
132 133 def _cmp(a, b):
133 134 return (a > b) - (a < b)
134 135
135 136
136 137 class _lazymanifest(object):
137 138 """A pure python manifest backed by a byte string. It is supplimented with
138 139 internal lists as it is modified, until it is compacted back to a pure byte
139 140 string.
140 141
141 142 ``data`` is the initial manifest data.
142 143
143 144 ``positions`` is a list of offsets, one per manifest entry. Positive
144 145 values are offsets into ``data``, negative values are offsets into the
145 146 ``extradata`` list. When an entry is removed, its entry is dropped from
146 147 ``positions``. The values are encoded such that when walking the list and
147 148 indexing into ``data`` or ``extradata`` as appropriate, the entries are
148 149 sorted by filename.
149 150
150 151 ``extradata`` is a list of (key, hash, flags) for entries that were added or
151 152 modified since the manifest was created or compacted.
152 153 """
153 154
154 155 def __init__(
155 156 self,
156 157 data,
157 158 positions=None,
158 159 extrainfo=None,
159 160 extradata=None,
160 161 hasremovals=False,
161 162 ):
162 163 if positions is None:
163 164 self.positions = self.findlines(data)
164 165 self.extrainfo = [0] * len(self.positions)
165 166 self.data = data
166 167 self.extradata = []
167 168 self.hasremovals = False
168 169 else:
169 170 self.positions = positions[:]
170 171 self.extrainfo = extrainfo[:]
171 172 self.extradata = extradata[:]
172 173 self.data = data
173 174 self.hasremovals = hasremovals
174 175
175 176 def findlines(self, data):
176 177 if not data:
177 178 return []
178 179 pos = data.find(b"\n")
179 180 if pos == -1 or data[-1:] != b'\n':
180 181 raise ValueError(b"Manifest did not end in a newline.")
181 182 positions = [0]
182 183 prev = data[: data.find(b'\x00')]
183 184 while pos < len(data) - 1 and pos != -1:
184 185 positions.append(pos + 1)
185 186 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
186 187 if nexts < prev:
187 188 raise ValueError(b"Manifest lines not in sorted order.")
188 189 prev = nexts
189 190 pos = data.find(b"\n", pos + 1)
190 191 return positions
191 192
192 193 def _get(self, index):
193 194 # get the position encoded in pos:
194 195 # positive number is an index in 'data'
195 196 # negative number is in extrapieces
196 197 pos = self.positions[index]
197 198 if pos >= 0:
198 199 return self.data, pos
199 200 return self.extradata[-pos - 1], -1
200 201
201 202 def _getkey(self, pos):
202 203 if pos >= 0:
203 204 return self.data[pos : self.data.find(b'\x00', pos + 1)]
204 205 return self.extradata[-pos - 1][0]
205 206
206 207 def bsearch(self, key):
207 208 first = 0
208 209 last = len(self.positions) - 1
209 210
210 211 while first <= last:
211 212 midpoint = (first + last) // 2
212 213 nextpos = self.positions[midpoint]
213 214 candidate = self._getkey(nextpos)
214 215 r = _cmp(key, candidate)
215 216 if r == 0:
216 217 return midpoint
217 218 else:
218 219 if r < 0:
219 220 last = midpoint - 1
220 221 else:
221 222 first = midpoint + 1
222 223 return -1
223 224
224 225 def bsearch2(self, key):
225 226 # same as the above, but will always return the position
226 227 # done for performance reasons
227 228 first = 0
228 229 last = len(self.positions) - 1
229 230
230 231 while first <= last:
231 232 midpoint = (first + last) // 2
232 233 nextpos = self.positions[midpoint]
233 234 candidate = self._getkey(nextpos)
234 235 r = _cmp(key, candidate)
235 236 if r == 0:
236 237 return (midpoint, True)
237 238 else:
238 239 if r < 0:
239 240 last = midpoint - 1
240 241 else:
241 242 first = midpoint + 1
242 243 return (first, False)
243 244
244 245 def __contains__(self, key):
245 246 return self.bsearch(key) != -1
246 247
247 248 def _getflags(self, data, needle, pos):
248 249 start = pos + 41
249 250 end = data.find(b"\n", start)
250 251 if end == -1:
251 252 end = len(data) - 1
252 253 if start == end:
253 254 return b''
254 255 return self.data[start:end]
255 256
256 257 def __getitem__(self, key):
257 258 if not isinstance(key, bytes):
258 259 raise TypeError(b"getitem: manifest keys must be a bytes.")
259 260 needle = self.bsearch(key)
260 261 if needle == -1:
261 262 raise KeyError
262 263 data, pos = self._get(needle)
263 264 if pos == -1:
264 265 return (data[1], data[2])
265 266 zeropos = data.find(b'\x00', pos)
266 267 assert 0 <= needle <= len(self.positions)
267 268 assert len(self.extrainfo) == len(self.positions)
268 269 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40)
269 270 flags = self._getflags(data, needle, zeropos)
270 271 return (hashval, flags)
271 272
272 273 def __delitem__(self, key):
273 274 needle, found = self.bsearch2(key)
274 275 if not found:
275 276 raise KeyError
276 277 cur = self.positions[needle]
277 278 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
278 279 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
279 280 if cur >= 0:
280 281 # This does NOT unsort the list as far as the search functions are
281 282 # concerned, as they only examine lines mapped by self.positions.
282 283 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
283 284 self.hasremovals = True
284 285
285 286 def __setitem__(self, key, value):
286 287 if not isinstance(key, bytes):
287 288 raise TypeError(b"setitem: manifest keys must be a byte string.")
288 289 if not isinstance(value, tuple) or len(value) != 2:
289 290 raise TypeError(
290 291 b"Manifest values must be a tuple of (node, flags)."
291 292 )
292 293 hashval = value[0]
293 294 if not isinstance(hashval, bytes) or not 20 <= len(hashval) <= 22:
294 295 raise TypeError(b"node must be a 20-byte byte string")
295 296 flags = value[1]
296 297 if len(hashval) == 22:
297 298 hashval = hashval[:-1]
298 299 if not isinstance(flags, bytes) or len(flags) > 1:
299 300 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
300 301 needle, found = self.bsearch2(key)
301 302 if found:
302 303 # put the item
303 304 pos = self.positions[needle]
304 305 if pos < 0:
305 306 self.extradata[-pos - 1] = (key, hashval, value[1])
306 307 else:
307 308 # just don't bother
308 309 self.extradata.append((key, hashval, value[1]))
309 310 self.positions[needle] = -len(self.extradata)
310 311 else:
311 312 # not found, put it in with extra positions
312 313 self.extradata.append((key, hashval, value[1]))
313 314 self.positions = (
314 315 self.positions[:needle]
315 316 + [-len(self.extradata)]
316 317 + self.positions[needle:]
317 318 )
318 319 self.extrainfo = (
319 320 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
320 321 )
321 322
322 323 def copy(self):
323 324 # XXX call _compact like in C?
324 325 return _lazymanifest(
325 326 self.data,
326 327 self.positions,
327 328 self.extrainfo,
328 329 self.extradata,
329 330 self.hasremovals,
330 331 )
331 332
332 333 def _compact(self):
333 334 # hopefully not called TOO often
334 335 if len(self.extradata) == 0 and not self.hasremovals:
335 336 return
336 337 l = []
337 338 i = 0
338 339 offset = 0
339 340 self.extrainfo = [0] * len(self.positions)
340 341 while i < len(self.positions):
341 342 if self.positions[i] >= 0:
342 343 cur = self.positions[i]
343 344 last_cut = cur
344 345
345 346 # Collect all contiguous entries in the buffer at the current
346 347 # offset, breaking out only for added/modified items held in
347 348 # extradata, or a deleted line prior to the next position.
348 349 while True:
349 350 self.positions[i] = offset
350 351 i += 1
351 352 if i == len(self.positions) or self.positions[i] < 0:
352 353 break
353 354
354 355 # A removed file has no positions[] entry, but does have an
355 356 # overwritten first byte. Break out and find the end of the
356 357 # current good entry/entries if there is a removed file
357 358 # before the next position.
358 359 if (
359 360 self.hasremovals
360 361 and self.data.find(b'\n\x00', cur, self.positions[i])
361 362 != -1
362 363 ):
363 364 break
364 365
365 366 offset += self.positions[i] - cur
366 367 cur = self.positions[i]
367 368 end_cut = self.data.find(b'\n', cur)
368 369 if end_cut != -1:
369 370 end_cut += 1
370 371 offset += end_cut - cur
371 372 l.append(self.data[last_cut:end_cut])
372 373 else:
373 374 while i < len(self.positions) and self.positions[i] < 0:
374 375 cur = self.positions[i]
375 376 t = self.extradata[-cur - 1]
376 377 l.append(self._pack(t))
377 378 self.positions[i] = offset
378 379 if len(t[1]) > 20:
379 380 self.extrainfo[i] = ord(t[1][21])
380 381 offset += len(l[-1])
381 382 i += 1
382 383 self.data = b''.join(l)
383 384 self.hasremovals = False
384 385 self.extradata = []
385 386
386 387 def _pack(self, d):
387 388 return d[0] + b'\x00' + hex(d[1][:20]) + d[2] + b'\n'
388 389
389 390 def text(self):
390 391 self._compact()
391 392 return self.data
392 393
393 394 def diff(self, m2, clean=False):
394 395 '''Finds changes between the current manifest and m2.'''
395 396 # XXX think whether efficiency matters here
396 397 diff = {}
397 398
398 399 for fn, e1, flags in self.iterentries():
399 400 if fn not in m2:
400 401 diff[fn] = (e1, flags), (None, b'')
401 402 else:
402 403 e2 = m2[fn]
403 404 if (e1, flags) != e2:
404 405 diff[fn] = (e1, flags), e2
405 406 elif clean:
406 407 diff[fn] = None
407 408
408 409 for fn, e2, flags in m2.iterentries():
409 410 if fn not in self:
410 411 diff[fn] = (None, b''), (e2, flags)
411 412
412 413 return diff
413 414
414 415 def iterentries(self):
415 416 return lazymanifestiterentries(self)
416 417
417 418 def iterkeys(self):
418 419 return lazymanifestiter(self)
419 420
420 421 def __iter__(self):
421 422 return lazymanifestiter(self)
422 423
423 424 def __len__(self):
424 425 return len(self.positions)
425 426
426 427 def filtercopy(self, filterfn):
427 428 # XXX should be optimized
428 429 c = _lazymanifest(b'')
429 430 for f, n, fl in self.iterentries():
430 431 if filterfn(f):
431 432 c[f] = n, fl
432 433 return c
433 434
434 435
435 436 try:
436 437 _lazymanifest = parsers.lazymanifest
437 438 except AttributeError:
438 439 pass
439 440
440 441
441 442 @interfaceutil.implementer(repository.imanifestdict)
442 443 class manifestdict(object):
443 444 def __init__(self, data=b''):
444 445 self._lm = _lazymanifest(data)
445 446
446 447 def __getitem__(self, key):
447 448 return self._lm[key][0]
448 449
449 450 def find(self, key):
450 451 return self._lm[key]
451 452
452 453 def __len__(self):
453 454 return len(self._lm)
454 455
455 456 def __nonzero__(self):
456 457 # nonzero is covered by the __len__ function, but implementing it here
457 458 # makes it easier for extensions to override.
458 459 return len(self._lm) != 0
459 460
460 461 __bool__ = __nonzero__
461 462
462 463 def __setitem__(self, key, node):
463 464 self._lm[key] = node, self.flags(key, b'')
464 465
465 466 def __contains__(self, key):
466 467 if key is None:
467 468 return False
468 469 return key in self._lm
469 470
470 471 def __delitem__(self, key):
471 472 del self._lm[key]
472 473
473 474 def __iter__(self):
474 475 return self._lm.__iter__()
475 476
476 477 def iterkeys(self):
477 478 return self._lm.iterkeys()
478 479
479 480 def keys(self):
480 481 return list(self.iterkeys())
481 482
482 483 def filesnotin(self, m2, match=None):
483 484 '''Set of files in this manifest that are not in the other'''
484 485 if match:
485 486 m1 = self.matches(match)
486 487 m2 = m2.matches(match)
487 488 return m1.filesnotin(m2)
488 489 diff = self.diff(m2)
489 490 files = set(
490 491 filepath
491 492 for filepath, hashflags in pycompat.iteritems(diff)
492 493 if hashflags[1][0] is None
493 494 )
494 495 return files
495 496
496 497 @propertycache
497 498 def _dirs(self):
498 499 return pathutil.dirs(self)
499 500
500 501 def dirs(self):
501 502 return self._dirs
502 503
503 504 def hasdir(self, dir):
504 505 return dir in self._dirs
505 506
506 507 def _filesfastpath(self, match):
507 508 '''Checks whether we can correctly and quickly iterate over matcher
508 509 files instead of over manifest files.'''
509 510 files = match.files()
510 511 return len(files) < 100 and (
511 512 match.isexact()
512 513 or (match.prefix() and all(fn in self for fn in files))
513 514 )
514 515
515 516 def walk(self, match):
516 517 '''Generates matching file names.
517 518
518 519 Equivalent to manifest.matches(match).iterkeys(), but without creating
519 520 an entirely new manifest.
520 521
521 522 It also reports nonexistent files by marking them bad with match.bad().
522 523 '''
523 524 if match.always():
524 525 for f in iter(self):
525 526 yield f
526 527 return
527 528
528 529 fset = set(match.files())
529 530
530 531 # avoid the entire walk if we're only looking for specific files
531 532 if self._filesfastpath(match):
532 533 for fn in sorted(fset):
533 534 yield fn
534 535 return
535 536
536 537 for fn in self:
537 538 if fn in fset:
538 539 # specified pattern is the exact name
539 540 fset.remove(fn)
540 541 if match(fn):
541 542 yield fn
542 543
543 544 # for dirstate.walk, files=[''] means "walk the whole tree".
544 545 # follow that here, too
545 546 fset.discard(b'')
546 547
547 548 for fn in sorted(fset):
548 549 if not self.hasdir(fn):
549 550 match.bad(fn, None)
550 551
551 552 def matches(self, match):
552 553 '''generate a new manifest filtered by the match argument'''
553 554 if match.always():
554 555 return self.copy()
555 556
556 557 if self._filesfastpath(match):
557 558 m = manifestdict()
558 559 lm = self._lm
559 560 for fn in match.files():
560 561 if fn in lm:
561 562 m._lm[fn] = lm[fn]
562 563 return m
563 564
564 565 m = manifestdict()
565 566 m._lm = self._lm.filtercopy(match)
566 567 return m
567 568
568 569 def diff(self, m2, match=None, clean=False):
569 570 '''Finds changes between the current manifest and m2.
570 571
571 572 Args:
572 573 m2: the manifest to which this manifest should be compared.
573 574 clean: if true, include files unchanged between these manifests
574 575 with a None value in the returned dictionary.
575 576
576 577 The result is returned as a dict with filename as key and
577 578 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
578 579 nodeid in the current/other manifest and fl1/fl2 is the flag
579 580 in the current/other manifest. Where the file does not exist,
580 581 the nodeid will be None and the flags will be the empty
581 582 string.
582 583 '''
583 584 if match:
584 585 m1 = self.matches(match)
585 586 m2 = m2.matches(match)
586 587 return m1.diff(m2, clean=clean)
587 588 return self._lm.diff(m2._lm, clean)
588 589
589 590 def setflag(self, key, flag):
590 591 self._lm[key] = self[key], flag
591 592
592 593 def get(self, key, default=None):
593 594 try:
594 595 return self._lm[key][0]
595 596 except KeyError:
596 597 return default
597 598
598 599 def flags(self, key, default=b''):
599 600 try:
600 601 return self._lm[key][1]
601 602 except KeyError:
602 603 return default
603 604
604 605 def copy(self):
605 606 c = manifestdict()
606 607 c._lm = self._lm.copy()
607 608 return c
608 609
609 610 def items(self):
610 611 return (x[:2] for x in self._lm.iterentries())
611 612
612 613 def iteritems(self):
613 614 return (x[:2] for x in self._lm.iterentries())
614 615
615 616 def iterentries(self):
616 617 return self._lm.iterentries()
617 618
618 619 def text(self):
619 620 # most likely uses native version
620 621 return self._lm.text()
621 622
622 623 def fastdelta(self, base, changes):
623 624 """Given a base manifest text as a bytearray and a list of changes
624 625 relative to that text, compute a delta that can be used by revlog.
625 626 """
626 627 delta = []
627 628 dstart = None
628 629 dend = None
629 630 dline = [b""]
630 631 start = 0
631 632 # zero copy representation of base as a buffer
632 633 addbuf = util.buffer(base)
633 634
634 635 changes = list(changes)
635 636 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
636 637 # start with a readonly loop that finds the offset of
637 638 # each line and creates the deltas
638 639 for f, todelete in changes:
639 640 # bs will either be the index of the item or the insert point
640 641 start, end = _msearch(addbuf, f, start)
641 642 if not todelete:
642 643 h, fl = self._lm[f]
643 644 l = b"%s\0%s%s\n" % (f, hex(h), fl)
644 645 else:
645 646 if start == end:
646 647 # item we want to delete was not found, error out
647 648 raise AssertionError(
648 649 _(b"failed to remove %s from manifest") % f
649 650 )
650 651 l = b""
651 652 if dstart is not None and dstart <= start and dend >= start:
652 653 if dend < end:
653 654 dend = end
654 655 if l:
655 656 dline.append(l)
656 657 else:
657 658 if dstart is not None:
658 659 delta.append([dstart, dend, b"".join(dline)])
659 660 dstart = start
660 661 dend = end
661 662 dline = [l]
662 663
663 664 if dstart is not None:
664 665 delta.append([dstart, dend, b"".join(dline)])
665 666 # apply the delta to the base, and get a delta for addrevision
666 667 deltatext, arraytext = _addlistdelta(base, delta)
667 668 else:
668 669 # For large changes, it's much cheaper to just build the text and
669 670 # diff it.
670 671 arraytext = bytearray(self.text())
671 672 deltatext = mdiff.textdiff(
672 673 util.buffer(base), util.buffer(arraytext)
673 674 )
674 675
675 676 return arraytext, deltatext
676 677
677 678
678 679 def _msearch(m, s, lo=0, hi=None):
679 680 '''return a tuple (start, end) that says where to find s within m.
680 681
681 682 If the string is found m[start:end] are the line containing
682 683 that string. If start == end the string was not found and
683 684 they indicate the proper sorted insertion point.
684 685
685 686 m should be a buffer, a memoryview or a byte string.
686 687 s is a byte string'''
687 688
688 689 def advance(i, c):
689 690 while i < lenm and m[i : i + 1] != c:
690 691 i += 1
691 692 return i
692 693
693 694 if not s:
694 695 return (lo, lo)
695 696 lenm = len(m)
696 697 if not hi:
697 698 hi = lenm
698 699 while lo < hi:
699 700 mid = (lo + hi) // 2
700 701 start = mid
701 702 while start > 0 and m[start - 1 : start] != b'\n':
702 703 start -= 1
703 704 end = advance(start, b'\0')
704 705 if bytes(m[start:end]) < s:
705 706 # we know that after the null there are 40 bytes of sha1
706 707 # this translates to the bisect lo = mid + 1
707 708 lo = advance(end + 40, b'\n') + 1
708 709 else:
709 710 # this translates to the bisect hi = mid
710 711 hi = start
711 712 end = advance(lo, b'\0')
712 713 found = m[lo:end]
713 714 if s == found:
714 715 # we know that after the null there are 40 bytes of sha1
715 716 end = advance(end + 40, b'\n')
716 717 return (lo, end + 1)
717 718 else:
718 719 return (lo, lo)
719 720
720 721
721 722 def _checkforbidden(l):
722 723 """Check filenames for illegal characters."""
723 724 for f in l:
724 725 if b'\n' in f or b'\r' in f:
725 726 raise error.StorageError(
726 727 _(b"'\\n' and '\\r' disallowed in filenames: %r")
727 728 % pycompat.bytestr(f)
728 729 )
729 730
730 731
731 732 # apply the changes collected during the bisect loop to our addlist
732 733 # return a delta suitable for addrevision
733 734 def _addlistdelta(addlist, x):
734 735 # for large addlist arrays, building a new array is cheaper
735 736 # than repeatedly modifying the existing one
736 737 currentposition = 0
737 738 newaddlist = bytearray()
738 739
739 740 for start, end, content in x:
740 741 newaddlist += addlist[currentposition:start]
741 742 if content:
742 743 newaddlist += bytearray(content)
743 744
744 745 currentposition = end
745 746
746 747 newaddlist += addlist[currentposition:]
747 748
748 749 deltatext = b"".join(
749 750 struct.pack(b">lll", start, end, len(content)) + content
750 751 for start, end, content in x
751 752 )
752 753 return deltatext, newaddlist
753 754
754 755
755 756 def _splittopdir(f):
756 757 if b'/' in f:
757 758 dir, subpath = f.split(b'/', 1)
758 759 return dir + b'/', subpath
759 760 else:
760 761 return b'', f
761 762
762 763
763 764 _noop = lambda s: None
764 765
765 766
766 767 class treemanifest(object):
767 768 def __init__(self, dir=b'', text=b''):
768 769 self._dir = dir
769 770 self._node = nullid
770 771 self._loadfunc = _noop
771 772 self._copyfunc = _noop
772 773 self._dirty = False
773 774 self._dirs = {}
774 775 self._lazydirs = {}
775 776 # Using _lazymanifest here is a little slower than plain old dicts
776 777 self._files = {}
777 778 self._flags = {}
778 779 if text:
779 780
780 781 def readsubtree(subdir, subm):
781 782 raise AssertionError(
782 783 b'treemanifest constructor only accepts flat manifests'
783 784 )
784 785
785 786 self.parse(text, readsubtree)
786 787 self._dirty = True # Mark flat manifest dirty after parsing
787 788
788 789 def _subpath(self, path):
789 790 return self._dir + path
790 791
791 792 def _loadalllazy(self):
792 793 selfdirs = self._dirs
793 794 for d, (path, node, readsubtree, docopy) in pycompat.iteritems(
794 795 self._lazydirs
795 796 ):
796 797 if docopy:
797 798 selfdirs[d] = readsubtree(path, node).copy()
798 799 else:
799 800 selfdirs[d] = readsubtree(path, node)
800 801 self._lazydirs = {}
801 802
802 803 def _loadlazy(self, d):
803 804 v = self._lazydirs.get(d)
804 805 if v:
805 806 path, node, readsubtree, docopy = v
806 807 if docopy:
807 808 self._dirs[d] = readsubtree(path, node).copy()
808 809 else:
809 810 self._dirs[d] = readsubtree(path, node)
810 811 del self._lazydirs[d]
811 812
812 813 def _loadchildrensetlazy(self, visit):
813 814 if not visit:
814 815 return None
815 816 if visit == b'all' or visit == b'this':
816 817 self._loadalllazy()
817 818 return None
818 819
819 820 loadlazy = self._loadlazy
820 821 for k in visit:
821 822 loadlazy(k + b'/')
822 823 return visit
823 824
824 825 def _loaddifflazy(self, t1, t2):
825 826 """load items in t1 and t2 if they're needed for diffing.
826 827
827 828 The criteria currently is:
828 829 - if it's not present in _lazydirs in either t1 or t2, load it in the
829 830 other (it may already be loaded or it may not exist, doesn't matter)
830 831 - if it's present in _lazydirs in both, compare the nodeid; if it
831 832 differs, load it in both
832 833 """
833 834 toloadlazy = []
834 835 for d, v1 in pycompat.iteritems(t1._lazydirs):
835 836 v2 = t2._lazydirs.get(d)
836 837 if not v2 or v2[1] != v1[1]:
837 838 toloadlazy.append(d)
838 839 for d, v1 in pycompat.iteritems(t2._lazydirs):
839 840 if d not in t1._lazydirs:
840 841 toloadlazy.append(d)
841 842
842 843 for d in toloadlazy:
843 844 t1._loadlazy(d)
844 845 t2._loadlazy(d)
845 846
846 847 def __len__(self):
847 848 self._load()
848 849 size = len(self._files)
849 850 self._loadalllazy()
850 851 for m in self._dirs.values():
851 852 size += m.__len__()
852 853 return size
853 854
854 855 def __nonzero__(self):
855 856 # Faster than "__len() != 0" since it avoids loading sub-manifests
856 857 return not self._isempty()
857 858
858 859 __bool__ = __nonzero__
859 860
860 861 def _isempty(self):
861 862 self._load() # for consistency; already loaded by all callers
862 863 # See if we can skip loading everything.
863 864 if self._files or (
864 865 self._dirs and any(not m._isempty() for m in self._dirs.values())
865 866 ):
866 867 return False
867 868 self._loadalllazy()
868 869 return not self._dirs or all(m._isempty() for m in self._dirs.values())
869 870
871 @encoding.strmethod
870 872 def __repr__(self):
871 873 return (
872 b'<treemanifest dir=%s, node=%s, loaded=%s, dirty=%s at 0x%x>'
874 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
873 875 % (
874 876 self._dir,
875 877 hex(self._node),
876 878 bool(self._loadfunc is _noop),
877 879 self._dirty,
878 880 id(self),
879 881 )
880 882 )
881 883
882 884 def dir(self):
883 885 '''The directory that this tree manifest represents, including a
884 886 trailing '/'. Empty string for the repo root directory.'''
885 887 return self._dir
886 888
887 889 def node(self):
888 890 '''This node of this instance. nullid for unsaved instances. Should
889 891 be updated when the instance is read or written from a revlog.
890 892 '''
891 893 assert not self._dirty
892 894 return self._node
893 895
894 896 def setnode(self, node):
895 897 self._node = node
896 898 self._dirty = False
897 899
898 900 def iterentries(self):
899 901 self._load()
900 902 self._loadalllazy()
901 903 for p, n in sorted(
902 904 itertools.chain(self._dirs.items(), self._files.items())
903 905 ):
904 906 if p in self._files:
905 907 yield self._subpath(p), n, self._flags.get(p, b'')
906 908 else:
907 909 for x in n.iterentries():
908 910 yield x
909 911
910 912 def items(self):
911 913 self._load()
912 914 self._loadalllazy()
913 915 for p, n in sorted(
914 916 itertools.chain(self._dirs.items(), self._files.items())
915 917 ):
916 918 if p in self._files:
917 919 yield self._subpath(p), n
918 920 else:
919 921 for f, sn in pycompat.iteritems(n):
920 922 yield f, sn
921 923
922 924 iteritems = items
923 925
924 926 def iterkeys(self):
925 927 self._load()
926 928 self._loadalllazy()
927 929 for p in sorted(itertools.chain(self._dirs, self._files)):
928 930 if p in self._files:
929 931 yield self._subpath(p)
930 932 else:
931 933 for f in self._dirs[p]:
932 934 yield f
933 935
934 936 def keys(self):
935 937 return list(self.iterkeys())
936 938
937 939 def __iter__(self):
938 940 return self.iterkeys()
939 941
940 942 def __contains__(self, f):
941 943 if f is None:
942 944 return False
943 945 self._load()
944 946 dir, subpath = _splittopdir(f)
945 947 if dir:
946 948 self._loadlazy(dir)
947 949
948 950 if dir not in self._dirs:
949 951 return False
950 952
951 953 return self._dirs[dir].__contains__(subpath)
952 954 else:
953 955 return f in self._files
954 956
955 957 def get(self, f, default=None):
956 958 self._load()
957 959 dir, subpath = _splittopdir(f)
958 960 if dir:
959 961 self._loadlazy(dir)
960 962
961 963 if dir not in self._dirs:
962 964 return default
963 965 return self._dirs[dir].get(subpath, default)
964 966 else:
965 967 return self._files.get(f, default)
966 968
967 969 def __getitem__(self, f):
968 970 self._load()
969 971 dir, subpath = _splittopdir(f)
970 972 if dir:
971 973 self._loadlazy(dir)
972 974
973 975 return self._dirs[dir].__getitem__(subpath)
974 976 else:
975 977 return self._files[f]
976 978
977 979 def flags(self, f):
978 980 self._load()
979 981 dir, subpath = _splittopdir(f)
980 982 if dir:
981 983 self._loadlazy(dir)
982 984
983 985 if dir not in self._dirs:
984 986 return b''
985 987 return self._dirs[dir].flags(subpath)
986 988 else:
987 989 if f in self._lazydirs or f in self._dirs:
988 990 return b''
989 991 return self._flags.get(f, b'')
990 992
991 993 def find(self, f):
992 994 self._load()
993 995 dir, subpath = _splittopdir(f)
994 996 if dir:
995 997 self._loadlazy(dir)
996 998
997 999 return self._dirs[dir].find(subpath)
998 1000 else:
999 1001 return self._files[f], self._flags.get(f, b'')
1000 1002
1001 1003 def __delitem__(self, f):
1002 1004 self._load()
1003 1005 dir, subpath = _splittopdir(f)
1004 1006 if dir:
1005 1007 self._loadlazy(dir)
1006 1008
1007 1009 self._dirs[dir].__delitem__(subpath)
1008 1010 # If the directory is now empty, remove it
1009 1011 if self._dirs[dir]._isempty():
1010 1012 del self._dirs[dir]
1011 1013 else:
1012 1014 del self._files[f]
1013 1015 if f in self._flags:
1014 1016 del self._flags[f]
1015 1017 self._dirty = True
1016 1018
1017 1019 def __setitem__(self, f, n):
1018 1020 assert n is not None
1019 1021 self._load()
1020 1022 dir, subpath = _splittopdir(f)
1021 1023 if dir:
1022 1024 self._loadlazy(dir)
1023 1025 if dir not in self._dirs:
1024 1026 self._dirs[dir] = treemanifest(self._subpath(dir))
1025 1027 self._dirs[dir].__setitem__(subpath, n)
1026 1028 else:
1027 1029 self._files[f] = n[:21] # to match manifestdict's behavior
1028 1030 self._dirty = True
1029 1031
1030 1032 def _load(self):
1031 1033 if self._loadfunc is not _noop:
1032 1034 lf, self._loadfunc = self._loadfunc, _noop
1033 1035 lf(self)
1034 1036 elif self._copyfunc is not _noop:
1035 1037 cf, self._copyfunc = self._copyfunc, _noop
1036 1038 cf(self)
1037 1039
1038 1040 def setflag(self, f, flags):
1039 1041 """Set the flags (symlink, executable) for path f."""
1040 1042 self._load()
1041 1043 dir, subpath = _splittopdir(f)
1042 1044 if dir:
1043 1045 self._loadlazy(dir)
1044 1046 if dir not in self._dirs:
1045 1047 self._dirs[dir] = treemanifest(self._subpath(dir))
1046 1048 self._dirs[dir].setflag(subpath, flags)
1047 1049 else:
1048 1050 self._flags[f] = flags
1049 1051 self._dirty = True
1050 1052
1051 1053 def copy(self):
1052 1054 copy = treemanifest(self._dir)
1053 1055 copy._node = self._node
1054 1056 copy._dirty = self._dirty
1055 1057 if self._copyfunc is _noop:
1056 1058
1057 1059 def _copyfunc(s):
1058 1060 self._load()
1059 1061 s._lazydirs = {
1060 1062 d: (p, n, r, True)
1061 1063 for d, (p, n, r, c) in pycompat.iteritems(self._lazydirs)
1062 1064 }
1063 1065 sdirs = s._dirs
1064 1066 for d, v in pycompat.iteritems(self._dirs):
1065 1067 sdirs[d] = v.copy()
1066 1068 s._files = dict.copy(self._files)
1067 1069 s._flags = dict.copy(self._flags)
1068 1070
1069 1071 if self._loadfunc is _noop:
1070 1072 _copyfunc(copy)
1071 1073 else:
1072 1074 copy._copyfunc = _copyfunc
1073 1075 else:
1074 1076 copy._copyfunc = self._copyfunc
1075 1077 return copy
1076 1078
1077 1079 def filesnotin(self, m2, match=None):
1078 1080 '''Set of files in this manifest that are not in the other'''
1079 1081 if match and not match.always():
1080 1082 m1 = self.matches(match)
1081 1083 m2 = m2.matches(match)
1082 1084 return m1.filesnotin(m2)
1083 1085
1084 1086 files = set()
1085 1087
1086 1088 def _filesnotin(t1, t2):
1087 1089 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1088 1090 return
1089 1091 t1._load()
1090 1092 t2._load()
1091 1093 self._loaddifflazy(t1, t2)
1092 1094 for d, m1 in pycompat.iteritems(t1._dirs):
1093 1095 if d in t2._dirs:
1094 1096 m2 = t2._dirs[d]
1095 1097 _filesnotin(m1, m2)
1096 1098 else:
1097 1099 files.update(m1.iterkeys())
1098 1100
1099 1101 for fn in t1._files:
1100 1102 if fn not in t2._files:
1101 1103 files.add(t1._subpath(fn))
1102 1104
1103 1105 _filesnotin(self, m2)
1104 1106 return files
1105 1107
1106 1108 @propertycache
1107 1109 def _alldirs(self):
1108 1110 return pathutil.dirs(self)
1109 1111
1110 1112 def dirs(self):
1111 1113 return self._alldirs
1112 1114
1113 1115 def hasdir(self, dir):
1114 1116 self._load()
1115 1117 topdir, subdir = _splittopdir(dir)
1116 1118 if topdir:
1117 1119 self._loadlazy(topdir)
1118 1120 if topdir in self._dirs:
1119 1121 return self._dirs[topdir].hasdir(subdir)
1120 1122 return False
1121 1123 dirslash = dir + b'/'
1122 1124 return dirslash in self._dirs or dirslash in self._lazydirs
1123 1125
1124 1126 def walk(self, match):
1125 1127 '''Generates matching file names.
1126 1128
1127 1129 Equivalent to manifest.matches(match).iterkeys(), but without creating
1128 1130 an entirely new manifest.
1129 1131
1130 1132 It also reports nonexistent files by marking them bad with match.bad().
1131 1133 '''
1132 1134 if match.always():
1133 1135 for f in iter(self):
1134 1136 yield f
1135 1137 return
1136 1138
1137 1139 fset = set(match.files())
1138 1140
1139 1141 for fn in self._walk(match):
1140 1142 if fn in fset:
1141 1143 # specified pattern is the exact name
1142 1144 fset.remove(fn)
1143 1145 yield fn
1144 1146
1145 1147 # for dirstate.walk, files=[''] means "walk the whole tree".
1146 1148 # follow that here, too
1147 1149 fset.discard(b'')
1148 1150
1149 1151 for fn in sorted(fset):
1150 1152 if not self.hasdir(fn):
1151 1153 match.bad(fn, None)
1152 1154
1153 1155 def _walk(self, match):
1154 1156 '''Recursively generates matching file names for walk().'''
1155 1157 visit = match.visitchildrenset(self._dir[:-1])
1156 1158 if not visit:
1157 1159 return
1158 1160
1159 1161 # yield this dir's files and walk its submanifests
1160 1162 self._load()
1161 1163 visit = self._loadchildrensetlazy(visit)
1162 1164 for p in sorted(list(self._dirs) + list(self._files)):
1163 1165 if p in self._files:
1164 1166 fullp = self._subpath(p)
1165 1167 if match(fullp):
1166 1168 yield fullp
1167 1169 else:
1168 1170 if not visit or p[:-1] in visit:
1169 1171 for f in self._dirs[p]._walk(match):
1170 1172 yield f
1171 1173
1172 1174 def matches(self, match):
1173 1175 '''generate a new manifest filtered by the match argument'''
1174 1176 if match.always():
1175 1177 return self.copy()
1176 1178
1177 1179 return self._matches(match)
1178 1180
1179 1181 def _matches(self, match):
1180 1182 '''recursively generate a new manifest filtered by the match argument.
1181 1183 '''
1182 1184
1183 1185 visit = match.visitchildrenset(self._dir[:-1])
1184 1186 if visit == b'all':
1185 1187 return self.copy()
1186 1188 ret = treemanifest(self._dir)
1187 1189 if not visit:
1188 1190 return ret
1189 1191
1190 1192 self._load()
1191 1193 for fn in self._files:
1192 1194 # While visitchildrenset *usually* lists only subdirs, this is
1193 1195 # actually up to the matcher and may have some files in the set().
1194 1196 # If visit == 'this', we should obviously look at the files in this
1195 1197 # directory; if visit is a set, and fn is in it, we should inspect
1196 1198 # fn (but no need to inspect things not in the set).
1197 1199 if visit != b'this' and fn not in visit:
1198 1200 continue
1199 1201 fullp = self._subpath(fn)
1200 1202 # visitchildrenset isn't perfect, we still need to call the regular
1201 1203 # matcher code to further filter results.
1202 1204 if not match(fullp):
1203 1205 continue
1204 1206 ret._files[fn] = self._files[fn]
1205 1207 if fn in self._flags:
1206 1208 ret._flags[fn] = self._flags[fn]
1207 1209
1208 1210 visit = self._loadchildrensetlazy(visit)
1209 1211 for dir, subm in pycompat.iteritems(self._dirs):
1210 1212 if visit and dir[:-1] not in visit:
1211 1213 continue
1212 1214 m = subm._matches(match)
1213 1215 if not m._isempty():
1214 1216 ret._dirs[dir] = m
1215 1217
1216 1218 if not ret._isempty():
1217 1219 ret._dirty = True
1218 1220 return ret
1219 1221
1220 1222 def diff(self, m2, match=None, clean=False):
1221 1223 '''Finds changes between the current manifest and m2.
1222 1224
1223 1225 Args:
1224 1226 m2: the manifest to which this manifest should be compared.
1225 1227 clean: if true, include files unchanged between these manifests
1226 1228 with a None value in the returned dictionary.
1227 1229
1228 1230 The result is returned as a dict with filename as key and
1229 1231 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1230 1232 nodeid in the current/other manifest and fl1/fl2 is the flag
1231 1233 in the current/other manifest. Where the file does not exist,
1232 1234 the nodeid will be None and the flags will be the empty
1233 1235 string.
1234 1236 '''
1235 1237 if match and not match.always():
1236 1238 m1 = self.matches(match)
1237 1239 m2 = m2.matches(match)
1238 1240 return m1.diff(m2, clean=clean)
1239 1241 result = {}
1240 1242 emptytree = treemanifest()
1241 1243
1242 1244 def _iterativediff(t1, t2, stack):
1243 1245 """compares two tree manifests and append new tree-manifests which
1244 1246 needs to be compared to stack"""
1245 1247 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1246 1248 return
1247 1249 t1._load()
1248 1250 t2._load()
1249 1251 self._loaddifflazy(t1, t2)
1250 1252
1251 1253 for d, m1 in pycompat.iteritems(t1._dirs):
1252 1254 m2 = t2._dirs.get(d, emptytree)
1253 1255 stack.append((m1, m2))
1254 1256
1255 1257 for d, m2 in pycompat.iteritems(t2._dirs):
1256 1258 if d not in t1._dirs:
1257 1259 stack.append((emptytree, m2))
1258 1260
1259 1261 for fn, n1 in pycompat.iteritems(t1._files):
1260 1262 fl1 = t1._flags.get(fn, b'')
1261 1263 n2 = t2._files.get(fn, None)
1262 1264 fl2 = t2._flags.get(fn, b'')
1263 1265 if n1 != n2 or fl1 != fl2:
1264 1266 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1265 1267 elif clean:
1266 1268 result[t1._subpath(fn)] = None
1267 1269
1268 1270 for fn, n2 in pycompat.iteritems(t2._files):
1269 1271 if fn not in t1._files:
1270 1272 fl2 = t2._flags.get(fn, b'')
1271 1273 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1272 1274
1273 1275 stackls = []
1274 1276 _iterativediff(self, m2, stackls)
1275 1277 while stackls:
1276 1278 t1, t2 = stackls.pop()
1277 1279 # stackls is populated in the function call
1278 1280 _iterativediff(t1, t2, stackls)
1279 1281 return result
1280 1282
1281 1283 def unmodifiedsince(self, m2):
1282 1284 return not self._dirty and not m2._dirty and self._node == m2._node
1283 1285
1284 1286 def parse(self, text, readsubtree):
1285 1287 selflazy = self._lazydirs
1286 1288 subpath = self._subpath
1287 1289 for f, n, fl in _parse(text):
1288 1290 if fl == b't':
1289 1291 f = f + b'/'
1290 1292 # False below means "doesn't need to be copied" and can use the
1291 1293 # cached value from readsubtree directly.
1292 1294 selflazy[f] = (subpath(f), n, readsubtree, False)
1293 1295 elif b'/' in f:
1294 1296 # This is a flat manifest, so use __setitem__ and setflag rather
1295 1297 # than assigning directly to _files and _flags, so we can
1296 1298 # assign a path in a subdirectory, and to mark dirty (compared
1297 1299 # to nullid).
1298 1300 self[f] = n
1299 1301 if fl:
1300 1302 self.setflag(f, fl)
1301 1303 else:
1302 1304 # Assigning to _files and _flags avoids marking as dirty,
1303 1305 # and should be a little faster.
1304 1306 self._files[f] = n
1305 1307 if fl:
1306 1308 self._flags[f] = fl
1307 1309
1308 1310 def text(self):
1309 1311 """Get the full data of this manifest as a bytestring."""
1310 1312 self._load()
1311 1313 return _text(self.iterentries())
1312 1314
1313 1315 def dirtext(self):
1314 1316 """Get the full data of this directory as a bytestring. Make sure that
1315 1317 any submanifests have been written first, so their nodeids are correct.
1316 1318 """
1317 1319 self._load()
1318 1320 flags = self.flags
1319 1321 lazydirs = [
1320 1322 (d[:-1], v[1], b't') for d, v in pycompat.iteritems(self._lazydirs)
1321 1323 ]
1322 1324 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1323 1325 files = [(f, self._files[f], flags(f)) for f in self._files]
1324 1326 return _text(sorted(dirs + files + lazydirs))
1325 1327
1326 1328 def read(self, gettext, readsubtree):
1327 1329 def _load_for_read(s):
1328 1330 s.parse(gettext(), readsubtree)
1329 1331 s._dirty = False
1330 1332
1331 1333 self._loadfunc = _load_for_read
1332 1334
1333 1335 def writesubtrees(self, m1, m2, writesubtree, match):
1334 1336 self._load() # for consistency; should never have any effect here
1335 1337 m1._load()
1336 1338 m2._load()
1337 1339 emptytree = treemanifest()
1338 1340
1339 1341 def getnode(m, d):
1340 1342 ld = m._lazydirs.get(d)
1341 1343 if ld:
1342 1344 return ld[1]
1343 1345 return m._dirs.get(d, emptytree)._node
1344 1346
1345 1347 # let's skip investigating things that `match` says we do not need.
1346 1348 visit = match.visitchildrenset(self._dir[:-1])
1347 1349 visit = self._loadchildrensetlazy(visit)
1348 1350 if visit == b'this' or visit == b'all':
1349 1351 visit = None
1350 1352 for d, subm in pycompat.iteritems(self._dirs):
1351 1353 if visit and d[:-1] not in visit:
1352 1354 continue
1353 1355 subp1 = getnode(m1, d)
1354 1356 subp2 = getnode(m2, d)
1355 1357 if subp1 == nullid:
1356 1358 subp1, subp2 = subp2, subp1
1357 1359 writesubtree(subm, subp1, subp2, match)
1358 1360
1359 1361 def walksubtrees(self, matcher=None):
1360 1362 """Returns an iterator of the subtrees of this manifest, including this
1361 1363 manifest itself.
1362 1364
1363 1365 If `matcher` is provided, it only returns subtrees that match.
1364 1366 """
1365 1367 if matcher and not matcher.visitdir(self._dir[:-1]):
1366 1368 return
1367 1369 if not matcher or matcher(self._dir[:-1]):
1368 1370 yield self
1369 1371
1370 1372 self._load()
1371 1373 # OPT: use visitchildrenset to avoid loading everything.
1372 1374 self._loadalllazy()
1373 1375 for d, subm in pycompat.iteritems(self._dirs):
1374 1376 for subtree in subm.walksubtrees(matcher=matcher):
1375 1377 yield subtree
1376 1378
1377 1379
1378 1380 class manifestfulltextcache(util.lrucachedict):
1379 1381 """File-backed LRU cache for the manifest cache
1380 1382
1381 1383 File consists of entries, up to EOF:
1382 1384
1383 1385 - 20 bytes node, 4 bytes length, <length> manifest data
1384 1386
1385 1387 These are written in reverse cache order (oldest to newest).
1386 1388
1387 1389 """
1388 1390
1389 1391 _file = b'manifestfulltextcache'
1390 1392
1391 1393 def __init__(self, max):
1392 1394 super(manifestfulltextcache, self).__init__(max)
1393 1395 self._dirty = False
1394 1396 self._read = False
1395 1397 self._opener = None
1396 1398
1397 1399 def read(self):
1398 1400 if self._read or self._opener is None:
1399 1401 return
1400 1402
1401 1403 try:
1402 1404 with self._opener(self._file) as fp:
1403 1405 set = super(manifestfulltextcache, self).__setitem__
1404 1406 # ignore trailing data, this is a cache, corruption is skipped
1405 1407 while True:
1406 1408 node = fp.read(20)
1407 1409 if len(node) < 20:
1408 1410 break
1409 1411 try:
1410 1412 size = struct.unpack(b'>L', fp.read(4))[0]
1411 1413 except struct.error:
1412 1414 break
1413 1415 value = bytearray(fp.read(size))
1414 1416 if len(value) != size:
1415 1417 break
1416 1418 set(node, value)
1417 1419 except IOError:
1418 1420 # the file is allowed to be missing
1419 1421 pass
1420 1422
1421 1423 self._read = True
1422 1424 self._dirty = False
1423 1425
1424 1426 def write(self):
1425 1427 if not self._dirty or self._opener is None:
1426 1428 return
1427 1429 # rotate backwards to the first used node
1428 1430 with self._opener(
1429 1431 self._file, b'w', atomictemp=True, checkambig=True
1430 1432 ) as fp:
1431 1433 node = self._head.prev
1432 1434 while True:
1433 1435 if node.key in self._cache:
1434 1436 fp.write(node.key)
1435 1437 fp.write(struct.pack(b'>L', len(node.value)))
1436 1438 fp.write(node.value)
1437 1439 if node is self._head:
1438 1440 break
1439 1441 node = node.prev
1440 1442
1441 1443 def __len__(self):
1442 1444 if not self._read:
1443 1445 self.read()
1444 1446 return super(manifestfulltextcache, self).__len__()
1445 1447
1446 1448 def __contains__(self, k):
1447 1449 if not self._read:
1448 1450 self.read()
1449 1451 return super(manifestfulltextcache, self).__contains__(k)
1450 1452
1451 1453 def __iter__(self):
1452 1454 if not self._read:
1453 1455 self.read()
1454 1456 return super(manifestfulltextcache, self).__iter__()
1455 1457
1456 1458 def __getitem__(self, k):
1457 1459 if not self._read:
1458 1460 self.read()
1459 1461 # the cache lru order can change on read
1460 1462 setdirty = self._cache.get(k) is not self._head
1461 1463 value = super(manifestfulltextcache, self).__getitem__(k)
1462 1464 if setdirty:
1463 1465 self._dirty = True
1464 1466 return value
1465 1467
1466 1468 def __setitem__(self, k, v):
1467 1469 if not self._read:
1468 1470 self.read()
1469 1471 super(manifestfulltextcache, self).__setitem__(k, v)
1470 1472 self._dirty = True
1471 1473
1472 1474 def __delitem__(self, k):
1473 1475 if not self._read:
1474 1476 self.read()
1475 1477 super(manifestfulltextcache, self).__delitem__(k)
1476 1478 self._dirty = True
1477 1479
1478 1480 def get(self, k, default=None):
1479 1481 if not self._read:
1480 1482 self.read()
1481 1483 return super(manifestfulltextcache, self).get(k, default=default)
1482 1484
1483 1485 def clear(self, clear_persisted_data=False):
1484 1486 super(manifestfulltextcache, self).clear()
1485 1487 if clear_persisted_data:
1486 1488 self._dirty = True
1487 1489 self.write()
1488 1490 self._read = False
1489 1491
1490 1492
1491 1493 # and upper bound of what we expect from compression
1492 1494 # (real live value seems to be "3")
1493 1495 MAXCOMPRESSION = 3
1494 1496
1495 1497
1496 1498 @interfaceutil.implementer(repository.imanifeststorage)
1497 1499 class manifestrevlog(object):
1498 1500 '''A revlog that stores manifest texts. This is responsible for caching the
1499 1501 full-text manifest contents.
1500 1502 '''
1501 1503
1502 1504 def __init__(
1503 1505 self,
1504 1506 opener,
1505 1507 tree=b'',
1506 1508 dirlogcache=None,
1507 1509 indexfile=None,
1508 1510 treemanifest=False,
1509 1511 ):
1510 1512 """Constructs a new manifest revlog
1511 1513
1512 1514 `indexfile` - used by extensions to have two manifests at once, like
1513 1515 when transitioning between flatmanifeset and treemanifests.
1514 1516
1515 1517 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1516 1518 options can also be used to make this a tree manifest revlog. The opener
1517 1519 option takes precedence, so if it is set to True, we ignore whatever
1518 1520 value is passed in to the constructor.
1519 1521 """
1520 1522 # During normal operations, we expect to deal with not more than four
1521 1523 # revs at a time (such as during commit --amend). When rebasing large
1522 1524 # stacks of commits, the number can go up, hence the config knob below.
1523 1525 cachesize = 4
1524 1526 optiontreemanifest = False
1525 1527 opts = getattr(opener, 'options', None)
1526 1528 if opts is not None:
1527 1529 cachesize = opts.get(b'manifestcachesize', cachesize)
1528 1530 optiontreemanifest = opts.get(b'treemanifest', False)
1529 1531
1530 1532 self._treeondisk = optiontreemanifest or treemanifest
1531 1533
1532 1534 self._fulltextcache = manifestfulltextcache(cachesize)
1533 1535
1534 1536 if tree:
1535 1537 assert self._treeondisk, b'opts is %r' % opts
1536 1538
1537 1539 if indexfile is None:
1538 1540 indexfile = b'00manifest.i'
1539 1541 if tree:
1540 1542 indexfile = b"meta/" + tree + indexfile
1541 1543
1542 1544 self.tree = tree
1543 1545
1544 1546 # The dirlogcache is kept on the root manifest log
1545 1547 if tree:
1546 1548 self._dirlogcache = dirlogcache
1547 1549 else:
1548 1550 self._dirlogcache = {b'': self}
1549 1551
1550 1552 self._revlog = revlog.revlog(
1551 1553 opener,
1552 1554 indexfile,
1553 1555 # only root indexfile is cached
1554 1556 checkambig=not bool(tree),
1555 1557 mmaplargeindex=True,
1556 1558 upperboundcomp=MAXCOMPRESSION,
1557 1559 )
1558 1560
1559 1561 self.index = self._revlog.index
1560 1562 self.version = self._revlog.version
1561 1563 self._generaldelta = self._revlog._generaldelta
1562 1564
1563 1565 def _setupmanifestcachehooks(self, repo):
1564 1566 """Persist the manifestfulltextcache on lock release"""
1565 1567 if not util.safehasattr(repo, b'_wlockref'):
1566 1568 return
1567 1569
1568 1570 self._fulltextcache._opener = repo.wcachevfs
1569 1571 if repo._currentlock(repo._wlockref) is None:
1570 1572 return
1571 1573
1572 1574 reporef = weakref.ref(repo)
1573 1575 manifestrevlogref = weakref.ref(self)
1574 1576
1575 1577 def persistmanifestcache(success):
1576 1578 # Repo is in an unknown state, do not persist.
1577 1579 if not success:
1578 1580 return
1579 1581
1580 1582 repo = reporef()
1581 1583 self = manifestrevlogref()
1582 1584 if repo is None or self is None:
1583 1585 return
1584 1586 if repo.manifestlog.getstorage(b'') is not self:
1585 1587 # there's a different manifest in play now, abort
1586 1588 return
1587 1589 self._fulltextcache.write()
1588 1590
1589 1591 repo._afterlock(persistmanifestcache)
1590 1592
1591 1593 @property
1592 1594 def fulltextcache(self):
1593 1595 return self._fulltextcache
1594 1596
1595 1597 def clearcaches(self, clear_persisted_data=False):
1596 1598 self._revlog.clearcaches()
1597 1599 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1598 1600 self._dirlogcache = {self.tree: self}
1599 1601
1600 1602 def dirlog(self, d):
1601 1603 if d:
1602 1604 assert self._treeondisk
1603 1605 if d not in self._dirlogcache:
1604 1606 mfrevlog = manifestrevlog(
1605 1607 self.opener, d, self._dirlogcache, treemanifest=self._treeondisk
1606 1608 )
1607 1609 self._dirlogcache[d] = mfrevlog
1608 1610 return self._dirlogcache[d]
1609 1611
1610 1612 def add(
1611 1613 self,
1612 1614 m,
1613 1615 transaction,
1614 1616 link,
1615 1617 p1,
1616 1618 p2,
1617 1619 added,
1618 1620 removed,
1619 1621 readtree=None,
1620 1622 match=None,
1621 1623 ):
1622 1624 if p1 in self.fulltextcache and util.safehasattr(m, b'fastdelta'):
1623 1625 # If our first parent is in the manifest cache, we can
1624 1626 # compute a delta here using properties we know about the
1625 1627 # manifest up-front, which may save time later for the
1626 1628 # revlog layer.
1627 1629
1628 1630 _checkforbidden(added)
1629 1631 # combine the changed lists into one sorted iterator
1630 1632 work = heapq.merge(
1631 1633 [(x, False) for x in sorted(added)],
1632 1634 [(x, True) for x in sorted(removed)],
1633 1635 )
1634 1636
1635 1637 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1636 1638 cachedelta = self._revlog.rev(p1), deltatext
1637 1639 text = util.buffer(arraytext)
1638 1640 n = self._revlog.addrevision(
1639 1641 text, transaction, link, p1, p2, cachedelta
1640 1642 )
1641 1643 else:
1642 1644 # The first parent manifest isn't already loaded, so we'll
1643 1645 # just encode a fulltext of the manifest and pass that
1644 1646 # through to the revlog layer, and let it handle the delta
1645 1647 # process.
1646 1648 if self._treeondisk:
1647 1649 assert readtree, b"readtree must be set for treemanifest writes"
1648 1650 assert match, b"match must be specified for treemanifest writes"
1649 1651 m1 = readtree(self.tree, p1)
1650 1652 m2 = readtree(self.tree, p2)
1651 1653 n = self._addtree(
1652 1654 m, transaction, link, m1, m2, readtree, match=match
1653 1655 )
1654 1656 arraytext = None
1655 1657 else:
1656 1658 text = m.text()
1657 1659 n = self._revlog.addrevision(text, transaction, link, p1, p2)
1658 1660 arraytext = bytearray(text)
1659 1661
1660 1662 if arraytext is not None:
1661 1663 self.fulltextcache[n] = arraytext
1662 1664
1663 1665 return n
1664 1666
1665 1667 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1666 1668 # If the manifest is unchanged compared to one parent,
1667 1669 # don't write a new revision
1668 1670 if self.tree != b'' and (
1669 1671 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1670 1672 ):
1671 1673 return m.node()
1672 1674
1673 1675 def writesubtree(subm, subp1, subp2, match):
1674 1676 sublog = self.dirlog(subm.dir())
1675 1677 sublog.add(
1676 1678 subm,
1677 1679 transaction,
1678 1680 link,
1679 1681 subp1,
1680 1682 subp2,
1681 1683 None,
1682 1684 None,
1683 1685 readtree=readtree,
1684 1686 match=match,
1685 1687 )
1686 1688
1687 1689 m.writesubtrees(m1, m2, writesubtree, match)
1688 1690 text = m.dirtext()
1689 1691 n = None
1690 1692 if self.tree != b'':
1691 1693 # Double-check whether contents are unchanged to one parent
1692 1694 if text == m1.dirtext():
1693 1695 n = m1.node()
1694 1696 elif text == m2.dirtext():
1695 1697 n = m2.node()
1696 1698
1697 1699 if not n:
1698 1700 n = self._revlog.addrevision(
1699 1701 text, transaction, link, m1.node(), m2.node()
1700 1702 )
1701 1703
1702 1704 # Save nodeid so parent manifest can calculate its nodeid
1703 1705 m.setnode(n)
1704 1706 return n
1705 1707
1706 1708 def __len__(self):
1707 1709 return len(self._revlog)
1708 1710
1709 1711 def __iter__(self):
1710 1712 return self._revlog.__iter__()
1711 1713
1712 1714 def rev(self, node):
1713 1715 return self._revlog.rev(node)
1714 1716
1715 1717 def node(self, rev):
1716 1718 return self._revlog.node(rev)
1717 1719
1718 1720 def lookup(self, value):
1719 1721 return self._revlog.lookup(value)
1720 1722
1721 1723 def parentrevs(self, rev):
1722 1724 return self._revlog.parentrevs(rev)
1723 1725
1724 1726 def parents(self, node):
1725 1727 return self._revlog.parents(node)
1726 1728
1727 1729 def linkrev(self, rev):
1728 1730 return self._revlog.linkrev(rev)
1729 1731
1730 1732 def checksize(self):
1731 1733 return self._revlog.checksize()
1732 1734
1733 1735 def revision(self, node, _df=None, raw=False):
1734 1736 return self._revlog.revision(node, _df=_df, raw=raw)
1735 1737
1736 1738 def rawdata(self, node, _df=None):
1737 1739 return self._revlog.rawdata(node, _df=_df)
1738 1740
1739 1741 def revdiff(self, rev1, rev2):
1740 1742 return self._revlog.revdiff(rev1, rev2)
1741 1743
1742 1744 def cmp(self, node, text):
1743 1745 return self._revlog.cmp(node, text)
1744 1746
1745 1747 def deltaparent(self, rev):
1746 1748 return self._revlog.deltaparent(rev)
1747 1749
1748 1750 def emitrevisions(
1749 1751 self,
1750 1752 nodes,
1751 1753 nodesorder=None,
1752 1754 revisiondata=False,
1753 1755 assumehaveparentrevisions=False,
1754 1756 deltamode=repository.CG_DELTAMODE_STD,
1755 1757 ):
1756 1758 return self._revlog.emitrevisions(
1757 1759 nodes,
1758 1760 nodesorder=nodesorder,
1759 1761 revisiondata=revisiondata,
1760 1762 assumehaveparentrevisions=assumehaveparentrevisions,
1761 1763 deltamode=deltamode,
1762 1764 )
1763 1765
1764 1766 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1765 1767 return self._revlog.addgroup(
1766 1768 deltas, linkmapper, transaction, addrevisioncb=addrevisioncb
1767 1769 )
1768 1770
1769 1771 def rawsize(self, rev):
1770 1772 return self._revlog.rawsize(rev)
1771 1773
1772 1774 def getstrippoint(self, minlink):
1773 1775 return self._revlog.getstrippoint(minlink)
1774 1776
1775 1777 def strip(self, minlink, transaction):
1776 1778 return self._revlog.strip(minlink, transaction)
1777 1779
1778 1780 def files(self):
1779 1781 return self._revlog.files()
1780 1782
1781 1783 def clone(self, tr, destrevlog, **kwargs):
1782 1784 if not isinstance(destrevlog, manifestrevlog):
1783 1785 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
1784 1786
1785 1787 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1786 1788
1787 1789 def storageinfo(
1788 1790 self,
1789 1791 exclusivefiles=False,
1790 1792 sharedfiles=False,
1791 1793 revisionscount=False,
1792 1794 trackedsize=False,
1793 1795 storedsize=False,
1794 1796 ):
1795 1797 return self._revlog.storageinfo(
1796 1798 exclusivefiles=exclusivefiles,
1797 1799 sharedfiles=sharedfiles,
1798 1800 revisionscount=revisionscount,
1799 1801 trackedsize=trackedsize,
1800 1802 storedsize=storedsize,
1801 1803 )
1802 1804
1803 1805 @property
1804 1806 def indexfile(self):
1805 1807 return self._revlog.indexfile
1806 1808
1807 1809 @indexfile.setter
1808 1810 def indexfile(self, value):
1809 1811 self._revlog.indexfile = value
1810 1812
1811 1813 @property
1812 1814 def opener(self):
1813 1815 return self._revlog.opener
1814 1816
1815 1817 @opener.setter
1816 1818 def opener(self, value):
1817 1819 self._revlog.opener = value
1818 1820
1819 1821
1820 1822 @interfaceutil.implementer(repository.imanifestlog)
1821 1823 class manifestlog(object):
1822 1824 """A collection class representing the collection of manifest snapshots
1823 1825 referenced by commits in the repository.
1824 1826
1825 1827 In this situation, 'manifest' refers to the abstract concept of a snapshot
1826 1828 of the list of files in the given commit. Consumers of the output of this
1827 1829 class do not care about the implementation details of the actual manifests
1828 1830 they receive (i.e. tree or flat or lazily loaded, etc)."""
1829 1831
1830 1832 def __init__(self, opener, repo, rootstore, narrowmatch):
1831 1833 usetreemanifest = False
1832 1834 cachesize = 4
1833 1835
1834 1836 opts = getattr(opener, 'options', None)
1835 1837 if opts is not None:
1836 1838 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
1837 1839 cachesize = opts.get(b'manifestcachesize', cachesize)
1838 1840
1839 1841 self._treemanifests = usetreemanifest
1840 1842
1841 1843 self._rootstore = rootstore
1842 1844 self._rootstore._setupmanifestcachehooks(repo)
1843 1845 self._narrowmatch = narrowmatch
1844 1846
1845 1847 # A cache of the manifestctx or treemanifestctx for each directory
1846 1848 self._dirmancache = {}
1847 1849 self._dirmancache[b''] = util.lrucachedict(cachesize)
1848 1850
1849 1851 self._cachesize = cachesize
1850 1852
1851 1853 def __getitem__(self, node):
1852 1854 """Retrieves the manifest instance for the given node. Throws a
1853 1855 LookupError if not found.
1854 1856 """
1855 1857 return self.get(b'', node)
1856 1858
1857 1859 def get(self, tree, node, verify=True):
1858 1860 """Retrieves the manifest instance for the given node. Throws a
1859 1861 LookupError if not found.
1860 1862
1861 1863 `verify` - if True an exception will be thrown if the node is not in
1862 1864 the revlog
1863 1865 """
1864 1866 if node in self._dirmancache.get(tree, ()):
1865 1867 return self._dirmancache[tree][node]
1866 1868
1867 1869 if not self._narrowmatch.always():
1868 1870 if not self._narrowmatch.visitdir(tree[:-1]):
1869 1871 return excludeddirmanifestctx(tree, node)
1870 1872 if tree:
1871 1873 if self._rootstore._treeondisk:
1872 1874 if verify:
1873 1875 # Side-effect is LookupError is raised if node doesn't
1874 1876 # exist.
1875 1877 self.getstorage(tree).rev(node)
1876 1878
1877 1879 m = treemanifestctx(self, tree, node)
1878 1880 else:
1879 1881 raise error.Abort(
1880 1882 _(
1881 1883 b"cannot ask for manifest directory '%s' in a flat "
1882 1884 b"manifest"
1883 1885 )
1884 1886 % tree
1885 1887 )
1886 1888 else:
1887 1889 if verify:
1888 1890 # Side-effect is LookupError is raised if node doesn't exist.
1889 1891 self._rootstore.rev(node)
1890 1892
1891 1893 if self._treemanifests:
1892 1894 m = treemanifestctx(self, b'', node)
1893 1895 else:
1894 1896 m = manifestctx(self, node)
1895 1897
1896 1898 if node != nullid:
1897 1899 mancache = self._dirmancache.get(tree)
1898 1900 if not mancache:
1899 1901 mancache = util.lrucachedict(self._cachesize)
1900 1902 self._dirmancache[tree] = mancache
1901 1903 mancache[node] = m
1902 1904 return m
1903 1905
1904 1906 def getstorage(self, tree):
1905 1907 return self._rootstore.dirlog(tree)
1906 1908
1907 1909 def clearcaches(self, clear_persisted_data=False):
1908 1910 self._dirmancache.clear()
1909 1911 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
1910 1912
1911 1913 def rev(self, node):
1912 1914 return self._rootstore.rev(node)
1913 1915
1914 1916
1915 1917 @interfaceutil.implementer(repository.imanifestrevisionwritable)
1916 1918 class memmanifestctx(object):
1917 1919 def __init__(self, manifestlog):
1918 1920 self._manifestlog = manifestlog
1919 1921 self._manifestdict = manifestdict()
1920 1922
1921 1923 def _storage(self):
1922 1924 return self._manifestlog.getstorage(b'')
1923 1925
1924 1926 def new(self):
1925 1927 return memmanifestctx(self._manifestlog)
1926 1928
1927 1929 def copy(self):
1928 1930 memmf = memmanifestctx(self._manifestlog)
1929 1931 memmf._manifestdict = self.read().copy()
1930 1932 return memmf
1931 1933
1932 1934 def read(self):
1933 1935 return self._manifestdict
1934 1936
1935 1937 def write(self, transaction, link, p1, p2, added, removed, match=None):
1936 1938 return self._storage().add(
1937 1939 self._manifestdict,
1938 1940 transaction,
1939 1941 link,
1940 1942 p1,
1941 1943 p2,
1942 1944 added,
1943 1945 removed,
1944 1946 match=match,
1945 1947 )
1946 1948
1947 1949
1948 1950 @interfaceutil.implementer(repository.imanifestrevisionstored)
1949 1951 class manifestctx(object):
1950 1952 """A class representing a single revision of a manifest, including its
1951 1953 contents, its parent revs, and its linkrev.
1952 1954 """
1953 1955
1954 1956 def __init__(self, manifestlog, node):
1955 1957 self._manifestlog = manifestlog
1956 1958 self._data = None
1957 1959
1958 1960 self._node = node
1959 1961
1960 1962 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
1961 1963 # but let's add it later when something needs it and we can load it
1962 1964 # lazily.
1963 1965 # self.p1, self.p2 = store.parents(node)
1964 1966 # rev = store.rev(node)
1965 1967 # self.linkrev = store.linkrev(rev)
1966 1968
1967 1969 def _storage(self):
1968 1970 return self._manifestlog.getstorage(b'')
1969 1971
1970 1972 def node(self):
1971 1973 return self._node
1972 1974
1973 1975 def new(self):
1974 1976 return memmanifestctx(self._manifestlog)
1975 1977
1976 1978 def copy(self):
1977 1979 memmf = memmanifestctx(self._manifestlog)
1978 1980 memmf._manifestdict = self.read().copy()
1979 1981 return memmf
1980 1982
1981 1983 @propertycache
1982 1984 def parents(self):
1983 1985 return self._storage().parents(self._node)
1984 1986
1985 1987 def read(self):
1986 1988 if self._data is None:
1987 1989 if self._node == nullid:
1988 1990 self._data = manifestdict()
1989 1991 else:
1990 1992 store = self._storage()
1991 1993 if self._node in store.fulltextcache:
1992 1994 text = pycompat.bytestr(store.fulltextcache[self._node])
1993 1995 else:
1994 1996 text = store.revision(self._node)
1995 1997 arraytext = bytearray(text)
1996 1998 store.fulltextcache[self._node] = arraytext
1997 1999 self._data = manifestdict(text)
1998 2000 return self._data
1999 2001
2000 2002 def readfast(self, shallow=False):
2001 2003 '''Calls either readdelta or read, based on which would be less work.
2002 2004 readdelta is called if the delta is against the p1, and therefore can be
2003 2005 read quickly.
2004 2006
2005 2007 If `shallow` is True, nothing changes since this is a flat manifest.
2006 2008 '''
2007 2009 store = self._storage()
2008 2010 r = store.rev(self._node)
2009 2011 deltaparent = store.deltaparent(r)
2010 2012 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2011 2013 return self.readdelta()
2012 2014 return self.read()
2013 2015
2014 2016 def readdelta(self, shallow=False):
2015 2017 '''Returns a manifest containing just the entries that are present
2016 2018 in this manifest, but not in its p1 manifest. This is efficient to read
2017 2019 if the revlog delta is already p1.
2018 2020
2019 2021 Changing the value of `shallow` has no effect on flat manifests.
2020 2022 '''
2021 2023 store = self._storage()
2022 2024 r = store.rev(self._node)
2023 2025 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2024 2026 return manifestdict(d)
2025 2027
2026 2028 def find(self, key):
2027 2029 return self.read().find(key)
2028 2030
2029 2031
2030 2032 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2031 2033 class memtreemanifestctx(object):
2032 2034 def __init__(self, manifestlog, dir=b''):
2033 2035 self._manifestlog = manifestlog
2034 2036 self._dir = dir
2035 2037 self._treemanifest = treemanifest()
2036 2038
2037 2039 def _storage(self):
2038 2040 return self._manifestlog.getstorage(b'')
2039 2041
2040 2042 def new(self, dir=b''):
2041 2043 return memtreemanifestctx(self._manifestlog, dir=dir)
2042 2044
2043 2045 def copy(self):
2044 2046 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2045 2047 memmf._treemanifest = self._treemanifest.copy()
2046 2048 return memmf
2047 2049
2048 2050 def read(self):
2049 2051 return self._treemanifest
2050 2052
2051 2053 def write(self, transaction, link, p1, p2, added, removed, match=None):
2052 2054 def readtree(dir, node):
2053 2055 return self._manifestlog.get(dir, node).read()
2054 2056
2055 2057 return self._storage().add(
2056 2058 self._treemanifest,
2057 2059 transaction,
2058 2060 link,
2059 2061 p1,
2060 2062 p2,
2061 2063 added,
2062 2064 removed,
2063 2065 readtree=readtree,
2064 2066 match=match,
2065 2067 )
2066 2068
2067 2069
2068 2070 @interfaceutil.implementer(repository.imanifestrevisionstored)
2069 2071 class treemanifestctx(object):
2070 2072 def __init__(self, manifestlog, dir, node):
2071 2073 self._manifestlog = manifestlog
2072 2074 self._dir = dir
2073 2075 self._data = None
2074 2076
2075 2077 self._node = node
2076 2078
2077 2079 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2078 2080 # we can instantiate treemanifestctx objects for directories we don't
2079 2081 # have on disk.
2080 2082 # self.p1, self.p2 = store.parents(node)
2081 2083 # rev = store.rev(node)
2082 2084 # self.linkrev = store.linkrev(rev)
2083 2085
2084 2086 def _storage(self):
2085 2087 narrowmatch = self._manifestlog._narrowmatch
2086 2088 if not narrowmatch.always():
2087 2089 if not narrowmatch.visitdir(self._dir[:-1]):
2088 2090 return excludedmanifestrevlog(self._dir)
2089 2091 return self._manifestlog.getstorage(self._dir)
2090 2092
2091 2093 def read(self):
2092 2094 if self._data is None:
2093 2095 store = self._storage()
2094 2096 if self._node == nullid:
2095 2097 self._data = treemanifest()
2096 2098 # TODO accessing non-public API
2097 2099 elif store._treeondisk:
2098 2100 m = treemanifest(dir=self._dir)
2099 2101
2100 2102 def gettext():
2101 2103 return store.revision(self._node)
2102 2104
2103 2105 def readsubtree(dir, subm):
2104 2106 # Set verify to False since we need to be able to create
2105 2107 # subtrees for trees that don't exist on disk.
2106 2108 return self._manifestlog.get(dir, subm, verify=False).read()
2107 2109
2108 2110 m.read(gettext, readsubtree)
2109 2111 m.setnode(self._node)
2110 2112 self._data = m
2111 2113 else:
2112 2114 if self._node in store.fulltextcache:
2113 2115 text = pycompat.bytestr(store.fulltextcache[self._node])
2114 2116 else:
2115 2117 text = store.revision(self._node)
2116 2118 arraytext = bytearray(text)
2117 2119 store.fulltextcache[self._node] = arraytext
2118 2120 self._data = treemanifest(dir=self._dir, text=text)
2119 2121
2120 2122 return self._data
2121 2123
2122 2124 def node(self):
2123 2125 return self._node
2124 2126
2125 2127 def new(self, dir=b''):
2126 2128 return memtreemanifestctx(self._manifestlog, dir=dir)
2127 2129
2128 2130 def copy(self):
2129 2131 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2130 2132 memmf._treemanifest = self.read().copy()
2131 2133 return memmf
2132 2134
2133 2135 @propertycache
2134 2136 def parents(self):
2135 2137 return self._storage().parents(self._node)
2136 2138
2137 2139 def readdelta(self, shallow=False):
2138 2140 '''Returns a manifest containing just the entries that are present
2139 2141 in this manifest, but not in its p1 manifest. This is efficient to read
2140 2142 if the revlog delta is already p1.
2141 2143
2142 2144 If `shallow` is True, this will read the delta for this directory,
2143 2145 without recursively reading subdirectory manifests. Instead, any
2144 2146 subdirectory entry will be reported as it appears in the manifest, i.e.
2145 2147 the subdirectory will be reported among files and distinguished only by
2146 2148 its 't' flag.
2147 2149 '''
2148 2150 store = self._storage()
2149 2151 if shallow:
2150 2152 r = store.rev(self._node)
2151 2153 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2152 2154 return manifestdict(d)
2153 2155 else:
2154 2156 # Need to perform a slow delta
2155 2157 r0 = store.deltaparent(store.rev(self._node))
2156 2158 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2157 2159 m1 = self.read()
2158 2160 md = treemanifest(dir=self._dir)
2159 2161 for f, ((n0, fl0), (n1, fl1)) in pycompat.iteritems(m0.diff(m1)):
2160 2162 if n1:
2161 2163 md[f] = n1
2162 2164 if fl1:
2163 2165 md.setflag(f, fl1)
2164 2166 return md
2165 2167
2166 2168 def readfast(self, shallow=False):
2167 2169 '''Calls either readdelta or read, based on which would be less work.
2168 2170 readdelta is called if the delta is against the p1, and therefore can be
2169 2171 read quickly.
2170 2172
2171 2173 If `shallow` is True, it only returns the entries from this manifest,
2172 2174 and not any submanifests.
2173 2175 '''
2174 2176 store = self._storage()
2175 2177 r = store.rev(self._node)
2176 2178 deltaparent = store.deltaparent(r)
2177 2179 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2178 2180 return self.readdelta(shallow=shallow)
2179 2181
2180 2182 if shallow:
2181 2183 return manifestdict(store.revision(self._node))
2182 2184 else:
2183 2185 return self.read()
2184 2186
2185 2187 def find(self, key):
2186 2188 return self.read().find(key)
2187 2189
2188 2190
2189 2191 class excludeddir(treemanifest):
2190 2192 """Stand-in for a directory that is excluded from the repository.
2191 2193
2192 2194 With narrowing active on a repository that uses treemanifests,
2193 2195 some of the directory revlogs will be excluded from the resulting
2194 2196 clone. This is a huge storage win for clients, but means we need
2195 2197 some sort of pseudo-manifest to surface to internals so we can
2196 2198 detect a merge conflict outside the narrowspec. That's what this
2197 2199 class is: it stands in for a directory whose node is known, but
2198 2200 whose contents are unknown.
2199 2201 """
2200 2202
2201 2203 def __init__(self, dir, node):
2202 2204 super(excludeddir, self).__init__(dir)
2203 2205 self._node = node
2204 2206 # Add an empty file, which will be included by iterators and such,
2205 2207 # appearing as the directory itself (i.e. something like "dir/")
2206 2208 self._files[b''] = node
2207 2209 self._flags[b''] = b't'
2208 2210
2209 2211 # Manifests outside the narrowspec should never be modified, so avoid
2210 2212 # copying. This makes a noticeable difference when there are very many
2211 2213 # directories outside the narrowspec. Also, it makes sense for the copy to
2212 2214 # be of the same type as the original, which would not happen with the
2213 2215 # super type's copy().
2214 2216 def copy(self):
2215 2217 return self
2216 2218
2217 2219
2218 2220 class excludeddirmanifestctx(treemanifestctx):
2219 2221 """context wrapper for excludeddir - see that docstring for rationale"""
2220 2222
2221 2223 def __init__(self, dir, node):
2222 2224 self._dir = dir
2223 2225 self._node = node
2224 2226
2225 2227 def read(self):
2226 2228 return excludeddir(self._dir, self._node)
2227 2229
2228 2230 def write(self, *args):
2229 2231 raise error.ProgrammingError(
2230 2232 b'attempt to write manifest from excluded dir %s' % self._dir
2231 2233 )
2232 2234
2233 2235
2234 2236 class excludedmanifestrevlog(manifestrevlog):
2235 2237 """Stand-in for excluded treemanifest revlogs.
2236 2238
2237 2239 When narrowing is active on a treemanifest repository, we'll have
2238 2240 references to directories we can't see due to the revlog being
2239 2241 skipped. This class exists to conform to the manifestrevlog
2240 2242 interface for those directories and proactively prevent writes to
2241 2243 outside the narrowspec.
2242 2244 """
2243 2245
2244 2246 def __init__(self, dir):
2245 2247 self._dir = dir
2246 2248
2247 2249 def __len__(self):
2248 2250 raise error.ProgrammingError(
2249 2251 b'attempt to get length of excluded dir %s' % self._dir
2250 2252 )
2251 2253
2252 2254 def rev(self, node):
2253 2255 raise error.ProgrammingError(
2254 2256 b'attempt to get rev from excluded dir %s' % self._dir
2255 2257 )
2256 2258
2257 2259 def linkrev(self, node):
2258 2260 raise error.ProgrammingError(
2259 2261 b'attempt to get linkrev from excluded dir %s' % self._dir
2260 2262 )
2261 2263
2262 2264 def node(self, rev):
2263 2265 raise error.ProgrammingError(
2264 2266 b'attempt to get node from excluded dir %s' % self._dir
2265 2267 )
2266 2268
2267 2269 def add(self, *args, **kwargs):
2268 2270 # We should never write entries in dirlogs outside the narrow clone.
2269 2271 # However, the method still gets called from writesubtree() in
2270 2272 # _addtree(), so we need to handle it. We should possibly make that
2271 2273 # avoid calling add() with a clean manifest (_dirty is always False
2272 2274 # in excludeddir instances).
2273 2275 pass
@@ -1,3226 +1,3227 b''
1 1 # patch.py - patch file parsing routines
2 2 #
3 3 # Copyright 2006 Brendan Cully <brendan@kublai.com>
4 4 # Copyright 2007 Chris Mason <chris.mason@oracle.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import, print_function
10 10
11 11 import collections
12 12 import contextlib
13 13 import copy
14 14 import errno
15 15 import os
16 16 import re
17 17 import shutil
18 18 import zlib
19 19
20 20 from .i18n import _
21 21 from .node import (
22 22 hex,
23 23 short,
24 24 )
25 25 from .pycompat import open
26 26 from . import (
27 27 copies,
28 28 diffhelper,
29 29 diffutil,
30 30 encoding,
31 31 error,
32 32 mail,
33 33 mdiff,
34 34 pathutil,
35 35 pycompat,
36 36 scmutil,
37 37 similar,
38 38 util,
39 39 vfs as vfsmod,
40 40 )
41 41 from .utils import (
42 42 dateutil,
43 43 hashutil,
44 44 procutil,
45 45 stringutil,
46 46 )
47 47
48 48 stringio = util.stringio
49 49
50 50 gitre = re.compile(br'diff --git a/(.*) b/(.*)')
51 51 tabsplitter = re.compile(br'(\t+|[^\t]+)')
52 52 wordsplitter = re.compile(
53 53 br'(\t+| +|[a-zA-Z0-9_\x80-\xff]+|[^ \ta-zA-Z0-9_\x80-\xff])'
54 54 )
55 55
56 56 PatchError = error.PatchError
57 57
58 58 # public functions
59 59
60 60
61 61 def split(stream):
62 62 '''return an iterator of individual patches from a stream'''
63 63
64 64 def isheader(line, inheader):
65 65 if inheader and line.startswith((b' ', b'\t')):
66 66 # continuation
67 67 return True
68 68 if line.startswith((b' ', b'-', b'+')):
69 69 # diff line - don't check for header pattern in there
70 70 return False
71 71 l = line.split(b': ', 1)
72 72 return len(l) == 2 and b' ' not in l[0]
73 73
74 74 def chunk(lines):
75 75 return stringio(b''.join(lines))
76 76
77 77 def hgsplit(stream, cur):
78 78 inheader = True
79 79
80 80 for line in stream:
81 81 if not line.strip():
82 82 inheader = False
83 83 if not inheader and line.startswith(b'# HG changeset patch'):
84 84 yield chunk(cur)
85 85 cur = []
86 86 inheader = True
87 87
88 88 cur.append(line)
89 89
90 90 if cur:
91 91 yield chunk(cur)
92 92
93 93 def mboxsplit(stream, cur):
94 94 for line in stream:
95 95 if line.startswith(b'From '):
96 96 for c in split(chunk(cur[1:])):
97 97 yield c
98 98 cur = []
99 99
100 100 cur.append(line)
101 101
102 102 if cur:
103 103 for c in split(chunk(cur[1:])):
104 104 yield c
105 105
106 106 def mimesplit(stream, cur):
107 107 def msgfp(m):
108 108 fp = stringio()
109 109 g = mail.Generator(fp, mangle_from_=False)
110 110 g.flatten(m)
111 111 fp.seek(0)
112 112 return fp
113 113
114 114 for line in stream:
115 115 cur.append(line)
116 116 c = chunk(cur)
117 117
118 118 m = mail.parse(c)
119 119 if not m.is_multipart():
120 120 yield msgfp(m)
121 121 else:
122 122 ok_types = (b'text/plain', b'text/x-diff', b'text/x-patch')
123 123 for part in m.walk():
124 124 ct = part.get_content_type()
125 125 if ct not in ok_types:
126 126 continue
127 127 yield msgfp(part)
128 128
129 129 def headersplit(stream, cur):
130 130 inheader = False
131 131
132 132 for line in stream:
133 133 if not inheader and isheader(line, inheader):
134 134 yield chunk(cur)
135 135 cur = []
136 136 inheader = True
137 137 if inheader and not isheader(line, inheader):
138 138 inheader = False
139 139
140 140 cur.append(line)
141 141
142 142 if cur:
143 143 yield chunk(cur)
144 144
145 145 def remainder(cur):
146 146 yield chunk(cur)
147 147
148 148 class fiter(object):
149 149 def __init__(self, fp):
150 150 self.fp = fp
151 151
152 152 def __iter__(self):
153 153 return self
154 154
155 155 def next(self):
156 156 l = self.fp.readline()
157 157 if not l:
158 158 raise StopIteration
159 159 return l
160 160
161 161 __next__ = next
162 162
163 163 inheader = False
164 164 cur = []
165 165
166 166 mimeheaders = [b'content-type']
167 167
168 168 if not util.safehasattr(stream, b'next'):
169 169 # http responses, for example, have readline but not next
170 170 stream = fiter(stream)
171 171
172 172 for line in stream:
173 173 cur.append(line)
174 174 if line.startswith(b'# HG changeset patch'):
175 175 return hgsplit(stream, cur)
176 176 elif line.startswith(b'From '):
177 177 return mboxsplit(stream, cur)
178 178 elif isheader(line, inheader):
179 179 inheader = True
180 180 if line.split(b':', 1)[0].lower() in mimeheaders:
181 181 # let email parser handle this
182 182 return mimesplit(stream, cur)
183 183 elif line.startswith(b'--- ') and inheader:
184 184 # No evil headers seen by diff start, split by hand
185 185 return headersplit(stream, cur)
186 186 # Not enough info, keep reading
187 187
188 188 # if we are here, we have a very plain patch
189 189 return remainder(cur)
190 190
191 191
192 192 ## Some facility for extensible patch parsing:
193 193 # list of pairs ("header to match", "data key")
194 194 patchheadermap = [
195 195 (b'Date', b'date'),
196 196 (b'Branch', b'branch'),
197 197 (b'Node ID', b'nodeid'),
198 198 ]
199 199
200 200
201 201 @contextlib.contextmanager
202 202 def extract(ui, fileobj):
203 203 '''extract patch from data read from fileobj.
204 204
205 205 patch can be a normal patch or contained in an email message.
206 206
207 207 return a dictionary. Standard keys are:
208 208 - filename,
209 209 - message,
210 210 - user,
211 211 - date,
212 212 - branch,
213 213 - node,
214 214 - p1,
215 215 - p2.
216 216 Any item can be missing from the dictionary. If filename is missing,
217 217 fileobj did not contain a patch. Caller must unlink filename when done.'''
218 218
219 219 fd, tmpname = pycompat.mkstemp(prefix=b'hg-patch-')
220 220 tmpfp = os.fdopen(fd, 'wb')
221 221 try:
222 222 yield _extract(ui, fileobj, tmpname, tmpfp)
223 223 finally:
224 224 tmpfp.close()
225 225 os.unlink(tmpname)
226 226
227 227
228 228 def _extract(ui, fileobj, tmpname, tmpfp):
229 229
230 230 # attempt to detect the start of a patch
231 231 # (this heuristic is borrowed from quilt)
232 232 diffre = re.compile(
233 233 br'^(?:Index:[ \t]|diff[ \t]-|RCS file: |'
234 234 br'retrieving revision [0-9]+(\.[0-9]+)*$|'
235 235 br'---[ \t].*?^\+\+\+[ \t]|'
236 236 br'\*\*\*[ \t].*?^---[ \t])',
237 237 re.MULTILINE | re.DOTALL,
238 238 )
239 239
240 240 data = {}
241 241
242 242 msg = mail.parse(fileobj)
243 243
244 244 subject = msg['Subject'] and mail.headdecode(msg['Subject'])
245 245 data[b'user'] = msg['From'] and mail.headdecode(msg['From'])
246 246 if not subject and not data[b'user']:
247 247 # Not an email, restore parsed headers if any
248 248 subject = (
249 249 b'\n'.join(
250 250 b': '.join(map(encoding.strtolocal, h)) for h in msg.items()
251 251 )
252 252 + b'\n'
253 253 )
254 254
255 255 # should try to parse msg['Date']
256 256 parents = []
257 257
258 258 nodeid = msg['X-Mercurial-Node']
259 259 if nodeid:
260 260 data[b'nodeid'] = nodeid = mail.headdecode(nodeid)
261 261 ui.debug(b'Node ID: %s\n' % nodeid)
262 262
263 263 if subject:
264 264 if subject.startswith(b'[PATCH'):
265 265 pend = subject.find(b']')
266 266 if pend >= 0:
267 267 subject = subject[pend + 1 :].lstrip()
268 268 subject = re.sub(br'\n[ \t]+', b' ', subject)
269 269 ui.debug(b'Subject: %s\n' % subject)
270 270 if data[b'user']:
271 271 ui.debug(b'From: %s\n' % data[b'user'])
272 272 diffs_seen = 0
273 273 ok_types = (b'text/plain', b'text/x-diff', b'text/x-patch')
274 274 message = b''
275 275 for part in msg.walk():
276 276 content_type = pycompat.bytestr(part.get_content_type())
277 277 ui.debug(b'Content-Type: %s\n' % content_type)
278 278 if content_type not in ok_types:
279 279 continue
280 280 payload = part.get_payload(decode=True)
281 281 m = diffre.search(payload)
282 282 if m:
283 283 hgpatch = False
284 284 hgpatchheader = False
285 285 ignoretext = False
286 286
287 287 ui.debug(b'found patch at byte %d\n' % m.start(0))
288 288 diffs_seen += 1
289 289 cfp = stringio()
290 290 for line in payload[: m.start(0)].splitlines():
291 291 if line.startswith(b'# HG changeset patch') and not hgpatch:
292 292 ui.debug(b'patch generated by hg export\n')
293 293 hgpatch = True
294 294 hgpatchheader = True
295 295 # drop earlier commit message content
296 296 cfp.seek(0)
297 297 cfp.truncate()
298 298 subject = None
299 299 elif hgpatchheader:
300 300 if line.startswith(b'# User '):
301 301 data[b'user'] = line[7:]
302 302 ui.debug(b'From: %s\n' % data[b'user'])
303 303 elif line.startswith(b"# Parent "):
304 304 parents.append(line[9:].lstrip())
305 305 elif line.startswith(b"# "):
306 306 for header, key in patchheadermap:
307 307 prefix = b'# %s ' % header
308 308 if line.startswith(prefix):
309 309 data[key] = line[len(prefix) :]
310 310 ui.debug(b'%s: %s\n' % (header, data[key]))
311 311 else:
312 312 hgpatchheader = False
313 313 elif line == b'---':
314 314 ignoretext = True
315 315 if not hgpatchheader and not ignoretext:
316 316 cfp.write(line)
317 317 cfp.write(b'\n')
318 318 message = cfp.getvalue()
319 319 if tmpfp:
320 320 tmpfp.write(payload)
321 321 if not payload.endswith(b'\n'):
322 322 tmpfp.write(b'\n')
323 323 elif not diffs_seen and message and content_type == b'text/plain':
324 324 message += b'\n' + payload
325 325
326 326 if subject and not message.startswith(subject):
327 327 message = b'%s\n%s' % (subject, message)
328 328 data[b'message'] = message
329 329 tmpfp.close()
330 330 if parents:
331 331 data[b'p1'] = parents.pop(0)
332 332 if parents:
333 333 data[b'p2'] = parents.pop(0)
334 334
335 335 if diffs_seen:
336 336 data[b'filename'] = tmpname
337 337
338 338 return data
339 339
340 340
341 341 class patchmeta(object):
342 342 """Patched file metadata
343 343
344 344 'op' is the performed operation within ADD, DELETE, RENAME, MODIFY
345 345 or COPY. 'path' is patched file path. 'oldpath' is set to the
346 346 origin file when 'op' is either COPY or RENAME, None otherwise. If
347 347 file mode is changed, 'mode' is a tuple (islink, isexec) where
348 348 'islink' is True if the file is a symlink and 'isexec' is True if
349 349 the file is executable. Otherwise, 'mode' is None.
350 350 """
351 351
352 352 def __init__(self, path):
353 353 self.path = path
354 354 self.oldpath = None
355 355 self.mode = None
356 356 self.op = b'MODIFY'
357 357 self.binary = False
358 358
359 359 def setmode(self, mode):
360 360 islink = mode & 0o20000
361 361 isexec = mode & 0o100
362 362 self.mode = (islink, isexec)
363 363
364 364 def copy(self):
365 365 other = patchmeta(self.path)
366 366 other.oldpath = self.oldpath
367 367 other.mode = self.mode
368 368 other.op = self.op
369 369 other.binary = self.binary
370 370 return other
371 371
372 372 def _ispatchinga(self, afile):
373 373 if afile == b'/dev/null':
374 374 return self.op == b'ADD'
375 375 return afile == b'a/' + (self.oldpath or self.path)
376 376
377 377 def _ispatchingb(self, bfile):
378 378 if bfile == b'/dev/null':
379 379 return self.op == b'DELETE'
380 380 return bfile == b'b/' + self.path
381 381
382 382 def ispatching(self, afile, bfile):
383 383 return self._ispatchinga(afile) and self._ispatchingb(bfile)
384 384
385 385 def __repr__(self):
386 386 return "<patchmeta %s %r>" % (self.op, self.path)
387 387
388 388
389 389 def readgitpatch(lr):
390 390 """extract git-style metadata about patches from <patchname>"""
391 391
392 392 # Filter patch for git information
393 393 gp = None
394 394 gitpatches = []
395 395 for line in lr:
396 396 line = line.rstrip(b' \r\n')
397 397 if line.startswith(b'diff --git a/'):
398 398 m = gitre.match(line)
399 399 if m:
400 400 if gp:
401 401 gitpatches.append(gp)
402 402 dst = m.group(2)
403 403 gp = patchmeta(dst)
404 404 elif gp:
405 405 if line.startswith(b'--- '):
406 406 gitpatches.append(gp)
407 407 gp = None
408 408 continue
409 409 if line.startswith(b'rename from '):
410 410 gp.op = b'RENAME'
411 411 gp.oldpath = line[12:]
412 412 elif line.startswith(b'rename to '):
413 413 gp.path = line[10:]
414 414 elif line.startswith(b'copy from '):
415 415 gp.op = b'COPY'
416 416 gp.oldpath = line[10:]
417 417 elif line.startswith(b'copy to '):
418 418 gp.path = line[8:]
419 419 elif line.startswith(b'deleted file'):
420 420 gp.op = b'DELETE'
421 421 elif line.startswith(b'new file mode '):
422 422 gp.op = b'ADD'
423 423 gp.setmode(int(line[-6:], 8))
424 424 elif line.startswith(b'new mode '):
425 425 gp.setmode(int(line[-6:], 8))
426 426 elif line.startswith(b'GIT binary patch'):
427 427 gp.binary = True
428 428 if gp:
429 429 gitpatches.append(gp)
430 430
431 431 return gitpatches
432 432
433 433
434 434 class linereader(object):
435 435 # simple class to allow pushing lines back into the input stream
436 436 def __init__(self, fp):
437 437 self.fp = fp
438 438 self.buf = []
439 439
440 440 def push(self, line):
441 441 if line is not None:
442 442 self.buf.append(line)
443 443
444 444 def readline(self):
445 445 if self.buf:
446 446 l = self.buf[0]
447 447 del self.buf[0]
448 448 return l
449 449 return self.fp.readline()
450 450
451 451 def __iter__(self):
452 452 return iter(self.readline, b'')
453 453
454 454
455 455 class abstractbackend(object):
456 456 def __init__(self, ui):
457 457 self.ui = ui
458 458
459 459 def getfile(self, fname):
460 460 """Return target file data and flags as a (data, (islink,
461 461 isexec)) tuple. Data is None if file is missing/deleted.
462 462 """
463 463 raise NotImplementedError
464 464
465 465 def setfile(self, fname, data, mode, copysource):
466 466 """Write data to target file fname and set its mode. mode is a
467 467 (islink, isexec) tuple. If data is None, the file content should
468 468 be left unchanged. If the file is modified after being copied,
469 469 copysource is set to the original file name.
470 470 """
471 471 raise NotImplementedError
472 472
473 473 def unlink(self, fname):
474 474 """Unlink target file."""
475 475 raise NotImplementedError
476 476
477 477 def writerej(self, fname, failed, total, lines):
478 478 """Write rejected lines for fname. total is the number of hunks
479 479 which failed to apply and total the total number of hunks for this
480 480 files.
481 481 """
482 482
483 483 def exists(self, fname):
484 484 raise NotImplementedError
485 485
486 486 def close(self):
487 487 raise NotImplementedError
488 488
489 489
490 490 class fsbackend(abstractbackend):
491 491 def __init__(self, ui, basedir):
492 492 super(fsbackend, self).__init__(ui)
493 493 self.opener = vfsmod.vfs(basedir)
494 494
495 495 def getfile(self, fname):
496 496 if self.opener.islink(fname):
497 497 return (self.opener.readlink(fname), (True, False))
498 498
499 499 isexec = False
500 500 try:
501 501 isexec = self.opener.lstat(fname).st_mode & 0o100 != 0
502 502 except OSError as e:
503 503 if e.errno != errno.ENOENT:
504 504 raise
505 505 try:
506 506 return (self.opener.read(fname), (False, isexec))
507 507 except IOError as e:
508 508 if e.errno != errno.ENOENT:
509 509 raise
510 510 return None, None
511 511
512 512 def setfile(self, fname, data, mode, copysource):
513 513 islink, isexec = mode
514 514 if data is None:
515 515 self.opener.setflags(fname, islink, isexec)
516 516 return
517 517 if islink:
518 518 self.opener.symlink(data, fname)
519 519 else:
520 520 self.opener.write(fname, data)
521 521 if isexec:
522 522 self.opener.setflags(fname, False, True)
523 523
524 524 def unlink(self, fname):
525 525 rmdir = self.ui.configbool(b'experimental', b'removeemptydirs')
526 526 self.opener.unlinkpath(fname, ignoremissing=True, rmdir=rmdir)
527 527
528 528 def writerej(self, fname, failed, total, lines):
529 529 fname = fname + b".rej"
530 530 self.ui.warn(
531 531 _(b"%d out of %d hunks FAILED -- saving rejects to file %s\n")
532 532 % (failed, total, fname)
533 533 )
534 534 fp = self.opener(fname, b'w')
535 535 fp.writelines(lines)
536 536 fp.close()
537 537
538 538 def exists(self, fname):
539 539 return self.opener.lexists(fname)
540 540
541 541
542 542 class workingbackend(fsbackend):
543 543 def __init__(self, ui, repo, similarity):
544 544 super(workingbackend, self).__init__(ui, repo.root)
545 545 self.repo = repo
546 546 self.similarity = similarity
547 547 self.removed = set()
548 548 self.changed = set()
549 549 self.copied = []
550 550
551 551 def _checkknown(self, fname):
552 552 if self.repo.dirstate[fname] == b'?' and self.exists(fname):
553 553 raise PatchError(_(b'cannot patch %s: file is not tracked') % fname)
554 554
555 555 def setfile(self, fname, data, mode, copysource):
556 556 self._checkknown(fname)
557 557 super(workingbackend, self).setfile(fname, data, mode, copysource)
558 558 if copysource is not None:
559 559 self.copied.append((copysource, fname))
560 560 self.changed.add(fname)
561 561
562 562 def unlink(self, fname):
563 563 self._checkknown(fname)
564 564 super(workingbackend, self).unlink(fname)
565 565 self.removed.add(fname)
566 566 self.changed.add(fname)
567 567
568 568 def close(self):
569 569 wctx = self.repo[None]
570 570 changed = set(self.changed)
571 571 for src, dst in self.copied:
572 572 scmutil.dirstatecopy(self.ui, self.repo, wctx, src, dst)
573 573 if self.removed:
574 574 wctx.forget(sorted(self.removed))
575 575 for f in self.removed:
576 576 if f not in self.repo.dirstate:
577 577 # File was deleted and no longer belongs to the
578 578 # dirstate, it was probably marked added then
579 579 # deleted, and should not be considered by
580 580 # marktouched().
581 581 changed.discard(f)
582 582 if changed:
583 583 scmutil.marktouched(self.repo, changed, self.similarity)
584 584 return sorted(self.changed)
585 585
586 586
587 587 class filestore(object):
588 588 def __init__(self, maxsize=None):
589 589 self.opener = None
590 590 self.files = {}
591 591 self.created = 0
592 592 self.maxsize = maxsize
593 593 if self.maxsize is None:
594 594 self.maxsize = 4 * (2 ** 20)
595 595 self.size = 0
596 596 self.data = {}
597 597
598 598 def setfile(self, fname, data, mode, copied=None):
599 599 if self.maxsize < 0 or (len(data) + self.size) <= self.maxsize:
600 600 self.data[fname] = (data, mode, copied)
601 601 self.size += len(data)
602 602 else:
603 603 if self.opener is None:
604 604 root = pycompat.mkdtemp(prefix=b'hg-patch-')
605 605 self.opener = vfsmod.vfs(root)
606 606 # Avoid filename issues with these simple names
607 607 fn = b'%d' % self.created
608 608 self.opener.write(fn, data)
609 609 self.created += 1
610 610 self.files[fname] = (fn, mode, copied)
611 611
612 612 def getfile(self, fname):
613 613 if fname in self.data:
614 614 return self.data[fname]
615 615 if not self.opener or fname not in self.files:
616 616 return None, None, None
617 617 fn, mode, copied = self.files[fname]
618 618 return self.opener.read(fn), mode, copied
619 619
620 620 def close(self):
621 621 if self.opener:
622 622 shutil.rmtree(self.opener.base)
623 623
624 624
625 625 class repobackend(abstractbackend):
626 626 def __init__(self, ui, repo, ctx, store):
627 627 super(repobackend, self).__init__(ui)
628 628 self.repo = repo
629 629 self.ctx = ctx
630 630 self.store = store
631 631 self.changed = set()
632 632 self.removed = set()
633 633 self.copied = {}
634 634
635 635 def _checkknown(self, fname):
636 636 if fname not in self.ctx:
637 637 raise PatchError(_(b'cannot patch %s: file is not tracked') % fname)
638 638
639 639 def getfile(self, fname):
640 640 try:
641 641 fctx = self.ctx[fname]
642 642 except error.LookupError:
643 643 return None, None
644 644 flags = fctx.flags()
645 645 return fctx.data(), (b'l' in flags, b'x' in flags)
646 646
647 647 def setfile(self, fname, data, mode, copysource):
648 648 if copysource:
649 649 self._checkknown(copysource)
650 650 if data is None:
651 651 data = self.ctx[fname].data()
652 652 self.store.setfile(fname, data, mode, copysource)
653 653 self.changed.add(fname)
654 654 if copysource:
655 655 self.copied[fname] = copysource
656 656
657 657 def unlink(self, fname):
658 658 self._checkknown(fname)
659 659 self.removed.add(fname)
660 660
661 661 def exists(self, fname):
662 662 return fname in self.ctx
663 663
664 664 def close(self):
665 665 return self.changed | self.removed
666 666
667 667
668 668 # @@ -start,len +start,len @@ or @@ -start +start @@ if len is 1
669 669 unidesc = re.compile(br'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@')
670 670 contextdesc = re.compile(br'(?:---|\*\*\*) (\d+)(?:,(\d+))? (?:---|\*\*\*)')
671 671 eolmodes = [b'strict', b'crlf', b'lf', b'auto']
672 672
673 673
674 674 class patchfile(object):
675 675 def __init__(self, ui, gp, backend, store, eolmode=b'strict'):
676 676 self.fname = gp.path
677 677 self.eolmode = eolmode
678 678 self.eol = None
679 679 self.backend = backend
680 680 self.ui = ui
681 681 self.lines = []
682 682 self.exists = False
683 683 self.missing = True
684 684 self.mode = gp.mode
685 685 self.copysource = gp.oldpath
686 686 self.create = gp.op in (b'ADD', b'COPY', b'RENAME')
687 687 self.remove = gp.op == b'DELETE'
688 688 if self.copysource is None:
689 689 data, mode = backend.getfile(self.fname)
690 690 else:
691 691 data, mode = store.getfile(self.copysource)[:2]
692 692 if data is not None:
693 693 self.exists = self.copysource is None or backend.exists(self.fname)
694 694 self.missing = False
695 695 if data:
696 696 self.lines = mdiff.splitnewlines(data)
697 697 if self.mode is None:
698 698 self.mode = mode
699 699 if self.lines:
700 700 # Normalize line endings
701 701 if self.lines[0].endswith(b'\r\n'):
702 702 self.eol = b'\r\n'
703 703 elif self.lines[0].endswith(b'\n'):
704 704 self.eol = b'\n'
705 705 if eolmode != b'strict':
706 706 nlines = []
707 707 for l in self.lines:
708 708 if l.endswith(b'\r\n'):
709 709 l = l[:-2] + b'\n'
710 710 nlines.append(l)
711 711 self.lines = nlines
712 712 else:
713 713 if self.create:
714 714 self.missing = False
715 715 if self.mode is None:
716 716 self.mode = (False, False)
717 717 if self.missing:
718 718 self.ui.warn(_(b"unable to find '%s' for patching\n") % self.fname)
719 719 self.ui.warn(
720 720 _(
721 721 b"(use '--prefix' to apply patch relative to the "
722 722 b"current directory)\n"
723 723 )
724 724 )
725 725
726 726 self.hash = {}
727 727 self.dirty = 0
728 728 self.offset = 0
729 729 self.skew = 0
730 730 self.rej = []
731 731 self.fileprinted = False
732 732 self.printfile(False)
733 733 self.hunks = 0
734 734
735 735 def writelines(self, fname, lines, mode):
736 736 if self.eolmode == b'auto':
737 737 eol = self.eol
738 738 elif self.eolmode == b'crlf':
739 739 eol = b'\r\n'
740 740 else:
741 741 eol = b'\n'
742 742
743 743 if self.eolmode != b'strict' and eol and eol != b'\n':
744 744 rawlines = []
745 745 for l in lines:
746 746 if l and l.endswith(b'\n'):
747 747 l = l[:-1] + eol
748 748 rawlines.append(l)
749 749 lines = rawlines
750 750
751 751 self.backend.setfile(fname, b''.join(lines), mode, self.copysource)
752 752
753 753 def printfile(self, warn):
754 754 if self.fileprinted:
755 755 return
756 756 if warn or self.ui.verbose:
757 757 self.fileprinted = True
758 758 s = _(b"patching file %s\n") % self.fname
759 759 if warn:
760 760 self.ui.warn(s)
761 761 else:
762 762 self.ui.note(s)
763 763
764 764 def findlines(self, l, linenum):
765 765 # looks through the hash and finds candidate lines. The
766 766 # result is a list of line numbers sorted based on distance
767 767 # from linenum
768 768
769 769 cand = self.hash.get(l, [])
770 770 if len(cand) > 1:
771 771 # resort our list of potentials forward then back.
772 772 cand.sort(key=lambda x: abs(x - linenum))
773 773 return cand
774 774
775 775 def write_rej(self):
776 776 # our rejects are a little different from patch(1). This always
777 777 # creates rejects in the same form as the original patch. A file
778 778 # header is inserted so that you can run the reject through patch again
779 779 # without having to type the filename.
780 780 if not self.rej:
781 781 return
782 782 base = os.path.basename(self.fname)
783 783 lines = [b"--- %s\n+++ %s\n" % (base, base)]
784 784 for x in self.rej:
785 785 for l in x.hunk:
786 786 lines.append(l)
787 787 if l[-1:] != b'\n':
788 788 lines.append(b"\n\\ No newline at end of file\n")
789 789 self.backend.writerej(self.fname, len(self.rej), self.hunks, lines)
790 790
791 791 def apply(self, h):
792 792 if not h.complete():
793 793 raise PatchError(
794 794 _(b"bad hunk #%d %s (%d %d %d %d)")
795 795 % (h.number, h.desc, len(h.a), h.lena, len(h.b), h.lenb)
796 796 )
797 797
798 798 self.hunks += 1
799 799
800 800 if self.missing:
801 801 self.rej.append(h)
802 802 return -1
803 803
804 804 if self.exists and self.create:
805 805 if self.copysource:
806 806 self.ui.warn(
807 807 _(b"cannot create %s: destination already exists\n")
808 808 % self.fname
809 809 )
810 810 else:
811 811 self.ui.warn(_(b"file %s already exists\n") % self.fname)
812 812 self.rej.append(h)
813 813 return -1
814 814
815 815 if isinstance(h, binhunk):
816 816 if self.remove:
817 817 self.backend.unlink(self.fname)
818 818 else:
819 819 l = h.new(self.lines)
820 820 self.lines[:] = l
821 821 self.offset += len(l)
822 822 self.dirty = True
823 823 return 0
824 824
825 825 horig = h
826 826 if (
827 827 self.eolmode in (b'crlf', b'lf')
828 828 or self.eolmode == b'auto'
829 829 and self.eol
830 830 ):
831 831 # If new eols are going to be normalized, then normalize
832 832 # hunk data before patching. Otherwise, preserve input
833 833 # line-endings.
834 834 h = h.getnormalized()
835 835
836 836 # fast case first, no offsets, no fuzz
837 837 old, oldstart, new, newstart = h.fuzzit(0, False)
838 838 oldstart += self.offset
839 839 orig_start = oldstart
840 840 # if there's skew we want to emit the "(offset %d lines)" even
841 841 # when the hunk cleanly applies at start + skew, so skip the
842 842 # fast case code
843 843 if self.skew == 0 and diffhelper.testhunk(old, self.lines, oldstart):
844 844 if self.remove:
845 845 self.backend.unlink(self.fname)
846 846 else:
847 847 self.lines[oldstart : oldstart + len(old)] = new
848 848 self.offset += len(new) - len(old)
849 849 self.dirty = True
850 850 return 0
851 851
852 852 # ok, we couldn't match the hunk. Lets look for offsets and fuzz it
853 853 self.hash = {}
854 854 for x, s in enumerate(self.lines):
855 855 self.hash.setdefault(s, []).append(x)
856 856
857 857 for fuzzlen in pycompat.xrange(
858 858 self.ui.configint(b"patch", b"fuzz") + 1
859 859 ):
860 860 for toponly in [True, False]:
861 861 old, oldstart, new, newstart = h.fuzzit(fuzzlen, toponly)
862 862 oldstart = oldstart + self.offset + self.skew
863 863 oldstart = min(oldstart, len(self.lines))
864 864 if old:
865 865 cand = self.findlines(old[0][1:], oldstart)
866 866 else:
867 867 # Only adding lines with no or fuzzed context, just
868 868 # take the skew in account
869 869 cand = [oldstart]
870 870
871 871 for l in cand:
872 872 if not old or diffhelper.testhunk(old, self.lines, l):
873 873 self.lines[l : l + len(old)] = new
874 874 self.offset += len(new) - len(old)
875 875 self.skew = l - orig_start
876 876 self.dirty = True
877 877 offset = l - orig_start - fuzzlen
878 878 if fuzzlen:
879 879 msg = _(
880 880 b"Hunk #%d succeeded at %d "
881 881 b"with fuzz %d "
882 882 b"(offset %d lines).\n"
883 883 )
884 884 self.printfile(True)
885 885 self.ui.warn(
886 886 msg % (h.number, l + 1, fuzzlen, offset)
887 887 )
888 888 else:
889 889 msg = _(
890 890 b"Hunk #%d succeeded at %d "
891 891 b"(offset %d lines).\n"
892 892 )
893 893 self.ui.note(msg % (h.number, l + 1, offset))
894 894 return fuzzlen
895 895 self.printfile(True)
896 896 self.ui.warn(_(b"Hunk #%d FAILED at %d\n") % (h.number, orig_start))
897 897 self.rej.append(horig)
898 898 return -1
899 899
900 900 def close(self):
901 901 if self.dirty:
902 902 self.writelines(self.fname, self.lines, self.mode)
903 903 self.write_rej()
904 904 return len(self.rej)
905 905
906 906
907 907 class header(object):
908 908 """patch header
909 909 """
910 910
911 911 diffgit_re = re.compile(b'diff --git a/(.*) b/(.*)$')
912 912 diff_re = re.compile(b'diff -r .* (.*)$')
913 913 allhunks_re = re.compile(b'(?:index|deleted file) ')
914 914 pretty_re = re.compile(b'(?:new file|deleted file) ')
915 915 special_re = re.compile(b'(?:index|deleted|copy|rename|new mode) ')
916 916 newfile_re = re.compile(b'(?:new file|copy to|rename to)')
917 917
918 918 def __init__(self, header):
919 919 self.header = header
920 920 self.hunks = []
921 921
922 922 def binary(self):
923 923 return any(h.startswith(b'index ') for h in self.header)
924 924
925 925 def pretty(self, fp):
926 926 for h in self.header:
927 927 if h.startswith(b'index '):
928 928 fp.write(_(b'this modifies a binary file (all or nothing)\n'))
929 929 break
930 930 if self.pretty_re.match(h):
931 931 fp.write(h)
932 932 if self.binary():
933 933 fp.write(_(b'this is a binary file\n'))
934 934 break
935 935 if h.startswith(b'---'):
936 936 fp.write(
937 937 _(b'%d hunks, %d lines changed\n')
938 938 % (
939 939 len(self.hunks),
940 940 sum([max(h.added, h.removed) for h in self.hunks]),
941 941 )
942 942 )
943 943 break
944 944 fp.write(h)
945 945
946 946 def write(self, fp):
947 947 fp.write(b''.join(self.header))
948 948
949 949 def allhunks(self):
950 950 return any(self.allhunks_re.match(h) for h in self.header)
951 951
952 952 def files(self):
953 953 match = self.diffgit_re.match(self.header[0])
954 954 if match:
955 955 fromfile, tofile = match.groups()
956 956 if fromfile == tofile:
957 957 return [fromfile]
958 958 return [fromfile, tofile]
959 959 else:
960 960 return self.diff_re.match(self.header[0]).groups()
961 961
962 962 def filename(self):
963 963 return self.files()[-1]
964 964
965 965 def __repr__(self):
966 966 return '<header %s>' % (
967 967 ' '.join(pycompat.rapply(pycompat.fsdecode, self.files()))
968 968 )
969 969
970 970 def isnewfile(self):
971 971 return any(self.newfile_re.match(h) for h in self.header)
972 972
973 973 def special(self):
974 974 # Special files are shown only at the header level and not at the hunk
975 975 # level for example a file that has been deleted is a special file.
976 976 # The user cannot change the content of the operation, in the case of
977 977 # the deleted file he has to take the deletion or not take it, he
978 978 # cannot take some of it.
979 979 # Newly added files are special if they are empty, they are not special
980 980 # if they have some content as we want to be able to change it
981 981 nocontent = len(self.header) == 2
982 982 emptynewfile = self.isnewfile() and nocontent
983 983 return emptynewfile or any(
984 984 self.special_re.match(h) for h in self.header
985 985 )
986 986
987 987
988 988 class recordhunk(object):
989 989 """patch hunk
990 990
991 991 XXX shouldn't we merge this with the other hunk class?
992 992 """
993 993
994 994 def __init__(
995 995 self,
996 996 header,
997 997 fromline,
998 998 toline,
999 999 proc,
1000 1000 before,
1001 1001 hunk,
1002 1002 after,
1003 1003 maxcontext=None,
1004 1004 ):
1005 1005 def trimcontext(lines, reverse=False):
1006 1006 if maxcontext is not None:
1007 1007 delta = len(lines) - maxcontext
1008 1008 if delta > 0:
1009 1009 if reverse:
1010 1010 return delta, lines[delta:]
1011 1011 else:
1012 1012 return delta, lines[:maxcontext]
1013 1013 return 0, lines
1014 1014
1015 1015 self.header = header
1016 1016 trimedbefore, self.before = trimcontext(before, True)
1017 1017 self.fromline = fromline + trimedbefore
1018 1018 self.toline = toline + trimedbefore
1019 1019 _trimedafter, self.after = trimcontext(after, False)
1020 1020 self.proc = proc
1021 1021 self.hunk = hunk
1022 1022 self.added, self.removed = self.countchanges(self.hunk)
1023 1023
1024 1024 def __eq__(self, v):
1025 1025 if not isinstance(v, recordhunk):
1026 1026 return False
1027 1027
1028 1028 return (
1029 1029 (v.hunk == self.hunk)
1030 1030 and (v.proc == self.proc)
1031 1031 and (self.fromline == v.fromline)
1032 1032 and (self.header.files() == v.header.files())
1033 1033 )
1034 1034
1035 1035 def __hash__(self):
1036 1036 return hash(
1037 1037 (
1038 1038 tuple(self.hunk),
1039 1039 tuple(self.header.files()),
1040 1040 self.fromline,
1041 1041 self.proc,
1042 1042 )
1043 1043 )
1044 1044
1045 1045 def countchanges(self, hunk):
1046 1046 """hunk -> (n+,n-)"""
1047 1047 add = len([h for h in hunk if h.startswith(b'+')])
1048 1048 rem = len([h for h in hunk if h.startswith(b'-')])
1049 1049 return add, rem
1050 1050
1051 1051 def reversehunk(self):
1052 1052 """return another recordhunk which is the reverse of the hunk
1053 1053
1054 1054 If this hunk is diff(A, B), the returned hunk is diff(B, A). To do
1055 1055 that, swap fromline/toline and +/- signs while keep other things
1056 1056 unchanged.
1057 1057 """
1058 1058 m = {b'+': b'-', b'-': b'+', b'\\': b'\\'}
1059 1059 hunk = [b'%s%s' % (m[l[0:1]], l[1:]) for l in self.hunk]
1060 1060 return recordhunk(
1061 1061 self.header,
1062 1062 self.toline,
1063 1063 self.fromline,
1064 1064 self.proc,
1065 1065 self.before,
1066 1066 hunk,
1067 1067 self.after,
1068 1068 )
1069 1069
1070 1070 def write(self, fp):
1071 1071 delta = len(self.before) + len(self.after)
1072 1072 if self.after and self.after[-1] == b'\\ No newline at end of file\n':
1073 1073 delta -= 1
1074 1074 fromlen = delta + self.removed
1075 1075 tolen = delta + self.added
1076 1076 fp.write(
1077 1077 b'@@ -%d,%d +%d,%d @@%s\n'
1078 1078 % (
1079 1079 self.fromline,
1080 1080 fromlen,
1081 1081 self.toline,
1082 1082 tolen,
1083 1083 self.proc and (b' ' + self.proc),
1084 1084 )
1085 1085 )
1086 1086 fp.write(b''.join(self.before + self.hunk + self.after))
1087 1087
1088 1088 pretty = write
1089 1089
1090 1090 def filename(self):
1091 1091 return self.header.filename()
1092 1092
1093 @encoding.strmethod
1093 1094 def __repr__(self):
1094 1095 return b'<hunk %r@%d>' % (self.filename(), self.fromline)
1095 1096
1096 1097
1097 1098 def getmessages():
1098 1099 return {
1099 1100 b'multiple': {
1100 1101 b'apply': _(b"apply change %d/%d to '%s'?"),
1101 1102 b'discard': _(b"discard change %d/%d to '%s'?"),
1102 1103 b'keep': _(b"keep change %d/%d to '%s'?"),
1103 1104 b'record': _(b"record change %d/%d to '%s'?"),
1104 1105 },
1105 1106 b'single': {
1106 1107 b'apply': _(b"apply this change to '%s'?"),
1107 1108 b'discard': _(b"discard this change to '%s'?"),
1108 1109 b'keep': _(b"keep this change to '%s'?"),
1109 1110 b'record': _(b"record this change to '%s'?"),
1110 1111 },
1111 1112 b'help': {
1112 1113 b'apply': _(
1113 1114 b'[Ynesfdaq?]'
1114 1115 b'$$ &Yes, apply this change'
1115 1116 b'$$ &No, skip this change'
1116 1117 b'$$ &Edit this change manually'
1117 1118 b'$$ &Skip remaining changes to this file'
1118 1119 b'$$ Apply remaining changes to this &file'
1119 1120 b'$$ &Done, skip remaining changes and files'
1120 1121 b'$$ Apply &all changes to all remaining files'
1121 1122 b'$$ &Quit, applying no changes'
1122 1123 b'$$ &? (display help)'
1123 1124 ),
1124 1125 b'discard': _(
1125 1126 b'[Ynesfdaq?]'
1126 1127 b'$$ &Yes, discard this change'
1127 1128 b'$$ &No, skip this change'
1128 1129 b'$$ &Edit this change manually'
1129 1130 b'$$ &Skip remaining changes to this file'
1130 1131 b'$$ Discard remaining changes to this &file'
1131 1132 b'$$ &Done, skip remaining changes and files'
1132 1133 b'$$ Discard &all changes to all remaining files'
1133 1134 b'$$ &Quit, discarding no changes'
1134 1135 b'$$ &? (display help)'
1135 1136 ),
1136 1137 b'keep': _(
1137 1138 b'[Ynesfdaq?]'
1138 1139 b'$$ &Yes, keep this change'
1139 1140 b'$$ &No, skip this change'
1140 1141 b'$$ &Edit this change manually'
1141 1142 b'$$ &Skip remaining changes to this file'
1142 1143 b'$$ Keep remaining changes to this &file'
1143 1144 b'$$ &Done, skip remaining changes and files'
1144 1145 b'$$ Keep &all changes to all remaining files'
1145 1146 b'$$ &Quit, keeping all changes'
1146 1147 b'$$ &? (display help)'
1147 1148 ),
1148 1149 b'record': _(
1149 1150 b'[Ynesfdaq?]'
1150 1151 b'$$ &Yes, record this change'
1151 1152 b'$$ &No, skip this change'
1152 1153 b'$$ &Edit this change manually'
1153 1154 b'$$ &Skip remaining changes to this file'
1154 1155 b'$$ Record remaining changes to this &file'
1155 1156 b'$$ &Done, skip remaining changes and files'
1156 1157 b'$$ Record &all changes to all remaining files'
1157 1158 b'$$ &Quit, recording no changes'
1158 1159 b'$$ &? (display help)'
1159 1160 ),
1160 1161 },
1161 1162 }
1162 1163
1163 1164
1164 1165 def filterpatch(ui, headers, match, operation=None):
1165 1166 """Interactively filter patch chunks into applied-only chunks"""
1166 1167 messages = getmessages()
1167 1168
1168 1169 if operation is None:
1169 1170 operation = b'record'
1170 1171
1171 1172 def prompt(skipfile, skipall, query, chunk):
1172 1173 """prompt query, and process base inputs
1173 1174
1174 1175 - y/n for the rest of file
1175 1176 - y/n for the rest
1176 1177 - ? (help)
1177 1178 - q (quit)
1178 1179
1179 1180 Return True/False and possibly updated skipfile and skipall.
1180 1181 """
1181 1182 newpatches = None
1182 1183 if skipall is not None:
1183 1184 return skipall, skipfile, skipall, newpatches
1184 1185 if skipfile is not None:
1185 1186 return skipfile, skipfile, skipall, newpatches
1186 1187 while True:
1187 1188 resps = messages[b'help'][operation]
1188 1189 # IMPORTANT: keep the last line of this prompt short (<40 english
1189 1190 # chars is a good target) because of issue6158.
1190 1191 r = ui.promptchoice(b"%s\n(enter ? for help) %s" % (query, resps))
1191 1192 ui.write(b"\n")
1192 1193 if r == 8: # ?
1193 1194 for c, t in ui.extractchoices(resps)[1]:
1194 1195 ui.write(b'%s - %s\n' % (c, encoding.lower(t)))
1195 1196 continue
1196 1197 elif r == 0: # yes
1197 1198 ret = True
1198 1199 elif r == 1: # no
1199 1200 ret = False
1200 1201 elif r == 2: # Edit patch
1201 1202 if chunk is None:
1202 1203 ui.write(_(b'cannot edit patch for whole file'))
1203 1204 ui.write(b"\n")
1204 1205 continue
1205 1206 if chunk.header.binary():
1206 1207 ui.write(_(b'cannot edit patch for binary file'))
1207 1208 ui.write(b"\n")
1208 1209 continue
1209 1210 # Patch comment based on the Git one (based on comment at end of
1210 1211 # https://mercurial-scm.org/wiki/RecordExtension)
1211 1212 phelp = b'---' + _(
1212 1213 """
1213 1214 To remove '-' lines, make them ' ' lines (context).
1214 1215 To remove '+' lines, delete them.
1215 1216 Lines starting with # will be removed from the patch.
1216 1217
1217 1218 If the patch applies cleanly, the edited hunk will immediately be
1218 1219 added to the record list. If it does not apply cleanly, a rejects
1219 1220 file will be generated: you can use that when you try again. If
1220 1221 all lines of the hunk are removed, then the edit is aborted and
1221 1222 the hunk is left unchanged.
1222 1223 """
1223 1224 )
1224 1225 (patchfd, patchfn) = pycompat.mkstemp(
1225 1226 prefix=b"hg-editor-", suffix=b".diff"
1226 1227 )
1227 1228 ncpatchfp = None
1228 1229 try:
1229 1230 # Write the initial patch
1230 1231 f = util.nativeeolwriter(os.fdopen(patchfd, 'wb'))
1231 1232 chunk.header.write(f)
1232 1233 chunk.write(f)
1233 1234 f.write(
1234 1235 b''.join(
1235 1236 [b'# ' + i + b'\n' for i in phelp.splitlines()]
1236 1237 )
1237 1238 )
1238 1239 f.close()
1239 1240 # Start the editor and wait for it to complete
1240 1241 editor = ui.geteditor()
1241 1242 ret = ui.system(
1242 1243 b"%s \"%s\"" % (editor, patchfn),
1243 1244 environ={b'HGUSER': ui.username()},
1244 1245 blockedtag=b'filterpatch',
1245 1246 )
1246 1247 if ret != 0:
1247 1248 ui.warn(_(b"editor exited with exit code %d\n") % ret)
1248 1249 continue
1249 1250 # Remove comment lines
1250 1251 patchfp = open(patchfn, 'rb')
1251 1252 ncpatchfp = stringio()
1252 1253 for line in util.iterfile(patchfp):
1253 1254 line = util.fromnativeeol(line)
1254 1255 if not line.startswith(b'#'):
1255 1256 ncpatchfp.write(line)
1256 1257 patchfp.close()
1257 1258 ncpatchfp.seek(0)
1258 1259 newpatches = parsepatch(ncpatchfp)
1259 1260 finally:
1260 1261 os.unlink(patchfn)
1261 1262 del ncpatchfp
1262 1263 # Signal that the chunk shouldn't be applied as-is, but
1263 1264 # provide the new patch to be used instead.
1264 1265 ret = False
1265 1266 elif r == 3: # Skip
1266 1267 ret = skipfile = False
1267 1268 elif r == 4: # file (Record remaining)
1268 1269 ret = skipfile = True
1269 1270 elif r == 5: # done, skip remaining
1270 1271 ret = skipall = False
1271 1272 elif r == 6: # all
1272 1273 ret = skipall = True
1273 1274 elif r == 7: # quit
1274 1275 raise error.Abort(_(b'user quit'))
1275 1276 return ret, skipfile, skipall, newpatches
1276 1277
1277 1278 seen = set()
1278 1279 applied = {} # 'filename' -> [] of chunks
1279 1280 skipfile, skipall = None, None
1280 1281 pos, total = 1, sum(len(h.hunks) for h in headers)
1281 1282 for h in headers:
1282 1283 pos += len(h.hunks)
1283 1284 skipfile = None
1284 1285 fixoffset = 0
1285 1286 hdr = b''.join(h.header)
1286 1287 if hdr in seen:
1287 1288 continue
1288 1289 seen.add(hdr)
1289 1290 if skipall is None:
1290 1291 h.pretty(ui)
1291 1292 files = h.files()
1292 1293 msg = _(b'examine changes to %s?') % _(b' and ').join(
1293 1294 b"'%s'" % f for f in files
1294 1295 )
1295 1296 if all(match.exact(f) for f in files):
1296 1297 r, skipall, np = True, None, None
1297 1298 else:
1298 1299 r, skipfile, skipall, np = prompt(skipfile, skipall, msg, None)
1299 1300 if not r:
1300 1301 continue
1301 1302 applied[h.filename()] = [h]
1302 1303 if h.allhunks():
1303 1304 applied[h.filename()] += h.hunks
1304 1305 continue
1305 1306 for i, chunk in enumerate(h.hunks):
1306 1307 if skipfile is None and skipall is None:
1307 1308 chunk.pretty(ui)
1308 1309 if total == 1:
1309 1310 msg = messages[b'single'][operation] % chunk.filename()
1310 1311 else:
1311 1312 idx = pos - len(h.hunks) + i
1312 1313 msg = messages[b'multiple'][operation] % (
1313 1314 idx,
1314 1315 total,
1315 1316 chunk.filename(),
1316 1317 )
1317 1318 r, skipfile, skipall, newpatches = prompt(
1318 1319 skipfile, skipall, msg, chunk
1319 1320 )
1320 1321 if r:
1321 1322 if fixoffset:
1322 1323 chunk = copy.copy(chunk)
1323 1324 chunk.toline += fixoffset
1324 1325 applied[chunk.filename()].append(chunk)
1325 1326 elif newpatches is not None:
1326 1327 for newpatch in newpatches:
1327 1328 for newhunk in newpatch.hunks:
1328 1329 if fixoffset:
1329 1330 newhunk.toline += fixoffset
1330 1331 applied[newhunk.filename()].append(newhunk)
1331 1332 else:
1332 1333 fixoffset += chunk.removed - chunk.added
1333 1334 return (
1334 1335 sum(
1335 1336 [
1336 1337 h
1337 1338 for h in pycompat.itervalues(applied)
1338 1339 if h[0].special() or len(h) > 1
1339 1340 ],
1340 1341 [],
1341 1342 ),
1342 1343 {},
1343 1344 )
1344 1345
1345 1346
1346 1347 class hunk(object):
1347 1348 def __init__(self, desc, num, lr, context):
1348 1349 self.number = num
1349 1350 self.desc = desc
1350 1351 self.hunk = [desc]
1351 1352 self.a = []
1352 1353 self.b = []
1353 1354 self.starta = self.lena = None
1354 1355 self.startb = self.lenb = None
1355 1356 if lr is not None:
1356 1357 if context:
1357 1358 self.read_context_hunk(lr)
1358 1359 else:
1359 1360 self.read_unified_hunk(lr)
1360 1361
1361 1362 def getnormalized(self):
1362 1363 """Return a copy with line endings normalized to LF."""
1363 1364
1364 1365 def normalize(lines):
1365 1366 nlines = []
1366 1367 for line in lines:
1367 1368 if line.endswith(b'\r\n'):
1368 1369 line = line[:-2] + b'\n'
1369 1370 nlines.append(line)
1370 1371 return nlines
1371 1372
1372 1373 # Dummy object, it is rebuilt manually
1373 1374 nh = hunk(self.desc, self.number, None, None)
1374 1375 nh.number = self.number
1375 1376 nh.desc = self.desc
1376 1377 nh.hunk = self.hunk
1377 1378 nh.a = normalize(self.a)
1378 1379 nh.b = normalize(self.b)
1379 1380 nh.starta = self.starta
1380 1381 nh.startb = self.startb
1381 1382 nh.lena = self.lena
1382 1383 nh.lenb = self.lenb
1383 1384 return nh
1384 1385
1385 1386 def read_unified_hunk(self, lr):
1386 1387 m = unidesc.match(self.desc)
1387 1388 if not m:
1388 1389 raise PatchError(_(b"bad hunk #%d") % self.number)
1389 1390 self.starta, self.lena, self.startb, self.lenb = m.groups()
1390 1391 if self.lena is None:
1391 1392 self.lena = 1
1392 1393 else:
1393 1394 self.lena = int(self.lena)
1394 1395 if self.lenb is None:
1395 1396 self.lenb = 1
1396 1397 else:
1397 1398 self.lenb = int(self.lenb)
1398 1399 self.starta = int(self.starta)
1399 1400 self.startb = int(self.startb)
1400 1401 try:
1401 1402 diffhelper.addlines(
1402 1403 lr, self.hunk, self.lena, self.lenb, self.a, self.b
1403 1404 )
1404 1405 except error.ParseError as e:
1405 1406 raise PatchError(_(b"bad hunk #%d: %s") % (self.number, e))
1406 1407 # if we hit eof before finishing out the hunk, the last line will
1407 1408 # be zero length. Lets try to fix it up.
1408 1409 while len(self.hunk[-1]) == 0:
1409 1410 del self.hunk[-1]
1410 1411 del self.a[-1]
1411 1412 del self.b[-1]
1412 1413 self.lena -= 1
1413 1414 self.lenb -= 1
1414 1415 self._fixnewline(lr)
1415 1416
1416 1417 def read_context_hunk(self, lr):
1417 1418 self.desc = lr.readline()
1418 1419 m = contextdesc.match(self.desc)
1419 1420 if not m:
1420 1421 raise PatchError(_(b"bad hunk #%d") % self.number)
1421 1422 self.starta, aend = m.groups()
1422 1423 self.starta = int(self.starta)
1423 1424 if aend is None:
1424 1425 aend = self.starta
1425 1426 self.lena = int(aend) - self.starta
1426 1427 if self.starta:
1427 1428 self.lena += 1
1428 1429 for x in pycompat.xrange(self.lena):
1429 1430 l = lr.readline()
1430 1431 if l.startswith(b'---'):
1431 1432 # lines addition, old block is empty
1432 1433 lr.push(l)
1433 1434 break
1434 1435 s = l[2:]
1435 1436 if l.startswith(b'- ') or l.startswith(b'! '):
1436 1437 u = b'-' + s
1437 1438 elif l.startswith(b' '):
1438 1439 u = b' ' + s
1439 1440 else:
1440 1441 raise PatchError(
1441 1442 _(b"bad hunk #%d old text line %d") % (self.number, x)
1442 1443 )
1443 1444 self.a.append(u)
1444 1445 self.hunk.append(u)
1445 1446
1446 1447 l = lr.readline()
1447 1448 if l.startswith(br'\ '):
1448 1449 s = self.a[-1][:-1]
1449 1450 self.a[-1] = s
1450 1451 self.hunk[-1] = s
1451 1452 l = lr.readline()
1452 1453 m = contextdesc.match(l)
1453 1454 if not m:
1454 1455 raise PatchError(_(b"bad hunk #%d") % self.number)
1455 1456 self.startb, bend = m.groups()
1456 1457 self.startb = int(self.startb)
1457 1458 if bend is None:
1458 1459 bend = self.startb
1459 1460 self.lenb = int(bend) - self.startb
1460 1461 if self.startb:
1461 1462 self.lenb += 1
1462 1463 hunki = 1
1463 1464 for x in pycompat.xrange(self.lenb):
1464 1465 l = lr.readline()
1465 1466 if l.startswith(br'\ '):
1466 1467 # XXX: the only way to hit this is with an invalid line range.
1467 1468 # The no-eol marker is not counted in the line range, but I
1468 1469 # guess there are diff(1) out there which behave differently.
1469 1470 s = self.b[-1][:-1]
1470 1471 self.b[-1] = s
1471 1472 self.hunk[hunki - 1] = s
1472 1473 continue
1473 1474 if not l:
1474 1475 # line deletions, new block is empty and we hit EOF
1475 1476 lr.push(l)
1476 1477 break
1477 1478 s = l[2:]
1478 1479 if l.startswith(b'+ ') or l.startswith(b'! '):
1479 1480 u = b'+' + s
1480 1481 elif l.startswith(b' '):
1481 1482 u = b' ' + s
1482 1483 elif len(self.b) == 0:
1483 1484 # line deletions, new block is empty
1484 1485 lr.push(l)
1485 1486 break
1486 1487 else:
1487 1488 raise PatchError(
1488 1489 _(b"bad hunk #%d old text line %d") % (self.number, x)
1489 1490 )
1490 1491 self.b.append(s)
1491 1492 while True:
1492 1493 if hunki >= len(self.hunk):
1493 1494 h = b""
1494 1495 else:
1495 1496 h = self.hunk[hunki]
1496 1497 hunki += 1
1497 1498 if h == u:
1498 1499 break
1499 1500 elif h.startswith(b'-'):
1500 1501 continue
1501 1502 else:
1502 1503 self.hunk.insert(hunki - 1, u)
1503 1504 break
1504 1505
1505 1506 if not self.a:
1506 1507 # this happens when lines were only added to the hunk
1507 1508 for x in self.hunk:
1508 1509 if x.startswith(b'-') or x.startswith(b' '):
1509 1510 self.a.append(x)
1510 1511 if not self.b:
1511 1512 # this happens when lines were only deleted from the hunk
1512 1513 for x in self.hunk:
1513 1514 if x.startswith(b'+') or x.startswith(b' '):
1514 1515 self.b.append(x[1:])
1515 1516 # @@ -start,len +start,len @@
1516 1517 self.desc = b"@@ -%d,%d +%d,%d @@\n" % (
1517 1518 self.starta,
1518 1519 self.lena,
1519 1520 self.startb,
1520 1521 self.lenb,
1521 1522 )
1522 1523 self.hunk[0] = self.desc
1523 1524 self._fixnewline(lr)
1524 1525
1525 1526 def _fixnewline(self, lr):
1526 1527 l = lr.readline()
1527 1528 if l.startswith(br'\ '):
1528 1529 diffhelper.fixnewline(self.hunk, self.a, self.b)
1529 1530 else:
1530 1531 lr.push(l)
1531 1532
1532 1533 def complete(self):
1533 1534 return len(self.a) == self.lena and len(self.b) == self.lenb
1534 1535
1535 1536 def _fuzzit(self, old, new, fuzz, toponly):
1536 1537 # this removes context lines from the top and bottom of list 'l'. It
1537 1538 # checks the hunk to make sure only context lines are removed, and then
1538 1539 # returns a new shortened list of lines.
1539 1540 fuzz = min(fuzz, len(old))
1540 1541 if fuzz:
1541 1542 top = 0
1542 1543 bot = 0
1543 1544 hlen = len(self.hunk)
1544 1545 for x in pycompat.xrange(hlen - 1):
1545 1546 # the hunk starts with the @@ line, so use x+1
1546 1547 if self.hunk[x + 1].startswith(b' '):
1547 1548 top += 1
1548 1549 else:
1549 1550 break
1550 1551 if not toponly:
1551 1552 for x in pycompat.xrange(hlen - 1):
1552 1553 if self.hunk[hlen - bot - 1].startswith(b' '):
1553 1554 bot += 1
1554 1555 else:
1555 1556 break
1556 1557
1557 1558 bot = min(fuzz, bot)
1558 1559 top = min(fuzz, top)
1559 1560 return old[top : len(old) - bot], new[top : len(new) - bot], top
1560 1561 return old, new, 0
1561 1562
1562 1563 def fuzzit(self, fuzz, toponly):
1563 1564 old, new, top = self._fuzzit(self.a, self.b, fuzz, toponly)
1564 1565 oldstart = self.starta + top
1565 1566 newstart = self.startb + top
1566 1567 # zero length hunk ranges already have their start decremented
1567 1568 if self.lena and oldstart > 0:
1568 1569 oldstart -= 1
1569 1570 if self.lenb and newstart > 0:
1570 1571 newstart -= 1
1571 1572 return old, oldstart, new, newstart
1572 1573
1573 1574
1574 1575 class binhunk(object):
1575 1576 """A binary patch file."""
1576 1577
1577 1578 def __init__(self, lr, fname):
1578 1579 self.text = None
1579 1580 self.delta = False
1580 1581 self.hunk = [b'GIT binary patch\n']
1581 1582 self._fname = fname
1582 1583 self._read(lr)
1583 1584
1584 1585 def complete(self):
1585 1586 return self.text is not None
1586 1587
1587 1588 def new(self, lines):
1588 1589 if self.delta:
1589 1590 return [applybindelta(self.text, b''.join(lines))]
1590 1591 return [self.text]
1591 1592
1592 1593 def _read(self, lr):
1593 1594 def getline(lr, hunk):
1594 1595 l = lr.readline()
1595 1596 hunk.append(l)
1596 1597 return l.rstrip(b'\r\n')
1597 1598
1598 1599 while True:
1599 1600 line = getline(lr, self.hunk)
1600 1601 if not line:
1601 1602 raise PatchError(
1602 1603 _(b'could not extract "%s" binary data') % self._fname
1603 1604 )
1604 1605 if line.startswith(b'literal '):
1605 1606 size = int(line[8:].rstrip())
1606 1607 break
1607 1608 if line.startswith(b'delta '):
1608 1609 size = int(line[6:].rstrip())
1609 1610 self.delta = True
1610 1611 break
1611 1612 dec = []
1612 1613 line = getline(lr, self.hunk)
1613 1614 while len(line) > 1:
1614 1615 l = line[0:1]
1615 1616 if l <= b'Z' and l >= b'A':
1616 1617 l = ord(l) - ord(b'A') + 1
1617 1618 else:
1618 1619 l = ord(l) - ord(b'a') + 27
1619 1620 try:
1620 1621 dec.append(util.b85decode(line[1:])[:l])
1621 1622 except ValueError as e:
1622 1623 raise PatchError(
1623 1624 _(b'could not decode "%s" binary patch: %s')
1624 1625 % (self._fname, stringutil.forcebytestr(e))
1625 1626 )
1626 1627 line = getline(lr, self.hunk)
1627 1628 text = zlib.decompress(b''.join(dec))
1628 1629 if len(text) != size:
1629 1630 raise PatchError(
1630 1631 _(b'"%s" length is %d bytes, should be %d')
1631 1632 % (self._fname, len(text), size)
1632 1633 )
1633 1634 self.text = text
1634 1635
1635 1636
1636 1637 def parsefilename(str):
1637 1638 # --- filename \t|space stuff
1638 1639 s = str[4:].rstrip(b'\r\n')
1639 1640 i = s.find(b'\t')
1640 1641 if i < 0:
1641 1642 i = s.find(b' ')
1642 1643 if i < 0:
1643 1644 return s
1644 1645 return s[:i]
1645 1646
1646 1647
1647 1648 def reversehunks(hunks):
1648 1649 '''reverse the signs in the hunks given as argument
1649 1650
1650 1651 This function operates on hunks coming out of patch.filterpatch, that is
1651 1652 a list of the form: [header1, hunk1, hunk2, header2...]. Example usage:
1652 1653
1653 1654 >>> rawpatch = b"""diff --git a/folder1/g b/folder1/g
1654 1655 ... --- a/folder1/g
1655 1656 ... +++ b/folder1/g
1656 1657 ... @@ -1,7 +1,7 @@
1657 1658 ... +firstline
1658 1659 ... c
1659 1660 ... 1
1660 1661 ... 2
1661 1662 ... + 3
1662 1663 ... -4
1663 1664 ... 5
1664 1665 ... d
1665 1666 ... +lastline"""
1666 1667 >>> hunks = parsepatch([rawpatch])
1667 1668 >>> hunkscomingfromfilterpatch = []
1668 1669 >>> for h in hunks:
1669 1670 ... hunkscomingfromfilterpatch.append(h)
1670 1671 ... hunkscomingfromfilterpatch.extend(h.hunks)
1671 1672
1672 1673 >>> reversedhunks = reversehunks(hunkscomingfromfilterpatch)
1673 1674 >>> from . import util
1674 1675 >>> fp = util.stringio()
1675 1676 >>> for c in reversedhunks:
1676 1677 ... c.write(fp)
1677 1678 >>> fp.seek(0) or None
1678 1679 >>> reversedpatch = fp.read()
1679 1680 >>> print(pycompat.sysstr(reversedpatch))
1680 1681 diff --git a/folder1/g b/folder1/g
1681 1682 --- a/folder1/g
1682 1683 +++ b/folder1/g
1683 1684 @@ -1,4 +1,3 @@
1684 1685 -firstline
1685 1686 c
1686 1687 1
1687 1688 2
1688 1689 @@ -2,6 +1,6 @@
1689 1690 c
1690 1691 1
1691 1692 2
1692 1693 - 3
1693 1694 +4
1694 1695 5
1695 1696 d
1696 1697 @@ -6,3 +5,2 @@
1697 1698 5
1698 1699 d
1699 1700 -lastline
1700 1701
1701 1702 '''
1702 1703
1703 1704 newhunks = []
1704 1705 for c in hunks:
1705 1706 if util.safehasattr(c, b'reversehunk'):
1706 1707 c = c.reversehunk()
1707 1708 newhunks.append(c)
1708 1709 return newhunks
1709 1710
1710 1711
1711 1712 def parsepatch(originalchunks, maxcontext=None):
1712 1713 """patch -> [] of headers -> [] of hunks
1713 1714
1714 1715 If maxcontext is not None, trim context lines if necessary.
1715 1716
1716 1717 >>> rawpatch = b'''diff --git a/folder1/g b/folder1/g
1717 1718 ... --- a/folder1/g
1718 1719 ... +++ b/folder1/g
1719 1720 ... @@ -1,8 +1,10 @@
1720 1721 ... 1
1721 1722 ... 2
1722 1723 ... -3
1723 1724 ... 4
1724 1725 ... 5
1725 1726 ... 6
1726 1727 ... +6.1
1727 1728 ... +6.2
1728 1729 ... 7
1729 1730 ... 8
1730 1731 ... +9'''
1731 1732 >>> out = util.stringio()
1732 1733 >>> headers = parsepatch([rawpatch], maxcontext=1)
1733 1734 >>> for header in headers:
1734 1735 ... header.write(out)
1735 1736 ... for hunk in header.hunks:
1736 1737 ... hunk.write(out)
1737 1738 >>> print(pycompat.sysstr(out.getvalue()))
1738 1739 diff --git a/folder1/g b/folder1/g
1739 1740 --- a/folder1/g
1740 1741 +++ b/folder1/g
1741 1742 @@ -2,3 +2,2 @@
1742 1743 2
1743 1744 -3
1744 1745 4
1745 1746 @@ -6,2 +5,4 @@
1746 1747 6
1747 1748 +6.1
1748 1749 +6.2
1749 1750 7
1750 1751 @@ -8,1 +9,2 @@
1751 1752 8
1752 1753 +9
1753 1754 """
1754 1755
1755 1756 class parser(object):
1756 1757 """patch parsing state machine"""
1757 1758
1758 1759 def __init__(self):
1759 1760 self.fromline = 0
1760 1761 self.toline = 0
1761 1762 self.proc = b''
1762 1763 self.header = None
1763 1764 self.context = []
1764 1765 self.before = []
1765 1766 self.hunk = []
1766 1767 self.headers = []
1767 1768
1768 1769 def addrange(self, limits):
1769 1770 self.addcontext([])
1770 1771 fromstart, fromend, tostart, toend, proc = limits
1771 1772 self.fromline = int(fromstart)
1772 1773 self.toline = int(tostart)
1773 1774 self.proc = proc
1774 1775
1775 1776 def addcontext(self, context):
1776 1777 if self.hunk:
1777 1778 h = recordhunk(
1778 1779 self.header,
1779 1780 self.fromline,
1780 1781 self.toline,
1781 1782 self.proc,
1782 1783 self.before,
1783 1784 self.hunk,
1784 1785 context,
1785 1786 maxcontext,
1786 1787 )
1787 1788 self.header.hunks.append(h)
1788 1789 self.fromline += len(self.before) + h.removed
1789 1790 self.toline += len(self.before) + h.added
1790 1791 self.before = []
1791 1792 self.hunk = []
1792 1793 self.context = context
1793 1794
1794 1795 def addhunk(self, hunk):
1795 1796 if self.context:
1796 1797 self.before = self.context
1797 1798 self.context = []
1798 1799 if self.hunk:
1799 1800 self.addcontext([])
1800 1801 self.hunk = hunk
1801 1802
1802 1803 def newfile(self, hdr):
1803 1804 self.addcontext([])
1804 1805 h = header(hdr)
1805 1806 self.headers.append(h)
1806 1807 self.header = h
1807 1808
1808 1809 def addother(self, line):
1809 1810 pass # 'other' lines are ignored
1810 1811
1811 1812 def finished(self):
1812 1813 self.addcontext([])
1813 1814 return self.headers
1814 1815
1815 1816 transitions = {
1816 1817 b'file': {
1817 1818 b'context': addcontext,
1818 1819 b'file': newfile,
1819 1820 b'hunk': addhunk,
1820 1821 b'range': addrange,
1821 1822 },
1822 1823 b'context': {
1823 1824 b'file': newfile,
1824 1825 b'hunk': addhunk,
1825 1826 b'range': addrange,
1826 1827 b'other': addother,
1827 1828 },
1828 1829 b'hunk': {
1829 1830 b'context': addcontext,
1830 1831 b'file': newfile,
1831 1832 b'range': addrange,
1832 1833 },
1833 1834 b'range': {b'context': addcontext, b'hunk': addhunk},
1834 1835 b'other': {b'other': addother},
1835 1836 }
1836 1837
1837 1838 p = parser()
1838 1839 fp = stringio()
1839 1840 fp.write(b''.join(originalchunks))
1840 1841 fp.seek(0)
1841 1842
1842 1843 state = b'context'
1843 1844 for newstate, data in scanpatch(fp):
1844 1845 try:
1845 1846 p.transitions[state][newstate](p, data)
1846 1847 except KeyError:
1847 1848 raise PatchError(
1848 1849 b'unhandled transition: %s -> %s' % (state, newstate)
1849 1850 )
1850 1851 state = newstate
1851 1852 del fp
1852 1853 return p.finished()
1853 1854
1854 1855
1855 1856 def pathtransform(path, strip, prefix):
1856 1857 '''turn a path from a patch into a path suitable for the repository
1857 1858
1858 1859 prefix, if not empty, is expected to be normalized with a / at the end.
1859 1860
1860 1861 Returns (stripped components, path in repository).
1861 1862
1862 1863 >>> pathtransform(b'a/b/c', 0, b'')
1863 1864 ('', 'a/b/c')
1864 1865 >>> pathtransform(b' a/b/c ', 0, b'')
1865 1866 ('', ' a/b/c')
1866 1867 >>> pathtransform(b' a/b/c ', 2, b'')
1867 1868 ('a/b/', 'c')
1868 1869 >>> pathtransform(b'a/b/c', 0, b'd/e/')
1869 1870 ('', 'd/e/a/b/c')
1870 1871 >>> pathtransform(b' a//b/c ', 2, b'd/e/')
1871 1872 ('a//b/', 'd/e/c')
1872 1873 >>> pathtransform(b'a/b/c', 3, b'')
1873 1874 Traceback (most recent call last):
1874 1875 PatchError: unable to strip away 1 of 3 dirs from a/b/c
1875 1876 '''
1876 1877 pathlen = len(path)
1877 1878 i = 0
1878 1879 if strip == 0:
1879 1880 return b'', prefix + path.rstrip()
1880 1881 count = strip
1881 1882 while count > 0:
1882 1883 i = path.find(b'/', i)
1883 1884 if i == -1:
1884 1885 raise PatchError(
1885 1886 _(b"unable to strip away %d of %d dirs from %s")
1886 1887 % (count, strip, path)
1887 1888 )
1888 1889 i += 1
1889 1890 # consume '//' in the path
1890 1891 while i < pathlen - 1 and path[i : i + 1] == b'/':
1891 1892 i += 1
1892 1893 count -= 1
1893 1894 return path[:i].lstrip(), prefix + path[i:].rstrip()
1894 1895
1895 1896
1896 1897 def makepatchmeta(backend, afile_orig, bfile_orig, hunk, strip, prefix):
1897 1898 nulla = afile_orig == b"/dev/null"
1898 1899 nullb = bfile_orig == b"/dev/null"
1899 1900 create = nulla and hunk.starta == 0 and hunk.lena == 0
1900 1901 remove = nullb and hunk.startb == 0 and hunk.lenb == 0
1901 1902 abase, afile = pathtransform(afile_orig, strip, prefix)
1902 1903 gooda = not nulla and backend.exists(afile)
1903 1904 bbase, bfile = pathtransform(bfile_orig, strip, prefix)
1904 1905 if afile == bfile:
1905 1906 goodb = gooda
1906 1907 else:
1907 1908 goodb = not nullb and backend.exists(bfile)
1908 1909 missing = not goodb and not gooda and not create
1909 1910
1910 1911 # some diff programs apparently produce patches where the afile is
1911 1912 # not /dev/null, but afile starts with bfile
1912 1913 abasedir = afile[: afile.rfind(b'/') + 1]
1913 1914 bbasedir = bfile[: bfile.rfind(b'/') + 1]
1914 1915 if (
1915 1916 missing
1916 1917 and abasedir == bbasedir
1917 1918 and afile.startswith(bfile)
1918 1919 and hunk.starta == 0
1919 1920 and hunk.lena == 0
1920 1921 ):
1921 1922 create = True
1922 1923 missing = False
1923 1924
1924 1925 # If afile is "a/b/foo" and bfile is "a/b/foo.orig" we assume the
1925 1926 # diff is between a file and its backup. In this case, the original
1926 1927 # file should be patched (see original mpatch code).
1927 1928 isbackup = abase == bbase and bfile.startswith(afile)
1928 1929 fname = None
1929 1930 if not missing:
1930 1931 if gooda and goodb:
1931 1932 if isbackup:
1932 1933 fname = afile
1933 1934 else:
1934 1935 fname = bfile
1935 1936 elif gooda:
1936 1937 fname = afile
1937 1938
1938 1939 if not fname:
1939 1940 if not nullb:
1940 1941 if isbackup:
1941 1942 fname = afile
1942 1943 else:
1943 1944 fname = bfile
1944 1945 elif not nulla:
1945 1946 fname = afile
1946 1947 else:
1947 1948 raise PatchError(_(b"undefined source and destination files"))
1948 1949
1949 1950 gp = patchmeta(fname)
1950 1951 if create:
1951 1952 gp.op = b'ADD'
1952 1953 elif remove:
1953 1954 gp.op = b'DELETE'
1954 1955 return gp
1955 1956
1956 1957
1957 1958 def scanpatch(fp):
1958 1959 """like patch.iterhunks, but yield different events
1959 1960
1960 1961 - ('file', [header_lines + fromfile + tofile])
1961 1962 - ('context', [context_lines])
1962 1963 - ('hunk', [hunk_lines])
1963 1964 - ('range', (-start,len, +start,len, proc))
1964 1965 """
1965 1966 lines_re = re.compile(br'@@ -(\d+),(\d+) \+(\d+),(\d+) @@\s*(.*)')
1966 1967 lr = linereader(fp)
1967 1968
1968 1969 def scanwhile(first, p):
1969 1970 """scan lr while predicate holds"""
1970 1971 lines = [first]
1971 1972 for line in iter(lr.readline, b''):
1972 1973 if p(line):
1973 1974 lines.append(line)
1974 1975 else:
1975 1976 lr.push(line)
1976 1977 break
1977 1978 return lines
1978 1979
1979 1980 for line in iter(lr.readline, b''):
1980 1981 if line.startswith(b'diff --git a/') or line.startswith(b'diff -r '):
1981 1982
1982 1983 def notheader(line):
1983 1984 s = line.split(None, 1)
1984 1985 return not s or s[0] not in (b'---', b'diff')
1985 1986
1986 1987 header = scanwhile(line, notheader)
1987 1988 fromfile = lr.readline()
1988 1989 if fromfile.startswith(b'---'):
1989 1990 tofile = lr.readline()
1990 1991 header += [fromfile, tofile]
1991 1992 else:
1992 1993 lr.push(fromfile)
1993 1994 yield b'file', header
1994 1995 elif line.startswith(b' '):
1995 1996 cs = (b' ', b'\\')
1996 1997 yield b'context', scanwhile(line, lambda l: l.startswith(cs))
1997 1998 elif line.startswith((b'-', b'+')):
1998 1999 cs = (b'-', b'+', b'\\')
1999 2000 yield b'hunk', scanwhile(line, lambda l: l.startswith(cs))
2000 2001 else:
2001 2002 m = lines_re.match(line)
2002 2003 if m:
2003 2004 yield b'range', m.groups()
2004 2005 else:
2005 2006 yield b'other', line
2006 2007
2007 2008
2008 2009 def scangitpatch(lr, firstline):
2009 2010 """
2010 2011 Git patches can emit:
2011 2012 - rename a to b
2012 2013 - change b
2013 2014 - copy a to c
2014 2015 - change c
2015 2016
2016 2017 We cannot apply this sequence as-is, the renamed 'a' could not be
2017 2018 found for it would have been renamed already. And we cannot copy
2018 2019 from 'b' instead because 'b' would have been changed already. So
2019 2020 we scan the git patch for copy and rename commands so we can
2020 2021 perform the copies ahead of time.
2021 2022 """
2022 2023 pos = 0
2023 2024 try:
2024 2025 pos = lr.fp.tell()
2025 2026 fp = lr.fp
2026 2027 except IOError:
2027 2028 fp = stringio(lr.fp.read())
2028 2029 gitlr = linereader(fp)
2029 2030 gitlr.push(firstline)
2030 2031 gitpatches = readgitpatch(gitlr)
2031 2032 fp.seek(pos)
2032 2033 return gitpatches
2033 2034
2034 2035
2035 2036 def iterhunks(fp):
2036 2037 """Read a patch and yield the following events:
2037 2038 - ("file", afile, bfile, firsthunk): select a new target file.
2038 2039 - ("hunk", hunk): a new hunk is ready to be applied, follows a
2039 2040 "file" event.
2040 2041 - ("git", gitchanges): current diff is in git format, gitchanges
2041 2042 maps filenames to gitpatch records. Unique event.
2042 2043 """
2043 2044 afile = b""
2044 2045 bfile = b""
2045 2046 state = None
2046 2047 hunknum = 0
2047 2048 emitfile = newfile = False
2048 2049 gitpatches = None
2049 2050
2050 2051 # our states
2051 2052 BFILE = 1
2052 2053 context = None
2053 2054 lr = linereader(fp)
2054 2055
2055 2056 for x in iter(lr.readline, b''):
2056 2057 if state == BFILE and (
2057 2058 (not context and x.startswith(b'@'))
2058 2059 or (context is not False and x.startswith(b'***************'))
2059 2060 or x.startswith(b'GIT binary patch')
2060 2061 ):
2061 2062 gp = None
2062 2063 if gitpatches and gitpatches[-1].ispatching(afile, bfile):
2063 2064 gp = gitpatches.pop()
2064 2065 if x.startswith(b'GIT binary patch'):
2065 2066 h = binhunk(lr, gp.path)
2066 2067 else:
2067 2068 if context is None and x.startswith(b'***************'):
2068 2069 context = True
2069 2070 h = hunk(x, hunknum + 1, lr, context)
2070 2071 hunknum += 1
2071 2072 if emitfile:
2072 2073 emitfile = False
2073 2074 yield b'file', (afile, bfile, h, gp and gp.copy() or None)
2074 2075 yield b'hunk', h
2075 2076 elif x.startswith(b'diff --git a/'):
2076 2077 m = gitre.match(x.rstrip(b' \r\n'))
2077 2078 if not m:
2078 2079 continue
2079 2080 if gitpatches is None:
2080 2081 # scan whole input for git metadata
2081 2082 gitpatches = scangitpatch(lr, x)
2082 2083 yield b'git', [
2083 2084 g.copy() for g in gitpatches if g.op in (b'COPY', b'RENAME')
2084 2085 ]
2085 2086 gitpatches.reverse()
2086 2087 afile = b'a/' + m.group(1)
2087 2088 bfile = b'b/' + m.group(2)
2088 2089 while gitpatches and not gitpatches[-1].ispatching(afile, bfile):
2089 2090 gp = gitpatches.pop()
2090 2091 yield b'file', (
2091 2092 b'a/' + gp.path,
2092 2093 b'b/' + gp.path,
2093 2094 None,
2094 2095 gp.copy(),
2095 2096 )
2096 2097 if not gitpatches:
2097 2098 raise PatchError(
2098 2099 _(b'failed to synchronize metadata for "%s"') % afile[2:]
2099 2100 )
2100 2101 newfile = True
2101 2102 elif x.startswith(b'---'):
2102 2103 # check for a unified diff
2103 2104 l2 = lr.readline()
2104 2105 if not l2.startswith(b'+++'):
2105 2106 lr.push(l2)
2106 2107 continue
2107 2108 newfile = True
2108 2109 context = False
2109 2110 afile = parsefilename(x)
2110 2111 bfile = parsefilename(l2)
2111 2112 elif x.startswith(b'***'):
2112 2113 # check for a context diff
2113 2114 l2 = lr.readline()
2114 2115 if not l2.startswith(b'---'):
2115 2116 lr.push(l2)
2116 2117 continue
2117 2118 l3 = lr.readline()
2118 2119 lr.push(l3)
2119 2120 if not l3.startswith(b"***************"):
2120 2121 lr.push(l2)
2121 2122 continue
2122 2123 newfile = True
2123 2124 context = True
2124 2125 afile = parsefilename(x)
2125 2126 bfile = parsefilename(l2)
2126 2127
2127 2128 if newfile:
2128 2129 newfile = False
2129 2130 emitfile = True
2130 2131 state = BFILE
2131 2132 hunknum = 0
2132 2133
2133 2134 while gitpatches:
2134 2135 gp = gitpatches.pop()
2135 2136 yield b'file', (b'a/' + gp.path, b'b/' + gp.path, None, gp.copy())
2136 2137
2137 2138
2138 2139 def applybindelta(binchunk, data):
2139 2140 """Apply a binary delta hunk
2140 2141 The algorithm used is the algorithm from git's patch-delta.c
2141 2142 """
2142 2143
2143 2144 def deltahead(binchunk):
2144 2145 i = 0
2145 2146 for c in pycompat.bytestr(binchunk):
2146 2147 i += 1
2147 2148 if not (ord(c) & 0x80):
2148 2149 return i
2149 2150 return i
2150 2151
2151 2152 out = b""
2152 2153 s = deltahead(binchunk)
2153 2154 binchunk = binchunk[s:]
2154 2155 s = deltahead(binchunk)
2155 2156 binchunk = binchunk[s:]
2156 2157 i = 0
2157 2158 while i < len(binchunk):
2158 2159 cmd = ord(binchunk[i : i + 1])
2159 2160 i += 1
2160 2161 if cmd & 0x80:
2161 2162 offset = 0
2162 2163 size = 0
2163 2164 if cmd & 0x01:
2164 2165 offset = ord(binchunk[i : i + 1])
2165 2166 i += 1
2166 2167 if cmd & 0x02:
2167 2168 offset |= ord(binchunk[i : i + 1]) << 8
2168 2169 i += 1
2169 2170 if cmd & 0x04:
2170 2171 offset |= ord(binchunk[i : i + 1]) << 16
2171 2172 i += 1
2172 2173 if cmd & 0x08:
2173 2174 offset |= ord(binchunk[i : i + 1]) << 24
2174 2175 i += 1
2175 2176 if cmd & 0x10:
2176 2177 size = ord(binchunk[i : i + 1])
2177 2178 i += 1
2178 2179 if cmd & 0x20:
2179 2180 size |= ord(binchunk[i : i + 1]) << 8
2180 2181 i += 1
2181 2182 if cmd & 0x40:
2182 2183 size |= ord(binchunk[i : i + 1]) << 16
2183 2184 i += 1
2184 2185 if size == 0:
2185 2186 size = 0x10000
2186 2187 offset_end = offset + size
2187 2188 out += data[offset:offset_end]
2188 2189 elif cmd != 0:
2189 2190 offset_end = i + cmd
2190 2191 out += binchunk[i:offset_end]
2191 2192 i += cmd
2192 2193 else:
2193 2194 raise PatchError(_(b'unexpected delta opcode 0'))
2194 2195 return out
2195 2196
2196 2197
2197 2198 def applydiff(ui, fp, backend, store, strip=1, prefix=b'', eolmode=b'strict'):
2198 2199 """Reads a patch from fp and tries to apply it.
2199 2200
2200 2201 Returns 0 for a clean patch, -1 if any rejects were found and 1 if
2201 2202 there was any fuzz.
2202 2203
2203 2204 If 'eolmode' is 'strict', the patch content and patched file are
2204 2205 read in binary mode. Otherwise, line endings are ignored when
2205 2206 patching then normalized according to 'eolmode'.
2206 2207 """
2207 2208 return _applydiff(
2208 2209 ui,
2209 2210 fp,
2210 2211 patchfile,
2211 2212 backend,
2212 2213 store,
2213 2214 strip=strip,
2214 2215 prefix=prefix,
2215 2216 eolmode=eolmode,
2216 2217 )
2217 2218
2218 2219
2219 2220 def _canonprefix(repo, prefix):
2220 2221 if prefix:
2221 2222 prefix = pathutil.canonpath(repo.root, repo.getcwd(), prefix)
2222 2223 if prefix != b'':
2223 2224 prefix += b'/'
2224 2225 return prefix
2225 2226
2226 2227
2227 2228 def _applydiff(
2228 2229 ui, fp, patcher, backend, store, strip=1, prefix=b'', eolmode=b'strict'
2229 2230 ):
2230 2231 prefix = _canonprefix(backend.repo, prefix)
2231 2232
2232 2233 def pstrip(p):
2233 2234 return pathtransform(p, strip - 1, prefix)[1]
2234 2235
2235 2236 rejects = 0
2236 2237 err = 0
2237 2238 current_file = None
2238 2239
2239 2240 for state, values in iterhunks(fp):
2240 2241 if state == b'hunk':
2241 2242 if not current_file:
2242 2243 continue
2243 2244 ret = current_file.apply(values)
2244 2245 if ret > 0:
2245 2246 err = 1
2246 2247 elif state == b'file':
2247 2248 if current_file:
2248 2249 rejects += current_file.close()
2249 2250 current_file = None
2250 2251 afile, bfile, first_hunk, gp = values
2251 2252 if gp:
2252 2253 gp.path = pstrip(gp.path)
2253 2254 if gp.oldpath:
2254 2255 gp.oldpath = pstrip(gp.oldpath)
2255 2256 else:
2256 2257 gp = makepatchmeta(
2257 2258 backend, afile, bfile, first_hunk, strip, prefix
2258 2259 )
2259 2260 if gp.op == b'RENAME':
2260 2261 backend.unlink(gp.oldpath)
2261 2262 if not first_hunk:
2262 2263 if gp.op == b'DELETE':
2263 2264 backend.unlink(gp.path)
2264 2265 continue
2265 2266 data, mode = None, None
2266 2267 if gp.op in (b'RENAME', b'COPY'):
2267 2268 data, mode = store.getfile(gp.oldpath)[:2]
2268 2269 if data is None:
2269 2270 # This means that the old path does not exist
2270 2271 raise PatchError(
2271 2272 _(b"source file '%s' does not exist") % gp.oldpath
2272 2273 )
2273 2274 if gp.mode:
2274 2275 mode = gp.mode
2275 2276 if gp.op == b'ADD':
2276 2277 # Added files without content have no hunk and
2277 2278 # must be created
2278 2279 data = b''
2279 2280 if data or mode:
2280 2281 if gp.op in (b'ADD', b'RENAME', b'COPY') and backend.exists(
2281 2282 gp.path
2282 2283 ):
2283 2284 raise PatchError(
2284 2285 _(
2285 2286 b"cannot create %s: destination "
2286 2287 b"already exists"
2287 2288 )
2288 2289 % gp.path
2289 2290 )
2290 2291 backend.setfile(gp.path, data, mode, gp.oldpath)
2291 2292 continue
2292 2293 try:
2293 2294 current_file = patcher(ui, gp, backend, store, eolmode=eolmode)
2294 2295 except PatchError as inst:
2295 2296 ui.warn(stringutil.forcebytestr(inst) + b'\n')
2296 2297 current_file = None
2297 2298 rejects += 1
2298 2299 continue
2299 2300 elif state == b'git':
2300 2301 for gp in values:
2301 2302 path = pstrip(gp.oldpath)
2302 2303 data, mode = backend.getfile(path)
2303 2304 if data is None:
2304 2305 # The error ignored here will trigger a getfile()
2305 2306 # error in a place more appropriate for error
2306 2307 # handling, and will not interrupt the patching
2307 2308 # process.
2308 2309 pass
2309 2310 else:
2310 2311 store.setfile(path, data, mode)
2311 2312 else:
2312 2313 raise error.Abort(_(b'unsupported parser state: %s') % state)
2313 2314
2314 2315 if current_file:
2315 2316 rejects += current_file.close()
2316 2317
2317 2318 if rejects:
2318 2319 return -1
2319 2320 return err
2320 2321
2321 2322
2322 2323 def _externalpatch(ui, repo, patcher, patchname, strip, files, similarity):
2323 2324 """use <patcher> to apply <patchname> to the working directory.
2324 2325 returns whether patch was applied with fuzz factor."""
2325 2326
2326 2327 fuzz = False
2327 2328 args = []
2328 2329 cwd = repo.root
2329 2330 if cwd:
2330 2331 args.append(b'-d %s' % procutil.shellquote(cwd))
2331 2332 cmd = b'%s %s -p%d < %s' % (
2332 2333 patcher,
2333 2334 b' '.join(args),
2334 2335 strip,
2335 2336 procutil.shellquote(patchname),
2336 2337 )
2337 2338 ui.debug(b'Using external patch tool: %s\n' % cmd)
2338 2339 fp = procutil.popen(cmd, b'rb')
2339 2340 try:
2340 2341 for line in util.iterfile(fp):
2341 2342 line = line.rstrip()
2342 2343 ui.note(line + b'\n')
2343 2344 if line.startswith(b'patching file '):
2344 2345 pf = util.parsepatchoutput(line)
2345 2346 printed_file = False
2346 2347 files.add(pf)
2347 2348 elif line.find(b'with fuzz') >= 0:
2348 2349 fuzz = True
2349 2350 if not printed_file:
2350 2351 ui.warn(pf + b'\n')
2351 2352 printed_file = True
2352 2353 ui.warn(line + b'\n')
2353 2354 elif line.find(b'saving rejects to file') >= 0:
2354 2355 ui.warn(line + b'\n')
2355 2356 elif line.find(b'FAILED') >= 0:
2356 2357 if not printed_file:
2357 2358 ui.warn(pf + b'\n')
2358 2359 printed_file = True
2359 2360 ui.warn(line + b'\n')
2360 2361 finally:
2361 2362 if files:
2362 2363 scmutil.marktouched(repo, files, similarity)
2363 2364 code = fp.close()
2364 2365 if code:
2365 2366 raise PatchError(
2366 2367 _(b"patch command failed: %s") % procutil.explainexit(code)
2367 2368 )
2368 2369 return fuzz
2369 2370
2370 2371
2371 2372 def patchbackend(
2372 2373 ui, backend, patchobj, strip, prefix, files=None, eolmode=b'strict'
2373 2374 ):
2374 2375 if files is None:
2375 2376 files = set()
2376 2377 if eolmode is None:
2377 2378 eolmode = ui.config(b'patch', b'eol')
2378 2379 if eolmode.lower() not in eolmodes:
2379 2380 raise error.Abort(_(b'unsupported line endings type: %s') % eolmode)
2380 2381 eolmode = eolmode.lower()
2381 2382
2382 2383 store = filestore()
2383 2384 try:
2384 2385 fp = open(patchobj, b'rb')
2385 2386 except TypeError:
2386 2387 fp = patchobj
2387 2388 try:
2388 2389 ret = applydiff(
2389 2390 ui, fp, backend, store, strip=strip, prefix=prefix, eolmode=eolmode
2390 2391 )
2391 2392 finally:
2392 2393 if fp != patchobj:
2393 2394 fp.close()
2394 2395 files.update(backend.close())
2395 2396 store.close()
2396 2397 if ret < 0:
2397 2398 raise PatchError(_(b'patch failed to apply'))
2398 2399 return ret > 0
2399 2400
2400 2401
2401 2402 def internalpatch(
2402 2403 ui,
2403 2404 repo,
2404 2405 patchobj,
2405 2406 strip,
2406 2407 prefix=b'',
2407 2408 files=None,
2408 2409 eolmode=b'strict',
2409 2410 similarity=0,
2410 2411 ):
2411 2412 """use builtin patch to apply <patchobj> to the working directory.
2412 2413 returns whether patch was applied with fuzz factor."""
2413 2414 backend = workingbackend(ui, repo, similarity)
2414 2415 return patchbackend(ui, backend, patchobj, strip, prefix, files, eolmode)
2415 2416
2416 2417
2417 2418 def patchrepo(
2418 2419 ui, repo, ctx, store, patchobj, strip, prefix, files=None, eolmode=b'strict'
2419 2420 ):
2420 2421 backend = repobackend(ui, repo, ctx, store)
2421 2422 return patchbackend(ui, backend, patchobj, strip, prefix, files, eolmode)
2422 2423
2423 2424
2424 2425 def patch(
2425 2426 ui,
2426 2427 repo,
2427 2428 patchname,
2428 2429 strip=1,
2429 2430 prefix=b'',
2430 2431 files=None,
2431 2432 eolmode=b'strict',
2432 2433 similarity=0,
2433 2434 ):
2434 2435 """Apply <patchname> to the working directory.
2435 2436
2436 2437 'eolmode' specifies how end of lines should be handled. It can be:
2437 2438 - 'strict': inputs are read in binary mode, EOLs are preserved
2438 2439 - 'crlf': EOLs are ignored when patching and reset to CRLF
2439 2440 - 'lf': EOLs are ignored when patching and reset to LF
2440 2441 - None: get it from user settings, default to 'strict'
2441 2442 'eolmode' is ignored when using an external patcher program.
2442 2443
2443 2444 Returns whether patch was applied with fuzz factor.
2444 2445 """
2445 2446 patcher = ui.config(b'ui', b'patch')
2446 2447 if files is None:
2447 2448 files = set()
2448 2449 if patcher:
2449 2450 return _externalpatch(
2450 2451 ui, repo, patcher, patchname, strip, files, similarity
2451 2452 )
2452 2453 return internalpatch(
2453 2454 ui, repo, patchname, strip, prefix, files, eolmode, similarity
2454 2455 )
2455 2456
2456 2457
2457 2458 def changedfiles(ui, repo, patchpath, strip=1, prefix=b''):
2458 2459 backend = fsbackend(ui, repo.root)
2459 2460 prefix = _canonprefix(repo, prefix)
2460 2461 with open(patchpath, b'rb') as fp:
2461 2462 changed = set()
2462 2463 for state, values in iterhunks(fp):
2463 2464 if state == b'file':
2464 2465 afile, bfile, first_hunk, gp = values
2465 2466 if gp:
2466 2467 gp.path = pathtransform(gp.path, strip - 1, prefix)[1]
2467 2468 if gp.oldpath:
2468 2469 gp.oldpath = pathtransform(
2469 2470 gp.oldpath, strip - 1, prefix
2470 2471 )[1]
2471 2472 else:
2472 2473 gp = makepatchmeta(
2473 2474 backend, afile, bfile, first_hunk, strip, prefix
2474 2475 )
2475 2476 changed.add(gp.path)
2476 2477 if gp.op == b'RENAME':
2477 2478 changed.add(gp.oldpath)
2478 2479 elif state not in (b'hunk', b'git'):
2479 2480 raise error.Abort(_(b'unsupported parser state: %s') % state)
2480 2481 return changed
2481 2482
2482 2483
2483 2484 class GitDiffRequired(Exception):
2484 2485 pass
2485 2486
2486 2487
2487 2488 diffopts = diffutil.diffallopts
2488 2489 diffallopts = diffutil.diffallopts
2489 2490 difffeatureopts = diffutil.difffeatureopts
2490 2491
2491 2492
2492 2493 def diff(
2493 2494 repo,
2494 2495 node1=None,
2495 2496 node2=None,
2496 2497 match=None,
2497 2498 changes=None,
2498 2499 opts=None,
2499 2500 losedatafn=None,
2500 2501 pathfn=None,
2501 2502 copy=None,
2502 2503 copysourcematch=None,
2503 2504 hunksfilterfn=None,
2504 2505 ):
2505 2506 '''yields diff of changes to files between two nodes, or node and
2506 2507 working directory.
2507 2508
2508 2509 if node1 is None, use first dirstate parent instead.
2509 2510 if node2 is None, compare node1 with working directory.
2510 2511
2511 2512 losedatafn(**kwarg) is a callable run when opts.upgrade=True and
2512 2513 every time some change cannot be represented with the current
2513 2514 patch format. Return False to upgrade to git patch format, True to
2514 2515 accept the loss or raise an exception to abort the diff. It is
2515 2516 called with the name of current file being diffed as 'fn'. If set
2516 2517 to None, patches will always be upgraded to git format when
2517 2518 necessary.
2518 2519
2519 2520 prefix is a filename prefix that is prepended to all filenames on
2520 2521 display (used for subrepos).
2521 2522
2522 2523 relroot, if not empty, must be normalized with a trailing /. Any match
2523 2524 patterns that fall outside it will be ignored.
2524 2525
2525 2526 copy, if not empty, should contain mappings {dst@y: src@x} of copy
2526 2527 information.
2527 2528
2528 2529 if copysourcematch is not None, then copy sources will be filtered by this
2529 2530 matcher
2530 2531
2531 2532 hunksfilterfn, if not None, should be a function taking a filectx and
2532 2533 hunks generator that may yield filtered hunks.
2533 2534 '''
2534 2535 if not node1 and not node2:
2535 2536 node1 = repo.dirstate.p1()
2536 2537
2537 2538 ctx1 = repo[node1]
2538 2539 ctx2 = repo[node2]
2539 2540
2540 2541 for fctx1, fctx2, hdr, hunks in diffhunks(
2541 2542 repo,
2542 2543 ctx1=ctx1,
2543 2544 ctx2=ctx2,
2544 2545 match=match,
2545 2546 changes=changes,
2546 2547 opts=opts,
2547 2548 losedatafn=losedatafn,
2548 2549 pathfn=pathfn,
2549 2550 copy=copy,
2550 2551 copysourcematch=copysourcematch,
2551 2552 ):
2552 2553 if hunksfilterfn is not None:
2553 2554 # If the file has been removed, fctx2 is None; but this should
2554 2555 # not occur here since we catch removed files early in
2555 2556 # logcmdutil.getlinerangerevs() for 'hg log -L'.
2556 2557 assert (
2557 2558 fctx2 is not None
2558 2559 ), b'fctx2 unexpectly None in diff hunks filtering'
2559 2560 hunks = hunksfilterfn(fctx2, hunks)
2560 2561 text = b''.join(sum((list(hlines) for hrange, hlines in hunks), []))
2561 2562 if hdr and (text or len(hdr) > 1):
2562 2563 yield b'\n'.join(hdr) + b'\n'
2563 2564 if text:
2564 2565 yield text
2565 2566
2566 2567
2567 2568 def diffhunks(
2568 2569 repo,
2569 2570 ctx1,
2570 2571 ctx2,
2571 2572 match=None,
2572 2573 changes=None,
2573 2574 opts=None,
2574 2575 losedatafn=None,
2575 2576 pathfn=None,
2576 2577 copy=None,
2577 2578 copysourcematch=None,
2578 2579 ):
2579 2580 """Yield diff of changes to files in the form of (`header`, `hunks`) tuples
2580 2581 where `header` is a list of diff headers and `hunks` is an iterable of
2581 2582 (`hunkrange`, `hunklines`) tuples.
2582 2583
2583 2584 See diff() for the meaning of parameters.
2584 2585 """
2585 2586
2586 2587 if opts is None:
2587 2588 opts = mdiff.defaultopts
2588 2589
2589 2590 def lrugetfilectx():
2590 2591 cache = {}
2591 2592 order = collections.deque()
2592 2593
2593 2594 def getfilectx(f, ctx):
2594 2595 fctx = ctx.filectx(f, filelog=cache.get(f))
2595 2596 if f not in cache:
2596 2597 if len(cache) > 20:
2597 2598 del cache[order.popleft()]
2598 2599 cache[f] = fctx.filelog()
2599 2600 else:
2600 2601 order.remove(f)
2601 2602 order.append(f)
2602 2603 return fctx
2603 2604
2604 2605 return getfilectx
2605 2606
2606 2607 getfilectx = lrugetfilectx()
2607 2608
2608 2609 if not changes:
2609 2610 changes = ctx1.status(ctx2, match=match)
2610 2611 if isinstance(changes, list):
2611 2612 modified, added, removed = changes[:3]
2612 2613 else:
2613 2614 modified, added, removed = (
2614 2615 changes.modified,
2615 2616 changes.added,
2616 2617 changes.removed,
2617 2618 )
2618 2619
2619 2620 if not modified and not added and not removed:
2620 2621 return []
2621 2622
2622 2623 if repo.ui.debugflag:
2623 2624 hexfunc = hex
2624 2625 else:
2625 2626 hexfunc = short
2626 2627 revs = [hexfunc(node) for node in [ctx1.node(), ctx2.node()] if node]
2627 2628
2628 2629 if copy is None:
2629 2630 copy = {}
2630 2631 if opts.git or opts.upgrade:
2631 2632 copy = copies.pathcopies(ctx1, ctx2, match=match)
2632 2633
2633 2634 if copysourcematch:
2634 2635 # filter out copies where source side isn't inside the matcher
2635 2636 # (copies.pathcopies() already filtered out the destination)
2636 2637 copy = {
2637 2638 dst: src
2638 2639 for dst, src in pycompat.iteritems(copy)
2639 2640 if copysourcematch(src)
2640 2641 }
2641 2642
2642 2643 modifiedset = set(modified)
2643 2644 addedset = set(added)
2644 2645 removedset = set(removed)
2645 2646 for f in modified:
2646 2647 if f not in ctx1:
2647 2648 # Fix up added, since merged-in additions appear as
2648 2649 # modifications during merges
2649 2650 modifiedset.remove(f)
2650 2651 addedset.add(f)
2651 2652 for f in removed:
2652 2653 if f not in ctx1:
2653 2654 # Merged-in additions that are then removed are reported as removed.
2654 2655 # They are not in ctx1, so We don't want to show them in the diff.
2655 2656 removedset.remove(f)
2656 2657 modified = sorted(modifiedset)
2657 2658 added = sorted(addedset)
2658 2659 removed = sorted(removedset)
2659 2660 for dst, src in list(copy.items()):
2660 2661 if src not in ctx1:
2661 2662 # Files merged in during a merge and then copied/renamed are
2662 2663 # reported as copies. We want to show them in the diff as additions.
2663 2664 del copy[dst]
2664 2665
2665 2666 prefetchmatch = scmutil.matchfiles(
2666 2667 repo, list(modifiedset | addedset | removedset)
2667 2668 )
2668 2669 scmutil.prefetchfiles(repo, [ctx1.rev(), ctx2.rev()], prefetchmatch)
2669 2670
2670 2671 def difffn(opts, losedata):
2671 2672 return trydiff(
2672 2673 repo,
2673 2674 revs,
2674 2675 ctx1,
2675 2676 ctx2,
2676 2677 modified,
2677 2678 added,
2678 2679 removed,
2679 2680 copy,
2680 2681 getfilectx,
2681 2682 opts,
2682 2683 losedata,
2683 2684 pathfn,
2684 2685 )
2685 2686
2686 2687 if opts.upgrade and not opts.git:
2687 2688 try:
2688 2689
2689 2690 def losedata(fn):
2690 2691 if not losedatafn or not losedatafn(fn=fn):
2691 2692 raise GitDiffRequired
2692 2693
2693 2694 # Buffer the whole output until we are sure it can be generated
2694 2695 return list(difffn(opts.copy(git=False), losedata))
2695 2696 except GitDiffRequired:
2696 2697 return difffn(opts.copy(git=True), None)
2697 2698 else:
2698 2699 return difffn(opts, None)
2699 2700
2700 2701
2701 2702 def diffsinglehunk(hunklines):
2702 2703 """yield tokens for a list of lines in a single hunk"""
2703 2704 for line in hunklines:
2704 2705 # chomp
2705 2706 chompline = line.rstrip(b'\r\n')
2706 2707 # highlight tabs and trailing whitespace
2707 2708 stripline = chompline.rstrip()
2708 2709 if line.startswith(b'-'):
2709 2710 label = b'diff.deleted'
2710 2711 elif line.startswith(b'+'):
2711 2712 label = b'diff.inserted'
2712 2713 else:
2713 2714 raise error.ProgrammingError(b'unexpected hunk line: %s' % line)
2714 2715 for token in tabsplitter.findall(stripline):
2715 2716 if token.startswith(b'\t'):
2716 2717 yield (token, b'diff.tab')
2717 2718 else:
2718 2719 yield (token, label)
2719 2720
2720 2721 if chompline != stripline:
2721 2722 yield (chompline[len(stripline) :], b'diff.trailingwhitespace')
2722 2723 if chompline != line:
2723 2724 yield (line[len(chompline) :], b'')
2724 2725
2725 2726
2726 2727 def diffsinglehunkinline(hunklines):
2727 2728 """yield tokens for a list of lines in a single hunk, with inline colors"""
2728 2729 # prepare deleted, and inserted content
2729 2730 a = b''
2730 2731 b = b''
2731 2732 for line in hunklines:
2732 2733 if line[0:1] == b'-':
2733 2734 a += line[1:]
2734 2735 elif line[0:1] == b'+':
2735 2736 b += line[1:]
2736 2737 else:
2737 2738 raise error.ProgrammingError(b'unexpected hunk line: %s' % line)
2738 2739 # fast path: if either side is empty, use diffsinglehunk
2739 2740 if not a or not b:
2740 2741 for t in diffsinglehunk(hunklines):
2741 2742 yield t
2742 2743 return
2743 2744 # re-split the content into words
2744 2745 al = wordsplitter.findall(a)
2745 2746 bl = wordsplitter.findall(b)
2746 2747 # re-arrange the words to lines since the diff algorithm is line-based
2747 2748 aln = [s if s == b'\n' else s + b'\n' for s in al]
2748 2749 bln = [s if s == b'\n' else s + b'\n' for s in bl]
2749 2750 an = b''.join(aln)
2750 2751 bn = b''.join(bln)
2751 2752 # run the diff algorithm, prepare atokens and btokens
2752 2753 atokens = []
2753 2754 btokens = []
2754 2755 blocks = mdiff.allblocks(an, bn, lines1=aln, lines2=bln)
2755 2756 for (a1, a2, b1, b2), btype in blocks:
2756 2757 changed = btype == b'!'
2757 2758 for token in mdiff.splitnewlines(b''.join(al[a1:a2])):
2758 2759 atokens.append((changed, token))
2759 2760 for token in mdiff.splitnewlines(b''.join(bl[b1:b2])):
2760 2761 btokens.append((changed, token))
2761 2762
2762 2763 # yield deleted tokens, then inserted ones
2763 2764 for prefix, label, tokens in [
2764 2765 (b'-', b'diff.deleted', atokens),
2765 2766 (b'+', b'diff.inserted', btokens),
2766 2767 ]:
2767 2768 nextisnewline = True
2768 2769 for changed, token in tokens:
2769 2770 if nextisnewline:
2770 2771 yield (prefix, label)
2771 2772 nextisnewline = False
2772 2773 # special handling line end
2773 2774 isendofline = token.endswith(b'\n')
2774 2775 if isendofline:
2775 2776 chomp = token[:-1] # chomp
2776 2777 if chomp.endswith(b'\r'):
2777 2778 chomp = chomp[:-1]
2778 2779 endofline = token[len(chomp) :]
2779 2780 token = chomp.rstrip() # detect spaces at the end
2780 2781 endspaces = chomp[len(token) :]
2781 2782 # scan tabs
2782 2783 for maybetab in tabsplitter.findall(token):
2783 2784 if b'\t' == maybetab[0:1]:
2784 2785 currentlabel = b'diff.tab'
2785 2786 else:
2786 2787 if changed:
2787 2788 currentlabel = label + b'.changed'
2788 2789 else:
2789 2790 currentlabel = label + b'.unchanged'
2790 2791 yield (maybetab, currentlabel)
2791 2792 if isendofline:
2792 2793 if endspaces:
2793 2794 yield (endspaces, b'diff.trailingwhitespace')
2794 2795 yield (endofline, b'')
2795 2796 nextisnewline = True
2796 2797
2797 2798
2798 2799 def difflabel(func, *args, **kw):
2799 2800 '''yields 2-tuples of (output, label) based on the output of func()'''
2800 2801 if kw.get('opts') and kw['opts'].worddiff:
2801 2802 dodiffhunk = diffsinglehunkinline
2802 2803 else:
2803 2804 dodiffhunk = diffsinglehunk
2804 2805 headprefixes = [
2805 2806 (b'diff', b'diff.diffline'),
2806 2807 (b'copy', b'diff.extended'),
2807 2808 (b'rename', b'diff.extended'),
2808 2809 (b'old', b'diff.extended'),
2809 2810 (b'new', b'diff.extended'),
2810 2811 (b'deleted', b'diff.extended'),
2811 2812 (b'index', b'diff.extended'),
2812 2813 (b'similarity', b'diff.extended'),
2813 2814 (b'---', b'diff.file_a'),
2814 2815 (b'+++', b'diff.file_b'),
2815 2816 ]
2816 2817 textprefixes = [
2817 2818 (b'@', b'diff.hunk'),
2818 2819 # - and + are handled by diffsinglehunk
2819 2820 ]
2820 2821 head = False
2821 2822
2822 2823 # buffers a hunk, i.e. adjacent "-", "+" lines without other changes.
2823 2824 hunkbuffer = []
2824 2825
2825 2826 def consumehunkbuffer():
2826 2827 if hunkbuffer:
2827 2828 for token in dodiffhunk(hunkbuffer):
2828 2829 yield token
2829 2830 hunkbuffer[:] = []
2830 2831
2831 2832 for chunk in func(*args, **kw):
2832 2833 lines = chunk.split(b'\n')
2833 2834 linecount = len(lines)
2834 2835 for i, line in enumerate(lines):
2835 2836 if head:
2836 2837 if line.startswith(b'@'):
2837 2838 head = False
2838 2839 else:
2839 2840 if line and not line.startswith(
2840 2841 (b' ', b'+', b'-', b'@', b'\\')
2841 2842 ):
2842 2843 head = True
2843 2844 diffline = False
2844 2845 if not head and line and line.startswith((b'+', b'-')):
2845 2846 diffline = True
2846 2847
2847 2848 prefixes = textprefixes
2848 2849 if head:
2849 2850 prefixes = headprefixes
2850 2851 if diffline:
2851 2852 # buffered
2852 2853 bufferedline = line
2853 2854 if i + 1 < linecount:
2854 2855 bufferedline += b"\n"
2855 2856 hunkbuffer.append(bufferedline)
2856 2857 else:
2857 2858 # unbuffered
2858 2859 for token in consumehunkbuffer():
2859 2860 yield token
2860 2861 stripline = line.rstrip()
2861 2862 for prefix, label in prefixes:
2862 2863 if stripline.startswith(prefix):
2863 2864 yield (stripline, label)
2864 2865 if line != stripline:
2865 2866 yield (
2866 2867 line[len(stripline) :],
2867 2868 b'diff.trailingwhitespace',
2868 2869 )
2869 2870 break
2870 2871 else:
2871 2872 yield (line, b'')
2872 2873 if i + 1 < linecount:
2873 2874 yield (b'\n', b'')
2874 2875 for token in consumehunkbuffer():
2875 2876 yield token
2876 2877
2877 2878
2878 2879 def diffui(*args, **kw):
2879 2880 '''like diff(), but yields 2-tuples of (output, label) for ui.write()'''
2880 2881 return difflabel(diff, *args, **kw)
2881 2882
2882 2883
2883 2884 def _filepairs(modified, added, removed, copy, opts):
2884 2885 '''generates tuples (f1, f2, copyop), where f1 is the name of the file
2885 2886 before and f2 is the the name after. For added files, f1 will be None,
2886 2887 and for removed files, f2 will be None. copyop may be set to None, 'copy'
2887 2888 or 'rename' (the latter two only if opts.git is set).'''
2888 2889 gone = set()
2889 2890
2890 2891 copyto = dict([(v, k) for k, v in copy.items()])
2891 2892
2892 2893 addedset, removedset = set(added), set(removed)
2893 2894
2894 2895 for f in sorted(modified + added + removed):
2895 2896 copyop = None
2896 2897 f1, f2 = f, f
2897 2898 if f in addedset:
2898 2899 f1 = None
2899 2900 if f in copy:
2900 2901 if opts.git:
2901 2902 f1 = copy[f]
2902 2903 if f1 in removedset and f1 not in gone:
2903 2904 copyop = b'rename'
2904 2905 gone.add(f1)
2905 2906 else:
2906 2907 copyop = b'copy'
2907 2908 elif f in removedset:
2908 2909 f2 = None
2909 2910 if opts.git:
2910 2911 # have we already reported a copy above?
2911 2912 if (
2912 2913 f in copyto
2913 2914 and copyto[f] in addedset
2914 2915 and copy[copyto[f]] == f
2915 2916 ):
2916 2917 continue
2917 2918 yield f1, f2, copyop
2918 2919
2919 2920
2920 2921 def trydiff(
2921 2922 repo,
2922 2923 revs,
2923 2924 ctx1,
2924 2925 ctx2,
2925 2926 modified,
2926 2927 added,
2927 2928 removed,
2928 2929 copy,
2929 2930 getfilectx,
2930 2931 opts,
2931 2932 losedatafn,
2932 2933 pathfn,
2933 2934 ):
2934 2935 '''given input data, generate a diff and yield it in blocks
2935 2936
2936 2937 If generating a diff would lose data like flags or binary data and
2937 2938 losedatafn is not None, it will be called.
2938 2939
2939 2940 pathfn is applied to every path in the diff output.
2940 2941 '''
2941 2942
2942 2943 def gitindex(text):
2943 2944 if not text:
2944 2945 text = b""
2945 2946 l = len(text)
2946 2947 s = hashutil.sha1(b'blob %d\0' % l)
2947 2948 s.update(text)
2948 2949 return hex(s.digest())
2949 2950
2950 2951 if opts.noprefix:
2951 2952 aprefix = bprefix = b''
2952 2953 else:
2953 2954 aprefix = b'a/'
2954 2955 bprefix = b'b/'
2955 2956
2956 2957 def diffline(f, revs):
2957 2958 revinfo = b' '.join([b"-r %s" % rev for rev in revs])
2958 2959 return b'diff %s %s' % (revinfo, f)
2959 2960
2960 2961 def isempty(fctx):
2961 2962 return fctx is None or fctx.size() == 0
2962 2963
2963 2964 date1 = dateutil.datestr(ctx1.date())
2964 2965 date2 = dateutil.datestr(ctx2.date())
2965 2966
2966 2967 gitmode = {b'l': b'120000', b'x': b'100755', b'': b'100644'}
2967 2968
2968 2969 if not pathfn:
2969 2970 pathfn = lambda f: f
2970 2971
2971 2972 for f1, f2, copyop in _filepairs(modified, added, removed, copy, opts):
2972 2973 content1 = None
2973 2974 content2 = None
2974 2975 fctx1 = None
2975 2976 fctx2 = None
2976 2977 flag1 = None
2977 2978 flag2 = None
2978 2979 if f1:
2979 2980 fctx1 = getfilectx(f1, ctx1)
2980 2981 if opts.git or losedatafn:
2981 2982 flag1 = ctx1.flags(f1)
2982 2983 if f2:
2983 2984 fctx2 = getfilectx(f2, ctx2)
2984 2985 if opts.git or losedatafn:
2985 2986 flag2 = ctx2.flags(f2)
2986 2987 # if binary is True, output "summary" or "base85", but not "text diff"
2987 2988 if opts.text:
2988 2989 binary = False
2989 2990 else:
2990 2991 binary = any(f.isbinary() for f in [fctx1, fctx2] if f is not None)
2991 2992
2992 2993 if losedatafn and not opts.git:
2993 2994 if (
2994 2995 binary
2995 2996 or
2996 2997 # copy/rename
2997 2998 f2 in copy
2998 2999 or
2999 3000 # empty file creation
3000 3001 (not f1 and isempty(fctx2))
3001 3002 or
3002 3003 # empty file deletion
3003 3004 (isempty(fctx1) and not f2)
3004 3005 or
3005 3006 # create with flags
3006 3007 (not f1 and flag2)
3007 3008 or
3008 3009 # change flags
3009 3010 (f1 and f2 and flag1 != flag2)
3010 3011 ):
3011 3012 losedatafn(f2 or f1)
3012 3013
3013 3014 path1 = pathfn(f1 or f2)
3014 3015 path2 = pathfn(f2 or f1)
3015 3016 header = []
3016 3017 if opts.git:
3017 3018 header.append(
3018 3019 b'diff --git %s%s %s%s' % (aprefix, path1, bprefix, path2)
3019 3020 )
3020 3021 if not f1: # added
3021 3022 header.append(b'new file mode %s' % gitmode[flag2])
3022 3023 elif not f2: # removed
3023 3024 header.append(b'deleted file mode %s' % gitmode[flag1])
3024 3025 else: # modified/copied/renamed
3025 3026 mode1, mode2 = gitmode[flag1], gitmode[flag2]
3026 3027 if mode1 != mode2:
3027 3028 header.append(b'old mode %s' % mode1)
3028 3029 header.append(b'new mode %s' % mode2)
3029 3030 if copyop is not None:
3030 3031 if opts.showsimilarity:
3031 3032 sim = similar.score(ctx1[path1], ctx2[path2]) * 100
3032 3033 header.append(b'similarity index %d%%' % sim)
3033 3034 header.append(b'%s from %s' % (copyop, path1))
3034 3035 header.append(b'%s to %s' % (copyop, path2))
3035 3036 elif revs:
3036 3037 header.append(diffline(path1, revs))
3037 3038
3038 3039 # fctx.is | diffopts | what to | is fctx.data()
3039 3040 # binary() | text nobinary git index | output? | outputted?
3040 3041 # ------------------------------------|----------------------------
3041 3042 # yes | no no no * | summary | no
3042 3043 # yes | no no yes * | base85 | yes
3043 3044 # yes | no yes no * | summary | no
3044 3045 # yes | no yes yes 0 | summary | no
3045 3046 # yes | no yes yes >0 | summary | semi [1]
3046 3047 # yes | yes * * * | text diff | yes
3047 3048 # no | * * * * | text diff | yes
3048 3049 # [1]: hash(fctx.data()) is outputted. so fctx.data() cannot be faked
3049 3050 if binary and (
3050 3051 not opts.git or (opts.git and opts.nobinary and not opts.index)
3051 3052 ):
3052 3053 # fast path: no binary content will be displayed, content1 and
3053 3054 # content2 are only used for equivalent test. cmp() could have a
3054 3055 # fast path.
3055 3056 if fctx1 is not None:
3056 3057 content1 = b'\0'
3057 3058 if fctx2 is not None:
3058 3059 if fctx1 is not None and not fctx1.cmp(fctx2):
3059 3060 content2 = b'\0' # not different
3060 3061 else:
3061 3062 content2 = b'\0\0'
3062 3063 else:
3063 3064 # normal path: load contents
3064 3065 if fctx1 is not None:
3065 3066 content1 = fctx1.data()
3066 3067 if fctx2 is not None:
3067 3068 content2 = fctx2.data()
3068 3069
3069 3070 if binary and opts.git and not opts.nobinary:
3070 3071 text = mdiff.b85diff(content1, content2)
3071 3072 if text:
3072 3073 header.append(
3073 3074 b'index %s..%s' % (gitindex(content1), gitindex(content2))
3074 3075 )
3075 3076 hunks = ((None, [text]),)
3076 3077 else:
3077 3078 if opts.git and opts.index > 0:
3078 3079 flag = flag1
3079 3080 if flag is None:
3080 3081 flag = flag2
3081 3082 header.append(
3082 3083 b'index %s..%s %s'
3083 3084 % (
3084 3085 gitindex(content1)[0 : opts.index],
3085 3086 gitindex(content2)[0 : opts.index],
3086 3087 gitmode[flag],
3087 3088 )
3088 3089 )
3089 3090
3090 3091 uheaders, hunks = mdiff.unidiff(
3091 3092 content1,
3092 3093 date1,
3093 3094 content2,
3094 3095 date2,
3095 3096 path1,
3096 3097 path2,
3097 3098 binary=binary,
3098 3099 opts=opts,
3099 3100 )
3100 3101 header.extend(uheaders)
3101 3102 yield fctx1, fctx2, header, hunks
3102 3103
3103 3104
3104 3105 def diffstatsum(stats):
3105 3106 maxfile, maxtotal, addtotal, removetotal, binary = 0, 0, 0, 0, False
3106 3107 for f, a, r, b in stats:
3107 3108 maxfile = max(maxfile, encoding.colwidth(f))
3108 3109 maxtotal = max(maxtotal, a + r)
3109 3110 addtotal += a
3110 3111 removetotal += r
3111 3112 binary = binary or b
3112 3113
3113 3114 return maxfile, maxtotal, addtotal, removetotal, binary
3114 3115
3115 3116
3116 3117 def diffstatdata(lines):
3117 3118 diffre = re.compile(br'^diff .*-r [a-z0-9]+\s(.*)$')
3118 3119
3119 3120 results = []
3120 3121 filename, adds, removes, isbinary = None, 0, 0, False
3121 3122
3122 3123 def addresult():
3123 3124 if filename:
3124 3125 results.append((filename, adds, removes, isbinary))
3125 3126
3126 3127 # inheader is used to track if a line is in the
3127 3128 # header portion of the diff. This helps properly account
3128 3129 # for lines that start with '--' or '++'
3129 3130 inheader = False
3130 3131
3131 3132 for line in lines:
3132 3133 if line.startswith(b'diff'):
3133 3134 addresult()
3134 3135 # starting a new file diff
3135 3136 # set numbers to 0 and reset inheader
3136 3137 inheader = True
3137 3138 adds, removes, isbinary = 0, 0, False
3138 3139 if line.startswith(b'diff --git a/'):
3139 3140 filename = gitre.search(line).group(2)
3140 3141 elif line.startswith(b'diff -r'):
3141 3142 # format: "diff -r ... -r ... filename"
3142 3143 filename = diffre.search(line).group(1)
3143 3144 elif line.startswith(b'@@'):
3144 3145 inheader = False
3145 3146 elif line.startswith(b'+') and not inheader:
3146 3147 adds += 1
3147 3148 elif line.startswith(b'-') and not inheader:
3148 3149 removes += 1
3149 3150 elif line.startswith(b'GIT binary patch') or line.startswith(
3150 3151 b'Binary file'
3151 3152 ):
3152 3153 isbinary = True
3153 3154 elif line.startswith(b'rename from'):
3154 3155 filename = line[12:]
3155 3156 elif line.startswith(b'rename to'):
3156 3157 filename += b' => %s' % line[10:]
3157 3158 addresult()
3158 3159 return results
3159 3160
3160 3161
3161 3162 def diffstat(lines, width=80):
3162 3163 output = []
3163 3164 stats = diffstatdata(lines)
3164 3165 maxname, maxtotal, totaladds, totalremoves, hasbinary = diffstatsum(stats)
3165 3166
3166 3167 countwidth = len(str(maxtotal))
3167 3168 if hasbinary and countwidth < 3:
3168 3169 countwidth = 3
3169 3170 graphwidth = width - countwidth - maxname - 6
3170 3171 if graphwidth < 10:
3171 3172 graphwidth = 10
3172 3173
3173 3174 def scale(i):
3174 3175 if maxtotal <= graphwidth:
3175 3176 return i
3176 3177 # If diffstat runs out of room it doesn't print anything,
3177 3178 # which isn't very useful, so always print at least one + or -
3178 3179 # if there were at least some changes.
3179 3180 return max(i * graphwidth // maxtotal, int(bool(i)))
3180 3181
3181 3182 for filename, adds, removes, isbinary in stats:
3182 3183 if isbinary:
3183 3184 count = b'Bin'
3184 3185 else:
3185 3186 count = b'%d' % (adds + removes)
3186 3187 pluses = b'+' * scale(adds)
3187 3188 minuses = b'-' * scale(removes)
3188 3189 output.append(
3189 3190 b' %s%s | %*s %s%s\n'
3190 3191 % (
3191 3192 filename,
3192 3193 b' ' * (maxname - encoding.colwidth(filename)),
3193 3194 countwidth,
3194 3195 count,
3195 3196 pluses,
3196 3197 minuses,
3197 3198 )
3198 3199 )
3199 3200
3200 3201 if stats:
3201 3202 output.append(
3202 3203 _(b' %d files changed, %d insertions(+), %d deletions(-)\n')
3203 3204 % (len(stats), totaladds, totalremoves)
3204 3205 )
3205 3206
3206 3207 return b''.join(output)
3207 3208
3208 3209
3209 3210 def diffstatui(*args, **kw):
3210 3211 '''like diffstat(), but yields 2-tuples of (output, label) for
3211 3212 ui.write()
3212 3213 '''
3213 3214
3214 3215 for line in diffstat(*args, **kw).splitlines():
3215 3216 if line and line[-1] in b'+-':
3216 3217 name, graph = line.rsplit(b' ', 1)
3217 3218 yield (name + b' ', b'')
3218 3219 m = re.search(br'\++', graph)
3219 3220 if m:
3220 3221 yield (m.group(0), b'diffstat.inserted')
3221 3222 m = re.search(br'-+', graph)
3222 3223 if m:
3223 3224 yield (m.group(0), b'diffstat.deleted')
3224 3225 else:
3225 3226 yield (line, b'')
3226 3227 yield (b'\n', b'')
General Comments 0
You need to be logged in to leave comments. Login now