##// END OF EJS Templates
pycompat: provide 'ispy3' constant...
Yuya Nishihara -
r30030:0f6d6fdd default
parent child Browse files
Show More
@@ -1,1621 +1,1622 b''
1 1 # bundle2.py - generic container format to transmit arbitrary data.
2 2 #
3 3 # Copyright 2013 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 """Handling of the new bundle2 format
8 8
9 9 The goal of bundle2 is to act as an atomically packet to transmit a set of
10 10 payloads in an application agnostic way. It consist in a sequence of "parts"
11 11 that will be handed to and processed by the application layer.
12 12
13 13
14 14 General format architecture
15 15 ===========================
16 16
17 17 The format is architectured as follow
18 18
19 19 - magic string
20 20 - stream level parameters
21 21 - payload parts (any number)
22 22 - end of stream marker.
23 23
24 24 the Binary format
25 25 ============================
26 26
27 27 All numbers are unsigned and big-endian.
28 28
29 29 stream level parameters
30 30 ------------------------
31 31
32 32 Binary format is as follow
33 33
34 34 :params size: int32
35 35
36 36 The total number of Bytes used by the parameters
37 37
38 38 :params value: arbitrary number of Bytes
39 39
40 40 A blob of `params size` containing the serialized version of all stream level
41 41 parameters.
42 42
43 43 The blob contains a space separated list of parameters. Parameters with value
44 44 are stored in the form `<name>=<value>`. Both name and value are urlquoted.
45 45
46 46 Empty name are obviously forbidden.
47 47
48 48 Name MUST start with a letter. If this first letter is lower case, the
49 49 parameter is advisory and can be safely ignored. However when the first
50 50 letter is capital, the parameter is mandatory and the bundling process MUST
51 51 stop if he is not able to proceed it.
52 52
53 53 Stream parameters use a simple textual format for two main reasons:
54 54
55 55 - Stream level parameters should remain simple and we want to discourage any
56 56 crazy usage.
57 57 - Textual data allow easy human inspection of a bundle2 header in case of
58 58 troubles.
59 59
60 60 Any Applicative level options MUST go into a bundle2 part instead.
61 61
62 62 Payload part
63 63 ------------------------
64 64
65 65 Binary format is as follow
66 66
67 67 :header size: int32
68 68
69 69 The total number of Bytes used by the part header. When the header is empty
70 70 (size = 0) this is interpreted as the end of stream marker.
71 71
72 72 :header:
73 73
74 74 The header defines how to interpret the part. It contains two piece of
75 75 data: the part type, and the part parameters.
76 76
77 77 The part type is used to route an application level handler, that can
78 78 interpret payload.
79 79
80 80 Part parameters are passed to the application level handler. They are
81 81 meant to convey information that will help the application level object to
82 82 interpret the part payload.
83 83
84 84 The binary format of the header is has follow
85 85
86 86 :typesize: (one byte)
87 87
88 88 :parttype: alphanumerical part name (restricted to [a-zA-Z0-9_:-]*)
89 89
90 90 :partid: A 32bits integer (unique in the bundle) that can be used to refer
91 91 to this part.
92 92
93 93 :parameters:
94 94
95 95 Part's parameter may have arbitrary content, the binary structure is::
96 96
97 97 <mandatory-count><advisory-count><param-sizes><param-data>
98 98
99 99 :mandatory-count: 1 byte, number of mandatory parameters
100 100
101 101 :advisory-count: 1 byte, number of advisory parameters
102 102
103 103 :param-sizes:
104 104
105 105 N couple of bytes, where N is the total number of parameters. Each
106 106 couple contains (<size-of-key>, <size-of-value) for one parameter.
107 107
108 108 :param-data:
109 109
110 110 A blob of bytes from which each parameter key and value can be
111 111 retrieved using the list of size couples stored in the previous
112 112 field.
113 113
114 114 Mandatory parameters comes first, then the advisory ones.
115 115
116 116 Each parameter's key MUST be unique within the part.
117 117
118 118 :payload:
119 119
120 120 payload is a series of `<chunksize><chunkdata>`.
121 121
122 122 `chunksize` is an int32, `chunkdata` are plain bytes (as much as
123 123 `chunksize` says)` The payload part is concluded by a zero size chunk.
124 124
125 125 The current implementation always produces either zero or one chunk.
126 126 This is an implementation limitation that will ultimately be lifted.
127 127
128 128 `chunksize` can be negative to trigger special case processing. No such
129 129 processing is in place yet.
130 130
131 131 Bundle processing
132 132 ============================
133 133
134 134 Each part is processed in order using a "part handler". Handler are registered
135 135 for a certain part type.
136 136
137 137 The matching of a part to its handler is case insensitive. The case of the
138 138 part type is used to know if a part is mandatory or advisory. If the Part type
139 139 contains any uppercase char it is considered mandatory. When no handler is
140 140 known for a Mandatory part, the process is aborted and an exception is raised.
141 141 If the part is advisory and no handler is known, the part is ignored. When the
142 142 process is aborted, the full bundle is still read from the stream to keep the
143 143 channel usable. But none of the part read from an abort are processed. In the
144 144 future, dropping the stream may become an option for channel we do not care to
145 145 preserve.
146 146 """
147 147
148 148 from __future__ import absolute_import
149 149
150 150 import errno
151 151 import re
152 152 import string
153 153 import struct
154 154 import sys
155 155
156 156 from .i18n import _
157 157 from . import (
158 158 changegroup,
159 159 error,
160 160 obsolete,
161 161 pushkey,
162 pycompat,
162 163 tags,
163 164 url,
164 165 util,
165 166 )
166 167
167 168 urlerr = util.urlerr
168 169 urlreq = util.urlreq
169 170
170 171 _pack = struct.pack
171 172 _unpack = struct.unpack
172 173
173 174 _fstreamparamsize = '>i'
174 175 _fpartheadersize = '>i'
175 176 _fparttypesize = '>B'
176 177 _fpartid = '>I'
177 178 _fpayloadsize = '>i'
178 179 _fpartparamcount = '>BB'
179 180
180 181 preferedchunksize = 4096
181 182
182 183 _parttypeforbidden = re.compile('[^a-zA-Z0-9_:-]')
183 184
184 185 def outdebug(ui, message):
185 186 """debug regarding output stream (bundling)"""
186 187 if ui.configbool('devel', 'bundle2.debug', False):
187 188 ui.debug('bundle2-output: %s\n' % message)
188 189
189 190 def indebug(ui, message):
190 191 """debug on input stream (unbundling)"""
191 192 if ui.configbool('devel', 'bundle2.debug', False):
192 193 ui.debug('bundle2-input: %s\n' % message)
193 194
194 195 def validateparttype(parttype):
195 196 """raise ValueError if a parttype contains invalid character"""
196 197 if _parttypeforbidden.search(parttype):
197 198 raise ValueError(parttype)
198 199
199 200 def _makefpartparamsizes(nbparams):
200 201 """return a struct format to read part parameter sizes
201 202
202 203 The number parameters is variable so we need to build that format
203 204 dynamically.
204 205 """
205 206 return '>'+('BB'*nbparams)
206 207
207 208 parthandlermapping = {}
208 209
209 210 def parthandler(parttype, params=()):
210 211 """decorator that register a function as a bundle2 part handler
211 212
212 213 eg::
213 214
214 215 @parthandler('myparttype', ('mandatory', 'param', 'handled'))
215 216 def myparttypehandler(...):
216 217 '''process a part of type "my part".'''
217 218 ...
218 219 """
219 220 validateparttype(parttype)
220 221 def _decorator(func):
221 222 lparttype = parttype.lower() # enforce lower case matching.
222 223 assert lparttype not in parthandlermapping
223 224 parthandlermapping[lparttype] = func
224 225 func.params = frozenset(params)
225 226 return func
226 227 return _decorator
227 228
228 229 class unbundlerecords(object):
229 230 """keep record of what happens during and unbundle
230 231
231 232 New records are added using `records.add('cat', obj)`. Where 'cat' is a
232 233 category of record and obj is an arbitrary object.
233 234
234 235 `records['cat']` will return all entries of this category 'cat'.
235 236
236 237 Iterating on the object itself will yield `('category', obj)` tuples
237 238 for all entries.
238 239
239 240 All iterations happens in chronological order.
240 241 """
241 242
242 243 def __init__(self):
243 244 self._categories = {}
244 245 self._sequences = []
245 246 self._replies = {}
246 247
247 248 def add(self, category, entry, inreplyto=None):
248 249 """add a new record of a given category.
249 250
250 251 The entry can then be retrieved in the list returned by
251 252 self['category']."""
252 253 self._categories.setdefault(category, []).append(entry)
253 254 self._sequences.append((category, entry))
254 255 if inreplyto is not None:
255 256 self.getreplies(inreplyto).add(category, entry)
256 257
257 258 def getreplies(self, partid):
258 259 """get the records that are replies to a specific part"""
259 260 return self._replies.setdefault(partid, unbundlerecords())
260 261
261 262 def __getitem__(self, cat):
262 263 return tuple(self._categories.get(cat, ()))
263 264
264 265 def __iter__(self):
265 266 return iter(self._sequences)
266 267
267 268 def __len__(self):
268 269 return len(self._sequences)
269 270
270 271 def __nonzero__(self):
271 272 return bool(self._sequences)
272 273
273 274 class bundleoperation(object):
274 275 """an object that represents a single bundling process
275 276
276 277 Its purpose is to carry unbundle-related objects and states.
277 278
278 279 A new object should be created at the beginning of each bundle processing.
279 280 The object is to be returned by the processing function.
280 281
281 282 The object has very little content now it will ultimately contain:
282 283 * an access to the repo the bundle is applied to,
283 284 * a ui object,
284 285 * a way to retrieve a transaction to add changes to the repo,
285 286 * a way to record the result of processing each part,
286 287 * a way to construct a bundle response when applicable.
287 288 """
288 289
289 290 def __init__(self, repo, transactiongetter, captureoutput=True):
290 291 self.repo = repo
291 292 self.ui = repo.ui
292 293 self.records = unbundlerecords()
293 294 self.gettransaction = transactiongetter
294 295 self.reply = None
295 296 self.captureoutput = captureoutput
296 297
297 298 class TransactionUnavailable(RuntimeError):
298 299 pass
299 300
300 301 def _notransaction():
301 302 """default method to get a transaction while processing a bundle
302 303
303 304 Raise an exception to highlight the fact that no transaction was expected
304 305 to be created"""
305 306 raise TransactionUnavailable()
306 307
307 308 def applybundle(repo, unbundler, tr, source=None, url=None, op=None):
308 309 # transform me into unbundler.apply() as soon as the freeze is lifted
309 310 tr.hookargs['bundle2'] = '1'
310 311 if source is not None and 'source' not in tr.hookargs:
311 312 tr.hookargs['source'] = source
312 313 if url is not None and 'url' not in tr.hookargs:
313 314 tr.hookargs['url'] = url
314 315 return processbundle(repo, unbundler, lambda: tr, op=op)
315 316
316 317 def processbundle(repo, unbundler, transactiongetter=None, op=None):
317 318 """This function process a bundle, apply effect to/from a repo
318 319
319 320 It iterates over each part then searches for and uses the proper handling
320 321 code to process the part. Parts are processed in order.
321 322
322 323 This is very early version of this function that will be strongly reworked
323 324 before final usage.
324 325
325 326 Unknown Mandatory part will abort the process.
326 327
327 328 It is temporarily possible to provide a prebuilt bundleoperation to the
328 329 function. This is used to ensure output is properly propagated in case of
329 330 an error during the unbundling. This output capturing part will likely be
330 331 reworked and this ability will probably go away in the process.
331 332 """
332 333 if op is None:
333 334 if transactiongetter is None:
334 335 transactiongetter = _notransaction
335 336 op = bundleoperation(repo, transactiongetter)
336 337 # todo:
337 338 # - replace this is a init function soon.
338 339 # - exception catching
339 340 unbundler.params
340 341 if repo.ui.debugflag:
341 342 msg = ['bundle2-input-bundle:']
342 343 if unbundler.params:
343 344 msg.append(' %i params')
344 345 if op.gettransaction is None:
345 346 msg.append(' no-transaction')
346 347 else:
347 348 msg.append(' with-transaction')
348 349 msg.append('\n')
349 350 repo.ui.debug(''.join(msg))
350 351 iterparts = enumerate(unbundler.iterparts())
351 352 part = None
352 353 nbpart = 0
353 354 try:
354 355 for nbpart, part in iterparts:
355 356 _processpart(op, part)
356 357 except Exception as exc:
357 358 for nbpart, part in iterparts:
358 359 # consume the bundle content
359 360 part.seek(0, 2)
360 361 # Small hack to let caller code distinguish exceptions from bundle2
361 362 # processing from processing the old format. This is mostly
362 363 # needed to handle different return codes to unbundle according to the
363 364 # type of bundle. We should probably clean up or drop this return code
364 365 # craziness in a future version.
365 366 exc.duringunbundle2 = True
366 367 salvaged = []
367 368 replycaps = None
368 369 if op.reply is not None:
369 370 salvaged = op.reply.salvageoutput()
370 371 replycaps = op.reply.capabilities
371 372 exc._replycaps = replycaps
372 373 exc._bundle2salvagedoutput = salvaged
373 374 raise
374 375 finally:
375 376 repo.ui.debug('bundle2-input-bundle: %i parts total\n' % nbpart)
376 377
377 378 return op
378 379
379 380 def _processpart(op, part):
380 381 """process a single part from a bundle
381 382
382 383 The part is guaranteed to have been fully consumed when the function exits
383 384 (even if an exception is raised)."""
384 385 status = 'unknown' # used by debug output
385 386 hardabort = False
386 387 try:
387 388 try:
388 389 handler = parthandlermapping.get(part.type)
389 390 if handler is None:
390 391 status = 'unsupported-type'
391 392 raise error.BundleUnknownFeatureError(parttype=part.type)
392 393 indebug(op.ui, 'found a handler for part %r' % part.type)
393 394 unknownparams = part.mandatorykeys - handler.params
394 395 if unknownparams:
395 396 unknownparams = list(unknownparams)
396 397 unknownparams.sort()
397 398 status = 'unsupported-params (%s)' % unknownparams
398 399 raise error.BundleUnknownFeatureError(parttype=part.type,
399 400 params=unknownparams)
400 401 status = 'supported'
401 402 except error.BundleUnknownFeatureError as exc:
402 403 if part.mandatory: # mandatory parts
403 404 raise
404 405 indebug(op.ui, 'ignoring unsupported advisory part %s' % exc)
405 406 return # skip to part processing
406 407 finally:
407 408 if op.ui.debugflag:
408 409 msg = ['bundle2-input-part: "%s"' % part.type]
409 410 if not part.mandatory:
410 411 msg.append(' (advisory)')
411 412 nbmp = len(part.mandatorykeys)
412 413 nbap = len(part.params) - nbmp
413 414 if nbmp or nbap:
414 415 msg.append(' (params:')
415 416 if nbmp:
416 417 msg.append(' %i mandatory' % nbmp)
417 418 if nbap:
418 419 msg.append(' %i advisory' % nbmp)
419 420 msg.append(')')
420 421 msg.append(' %s\n' % status)
421 422 op.ui.debug(''.join(msg))
422 423
423 424 # handler is called outside the above try block so that we don't
424 425 # risk catching KeyErrors from anything other than the
425 426 # parthandlermapping lookup (any KeyError raised by handler()
426 427 # itself represents a defect of a different variety).
427 428 output = None
428 429 if op.captureoutput and op.reply is not None:
429 430 op.ui.pushbuffer(error=True, subproc=True)
430 431 output = ''
431 432 try:
432 433 handler(op, part)
433 434 finally:
434 435 if output is not None:
435 436 output = op.ui.popbuffer()
436 437 if output:
437 438 outpart = op.reply.newpart('output', data=output,
438 439 mandatory=False)
439 440 outpart.addparam('in-reply-to', str(part.id), mandatory=False)
440 441 # If exiting or interrupted, do not attempt to seek the stream in the
441 442 # finally block below. This makes abort faster.
442 443 except (SystemExit, KeyboardInterrupt):
443 444 hardabort = True
444 445 raise
445 446 finally:
446 447 # consume the part content to not corrupt the stream.
447 448 if not hardabort:
448 449 part.seek(0, 2)
449 450
450 451
451 452 def decodecaps(blob):
452 453 """decode a bundle2 caps bytes blob into a dictionary
453 454
454 455 The blob is a list of capabilities (one per line)
455 456 Capabilities may have values using a line of the form::
456 457
457 458 capability=value1,value2,value3
458 459
459 460 The values are always a list."""
460 461 caps = {}
461 462 for line in blob.splitlines():
462 463 if not line:
463 464 continue
464 465 if '=' not in line:
465 466 key, vals = line, ()
466 467 else:
467 468 key, vals = line.split('=', 1)
468 469 vals = vals.split(',')
469 470 key = urlreq.unquote(key)
470 471 vals = [urlreq.unquote(v) for v in vals]
471 472 caps[key] = vals
472 473 return caps
473 474
474 475 def encodecaps(caps):
475 476 """encode a bundle2 caps dictionary into a bytes blob"""
476 477 chunks = []
477 478 for ca in sorted(caps):
478 479 vals = caps[ca]
479 480 ca = urlreq.quote(ca)
480 481 vals = [urlreq.quote(v) for v in vals]
481 482 if vals:
482 483 ca = "%s=%s" % (ca, ','.join(vals))
483 484 chunks.append(ca)
484 485 return '\n'.join(chunks)
485 486
486 487 bundletypes = {
487 488 "": ("", None), # only when using unbundle on ssh and old http servers
488 489 # since the unification ssh accepts a header but there
489 490 # is no capability signaling it.
490 491 "HG20": (), # special-cased below
491 492 "HG10UN": ("HG10UN", None),
492 493 "HG10BZ": ("HG10", 'BZ'),
493 494 "HG10GZ": ("HG10GZ", 'GZ'),
494 495 }
495 496
496 497 # hgweb uses this list to communicate its preferred type
497 498 bundlepriority = ['HG10GZ', 'HG10BZ', 'HG10UN']
498 499
499 500 class bundle20(object):
500 501 """represent an outgoing bundle2 container
501 502
502 503 Use the `addparam` method to add stream level parameter. and `newpart` to
503 504 populate it. Then call `getchunks` to retrieve all the binary chunks of
504 505 data that compose the bundle2 container."""
505 506
506 507 _magicstring = 'HG20'
507 508
508 509 def __init__(self, ui, capabilities=()):
509 510 self.ui = ui
510 511 self._params = []
511 512 self._parts = []
512 513 self.capabilities = dict(capabilities)
513 514 self._compressor = util.compressors[None]()
514 515
515 516 def setcompression(self, alg):
516 517 """setup core part compression to <alg>"""
517 518 if alg is None:
518 519 return
519 520 assert not any(n.lower() == 'Compression' for n, v in self._params)
520 521 self.addparam('Compression', alg)
521 522 self._compressor = util.compressors[alg]()
522 523
523 524 @property
524 525 def nbparts(self):
525 526 """total number of parts added to the bundler"""
526 527 return len(self._parts)
527 528
528 529 # methods used to defines the bundle2 content
529 530 def addparam(self, name, value=None):
530 531 """add a stream level parameter"""
531 532 if not name:
532 533 raise ValueError('empty parameter name')
533 534 if name[0] not in string.letters:
534 535 raise ValueError('non letter first character: %r' % name)
535 536 self._params.append((name, value))
536 537
537 538 def addpart(self, part):
538 539 """add a new part to the bundle2 container
539 540
540 541 Parts contains the actual applicative payload."""
541 542 assert part.id is None
542 543 part.id = len(self._parts) # very cheap counter
543 544 self._parts.append(part)
544 545
545 546 def newpart(self, typeid, *args, **kwargs):
546 547 """create a new part and add it to the containers
547 548
548 549 As the part is directly added to the containers. For now, this means
549 550 that any failure to properly initialize the part after calling
550 551 ``newpart`` should result in a failure of the whole bundling process.
551 552
552 553 You can still fall back to manually create and add if you need better
553 554 control."""
554 555 part = bundlepart(typeid, *args, **kwargs)
555 556 self.addpart(part)
556 557 return part
557 558
558 559 # methods used to generate the bundle2 stream
559 560 def getchunks(self):
560 561 if self.ui.debugflag:
561 562 msg = ['bundle2-output-bundle: "%s",' % self._magicstring]
562 563 if self._params:
563 564 msg.append(' (%i params)' % len(self._params))
564 565 msg.append(' %i parts total\n' % len(self._parts))
565 566 self.ui.debug(''.join(msg))
566 567 outdebug(self.ui, 'start emission of %s stream' % self._magicstring)
567 568 yield self._magicstring
568 569 param = self._paramchunk()
569 570 outdebug(self.ui, 'bundle parameter: %s' % param)
570 571 yield _pack(_fstreamparamsize, len(param))
571 572 if param:
572 573 yield param
573 574 # starting compression
574 575 for chunk in self._getcorechunk():
575 576 yield self._compressor.compress(chunk)
576 577 yield self._compressor.flush()
577 578
578 579 def _paramchunk(self):
579 580 """return a encoded version of all stream parameters"""
580 581 blocks = []
581 582 for par, value in self._params:
582 583 par = urlreq.quote(par)
583 584 if value is not None:
584 585 value = urlreq.quote(value)
585 586 par = '%s=%s' % (par, value)
586 587 blocks.append(par)
587 588 return ' '.join(blocks)
588 589
589 590 def _getcorechunk(self):
590 591 """yield chunk for the core part of the bundle
591 592
592 593 (all but headers and parameters)"""
593 594 outdebug(self.ui, 'start of parts')
594 595 for part in self._parts:
595 596 outdebug(self.ui, 'bundle part: "%s"' % part.type)
596 597 for chunk in part.getchunks(ui=self.ui):
597 598 yield chunk
598 599 outdebug(self.ui, 'end of bundle')
599 600 yield _pack(_fpartheadersize, 0)
600 601
601 602
602 603 def salvageoutput(self):
603 604 """return a list with a copy of all output parts in the bundle
604 605
605 606 This is meant to be used during error handling to make sure we preserve
606 607 server output"""
607 608 salvaged = []
608 609 for part in self._parts:
609 610 if part.type.startswith('output'):
610 611 salvaged.append(part.copy())
611 612 return salvaged
612 613
613 614
614 615 class unpackermixin(object):
615 616 """A mixin to extract bytes and struct data from a stream"""
616 617
617 618 def __init__(self, fp):
618 619 self._fp = fp
619 620 self._seekable = (util.safehasattr(fp, 'seek') and
620 621 util.safehasattr(fp, 'tell'))
621 622
622 623 def _unpack(self, format):
623 624 """unpack this struct format from the stream"""
624 625 data = self._readexact(struct.calcsize(format))
625 626 return _unpack(format, data)
626 627
627 628 def _readexact(self, size):
628 629 """read exactly <size> bytes from the stream"""
629 630 return changegroup.readexactly(self._fp, size)
630 631
631 632 def seek(self, offset, whence=0):
632 633 """move the underlying file pointer"""
633 634 if self._seekable:
634 635 return self._fp.seek(offset, whence)
635 636 else:
636 637 raise NotImplementedError(_('File pointer is not seekable'))
637 638
638 639 def tell(self):
639 640 """return the file offset, or None if file is not seekable"""
640 641 if self._seekable:
641 642 try:
642 643 return self._fp.tell()
643 644 except IOError as e:
644 645 if e.errno == errno.ESPIPE:
645 646 self._seekable = False
646 647 else:
647 648 raise
648 649 return None
649 650
650 651 def close(self):
651 652 """close underlying file"""
652 653 if util.safehasattr(self._fp, 'close'):
653 654 return self._fp.close()
654 655
655 656 def getunbundler(ui, fp, magicstring=None):
656 657 """return a valid unbundler object for a given magicstring"""
657 658 if magicstring is None:
658 659 magicstring = changegroup.readexactly(fp, 4)
659 660 magic, version = magicstring[0:2], magicstring[2:4]
660 661 if magic != 'HG':
661 662 raise error.Abort(_('not a Mercurial bundle'))
662 663 unbundlerclass = formatmap.get(version)
663 664 if unbundlerclass is None:
664 665 raise error.Abort(_('unknown bundle version %s') % version)
665 666 unbundler = unbundlerclass(ui, fp)
666 667 indebug(ui, 'start processing of %s stream' % magicstring)
667 668 return unbundler
668 669
669 670 class unbundle20(unpackermixin):
670 671 """interpret a bundle2 stream
671 672
672 673 This class is fed with a binary stream and yields parts through its
673 674 `iterparts` methods."""
674 675
675 676 _magicstring = 'HG20'
676 677
677 678 def __init__(self, ui, fp):
678 679 """If header is specified, we do not read it out of the stream."""
679 680 self.ui = ui
680 681 self._decompressor = util.decompressors[None]
681 682 self._compressed = None
682 683 super(unbundle20, self).__init__(fp)
683 684
684 685 @util.propertycache
685 686 def params(self):
686 687 """dictionary of stream level parameters"""
687 688 indebug(self.ui, 'reading bundle2 stream parameters')
688 689 params = {}
689 690 paramssize = self._unpack(_fstreamparamsize)[0]
690 691 if paramssize < 0:
691 692 raise error.BundleValueError('negative bundle param size: %i'
692 693 % paramssize)
693 694 if paramssize:
694 695 params = self._readexact(paramssize)
695 696 params = self._processallparams(params)
696 697 return params
697 698
698 699 def _processallparams(self, paramsblock):
699 700 """"""
700 701 params = util.sortdict()
701 702 for p in paramsblock.split(' '):
702 703 p = p.split('=', 1)
703 704 p = [urlreq.unquote(i) for i in p]
704 705 if len(p) < 2:
705 706 p.append(None)
706 707 self._processparam(*p)
707 708 params[p[0]] = p[1]
708 709 return params
709 710
710 711
711 712 def _processparam(self, name, value):
712 713 """process a parameter, applying its effect if needed
713 714
714 715 Parameter starting with a lower case letter are advisory and will be
715 716 ignored when unknown. Those starting with an upper case letter are
716 717 mandatory and will this function will raise a KeyError when unknown.
717 718
718 719 Note: no option are currently supported. Any input will be either
719 720 ignored or failing.
720 721 """
721 722 if not name:
722 723 raise ValueError('empty parameter name')
723 724 if name[0] not in string.letters:
724 725 raise ValueError('non letter first character: %r' % name)
725 726 try:
726 727 handler = b2streamparamsmap[name.lower()]
727 728 except KeyError:
728 729 if name[0].islower():
729 730 indebug(self.ui, "ignoring unknown parameter %r" % name)
730 731 else:
731 732 raise error.BundleUnknownFeatureError(params=(name,))
732 733 else:
733 734 handler(self, name, value)
734 735
735 736 def _forwardchunks(self):
736 737 """utility to transfer a bundle2 as binary
737 738
738 739 This is made necessary by the fact the 'getbundle' command over 'ssh'
739 740 have no way to know then the reply end, relying on the bundle to be
740 741 interpreted to know its end. This is terrible and we are sorry, but we
741 742 needed to move forward to get general delta enabled.
742 743 """
743 744 yield self._magicstring
744 745 assert 'params' not in vars(self)
745 746 paramssize = self._unpack(_fstreamparamsize)[0]
746 747 if paramssize < 0:
747 748 raise error.BundleValueError('negative bundle param size: %i'
748 749 % paramssize)
749 750 yield _pack(_fstreamparamsize, paramssize)
750 751 if paramssize:
751 752 params = self._readexact(paramssize)
752 753 self._processallparams(params)
753 754 yield params
754 755 assert self._decompressor is util.decompressors[None]
755 756 # From there, payload might need to be decompressed
756 757 self._fp = self._decompressor(self._fp)
757 758 emptycount = 0
758 759 while emptycount < 2:
759 760 # so we can brainlessly loop
760 761 assert _fpartheadersize == _fpayloadsize
761 762 size = self._unpack(_fpartheadersize)[0]
762 763 yield _pack(_fpartheadersize, size)
763 764 if size:
764 765 emptycount = 0
765 766 else:
766 767 emptycount += 1
767 768 continue
768 769 if size == flaginterrupt:
769 770 continue
770 771 elif size < 0:
771 772 raise error.BundleValueError('negative chunk size: %i')
772 773 yield self._readexact(size)
773 774
774 775
775 776 def iterparts(self):
776 777 """yield all parts contained in the stream"""
777 778 # make sure param have been loaded
778 779 self.params
779 780 # From there, payload need to be decompressed
780 781 self._fp = self._decompressor(self._fp)
781 782 indebug(self.ui, 'start extraction of bundle2 parts')
782 783 headerblock = self._readpartheader()
783 784 while headerblock is not None:
784 785 part = unbundlepart(self.ui, headerblock, self._fp)
785 786 yield part
786 787 part.seek(0, 2)
787 788 headerblock = self._readpartheader()
788 789 indebug(self.ui, 'end of bundle2 stream')
789 790
790 791 def _readpartheader(self):
791 792 """reads a part header size and return the bytes blob
792 793
793 794 returns None if empty"""
794 795 headersize = self._unpack(_fpartheadersize)[0]
795 796 if headersize < 0:
796 797 raise error.BundleValueError('negative part header size: %i'
797 798 % headersize)
798 799 indebug(self.ui, 'part header size: %i' % headersize)
799 800 if headersize:
800 801 return self._readexact(headersize)
801 802 return None
802 803
803 804 def compressed(self):
804 805 self.params # load params
805 806 return self._compressed
806 807
807 808 formatmap = {'20': unbundle20}
808 809
809 810 b2streamparamsmap = {}
810 811
811 812 def b2streamparamhandler(name):
812 813 """register a handler for a stream level parameter"""
813 814 def decorator(func):
814 815 assert name not in formatmap
815 816 b2streamparamsmap[name] = func
816 817 return func
817 818 return decorator
818 819
819 820 @b2streamparamhandler('compression')
820 821 def processcompression(unbundler, param, value):
821 822 """read compression parameter and install payload decompression"""
822 823 if value not in util.decompressors:
823 824 raise error.BundleUnknownFeatureError(params=(param,),
824 825 values=(value,))
825 826 unbundler._decompressor = util.decompressors[value]
826 827 if value is not None:
827 828 unbundler._compressed = True
828 829
829 830 class bundlepart(object):
830 831 """A bundle2 part contains application level payload
831 832
832 833 The part `type` is used to route the part to the application level
833 834 handler.
834 835
835 836 The part payload is contained in ``part.data``. It could be raw bytes or a
836 837 generator of byte chunks.
837 838
838 839 You can add parameters to the part using the ``addparam`` method.
839 840 Parameters can be either mandatory (default) or advisory. Remote side
840 841 should be able to safely ignore the advisory ones.
841 842
842 843 Both data and parameters cannot be modified after the generation has begun.
843 844 """
844 845
845 846 def __init__(self, parttype, mandatoryparams=(), advisoryparams=(),
846 847 data='', mandatory=True):
847 848 validateparttype(parttype)
848 849 self.id = None
849 850 self.type = parttype
850 851 self._data = data
851 852 self._mandatoryparams = list(mandatoryparams)
852 853 self._advisoryparams = list(advisoryparams)
853 854 # checking for duplicated entries
854 855 self._seenparams = set()
855 856 for pname, __ in self._mandatoryparams + self._advisoryparams:
856 857 if pname in self._seenparams:
857 858 raise RuntimeError('duplicated params: %s' % pname)
858 859 self._seenparams.add(pname)
859 860 # status of the part's generation:
860 861 # - None: not started,
861 862 # - False: currently generated,
862 863 # - True: generation done.
863 864 self._generated = None
864 865 self.mandatory = mandatory
865 866
866 867 def copy(self):
867 868 """return a copy of the part
868 869
869 870 The new part have the very same content but no partid assigned yet.
870 871 Parts with generated data cannot be copied."""
871 872 assert not util.safehasattr(self.data, 'next')
872 873 return self.__class__(self.type, self._mandatoryparams,
873 874 self._advisoryparams, self._data, self.mandatory)
874 875
875 876 # methods used to defines the part content
876 877 @property
877 878 def data(self):
878 879 return self._data
879 880
880 881 @data.setter
881 882 def data(self, data):
882 883 if self._generated is not None:
883 884 raise error.ReadOnlyPartError('part is being generated')
884 885 self._data = data
885 886
886 887 @property
887 888 def mandatoryparams(self):
888 889 # make it an immutable tuple to force people through ``addparam``
889 890 return tuple(self._mandatoryparams)
890 891
891 892 @property
892 893 def advisoryparams(self):
893 894 # make it an immutable tuple to force people through ``addparam``
894 895 return tuple(self._advisoryparams)
895 896
896 897 def addparam(self, name, value='', mandatory=True):
897 898 if self._generated is not None:
898 899 raise error.ReadOnlyPartError('part is being generated')
899 900 if name in self._seenparams:
900 901 raise ValueError('duplicated params: %s' % name)
901 902 self._seenparams.add(name)
902 903 params = self._advisoryparams
903 904 if mandatory:
904 905 params = self._mandatoryparams
905 906 params.append((name, value))
906 907
907 908 # methods used to generates the bundle2 stream
908 909 def getchunks(self, ui):
909 910 if self._generated is not None:
910 911 raise RuntimeError('part can only be consumed once')
911 912 self._generated = False
912 913
913 914 if ui.debugflag:
914 915 msg = ['bundle2-output-part: "%s"' % self.type]
915 916 if not self.mandatory:
916 917 msg.append(' (advisory)')
917 918 nbmp = len(self.mandatoryparams)
918 919 nbap = len(self.advisoryparams)
919 920 if nbmp or nbap:
920 921 msg.append(' (params:')
921 922 if nbmp:
922 923 msg.append(' %i mandatory' % nbmp)
923 924 if nbap:
924 925 msg.append(' %i advisory' % nbmp)
925 926 msg.append(')')
926 927 if not self.data:
927 928 msg.append(' empty payload')
928 929 elif util.safehasattr(self.data, 'next'):
929 930 msg.append(' streamed payload')
930 931 else:
931 932 msg.append(' %i bytes payload' % len(self.data))
932 933 msg.append('\n')
933 934 ui.debug(''.join(msg))
934 935
935 936 #### header
936 937 if self.mandatory:
937 938 parttype = self.type.upper()
938 939 else:
939 940 parttype = self.type.lower()
940 941 outdebug(ui, 'part %s: "%s"' % (self.id, parttype))
941 942 ## parttype
942 943 header = [_pack(_fparttypesize, len(parttype)),
943 944 parttype, _pack(_fpartid, self.id),
944 945 ]
945 946 ## parameters
946 947 # count
947 948 manpar = self.mandatoryparams
948 949 advpar = self.advisoryparams
949 950 header.append(_pack(_fpartparamcount, len(manpar), len(advpar)))
950 951 # size
951 952 parsizes = []
952 953 for key, value in manpar:
953 954 parsizes.append(len(key))
954 955 parsizes.append(len(value))
955 956 for key, value in advpar:
956 957 parsizes.append(len(key))
957 958 parsizes.append(len(value))
958 959 paramsizes = _pack(_makefpartparamsizes(len(parsizes) / 2), *parsizes)
959 960 header.append(paramsizes)
960 961 # key, value
961 962 for key, value in manpar:
962 963 header.append(key)
963 964 header.append(value)
964 965 for key, value in advpar:
965 966 header.append(key)
966 967 header.append(value)
967 968 ## finalize header
968 969 headerchunk = ''.join(header)
969 970 outdebug(ui, 'header chunk size: %i' % len(headerchunk))
970 971 yield _pack(_fpartheadersize, len(headerchunk))
971 972 yield headerchunk
972 973 ## payload
973 974 try:
974 975 for chunk in self._payloadchunks():
975 976 outdebug(ui, 'payload chunk size: %i' % len(chunk))
976 977 yield _pack(_fpayloadsize, len(chunk))
977 978 yield chunk
978 979 except GeneratorExit:
979 980 # GeneratorExit means that nobody is listening for our
980 981 # results anyway, so just bail quickly rather than trying
981 982 # to produce an error part.
982 983 ui.debug('bundle2-generatorexit\n')
983 984 raise
984 985 except BaseException as exc:
985 986 # backup exception data for later
986 987 ui.debug('bundle2-input-stream-interrupt: encoding exception %s'
987 988 % exc)
988 989 exc_info = sys.exc_info()
989 990 msg = 'unexpected error: %s' % exc
990 991 interpart = bundlepart('error:abort', [('message', msg)],
991 992 mandatory=False)
992 993 interpart.id = 0
993 994 yield _pack(_fpayloadsize, -1)
994 995 for chunk in interpart.getchunks(ui=ui):
995 996 yield chunk
996 997 outdebug(ui, 'closing payload chunk')
997 998 # abort current part payload
998 999 yield _pack(_fpayloadsize, 0)
999 if sys.version_info[0] >= 3:
1000 if pycompat.ispy3:
1000 1001 raise exc_info[0](exc_info[1]).with_traceback(exc_info[2])
1001 1002 else:
1002 1003 exec("""raise exc_info[0], exc_info[1], exc_info[2]""")
1003 1004 # end of payload
1004 1005 outdebug(ui, 'closing payload chunk')
1005 1006 yield _pack(_fpayloadsize, 0)
1006 1007 self._generated = True
1007 1008
1008 1009 def _payloadchunks(self):
1009 1010 """yield chunks of a the part payload
1010 1011
1011 1012 Exists to handle the different methods to provide data to a part."""
1012 1013 # we only support fixed size data now.
1013 1014 # This will be improved in the future.
1014 1015 if util.safehasattr(self.data, 'next'):
1015 1016 buff = util.chunkbuffer(self.data)
1016 1017 chunk = buff.read(preferedchunksize)
1017 1018 while chunk:
1018 1019 yield chunk
1019 1020 chunk = buff.read(preferedchunksize)
1020 1021 elif len(self.data):
1021 1022 yield self.data
1022 1023
1023 1024
1024 1025 flaginterrupt = -1
1025 1026
1026 1027 class interrupthandler(unpackermixin):
1027 1028 """read one part and process it with restricted capability
1028 1029
1029 1030 This allows to transmit exception raised on the producer size during part
1030 1031 iteration while the consumer is reading a part.
1031 1032
1032 1033 Part processed in this manner only have access to a ui object,"""
1033 1034
1034 1035 def __init__(self, ui, fp):
1035 1036 super(interrupthandler, self).__init__(fp)
1036 1037 self.ui = ui
1037 1038
1038 1039 def _readpartheader(self):
1039 1040 """reads a part header size and return the bytes blob
1040 1041
1041 1042 returns None if empty"""
1042 1043 headersize = self._unpack(_fpartheadersize)[0]
1043 1044 if headersize < 0:
1044 1045 raise error.BundleValueError('negative part header size: %i'
1045 1046 % headersize)
1046 1047 indebug(self.ui, 'part header size: %i\n' % headersize)
1047 1048 if headersize:
1048 1049 return self._readexact(headersize)
1049 1050 return None
1050 1051
1051 1052 def __call__(self):
1052 1053
1053 1054 self.ui.debug('bundle2-input-stream-interrupt:'
1054 1055 ' opening out of band context\n')
1055 1056 indebug(self.ui, 'bundle2 stream interruption, looking for a part.')
1056 1057 headerblock = self._readpartheader()
1057 1058 if headerblock is None:
1058 1059 indebug(self.ui, 'no part found during interruption.')
1059 1060 return
1060 1061 part = unbundlepart(self.ui, headerblock, self._fp)
1061 1062 op = interruptoperation(self.ui)
1062 1063 _processpart(op, part)
1063 1064 self.ui.debug('bundle2-input-stream-interrupt:'
1064 1065 ' closing out of band context\n')
1065 1066
1066 1067 class interruptoperation(object):
1067 1068 """A limited operation to be use by part handler during interruption
1068 1069
1069 1070 It only have access to an ui object.
1070 1071 """
1071 1072
1072 1073 def __init__(self, ui):
1073 1074 self.ui = ui
1074 1075 self.reply = None
1075 1076 self.captureoutput = False
1076 1077
1077 1078 @property
1078 1079 def repo(self):
1079 1080 raise RuntimeError('no repo access from stream interruption')
1080 1081
1081 1082 def gettransaction(self):
1082 1083 raise TransactionUnavailable('no repo access from stream interruption')
1083 1084
1084 1085 class unbundlepart(unpackermixin):
1085 1086 """a bundle part read from a bundle"""
1086 1087
1087 1088 def __init__(self, ui, header, fp):
1088 1089 super(unbundlepart, self).__init__(fp)
1089 1090 self.ui = ui
1090 1091 # unbundle state attr
1091 1092 self._headerdata = header
1092 1093 self._headeroffset = 0
1093 1094 self._initialized = False
1094 1095 self.consumed = False
1095 1096 # part data
1096 1097 self.id = None
1097 1098 self.type = None
1098 1099 self.mandatoryparams = None
1099 1100 self.advisoryparams = None
1100 1101 self.params = None
1101 1102 self.mandatorykeys = ()
1102 1103 self._payloadstream = None
1103 1104 self._readheader()
1104 1105 self._mandatory = None
1105 1106 self._chunkindex = [] #(payload, file) position tuples for chunk starts
1106 1107 self._pos = 0
1107 1108
1108 1109 def _fromheader(self, size):
1109 1110 """return the next <size> byte from the header"""
1110 1111 offset = self._headeroffset
1111 1112 data = self._headerdata[offset:(offset + size)]
1112 1113 self._headeroffset = offset + size
1113 1114 return data
1114 1115
1115 1116 def _unpackheader(self, format):
1116 1117 """read given format from header
1117 1118
1118 1119 This automatically compute the size of the format to read."""
1119 1120 data = self._fromheader(struct.calcsize(format))
1120 1121 return _unpack(format, data)
1121 1122
1122 1123 def _initparams(self, mandatoryparams, advisoryparams):
1123 1124 """internal function to setup all logic related parameters"""
1124 1125 # make it read only to prevent people touching it by mistake.
1125 1126 self.mandatoryparams = tuple(mandatoryparams)
1126 1127 self.advisoryparams = tuple(advisoryparams)
1127 1128 # user friendly UI
1128 1129 self.params = util.sortdict(self.mandatoryparams)
1129 1130 self.params.update(self.advisoryparams)
1130 1131 self.mandatorykeys = frozenset(p[0] for p in mandatoryparams)
1131 1132
1132 1133 def _payloadchunks(self, chunknum=0):
1133 1134 '''seek to specified chunk and start yielding data'''
1134 1135 if len(self._chunkindex) == 0:
1135 1136 assert chunknum == 0, 'Must start with chunk 0'
1136 1137 self._chunkindex.append((0, super(unbundlepart, self).tell()))
1137 1138 else:
1138 1139 assert chunknum < len(self._chunkindex), \
1139 1140 'Unknown chunk %d' % chunknum
1140 1141 super(unbundlepart, self).seek(self._chunkindex[chunknum][1])
1141 1142
1142 1143 pos = self._chunkindex[chunknum][0]
1143 1144 payloadsize = self._unpack(_fpayloadsize)[0]
1144 1145 indebug(self.ui, 'payload chunk size: %i' % payloadsize)
1145 1146 while payloadsize:
1146 1147 if payloadsize == flaginterrupt:
1147 1148 # interruption detection, the handler will now read a
1148 1149 # single part and process it.
1149 1150 interrupthandler(self.ui, self._fp)()
1150 1151 elif payloadsize < 0:
1151 1152 msg = 'negative payload chunk size: %i' % payloadsize
1152 1153 raise error.BundleValueError(msg)
1153 1154 else:
1154 1155 result = self._readexact(payloadsize)
1155 1156 chunknum += 1
1156 1157 pos += payloadsize
1157 1158 if chunknum == len(self._chunkindex):
1158 1159 self._chunkindex.append((pos,
1159 1160 super(unbundlepart, self).tell()))
1160 1161 yield result
1161 1162 payloadsize = self._unpack(_fpayloadsize)[0]
1162 1163 indebug(self.ui, 'payload chunk size: %i' % payloadsize)
1163 1164
1164 1165 def _findchunk(self, pos):
1165 1166 '''for a given payload position, return a chunk number and offset'''
1166 1167 for chunk, (ppos, fpos) in enumerate(self._chunkindex):
1167 1168 if ppos == pos:
1168 1169 return chunk, 0
1169 1170 elif ppos > pos:
1170 1171 return chunk - 1, pos - self._chunkindex[chunk - 1][0]
1171 1172 raise ValueError('Unknown chunk')
1172 1173
1173 1174 def _readheader(self):
1174 1175 """read the header and setup the object"""
1175 1176 typesize = self._unpackheader(_fparttypesize)[0]
1176 1177 self.type = self._fromheader(typesize)
1177 1178 indebug(self.ui, 'part type: "%s"' % self.type)
1178 1179 self.id = self._unpackheader(_fpartid)[0]
1179 1180 indebug(self.ui, 'part id: "%s"' % self.id)
1180 1181 # extract mandatory bit from type
1181 1182 self.mandatory = (self.type != self.type.lower())
1182 1183 self.type = self.type.lower()
1183 1184 ## reading parameters
1184 1185 # param count
1185 1186 mancount, advcount = self._unpackheader(_fpartparamcount)
1186 1187 indebug(self.ui, 'part parameters: %i' % (mancount + advcount))
1187 1188 # param size
1188 1189 fparamsizes = _makefpartparamsizes(mancount + advcount)
1189 1190 paramsizes = self._unpackheader(fparamsizes)
1190 1191 # make it a list of couple again
1191 1192 paramsizes = zip(paramsizes[::2], paramsizes[1::2])
1192 1193 # split mandatory from advisory
1193 1194 mansizes = paramsizes[:mancount]
1194 1195 advsizes = paramsizes[mancount:]
1195 1196 # retrieve param value
1196 1197 manparams = []
1197 1198 for key, value in mansizes:
1198 1199 manparams.append((self._fromheader(key), self._fromheader(value)))
1199 1200 advparams = []
1200 1201 for key, value in advsizes:
1201 1202 advparams.append((self._fromheader(key), self._fromheader(value)))
1202 1203 self._initparams(manparams, advparams)
1203 1204 ## part payload
1204 1205 self._payloadstream = util.chunkbuffer(self._payloadchunks())
1205 1206 # we read the data, tell it
1206 1207 self._initialized = True
1207 1208
1208 1209 def read(self, size=None):
1209 1210 """read payload data"""
1210 1211 if not self._initialized:
1211 1212 self._readheader()
1212 1213 if size is None:
1213 1214 data = self._payloadstream.read()
1214 1215 else:
1215 1216 data = self._payloadstream.read(size)
1216 1217 self._pos += len(data)
1217 1218 if size is None or len(data) < size:
1218 1219 if not self.consumed and self._pos:
1219 1220 self.ui.debug('bundle2-input-part: total payload size %i\n'
1220 1221 % self._pos)
1221 1222 self.consumed = True
1222 1223 return data
1223 1224
1224 1225 def tell(self):
1225 1226 return self._pos
1226 1227
1227 1228 def seek(self, offset, whence=0):
1228 1229 if whence == 0:
1229 1230 newpos = offset
1230 1231 elif whence == 1:
1231 1232 newpos = self._pos + offset
1232 1233 elif whence == 2:
1233 1234 if not self.consumed:
1234 1235 self.read()
1235 1236 newpos = self._chunkindex[-1][0] - offset
1236 1237 else:
1237 1238 raise ValueError('Unknown whence value: %r' % (whence,))
1238 1239
1239 1240 if newpos > self._chunkindex[-1][0] and not self.consumed:
1240 1241 self.read()
1241 1242 if not 0 <= newpos <= self._chunkindex[-1][0]:
1242 1243 raise ValueError('Offset out of range')
1243 1244
1244 1245 if self._pos != newpos:
1245 1246 chunk, internaloffset = self._findchunk(newpos)
1246 1247 self._payloadstream = util.chunkbuffer(self._payloadchunks(chunk))
1247 1248 adjust = self.read(internaloffset)
1248 1249 if len(adjust) != internaloffset:
1249 1250 raise error.Abort(_('Seek failed\n'))
1250 1251 self._pos = newpos
1251 1252
1252 1253 # These are only the static capabilities.
1253 1254 # Check the 'getrepocaps' function for the rest.
1254 1255 capabilities = {'HG20': (),
1255 1256 'error': ('abort', 'unsupportedcontent', 'pushraced',
1256 1257 'pushkey'),
1257 1258 'listkeys': (),
1258 1259 'pushkey': (),
1259 1260 'digests': tuple(sorted(util.DIGESTS.keys())),
1260 1261 'remote-changegroup': ('http', 'https'),
1261 1262 'hgtagsfnodes': (),
1262 1263 }
1263 1264
1264 1265 def getrepocaps(repo, allowpushback=False):
1265 1266 """return the bundle2 capabilities for a given repo
1266 1267
1267 1268 Exists to allow extensions (like evolution) to mutate the capabilities.
1268 1269 """
1269 1270 caps = capabilities.copy()
1270 1271 caps['changegroup'] = tuple(sorted(
1271 1272 changegroup.supportedincomingversions(repo)))
1272 1273 if obsolete.isenabled(repo, obsolete.exchangeopt):
1273 1274 supportedformat = tuple('V%i' % v for v in obsolete.formats)
1274 1275 caps['obsmarkers'] = supportedformat
1275 1276 if allowpushback:
1276 1277 caps['pushback'] = ()
1277 1278 return caps
1278 1279
1279 1280 def bundle2caps(remote):
1280 1281 """return the bundle capabilities of a peer as dict"""
1281 1282 raw = remote.capable('bundle2')
1282 1283 if not raw and raw != '':
1283 1284 return {}
1284 1285 capsblob = urlreq.unquote(remote.capable('bundle2'))
1285 1286 return decodecaps(capsblob)
1286 1287
1287 1288 def obsmarkersversion(caps):
1288 1289 """extract the list of supported obsmarkers versions from a bundle2caps dict
1289 1290 """
1290 1291 obscaps = caps.get('obsmarkers', ())
1291 1292 return [int(c[1:]) for c in obscaps if c.startswith('V')]
1292 1293
1293 1294 def writebundle(ui, cg, filename, bundletype, vfs=None, compression=None):
1294 1295 """Write a bundle file and return its filename.
1295 1296
1296 1297 Existing files will not be overwritten.
1297 1298 If no filename is specified, a temporary file is created.
1298 1299 bz2 compression can be turned off.
1299 1300 The bundle file will be deleted in case of errors.
1300 1301 """
1301 1302
1302 1303 if bundletype == "HG20":
1303 1304 bundle = bundle20(ui)
1304 1305 bundle.setcompression(compression)
1305 1306 part = bundle.newpart('changegroup', data=cg.getchunks())
1306 1307 part.addparam('version', cg.version)
1307 1308 if 'clcount' in cg.extras:
1308 1309 part.addparam('nbchanges', str(cg.extras['clcount']),
1309 1310 mandatory=False)
1310 1311 chunkiter = bundle.getchunks()
1311 1312 else:
1312 1313 # compression argument is only for the bundle2 case
1313 1314 assert compression is None
1314 1315 if cg.version != '01':
1315 1316 raise error.Abort(_('old bundle types only supports v1 '
1316 1317 'changegroups'))
1317 1318 header, comp = bundletypes[bundletype]
1318 1319 if comp not in util.compressors:
1319 1320 raise error.Abort(_('unknown stream compression type: %s')
1320 1321 % comp)
1321 1322 z = util.compressors[comp]()
1322 1323 subchunkiter = cg.getchunks()
1323 1324 def chunkiter():
1324 1325 yield header
1325 1326 for chunk in subchunkiter:
1326 1327 yield z.compress(chunk)
1327 1328 yield z.flush()
1328 1329 chunkiter = chunkiter()
1329 1330
1330 1331 # parse the changegroup data, otherwise we will block
1331 1332 # in case of sshrepo because we don't know the end of the stream
1332 1333 return changegroup.writechunks(ui, chunkiter, filename, vfs=vfs)
1333 1334
1334 1335 @parthandler('changegroup', ('version', 'nbchanges', 'treemanifest'))
1335 1336 def handlechangegroup(op, inpart):
1336 1337 """apply a changegroup part on the repo
1337 1338
1338 1339 This is a very early implementation that will massive rework before being
1339 1340 inflicted to any end-user.
1340 1341 """
1341 1342 # Make sure we trigger a transaction creation
1342 1343 #
1343 1344 # The addchangegroup function will get a transaction object by itself, but
1344 1345 # we need to make sure we trigger the creation of a transaction object used
1345 1346 # for the whole processing scope.
1346 1347 op.gettransaction()
1347 1348 unpackerversion = inpart.params.get('version', '01')
1348 1349 # We should raise an appropriate exception here
1349 1350 cg = changegroup.getunbundler(unpackerversion, inpart, None)
1350 1351 # the source and url passed here are overwritten by the one contained in
1351 1352 # the transaction.hookargs argument. So 'bundle2' is a placeholder
1352 1353 nbchangesets = None
1353 1354 if 'nbchanges' in inpart.params:
1354 1355 nbchangesets = int(inpart.params.get('nbchanges'))
1355 1356 if ('treemanifest' in inpart.params and
1356 1357 'treemanifest' not in op.repo.requirements):
1357 1358 if len(op.repo.changelog) != 0:
1358 1359 raise error.Abort(_(
1359 1360 "bundle contains tree manifests, but local repo is "
1360 1361 "non-empty and does not use tree manifests"))
1361 1362 op.repo.requirements.add('treemanifest')
1362 1363 op.repo._applyopenerreqs()
1363 1364 op.repo._writerequirements()
1364 1365 ret = cg.apply(op.repo, 'bundle2', 'bundle2', expectedtotal=nbchangesets)
1365 1366 op.records.add('changegroup', {'return': ret})
1366 1367 if op.reply is not None:
1367 1368 # This is definitely not the final form of this
1368 1369 # return. But one need to start somewhere.
1369 1370 part = op.reply.newpart('reply:changegroup', mandatory=False)
1370 1371 part.addparam('in-reply-to', str(inpart.id), mandatory=False)
1371 1372 part.addparam('return', '%i' % ret, mandatory=False)
1372 1373 assert not inpart.read()
1373 1374
1374 1375 _remotechangegroupparams = tuple(['url', 'size', 'digests'] +
1375 1376 ['digest:%s' % k for k in util.DIGESTS.keys()])
1376 1377 @parthandler('remote-changegroup', _remotechangegroupparams)
1377 1378 def handleremotechangegroup(op, inpart):
1378 1379 """apply a bundle10 on the repo, given an url and validation information
1379 1380
1380 1381 All the information about the remote bundle to import are given as
1381 1382 parameters. The parameters include:
1382 1383 - url: the url to the bundle10.
1383 1384 - size: the bundle10 file size. It is used to validate what was
1384 1385 retrieved by the client matches the server knowledge about the bundle.
1385 1386 - digests: a space separated list of the digest types provided as
1386 1387 parameters.
1387 1388 - digest:<digest-type>: the hexadecimal representation of the digest with
1388 1389 that name. Like the size, it is used to validate what was retrieved by
1389 1390 the client matches what the server knows about the bundle.
1390 1391
1391 1392 When multiple digest types are given, all of them are checked.
1392 1393 """
1393 1394 try:
1394 1395 raw_url = inpart.params['url']
1395 1396 except KeyError:
1396 1397 raise error.Abort(_('remote-changegroup: missing "%s" param') % 'url')
1397 1398 parsed_url = util.url(raw_url)
1398 1399 if parsed_url.scheme not in capabilities['remote-changegroup']:
1399 1400 raise error.Abort(_('remote-changegroup does not support %s urls') %
1400 1401 parsed_url.scheme)
1401 1402
1402 1403 try:
1403 1404 size = int(inpart.params['size'])
1404 1405 except ValueError:
1405 1406 raise error.Abort(_('remote-changegroup: invalid value for param "%s"')
1406 1407 % 'size')
1407 1408 except KeyError:
1408 1409 raise error.Abort(_('remote-changegroup: missing "%s" param') % 'size')
1409 1410
1410 1411 digests = {}
1411 1412 for typ in inpart.params.get('digests', '').split():
1412 1413 param = 'digest:%s' % typ
1413 1414 try:
1414 1415 value = inpart.params[param]
1415 1416 except KeyError:
1416 1417 raise error.Abort(_('remote-changegroup: missing "%s" param') %
1417 1418 param)
1418 1419 digests[typ] = value
1419 1420
1420 1421 real_part = util.digestchecker(url.open(op.ui, raw_url), size, digests)
1421 1422
1422 1423 # Make sure we trigger a transaction creation
1423 1424 #
1424 1425 # The addchangegroup function will get a transaction object by itself, but
1425 1426 # we need to make sure we trigger the creation of a transaction object used
1426 1427 # for the whole processing scope.
1427 1428 op.gettransaction()
1428 1429 from . import exchange
1429 1430 cg = exchange.readbundle(op.repo.ui, real_part, raw_url)
1430 1431 if not isinstance(cg, changegroup.cg1unpacker):
1431 1432 raise error.Abort(_('%s: not a bundle version 1.0') %
1432 1433 util.hidepassword(raw_url))
1433 1434 ret = cg.apply(op.repo, 'bundle2', 'bundle2')
1434 1435 op.records.add('changegroup', {'return': ret})
1435 1436 if op.reply is not None:
1436 1437 # This is definitely not the final form of this
1437 1438 # return. But one need to start somewhere.
1438 1439 part = op.reply.newpart('reply:changegroup')
1439 1440 part.addparam('in-reply-to', str(inpart.id), mandatory=False)
1440 1441 part.addparam('return', '%i' % ret, mandatory=False)
1441 1442 try:
1442 1443 real_part.validate()
1443 1444 except error.Abort as e:
1444 1445 raise error.Abort(_('bundle at %s is corrupted:\n%s') %
1445 1446 (util.hidepassword(raw_url), str(e)))
1446 1447 assert not inpart.read()
1447 1448
1448 1449 @parthandler('reply:changegroup', ('return', 'in-reply-to'))
1449 1450 def handlereplychangegroup(op, inpart):
1450 1451 ret = int(inpart.params['return'])
1451 1452 replyto = int(inpart.params['in-reply-to'])
1452 1453 op.records.add('changegroup', {'return': ret}, replyto)
1453 1454
1454 1455 @parthandler('check:heads')
1455 1456 def handlecheckheads(op, inpart):
1456 1457 """check that head of the repo did not change
1457 1458
1458 1459 This is used to detect a push race when using unbundle.
1459 1460 This replaces the "heads" argument of unbundle."""
1460 1461 h = inpart.read(20)
1461 1462 heads = []
1462 1463 while len(h) == 20:
1463 1464 heads.append(h)
1464 1465 h = inpart.read(20)
1465 1466 assert not h
1466 1467 # Trigger a transaction so that we are guaranteed to have the lock now.
1467 1468 if op.ui.configbool('experimental', 'bundle2lazylocking'):
1468 1469 op.gettransaction()
1469 1470 if sorted(heads) != sorted(op.repo.heads()):
1470 1471 raise error.PushRaced('repository changed while pushing - '
1471 1472 'please try again')
1472 1473
1473 1474 @parthandler('output')
1474 1475 def handleoutput(op, inpart):
1475 1476 """forward output captured on the server to the client"""
1476 1477 for line in inpart.read().splitlines():
1477 1478 op.ui.status(_('remote: %s\n') % line)
1478 1479
1479 1480 @parthandler('replycaps')
1480 1481 def handlereplycaps(op, inpart):
1481 1482 """Notify that a reply bundle should be created
1482 1483
1483 1484 The payload contains the capabilities information for the reply"""
1484 1485 caps = decodecaps(inpart.read())
1485 1486 if op.reply is None:
1486 1487 op.reply = bundle20(op.ui, caps)
1487 1488
1488 1489 class AbortFromPart(error.Abort):
1489 1490 """Sub-class of Abort that denotes an error from a bundle2 part."""
1490 1491
1491 1492 @parthandler('error:abort', ('message', 'hint'))
1492 1493 def handleerrorabort(op, inpart):
1493 1494 """Used to transmit abort error over the wire"""
1494 1495 raise AbortFromPart(inpart.params['message'],
1495 1496 hint=inpart.params.get('hint'))
1496 1497
1497 1498 @parthandler('error:pushkey', ('namespace', 'key', 'new', 'old', 'ret',
1498 1499 'in-reply-to'))
1499 1500 def handleerrorpushkey(op, inpart):
1500 1501 """Used to transmit failure of a mandatory pushkey over the wire"""
1501 1502 kwargs = {}
1502 1503 for name in ('namespace', 'key', 'new', 'old', 'ret'):
1503 1504 value = inpart.params.get(name)
1504 1505 if value is not None:
1505 1506 kwargs[name] = value
1506 1507 raise error.PushkeyFailed(inpart.params['in-reply-to'], **kwargs)
1507 1508
1508 1509 @parthandler('error:unsupportedcontent', ('parttype', 'params'))
1509 1510 def handleerrorunsupportedcontent(op, inpart):
1510 1511 """Used to transmit unknown content error over the wire"""
1511 1512 kwargs = {}
1512 1513 parttype = inpart.params.get('parttype')
1513 1514 if parttype is not None:
1514 1515 kwargs['parttype'] = parttype
1515 1516 params = inpart.params.get('params')
1516 1517 if params is not None:
1517 1518 kwargs['params'] = params.split('\0')
1518 1519
1519 1520 raise error.BundleUnknownFeatureError(**kwargs)
1520 1521
1521 1522 @parthandler('error:pushraced', ('message',))
1522 1523 def handleerrorpushraced(op, inpart):
1523 1524 """Used to transmit push race error over the wire"""
1524 1525 raise error.ResponseError(_('push failed:'), inpart.params['message'])
1525 1526
1526 1527 @parthandler('listkeys', ('namespace',))
1527 1528 def handlelistkeys(op, inpart):
1528 1529 """retrieve pushkey namespace content stored in a bundle2"""
1529 1530 namespace = inpart.params['namespace']
1530 1531 r = pushkey.decodekeys(inpart.read())
1531 1532 op.records.add('listkeys', (namespace, r))
1532 1533
1533 1534 @parthandler('pushkey', ('namespace', 'key', 'old', 'new'))
1534 1535 def handlepushkey(op, inpart):
1535 1536 """process a pushkey request"""
1536 1537 dec = pushkey.decode
1537 1538 namespace = dec(inpart.params['namespace'])
1538 1539 key = dec(inpart.params['key'])
1539 1540 old = dec(inpart.params['old'])
1540 1541 new = dec(inpart.params['new'])
1541 1542 # Grab the transaction to ensure that we have the lock before performing the
1542 1543 # pushkey.
1543 1544 if op.ui.configbool('experimental', 'bundle2lazylocking'):
1544 1545 op.gettransaction()
1545 1546 ret = op.repo.pushkey(namespace, key, old, new)
1546 1547 record = {'namespace': namespace,
1547 1548 'key': key,
1548 1549 'old': old,
1549 1550 'new': new}
1550 1551 op.records.add('pushkey', record)
1551 1552 if op.reply is not None:
1552 1553 rpart = op.reply.newpart('reply:pushkey')
1553 1554 rpart.addparam('in-reply-to', str(inpart.id), mandatory=False)
1554 1555 rpart.addparam('return', '%i' % ret, mandatory=False)
1555 1556 if inpart.mandatory and not ret:
1556 1557 kwargs = {}
1557 1558 for key in ('namespace', 'key', 'new', 'old', 'ret'):
1558 1559 if key in inpart.params:
1559 1560 kwargs[key] = inpart.params[key]
1560 1561 raise error.PushkeyFailed(partid=str(inpart.id), **kwargs)
1561 1562
1562 1563 @parthandler('reply:pushkey', ('return', 'in-reply-to'))
1563 1564 def handlepushkeyreply(op, inpart):
1564 1565 """retrieve the result of a pushkey request"""
1565 1566 ret = int(inpart.params['return'])
1566 1567 partid = int(inpart.params['in-reply-to'])
1567 1568 op.records.add('pushkey', {'return': ret}, partid)
1568 1569
1569 1570 @parthandler('obsmarkers')
1570 1571 def handleobsmarker(op, inpart):
1571 1572 """add a stream of obsmarkers to the repo"""
1572 1573 tr = op.gettransaction()
1573 1574 markerdata = inpart.read()
1574 1575 if op.ui.config('experimental', 'obsmarkers-exchange-debug', False):
1575 1576 op.ui.write(('obsmarker-exchange: %i bytes received\n')
1576 1577 % len(markerdata))
1577 1578 # The mergemarkers call will crash if marker creation is not enabled.
1578 1579 # we want to avoid this if the part is advisory.
1579 1580 if not inpart.mandatory and op.repo.obsstore.readonly:
1580 1581 op.repo.ui.debug('ignoring obsolescence markers, feature not enabled')
1581 1582 return
1582 1583 new = op.repo.obsstore.mergemarkers(tr, markerdata)
1583 1584 if new:
1584 1585 op.repo.ui.status(_('%i new obsolescence markers\n') % new)
1585 1586 op.records.add('obsmarkers', {'new': new})
1586 1587 if op.reply is not None:
1587 1588 rpart = op.reply.newpart('reply:obsmarkers')
1588 1589 rpart.addparam('in-reply-to', str(inpart.id), mandatory=False)
1589 1590 rpart.addparam('new', '%i' % new, mandatory=False)
1590 1591
1591 1592
1592 1593 @parthandler('reply:obsmarkers', ('new', 'in-reply-to'))
1593 1594 def handleobsmarkerreply(op, inpart):
1594 1595 """retrieve the result of a pushkey request"""
1595 1596 ret = int(inpart.params['new'])
1596 1597 partid = int(inpart.params['in-reply-to'])
1597 1598 op.records.add('obsmarkers', {'new': ret}, partid)
1598 1599
1599 1600 @parthandler('hgtagsfnodes')
1600 1601 def handlehgtagsfnodes(op, inpart):
1601 1602 """Applies .hgtags fnodes cache entries to the local repo.
1602 1603
1603 1604 Payload is pairs of 20 byte changeset nodes and filenodes.
1604 1605 """
1605 1606 # Grab the transaction so we ensure that we have the lock at this point.
1606 1607 if op.ui.configbool('experimental', 'bundle2lazylocking'):
1607 1608 op.gettransaction()
1608 1609 cache = tags.hgtagsfnodescache(op.repo.unfiltered())
1609 1610
1610 1611 count = 0
1611 1612 while True:
1612 1613 node = inpart.read(20)
1613 1614 fnode = inpart.read(20)
1614 1615 if len(node) < 20 or len(fnode) < 20:
1615 1616 op.ui.debug('ignoring incomplete received .hgtags fnodes data\n')
1616 1617 break
1617 1618 cache.setfnode(node, fnode)
1618 1619 count += 1
1619 1620
1620 1621 cache.write()
1621 1622 op.ui.debug('applied %i hgtags fnodes cache entries\n' % count)
@@ -1,579 +1,579 b''
1 1 # encoding.py - character transcoding support for Mercurial
2 2 #
3 3 # Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import array
11 11 import locale
12 12 import os
13 import sys
14 13 import unicodedata
15 14
16 15 from . import (
17 16 error,
17 pycompat,
18 18 )
19 19
20 if sys.version_info[0] >= 3:
20 if pycompat.ispy3:
21 21 unichr = chr
22 22
23 23 # These unicode characters are ignored by HFS+ (Apple Technote 1150,
24 24 # "Unicode Subtleties"), so we need to ignore them in some places for
25 25 # sanity.
26 26 _ignore = [unichr(int(x, 16)).encode("utf-8") for x in
27 27 "200c 200d 200e 200f 202a 202b 202c 202d 202e "
28 28 "206a 206b 206c 206d 206e 206f feff".split()]
29 29 # verify the next function will work
30 if sys.version_info[0] >= 3:
30 if pycompat.ispy3:
31 31 assert set(i[0] for i in _ignore) == set([ord(b'\xe2'), ord(b'\xef')])
32 32 else:
33 33 assert set(i[0] for i in _ignore) == set(["\xe2", "\xef"])
34 34
35 35 def hfsignoreclean(s):
36 36 """Remove codepoints ignored by HFS+ from s.
37 37
38 38 >>> hfsignoreclean(u'.h\u200cg'.encode('utf-8'))
39 39 '.hg'
40 40 >>> hfsignoreclean(u'.h\ufeffg'.encode('utf-8'))
41 41 '.hg'
42 42 """
43 43 if "\xe2" in s or "\xef" in s:
44 44 for c in _ignore:
45 45 s = s.replace(c, '')
46 46 return s
47 47
48 48 def _getpreferredencoding():
49 49 '''
50 50 On darwin, getpreferredencoding ignores the locale environment and
51 51 always returns mac-roman. http://bugs.python.org/issue6202 fixes this
52 52 for Python 2.7 and up. This is the same corrected code for earlier
53 53 Python versions.
54 54
55 55 However, we can't use a version check for this method, as some distributions
56 56 patch Python to fix this. Instead, we use it as a 'fixer' for the mac-roman
57 57 encoding, as it is unlikely that this encoding is the actually expected.
58 58 '''
59 59 try:
60 60 locale.CODESET
61 61 except AttributeError:
62 62 # Fall back to parsing environment variables :-(
63 63 return locale.getdefaultlocale()[1]
64 64
65 65 oldloc = locale.setlocale(locale.LC_CTYPE)
66 66 locale.setlocale(locale.LC_CTYPE, "")
67 67 result = locale.nl_langinfo(locale.CODESET)
68 68 locale.setlocale(locale.LC_CTYPE, oldloc)
69 69
70 70 return result
71 71
72 72 _encodingfixers = {
73 73 '646': lambda: 'ascii',
74 74 'ANSI_X3.4-1968': lambda: 'ascii',
75 75 'mac-roman': _getpreferredencoding
76 76 }
77 77
78 78 try:
79 79 encoding = os.environ.get("HGENCODING")
80 80 if not encoding:
81 81 encoding = locale.getpreferredencoding() or 'ascii'
82 82 encoding = _encodingfixers.get(encoding, lambda: encoding)()
83 83 except locale.Error:
84 84 encoding = 'ascii'
85 85 encodingmode = os.environ.get("HGENCODINGMODE", "strict")
86 86 fallbackencoding = 'ISO-8859-1'
87 87
88 88 class localstr(str):
89 89 '''This class allows strings that are unmodified to be
90 90 round-tripped to the local encoding and back'''
91 91 def __new__(cls, u, l):
92 92 s = str.__new__(cls, l)
93 93 s._utf8 = u
94 94 return s
95 95 def __hash__(self):
96 96 return hash(self._utf8) # avoid collisions in local string space
97 97
98 98 def tolocal(s):
99 99 """
100 100 Convert a string from internal UTF-8 to local encoding
101 101
102 102 All internal strings should be UTF-8 but some repos before the
103 103 implementation of locale support may contain latin1 or possibly
104 104 other character sets. We attempt to decode everything strictly
105 105 using UTF-8, then Latin-1, and failing that, we use UTF-8 and
106 106 replace unknown characters.
107 107
108 108 The localstr class is used to cache the known UTF-8 encoding of
109 109 strings next to their local representation to allow lossless
110 110 round-trip conversion back to UTF-8.
111 111
112 112 >>> u = 'foo: \\xc3\\xa4' # utf-8
113 113 >>> l = tolocal(u)
114 114 >>> l
115 115 'foo: ?'
116 116 >>> fromlocal(l)
117 117 'foo: \\xc3\\xa4'
118 118 >>> u2 = 'foo: \\xc3\\xa1'
119 119 >>> d = { l: 1, tolocal(u2): 2 }
120 120 >>> len(d) # no collision
121 121 2
122 122 >>> 'foo: ?' in d
123 123 False
124 124 >>> l1 = 'foo: \\xe4' # historical latin1 fallback
125 125 >>> l = tolocal(l1)
126 126 >>> l
127 127 'foo: ?'
128 128 >>> fromlocal(l) # magically in utf-8
129 129 'foo: \\xc3\\xa4'
130 130 """
131 131
132 132 try:
133 133 try:
134 134 # make sure string is actually stored in UTF-8
135 135 u = s.decode('UTF-8')
136 136 if encoding == 'UTF-8':
137 137 # fast path
138 138 return s
139 139 r = u.encode(encoding, "replace")
140 140 if u == r.decode(encoding):
141 141 # r is a safe, non-lossy encoding of s
142 142 return r
143 143 return localstr(s, r)
144 144 except UnicodeDecodeError:
145 145 # we should only get here if we're looking at an ancient changeset
146 146 try:
147 147 u = s.decode(fallbackencoding)
148 148 r = u.encode(encoding, "replace")
149 149 if u == r.decode(encoding):
150 150 # r is a safe, non-lossy encoding of s
151 151 return r
152 152 return localstr(u.encode('UTF-8'), r)
153 153 except UnicodeDecodeError:
154 154 u = s.decode("utf-8", "replace") # last ditch
155 155 return u.encode(encoding, "replace") # can't round-trip
156 156 except LookupError as k:
157 157 raise error.Abort(k, hint="please check your locale settings")
158 158
159 159 def fromlocal(s):
160 160 """
161 161 Convert a string from the local character encoding to UTF-8
162 162
163 163 We attempt to decode strings using the encoding mode set by
164 164 HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown
165 165 characters will cause an error message. Other modes include
166 166 'replace', which replaces unknown characters with a special
167 167 Unicode character, and 'ignore', which drops the character.
168 168 """
169 169
170 170 # can we do a lossless round-trip?
171 171 if isinstance(s, localstr):
172 172 return s._utf8
173 173
174 174 try:
175 175 return s.decode(encoding, encodingmode).encode("utf-8")
176 176 except UnicodeDecodeError as inst:
177 177 sub = s[max(0, inst.start - 10):inst.start + 10]
178 178 raise error.Abort("decoding near '%s': %s!" % (sub, inst))
179 179 except LookupError as k:
180 180 raise error.Abort(k, hint="please check your locale settings")
181 181
182 182 # How to treat ambiguous-width characters. Set to 'wide' to treat as wide.
183 183 wide = (os.environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide"
184 184 and "WFA" or "WF")
185 185
186 186 def colwidth(s):
187 187 "Find the column width of a string for display in the local encoding"
188 188 return ucolwidth(s.decode(encoding, 'replace'))
189 189
190 190 def ucolwidth(d):
191 191 "Find the column width of a Unicode string for display"
192 192 eaw = getattr(unicodedata, 'east_asian_width', None)
193 193 if eaw is not None:
194 194 return sum([eaw(c) in wide and 2 or 1 for c in d])
195 195 return len(d)
196 196
197 197 def getcols(s, start, c):
198 198 '''Use colwidth to find a c-column substring of s starting at byte
199 199 index start'''
200 200 for x in xrange(start + c, len(s)):
201 201 t = s[start:x]
202 202 if colwidth(t) == c:
203 203 return t
204 204
205 205 def trim(s, width, ellipsis='', leftside=False):
206 206 """Trim string 's' to at most 'width' columns (including 'ellipsis').
207 207
208 208 If 'leftside' is True, left side of string 's' is trimmed.
209 209 'ellipsis' is always placed at trimmed side.
210 210
211 211 >>> ellipsis = '+++'
212 212 >>> from . import encoding
213 213 >>> encoding.encoding = 'utf-8'
214 214 >>> t= '1234567890'
215 215 >>> print trim(t, 12, ellipsis=ellipsis)
216 216 1234567890
217 217 >>> print trim(t, 10, ellipsis=ellipsis)
218 218 1234567890
219 219 >>> print trim(t, 8, ellipsis=ellipsis)
220 220 12345+++
221 221 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
222 222 +++67890
223 223 >>> print trim(t, 8)
224 224 12345678
225 225 >>> print trim(t, 8, leftside=True)
226 226 34567890
227 227 >>> print trim(t, 3, ellipsis=ellipsis)
228 228 +++
229 229 >>> print trim(t, 1, ellipsis=ellipsis)
230 230 +
231 231 >>> u = u'\u3042\u3044\u3046\u3048\u304a' # 2 x 5 = 10 columns
232 232 >>> t = u.encode(encoding.encoding)
233 233 >>> print trim(t, 12, ellipsis=ellipsis)
234 234 \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a
235 235 >>> print trim(t, 10, ellipsis=ellipsis)
236 236 \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a
237 237 >>> print trim(t, 8, ellipsis=ellipsis)
238 238 \xe3\x81\x82\xe3\x81\x84+++
239 239 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
240 240 +++\xe3\x81\x88\xe3\x81\x8a
241 241 >>> print trim(t, 5)
242 242 \xe3\x81\x82\xe3\x81\x84
243 243 >>> print trim(t, 5, leftside=True)
244 244 \xe3\x81\x88\xe3\x81\x8a
245 245 >>> print trim(t, 4, ellipsis=ellipsis)
246 246 +++
247 247 >>> print trim(t, 4, ellipsis=ellipsis, leftside=True)
248 248 +++
249 249 >>> t = '\x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa' # invalid byte sequence
250 250 >>> print trim(t, 12, ellipsis=ellipsis)
251 251 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa
252 252 >>> print trim(t, 10, ellipsis=ellipsis)
253 253 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa
254 254 >>> print trim(t, 8, ellipsis=ellipsis)
255 255 \x11\x22\x33\x44\x55+++
256 256 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
257 257 +++\x66\x77\x88\x99\xaa
258 258 >>> print trim(t, 8)
259 259 \x11\x22\x33\x44\x55\x66\x77\x88
260 260 >>> print trim(t, 8, leftside=True)
261 261 \x33\x44\x55\x66\x77\x88\x99\xaa
262 262 >>> print trim(t, 3, ellipsis=ellipsis)
263 263 +++
264 264 >>> print trim(t, 1, ellipsis=ellipsis)
265 265 +
266 266 """
267 267 try:
268 268 u = s.decode(encoding)
269 269 except UnicodeDecodeError:
270 270 if len(s) <= width: # trimming is not needed
271 271 return s
272 272 width -= len(ellipsis)
273 273 if width <= 0: # no enough room even for ellipsis
274 274 return ellipsis[:width + len(ellipsis)]
275 275 if leftside:
276 276 return ellipsis + s[-width:]
277 277 return s[:width] + ellipsis
278 278
279 279 if ucolwidth(u) <= width: # trimming is not needed
280 280 return s
281 281
282 282 width -= len(ellipsis)
283 283 if width <= 0: # no enough room even for ellipsis
284 284 return ellipsis[:width + len(ellipsis)]
285 285
286 286 if leftside:
287 287 uslice = lambda i: u[i:]
288 288 concat = lambda s: ellipsis + s
289 289 else:
290 290 uslice = lambda i: u[:-i]
291 291 concat = lambda s: s + ellipsis
292 292 for i in xrange(1, len(u)):
293 293 usub = uslice(i)
294 294 if ucolwidth(usub) <= width:
295 295 return concat(usub.encode(encoding))
296 296 return ellipsis # no enough room for multi-column characters
297 297
298 298 def _asciilower(s):
299 299 '''convert a string to lowercase if ASCII
300 300
301 301 Raises UnicodeDecodeError if non-ASCII characters are found.'''
302 302 s.decode('ascii')
303 303 return s.lower()
304 304
305 305 def asciilower(s):
306 306 # delay importing avoids cyclic dependency around "parsers" in
307 307 # pure Python build (util => i18n => encoding => parsers => util)
308 308 from . import parsers
309 309 impl = getattr(parsers, 'asciilower', _asciilower)
310 310 global asciilower
311 311 asciilower = impl
312 312 return impl(s)
313 313
314 314 def _asciiupper(s):
315 315 '''convert a string to uppercase if ASCII
316 316
317 317 Raises UnicodeDecodeError if non-ASCII characters are found.'''
318 318 s.decode('ascii')
319 319 return s.upper()
320 320
321 321 def asciiupper(s):
322 322 # delay importing avoids cyclic dependency around "parsers" in
323 323 # pure Python build (util => i18n => encoding => parsers => util)
324 324 from . import parsers
325 325 impl = getattr(parsers, 'asciiupper', _asciiupper)
326 326 global asciiupper
327 327 asciiupper = impl
328 328 return impl(s)
329 329
330 330 def lower(s):
331 331 "best-effort encoding-aware case-folding of local string s"
332 332 try:
333 333 return asciilower(s)
334 334 except UnicodeDecodeError:
335 335 pass
336 336 try:
337 337 if isinstance(s, localstr):
338 338 u = s._utf8.decode("utf-8")
339 339 else:
340 340 u = s.decode(encoding, encodingmode)
341 341
342 342 lu = u.lower()
343 343 if u == lu:
344 344 return s # preserve localstring
345 345 return lu.encode(encoding)
346 346 except UnicodeError:
347 347 return s.lower() # we don't know how to fold this except in ASCII
348 348 except LookupError as k:
349 349 raise error.Abort(k, hint="please check your locale settings")
350 350
351 351 def upper(s):
352 352 "best-effort encoding-aware case-folding of local string s"
353 353 try:
354 354 return asciiupper(s)
355 355 except UnicodeDecodeError:
356 356 return upperfallback(s)
357 357
358 358 def upperfallback(s):
359 359 try:
360 360 if isinstance(s, localstr):
361 361 u = s._utf8.decode("utf-8")
362 362 else:
363 363 u = s.decode(encoding, encodingmode)
364 364
365 365 uu = u.upper()
366 366 if u == uu:
367 367 return s # preserve localstring
368 368 return uu.encode(encoding)
369 369 except UnicodeError:
370 370 return s.upper() # we don't know how to fold this except in ASCII
371 371 except LookupError as k:
372 372 raise error.Abort(k, hint="please check your locale settings")
373 373
374 374 class normcasespecs(object):
375 375 '''what a platform's normcase does to ASCII strings
376 376
377 377 This is specified per platform, and should be consistent with what normcase
378 378 on that platform actually does.
379 379
380 380 lower: normcase lowercases ASCII strings
381 381 upper: normcase uppercases ASCII strings
382 382 other: the fallback function should always be called
383 383
384 384 This should be kept in sync with normcase_spec in util.h.'''
385 385 lower = -1
386 386 upper = 1
387 387 other = 0
388 388
389 389 _jsonmap = []
390 390 _jsonmap.extend("\\u%04x" % x for x in range(32))
391 391 _jsonmap.extend(chr(x) for x in range(32, 127))
392 392 _jsonmap.append('\\u007f')
393 393 _jsonmap[0x09] = '\\t'
394 394 _jsonmap[0x0a] = '\\n'
395 395 _jsonmap[0x22] = '\\"'
396 396 _jsonmap[0x5c] = '\\\\'
397 397 _jsonmap[0x08] = '\\b'
398 398 _jsonmap[0x0c] = '\\f'
399 399 _jsonmap[0x0d] = '\\r'
400 400 _paranoidjsonmap = _jsonmap[:]
401 401 _paranoidjsonmap[0x3c] = '\\u003c' # '<' (e.g. escape "</script>")
402 402 _paranoidjsonmap[0x3e] = '\\u003e' # '>'
403 403 _jsonmap.extend(chr(x) for x in range(128, 256))
404 404
405 405 def jsonescape(s, paranoid=False):
406 406 '''returns a string suitable for JSON
407 407
408 408 JSON is problematic for us because it doesn't support non-Unicode
409 409 bytes. To deal with this, we take the following approach:
410 410
411 411 - localstr objects are converted back to UTF-8
412 412 - valid UTF-8/ASCII strings are passed as-is
413 413 - other strings are converted to UTF-8b surrogate encoding
414 414 - apply JSON-specified string escaping
415 415
416 416 (escapes are doubled in these tests)
417 417
418 418 >>> jsonescape('this is a test')
419 419 'this is a test'
420 420 >>> jsonescape('escape characters: \\0 \\x0b \\x7f')
421 421 'escape characters: \\\\u0000 \\\\u000b \\\\u007f'
422 422 >>> jsonescape('escape characters: \\t \\n \\r \\" \\\\')
423 423 'escape characters: \\\\t \\\\n \\\\r \\\\" \\\\\\\\'
424 424 >>> jsonescape('a weird byte: \\xdd')
425 425 'a weird byte: \\xed\\xb3\\x9d'
426 426 >>> jsonescape('utf-8: caf\\xc3\\xa9')
427 427 'utf-8: caf\\xc3\\xa9'
428 428 >>> jsonescape('')
429 429 ''
430 430
431 431 If paranoid, non-ascii and common troublesome characters are also escaped.
432 432 This is suitable for web output.
433 433
434 434 >>> jsonescape('escape boundary: \\x7e \\x7f \\xc2\\x80', paranoid=True)
435 435 'escape boundary: ~ \\\\u007f \\\\u0080'
436 436 >>> jsonescape('a weird byte: \\xdd', paranoid=True)
437 437 'a weird byte: \\\\udcdd'
438 438 >>> jsonescape('utf-8: caf\\xc3\\xa9', paranoid=True)
439 439 'utf-8: caf\\\\u00e9'
440 440 >>> jsonescape('non-BMP: \\xf0\\x9d\\x84\\x9e', paranoid=True)
441 441 'non-BMP: \\\\ud834\\\\udd1e'
442 442 >>> jsonescape('<foo@example.org>', paranoid=True)
443 443 '\\\\u003cfoo@example.org\\\\u003e'
444 444 '''
445 445
446 446 if paranoid:
447 447 jm = _paranoidjsonmap
448 448 else:
449 449 jm = _jsonmap
450 450
451 451 u8chars = toutf8b(s)
452 452 try:
453 453 return ''.join(jm[x] for x in bytearray(u8chars)) # fast path
454 454 except IndexError:
455 455 pass
456 456 # non-BMP char is represented as UTF-16 surrogate pair
457 457 u16codes = array.array('H', u8chars.decode('utf-8').encode('utf-16'))
458 458 u16codes.pop(0) # drop BOM
459 459 return ''.join(jm[x] if x < 128 else '\\u%04x' % x for x in u16codes)
460 460
461 461 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4]
462 462
463 463 def getutf8char(s, pos):
464 464 '''get the next full utf-8 character in the given string, starting at pos
465 465
466 466 Raises a UnicodeError if the given location does not start a valid
467 467 utf-8 character.
468 468 '''
469 469
470 470 # find how many bytes to attempt decoding from first nibble
471 471 l = _utf8len[ord(s[pos]) >> 4]
472 472 if not l: # ascii
473 473 return s[pos]
474 474
475 475 c = s[pos:pos + l]
476 476 # validate with attempted decode
477 477 c.decode("utf-8")
478 478 return c
479 479
480 480 def toutf8b(s):
481 481 '''convert a local, possibly-binary string into UTF-8b
482 482
483 483 This is intended as a generic method to preserve data when working
484 484 with schemes like JSON and XML that have no provision for
485 485 arbitrary byte strings. As Mercurial often doesn't know
486 486 what encoding data is in, we use so-called UTF-8b.
487 487
488 488 If a string is already valid UTF-8 (or ASCII), it passes unmodified.
489 489 Otherwise, unsupported bytes are mapped to UTF-16 surrogate range,
490 490 uDC00-uDCFF.
491 491
492 492 Principles of operation:
493 493
494 494 - ASCII and UTF-8 data successfully round-trips and is understood
495 495 by Unicode-oriented clients
496 496 - filenames and file contents in arbitrary other encodings can have
497 497 be round-tripped or recovered by clueful clients
498 498 - local strings that have a cached known UTF-8 encoding (aka
499 499 localstr) get sent as UTF-8 so Unicode-oriented clients get the
500 500 Unicode data they want
501 501 - because we must preserve UTF-8 bytestring in places such as
502 502 filenames, metadata can't be roundtripped without help
503 503
504 504 (Note: "UTF-8b" often refers to decoding a mix of valid UTF-8 and
505 505 arbitrary bytes into an internal Unicode format that can be
506 506 re-encoded back into the original. Here we are exposing the
507 507 internal surrogate encoding as a UTF-8 string.)
508 508 '''
509 509
510 510 if "\xed" not in s:
511 511 if isinstance(s, localstr):
512 512 return s._utf8
513 513 try:
514 514 s.decode('utf-8')
515 515 return s
516 516 except UnicodeDecodeError:
517 517 pass
518 518
519 519 r = ""
520 520 pos = 0
521 521 l = len(s)
522 522 while pos < l:
523 523 try:
524 524 c = getutf8char(s, pos)
525 525 if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf":
526 526 # have to re-escape existing U+DCxx characters
527 527 c = unichr(0xdc00 + ord(s[pos])).encode('utf-8')
528 528 pos += 1
529 529 else:
530 530 pos += len(c)
531 531 except UnicodeDecodeError:
532 532 c = unichr(0xdc00 + ord(s[pos])).encode('utf-8')
533 533 pos += 1
534 534 r += c
535 535 return r
536 536
537 537 def fromutf8b(s):
538 538 '''Given a UTF-8b string, return a local, possibly-binary string.
539 539
540 540 return the original binary string. This
541 541 is a round-trip process for strings like filenames, but metadata
542 542 that's was passed through tolocal will remain in UTF-8.
543 543
544 544 >>> roundtrip = lambda x: fromutf8b(toutf8b(x)) == x
545 545 >>> m = "\\xc3\\xa9\\x99abcd"
546 546 >>> toutf8b(m)
547 547 '\\xc3\\xa9\\xed\\xb2\\x99abcd'
548 548 >>> roundtrip(m)
549 549 True
550 550 >>> roundtrip("\\xc2\\xc2\\x80")
551 551 True
552 552 >>> roundtrip("\\xef\\xbf\\xbd")
553 553 True
554 554 >>> roundtrip("\\xef\\xef\\xbf\\xbd")
555 555 True
556 556 >>> roundtrip("\\xf1\\x80\\x80\\x80\\x80")
557 557 True
558 558 '''
559 559
560 560 # fast path - look for uDxxx prefixes in s
561 561 if "\xed" not in s:
562 562 return s
563 563
564 564 # We could do this with the unicode type but some Python builds
565 565 # use UTF-16 internally (issue5031) which causes non-BMP code
566 566 # points to be escaped. Instead, we use our handy getutf8char
567 567 # helper again to walk the string without "decoding" it.
568 568
569 569 r = ""
570 570 pos = 0
571 571 l = len(s)
572 572 while pos < l:
573 573 c = getutf8char(s, pos)
574 574 pos += len(c)
575 575 # unescape U+DCxx characters
576 576 if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf":
577 577 c = chr(ord(c.decode("utf-8")) & 0xff)
578 578 r += c
579 579 return r
@@ -1,166 +1,168 b''
1 1 # pycompat.py - portability shim for python 3
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6 """Mercurial portability shim for python 3.
7 7
8 8 This contains aliases to hide python version-specific details from the core.
9 9 """
10 10
11 11 from __future__ import absolute_import
12 12
13 13 import sys
14 14
15 if sys.version_info[0] < 3:
15 ispy3 = (sys.version_info[0] >= 3)
16
17 if not ispy3:
16 18 import cPickle as pickle
17 19 import cStringIO as io
18 20 import httplib
19 21 import Queue as _queue
20 22 import SocketServer as socketserver
21 23 import urlparse
22 24 import xmlrpclib
23 25 else:
24 26 import http.client as httplib
25 27 import io
26 28 import pickle
27 29 import queue as _queue
28 30 import socketserver
29 31 import urllib.parse as urlparse
30 32 import xmlrpc.client as xmlrpclib
31 33
32 if sys.version_info[0] >= 3:
34 if ispy3:
33 35 import builtins
34 36 import functools
35 37
36 38 def _wrapattrfunc(f):
37 39 @functools.wraps(f)
38 40 def w(object, name, *args):
39 41 if isinstance(name, bytes):
40 42 name = name.decode(u'utf-8')
41 43 return f(object, name, *args)
42 44 return w
43 45
44 46 # these wrappers are automagically imported by hgloader
45 47 delattr = _wrapattrfunc(builtins.delattr)
46 48 getattr = _wrapattrfunc(builtins.getattr)
47 49 hasattr = _wrapattrfunc(builtins.hasattr)
48 50 setattr = _wrapattrfunc(builtins.setattr)
49 51 xrange = builtins.range
50 52
51 53 stringio = io.StringIO
52 54 empty = _queue.Empty
53 55 queue = _queue.Queue
54 56
55 57 class _pycompatstub(object):
56 58 def __init__(self):
57 59 self._aliases = {}
58 60
59 61 def _registeraliases(self, origin, items):
60 62 """Add items that will be populated at the first access"""
61 63 self._aliases.update((item.replace('_', '').lower(), (origin, item))
62 64 for item in items)
63 65
64 66 def __getattr__(self, name):
65 67 try:
66 68 origin, item = self._aliases[name]
67 69 except KeyError:
68 70 raise AttributeError(name)
69 71 self.__dict__[name] = obj = getattr(origin, item)
70 72 return obj
71 73
72 74 httpserver = _pycompatstub()
73 75 urlreq = _pycompatstub()
74 76 urlerr = _pycompatstub()
75 if sys.version_info[0] < 3:
77 if not ispy3:
76 78 import BaseHTTPServer
77 79 import CGIHTTPServer
78 80 import SimpleHTTPServer
79 81 import urllib2
80 82 import urllib
81 83 urlreq._registeraliases(urllib, (
82 84 "addclosehook",
83 85 "addinfourl",
84 86 "ftpwrapper",
85 87 "pathname2url",
86 88 "quote",
87 89 "splitattr",
88 90 "splitpasswd",
89 91 "splitport",
90 92 "splituser",
91 93 "unquote",
92 94 "url2pathname",
93 95 "urlencode",
94 96 ))
95 97 urlreq._registeraliases(urllib2, (
96 98 "AbstractHTTPHandler",
97 99 "BaseHandler",
98 100 "build_opener",
99 101 "FileHandler",
100 102 "FTPHandler",
101 103 "HTTPBasicAuthHandler",
102 104 "HTTPDigestAuthHandler",
103 105 "HTTPHandler",
104 106 "HTTPPasswordMgrWithDefaultRealm",
105 107 "HTTPSHandler",
106 108 "install_opener",
107 109 "ProxyHandler",
108 110 "Request",
109 111 "urlopen",
110 112 ))
111 113 urlerr._registeraliases(urllib2, (
112 114 "HTTPError",
113 115 "URLError",
114 116 ))
115 117 httpserver._registeraliases(BaseHTTPServer, (
116 118 "HTTPServer",
117 119 "BaseHTTPRequestHandler",
118 120 ))
119 121 httpserver._registeraliases(SimpleHTTPServer, (
120 122 "SimpleHTTPRequestHandler",
121 123 ))
122 124 httpserver._registeraliases(CGIHTTPServer, (
123 125 "CGIHTTPRequestHandler",
124 126 ))
125 127
126 128 else:
127 129 import urllib.request
128 130 urlreq._registeraliases(urllib.request, (
129 131 "AbstractHTTPHandler",
130 132 "addclosehook",
131 133 "addinfourl",
132 134 "BaseHandler",
133 135 "build_opener",
134 136 "FileHandler",
135 137 "FTPHandler",
136 138 "ftpwrapper",
137 139 "HTTPHandler",
138 140 "HTTPSHandler",
139 141 "install_opener",
140 142 "pathname2url",
141 143 "HTTPBasicAuthHandler",
142 144 "HTTPDigestAuthHandler",
143 145 "HTTPPasswordMgrWithDefaultRealm",
144 146 "ProxyHandler",
145 147 "quote",
146 148 "Request",
147 149 "splitattr",
148 150 "splitpasswd",
149 151 "splitport",
150 152 "splituser",
151 153 "unquote",
152 154 "url2pathname",
153 155 "urlopen",
154 156 ))
155 157 import urllib.error
156 158 urlerr._registeraliases(urllib.error, (
157 159 "HTTPError",
158 160 "URLError",
159 161 ))
160 162 import http.server
161 163 httpserver._registeraliases(http.server, (
162 164 "HTTPServer",
163 165 "BaseHTTPRequestHandler",
164 166 "SimpleHTTPRequestHandler",
165 167 "CGIHTTPRequestHandler",
166 168 ))
@@ -1,2904 +1,2904 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import
17 17
18 18 import bz2
19 19 import calendar
20 20 import collections
21 21 import datetime
22 22 import errno
23 23 import gc
24 24 import hashlib
25 25 import imp
26 26 import os
27 27 import re as remod
28 28 import shutil
29 29 import signal
30 30 import socket
31 31 import subprocess
32 32 import sys
33 33 import tempfile
34 34 import textwrap
35 35 import time
36 36 import traceback
37 37 import zlib
38 38
39 39 from . import (
40 40 encoding,
41 41 error,
42 42 i18n,
43 43 osutil,
44 44 parsers,
45 45 pycompat,
46 46 )
47 47
48 48 for attr in (
49 49 'empty',
50 50 'httplib',
51 51 'httpserver',
52 52 'pickle',
53 53 'queue',
54 54 'urlerr',
55 55 'urlparse',
56 56 # we do import urlreq, but we do it outside the loop
57 57 #'urlreq',
58 58 'stringio',
59 59 'socketserver',
60 60 'xmlrpclib',
61 61 ):
62 62 globals()[attr] = getattr(pycompat, attr)
63 63
64 64 # This line is to make pyflakes happy:
65 65 urlreq = pycompat.urlreq
66 66
67 67 if os.name == 'nt':
68 68 from . import windows as platform
69 69 else:
70 70 from . import posix as platform
71 71
72 72 _ = i18n._
73 73
74 74 bindunixsocket = platform.bindunixsocket
75 75 cachestat = platform.cachestat
76 76 checkexec = platform.checkexec
77 77 checklink = platform.checklink
78 78 copymode = platform.copymode
79 79 executablepath = platform.executablepath
80 80 expandglobs = platform.expandglobs
81 81 explainexit = platform.explainexit
82 82 findexe = platform.findexe
83 83 gethgcmd = platform.gethgcmd
84 84 getuser = platform.getuser
85 85 getpid = os.getpid
86 86 groupmembers = platform.groupmembers
87 87 groupname = platform.groupname
88 88 hidewindow = platform.hidewindow
89 89 isexec = platform.isexec
90 90 isowner = platform.isowner
91 91 localpath = platform.localpath
92 92 lookupreg = platform.lookupreg
93 93 makedir = platform.makedir
94 94 nlinks = platform.nlinks
95 95 normpath = platform.normpath
96 96 normcase = platform.normcase
97 97 normcasespec = platform.normcasespec
98 98 normcasefallback = platform.normcasefallback
99 99 openhardlinks = platform.openhardlinks
100 100 oslink = platform.oslink
101 101 parsepatchoutput = platform.parsepatchoutput
102 102 pconvert = platform.pconvert
103 103 poll = platform.poll
104 104 popen = platform.popen
105 105 posixfile = platform.posixfile
106 106 quotecommand = platform.quotecommand
107 107 readpipe = platform.readpipe
108 108 rename = platform.rename
109 109 removedirs = platform.removedirs
110 110 samedevice = platform.samedevice
111 111 samefile = platform.samefile
112 112 samestat = platform.samestat
113 113 setbinary = platform.setbinary
114 114 setflags = platform.setflags
115 115 setsignalhandler = platform.setsignalhandler
116 116 shellquote = platform.shellquote
117 117 spawndetached = platform.spawndetached
118 118 split = platform.split
119 119 sshargs = platform.sshargs
120 120 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
121 121 statisexec = platform.statisexec
122 122 statislink = platform.statislink
123 123 termwidth = platform.termwidth
124 124 testpid = platform.testpid
125 125 umask = platform.umask
126 126 unlink = platform.unlink
127 127 unlinkpath = platform.unlinkpath
128 128 username = platform.username
129 129
130 130 # Python compatibility
131 131
132 132 _notset = object()
133 133
134 134 # disable Python's problematic floating point timestamps (issue4836)
135 135 # (Python hypocritically says you shouldn't change this behavior in
136 136 # libraries, and sure enough Mercurial is not a library.)
137 137 os.stat_float_times(False)
138 138
139 139 def safehasattr(thing, attr):
140 140 return getattr(thing, attr, _notset) is not _notset
141 141
142 142 DIGESTS = {
143 143 'md5': hashlib.md5,
144 144 'sha1': hashlib.sha1,
145 145 'sha512': hashlib.sha512,
146 146 }
147 147 # List of digest types from strongest to weakest
148 148 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
149 149
150 150 for k in DIGESTS_BY_STRENGTH:
151 151 assert k in DIGESTS
152 152
153 153 class digester(object):
154 154 """helper to compute digests.
155 155
156 156 This helper can be used to compute one or more digests given their name.
157 157
158 158 >>> d = digester(['md5', 'sha1'])
159 159 >>> d.update('foo')
160 160 >>> [k for k in sorted(d)]
161 161 ['md5', 'sha1']
162 162 >>> d['md5']
163 163 'acbd18db4cc2f85cedef654fccc4a4d8'
164 164 >>> d['sha1']
165 165 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
166 166 >>> digester.preferred(['md5', 'sha1'])
167 167 'sha1'
168 168 """
169 169
170 170 def __init__(self, digests, s=''):
171 171 self._hashes = {}
172 172 for k in digests:
173 173 if k not in DIGESTS:
174 174 raise Abort(_('unknown digest type: %s') % k)
175 175 self._hashes[k] = DIGESTS[k]()
176 176 if s:
177 177 self.update(s)
178 178
179 179 def update(self, data):
180 180 for h in self._hashes.values():
181 181 h.update(data)
182 182
183 183 def __getitem__(self, key):
184 184 if key not in DIGESTS:
185 185 raise Abort(_('unknown digest type: %s') % k)
186 186 return self._hashes[key].hexdigest()
187 187
188 188 def __iter__(self):
189 189 return iter(self._hashes)
190 190
191 191 @staticmethod
192 192 def preferred(supported):
193 193 """returns the strongest digest type in both supported and DIGESTS."""
194 194
195 195 for k in DIGESTS_BY_STRENGTH:
196 196 if k in supported:
197 197 return k
198 198 return None
199 199
200 200 class digestchecker(object):
201 201 """file handle wrapper that additionally checks content against a given
202 202 size and digests.
203 203
204 204 d = digestchecker(fh, size, {'md5': '...'})
205 205
206 206 When multiple digests are given, all of them are validated.
207 207 """
208 208
209 209 def __init__(self, fh, size, digests):
210 210 self._fh = fh
211 211 self._size = size
212 212 self._got = 0
213 213 self._digests = dict(digests)
214 214 self._digester = digester(self._digests.keys())
215 215
216 216 def read(self, length=-1):
217 217 content = self._fh.read(length)
218 218 self._digester.update(content)
219 219 self._got += len(content)
220 220 return content
221 221
222 222 def validate(self):
223 223 if self._size != self._got:
224 224 raise Abort(_('size mismatch: expected %d, got %d') %
225 225 (self._size, self._got))
226 226 for k, v in self._digests.items():
227 227 if v != self._digester[k]:
228 228 # i18n: first parameter is a digest name
229 229 raise Abort(_('%s mismatch: expected %s, got %s') %
230 230 (k, v, self._digester[k]))
231 231
232 232 try:
233 233 buffer = buffer
234 234 except NameError:
235 if sys.version_info[0] < 3:
235 if not pycompat.ispy3:
236 236 def buffer(sliceable, offset=0):
237 237 return sliceable[offset:]
238 238 else:
239 239 def buffer(sliceable, offset=0):
240 240 return memoryview(sliceable)[offset:]
241 241
242 242 closefds = os.name == 'posix'
243 243
244 244 _chunksize = 4096
245 245
246 246 class bufferedinputpipe(object):
247 247 """a manually buffered input pipe
248 248
249 249 Python will not let us use buffered IO and lazy reading with 'polling' at
250 250 the same time. We cannot probe the buffer state and select will not detect
251 251 that data are ready to read if they are already buffered.
252 252
253 253 This class let us work around that by implementing its own buffering
254 254 (allowing efficient readline) while offering a way to know if the buffer is
255 255 empty from the output (allowing collaboration of the buffer with polling).
256 256
257 257 This class lives in the 'util' module because it makes use of the 'os'
258 258 module from the python stdlib.
259 259 """
260 260
261 261 def __init__(self, input):
262 262 self._input = input
263 263 self._buffer = []
264 264 self._eof = False
265 265 self._lenbuf = 0
266 266
267 267 @property
268 268 def hasbuffer(self):
269 269 """True is any data is currently buffered
270 270
271 271 This will be used externally a pre-step for polling IO. If there is
272 272 already data then no polling should be set in place."""
273 273 return bool(self._buffer)
274 274
275 275 @property
276 276 def closed(self):
277 277 return self._input.closed
278 278
279 279 def fileno(self):
280 280 return self._input.fileno()
281 281
282 282 def close(self):
283 283 return self._input.close()
284 284
285 285 def read(self, size):
286 286 while (not self._eof) and (self._lenbuf < size):
287 287 self._fillbuffer()
288 288 return self._frombuffer(size)
289 289
290 290 def readline(self, *args, **kwargs):
291 291 if 1 < len(self._buffer):
292 292 # this should not happen because both read and readline end with a
293 293 # _frombuffer call that collapse it.
294 294 self._buffer = [''.join(self._buffer)]
295 295 self._lenbuf = len(self._buffer[0])
296 296 lfi = -1
297 297 if self._buffer:
298 298 lfi = self._buffer[-1].find('\n')
299 299 while (not self._eof) and lfi < 0:
300 300 self._fillbuffer()
301 301 if self._buffer:
302 302 lfi = self._buffer[-1].find('\n')
303 303 size = lfi + 1
304 304 if lfi < 0: # end of file
305 305 size = self._lenbuf
306 306 elif 1 < len(self._buffer):
307 307 # we need to take previous chunks into account
308 308 size += self._lenbuf - len(self._buffer[-1])
309 309 return self._frombuffer(size)
310 310
311 311 def _frombuffer(self, size):
312 312 """return at most 'size' data from the buffer
313 313
314 314 The data are removed from the buffer."""
315 315 if size == 0 or not self._buffer:
316 316 return ''
317 317 buf = self._buffer[0]
318 318 if 1 < len(self._buffer):
319 319 buf = ''.join(self._buffer)
320 320
321 321 data = buf[:size]
322 322 buf = buf[len(data):]
323 323 if buf:
324 324 self._buffer = [buf]
325 325 self._lenbuf = len(buf)
326 326 else:
327 327 self._buffer = []
328 328 self._lenbuf = 0
329 329 return data
330 330
331 331 def _fillbuffer(self):
332 332 """read data to the buffer"""
333 333 data = os.read(self._input.fileno(), _chunksize)
334 334 if not data:
335 335 self._eof = True
336 336 else:
337 337 self._lenbuf += len(data)
338 338 self._buffer.append(data)
339 339
340 340 def popen2(cmd, env=None, newlines=False):
341 341 # Setting bufsize to -1 lets the system decide the buffer size.
342 342 # The default for bufsize is 0, meaning unbuffered. This leads to
343 343 # poor performance on Mac OS X: http://bugs.python.org/issue4194
344 344 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
345 345 close_fds=closefds,
346 346 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
347 347 universal_newlines=newlines,
348 348 env=env)
349 349 return p.stdin, p.stdout
350 350
351 351 def popen3(cmd, env=None, newlines=False):
352 352 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
353 353 return stdin, stdout, stderr
354 354
355 355 def popen4(cmd, env=None, newlines=False, bufsize=-1):
356 356 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
357 357 close_fds=closefds,
358 358 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
359 359 stderr=subprocess.PIPE,
360 360 universal_newlines=newlines,
361 361 env=env)
362 362 return p.stdin, p.stdout, p.stderr, p
363 363
364 364 def version():
365 365 """Return version information if available."""
366 366 try:
367 367 from . import __version__
368 368 return __version__.version
369 369 except ImportError:
370 370 return 'unknown'
371 371
372 372 def versiontuple(v=None, n=4):
373 373 """Parses a Mercurial version string into an N-tuple.
374 374
375 375 The version string to be parsed is specified with the ``v`` argument.
376 376 If it isn't defined, the current Mercurial version string will be parsed.
377 377
378 378 ``n`` can be 2, 3, or 4. Here is how some version strings map to
379 379 returned values:
380 380
381 381 >>> v = '3.6.1+190-df9b73d2d444'
382 382 >>> versiontuple(v, 2)
383 383 (3, 6)
384 384 >>> versiontuple(v, 3)
385 385 (3, 6, 1)
386 386 >>> versiontuple(v, 4)
387 387 (3, 6, 1, '190-df9b73d2d444')
388 388
389 389 >>> versiontuple('3.6.1+190-df9b73d2d444+20151118')
390 390 (3, 6, 1, '190-df9b73d2d444+20151118')
391 391
392 392 >>> v = '3.6'
393 393 >>> versiontuple(v, 2)
394 394 (3, 6)
395 395 >>> versiontuple(v, 3)
396 396 (3, 6, None)
397 397 >>> versiontuple(v, 4)
398 398 (3, 6, None, None)
399 399
400 400 >>> v = '3.9-rc'
401 401 >>> versiontuple(v, 2)
402 402 (3, 9)
403 403 >>> versiontuple(v, 3)
404 404 (3, 9, None)
405 405 >>> versiontuple(v, 4)
406 406 (3, 9, None, 'rc')
407 407
408 408 >>> v = '3.9-rc+2-02a8fea4289b'
409 409 >>> versiontuple(v, 2)
410 410 (3, 9)
411 411 >>> versiontuple(v, 3)
412 412 (3, 9, None)
413 413 >>> versiontuple(v, 4)
414 414 (3, 9, None, 'rc+2-02a8fea4289b')
415 415 """
416 416 if not v:
417 417 v = version()
418 418 parts = remod.split('[\+-]', v, 1)
419 419 if len(parts) == 1:
420 420 vparts, extra = parts[0], None
421 421 else:
422 422 vparts, extra = parts
423 423
424 424 vints = []
425 425 for i in vparts.split('.'):
426 426 try:
427 427 vints.append(int(i))
428 428 except ValueError:
429 429 break
430 430 # (3, 6) -> (3, 6, None)
431 431 while len(vints) < 3:
432 432 vints.append(None)
433 433
434 434 if n == 2:
435 435 return (vints[0], vints[1])
436 436 if n == 3:
437 437 return (vints[0], vints[1], vints[2])
438 438 if n == 4:
439 439 return (vints[0], vints[1], vints[2], extra)
440 440
441 441 # used by parsedate
442 442 defaultdateformats = (
443 443 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
444 444 '%Y-%m-%dT%H:%M', # without seconds
445 445 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
446 446 '%Y-%m-%dT%H%M', # without seconds
447 447 '%Y-%m-%d %H:%M:%S', # our common legal variant
448 448 '%Y-%m-%d %H:%M', # without seconds
449 449 '%Y-%m-%d %H%M%S', # without :
450 450 '%Y-%m-%d %H%M', # without seconds
451 451 '%Y-%m-%d %I:%M:%S%p',
452 452 '%Y-%m-%d %H:%M',
453 453 '%Y-%m-%d %I:%M%p',
454 454 '%Y-%m-%d',
455 455 '%m-%d',
456 456 '%m/%d',
457 457 '%m/%d/%y',
458 458 '%m/%d/%Y',
459 459 '%a %b %d %H:%M:%S %Y',
460 460 '%a %b %d %I:%M:%S%p %Y',
461 461 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
462 462 '%b %d %H:%M:%S %Y',
463 463 '%b %d %I:%M:%S%p %Y',
464 464 '%b %d %H:%M:%S',
465 465 '%b %d %I:%M:%S%p',
466 466 '%b %d %H:%M',
467 467 '%b %d %I:%M%p',
468 468 '%b %d %Y',
469 469 '%b %d',
470 470 '%H:%M:%S',
471 471 '%I:%M:%S%p',
472 472 '%H:%M',
473 473 '%I:%M%p',
474 474 )
475 475
476 476 extendeddateformats = defaultdateformats + (
477 477 "%Y",
478 478 "%Y-%m",
479 479 "%b",
480 480 "%b %Y",
481 481 )
482 482
483 483 def cachefunc(func):
484 484 '''cache the result of function calls'''
485 485 # XXX doesn't handle keywords args
486 486 if func.__code__.co_argcount == 0:
487 487 cache = []
488 488 def f():
489 489 if len(cache) == 0:
490 490 cache.append(func())
491 491 return cache[0]
492 492 return f
493 493 cache = {}
494 494 if func.__code__.co_argcount == 1:
495 495 # we gain a small amount of time because
496 496 # we don't need to pack/unpack the list
497 497 def f(arg):
498 498 if arg not in cache:
499 499 cache[arg] = func(arg)
500 500 return cache[arg]
501 501 else:
502 502 def f(*args):
503 503 if args not in cache:
504 504 cache[args] = func(*args)
505 505 return cache[args]
506 506
507 507 return f
508 508
509 509 class sortdict(dict):
510 510 '''a simple sorted dictionary'''
511 511 def __init__(self, data=None):
512 512 self._list = []
513 513 if data:
514 514 self.update(data)
515 515 def copy(self):
516 516 return sortdict(self)
517 517 def __setitem__(self, key, val):
518 518 if key in self:
519 519 self._list.remove(key)
520 520 self._list.append(key)
521 521 dict.__setitem__(self, key, val)
522 522 def __iter__(self):
523 523 return self._list.__iter__()
524 524 def update(self, src):
525 525 if isinstance(src, dict):
526 526 src = src.iteritems()
527 527 for k, v in src:
528 528 self[k] = v
529 529 def clear(self):
530 530 dict.clear(self)
531 531 self._list = []
532 532 def items(self):
533 533 return [(k, self[k]) for k in self._list]
534 534 def __delitem__(self, key):
535 535 dict.__delitem__(self, key)
536 536 self._list.remove(key)
537 537 def pop(self, key, *args, **kwargs):
538 538 dict.pop(self, key, *args, **kwargs)
539 539 try:
540 540 self._list.remove(key)
541 541 except ValueError:
542 542 pass
543 543 def keys(self):
544 544 return self._list
545 545 def iterkeys(self):
546 546 return self._list.__iter__()
547 547 def iteritems(self):
548 548 for k in self._list:
549 549 yield k, self[k]
550 550 def insert(self, index, key, val):
551 551 self._list.insert(index, key)
552 552 dict.__setitem__(self, key, val)
553 553 def __repr__(self):
554 554 if not self:
555 555 return '%s()' % self.__class__.__name__
556 556 return '%s(%r)' % (self.__class__.__name__, self.items())
557 557
558 558 class _lrucachenode(object):
559 559 """A node in a doubly linked list.
560 560
561 561 Holds a reference to nodes on either side as well as a key-value
562 562 pair for the dictionary entry.
563 563 """
564 564 __slots__ = ('next', 'prev', 'key', 'value')
565 565
566 566 def __init__(self):
567 567 self.next = None
568 568 self.prev = None
569 569
570 570 self.key = _notset
571 571 self.value = None
572 572
573 573 def markempty(self):
574 574 """Mark the node as emptied."""
575 575 self.key = _notset
576 576
577 577 class lrucachedict(object):
578 578 """Dict that caches most recent accesses and sets.
579 579
580 580 The dict consists of an actual backing dict - indexed by original
581 581 key - and a doubly linked circular list defining the order of entries in
582 582 the cache.
583 583
584 584 The head node is the newest entry in the cache. If the cache is full,
585 585 we recycle head.prev and make it the new head. Cache accesses result in
586 586 the node being moved to before the existing head and being marked as the
587 587 new head node.
588 588 """
589 589 def __init__(self, max):
590 590 self._cache = {}
591 591
592 592 self._head = head = _lrucachenode()
593 593 head.prev = head
594 594 head.next = head
595 595 self._size = 1
596 596 self._capacity = max
597 597
598 598 def __len__(self):
599 599 return len(self._cache)
600 600
601 601 def __contains__(self, k):
602 602 return k in self._cache
603 603
604 604 def __iter__(self):
605 605 # We don't have to iterate in cache order, but why not.
606 606 n = self._head
607 607 for i in range(len(self._cache)):
608 608 yield n.key
609 609 n = n.next
610 610
611 611 def __getitem__(self, k):
612 612 node = self._cache[k]
613 613 self._movetohead(node)
614 614 return node.value
615 615
616 616 def __setitem__(self, k, v):
617 617 node = self._cache.get(k)
618 618 # Replace existing value and mark as newest.
619 619 if node is not None:
620 620 node.value = v
621 621 self._movetohead(node)
622 622 return
623 623
624 624 if self._size < self._capacity:
625 625 node = self._addcapacity()
626 626 else:
627 627 # Grab the last/oldest item.
628 628 node = self._head.prev
629 629
630 630 # At capacity. Kill the old entry.
631 631 if node.key is not _notset:
632 632 del self._cache[node.key]
633 633
634 634 node.key = k
635 635 node.value = v
636 636 self._cache[k] = node
637 637 # And mark it as newest entry. No need to adjust order since it
638 638 # is already self._head.prev.
639 639 self._head = node
640 640
641 641 def __delitem__(self, k):
642 642 node = self._cache.pop(k)
643 643 node.markempty()
644 644
645 645 # Temporarily mark as newest item before re-adjusting head to make
646 646 # this node the oldest item.
647 647 self._movetohead(node)
648 648 self._head = node.next
649 649
650 650 # Additional dict methods.
651 651
652 652 def get(self, k, default=None):
653 653 try:
654 654 return self._cache[k].value
655 655 except KeyError:
656 656 return default
657 657
658 658 def clear(self):
659 659 n = self._head
660 660 while n.key is not _notset:
661 661 n.markempty()
662 662 n = n.next
663 663
664 664 self._cache.clear()
665 665
666 666 def copy(self):
667 667 result = lrucachedict(self._capacity)
668 668 n = self._head.prev
669 669 # Iterate in oldest-to-newest order, so the copy has the right ordering
670 670 for i in range(len(self._cache)):
671 671 result[n.key] = n.value
672 672 n = n.prev
673 673 return result
674 674
675 675 def _movetohead(self, node):
676 676 """Mark a node as the newest, making it the new head.
677 677
678 678 When a node is accessed, it becomes the freshest entry in the LRU
679 679 list, which is denoted by self._head.
680 680
681 681 Visually, let's make ``N`` the new head node (* denotes head):
682 682
683 683 previous/oldest <-> head <-> next/next newest
684 684
685 685 ----<->--- A* ---<->-----
686 686 | |
687 687 E <-> D <-> N <-> C <-> B
688 688
689 689 To:
690 690
691 691 ----<->--- N* ---<->-----
692 692 | |
693 693 E <-> D <-> C <-> B <-> A
694 694
695 695 This requires the following moves:
696 696
697 697 C.next = D (node.prev.next = node.next)
698 698 D.prev = C (node.next.prev = node.prev)
699 699 E.next = N (head.prev.next = node)
700 700 N.prev = E (node.prev = head.prev)
701 701 N.next = A (node.next = head)
702 702 A.prev = N (head.prev = node)
703 703 """
704 704 head = self._head
705 705 # C.next = D
706 706 node.prev.next = node.next
707 707 # D.prev = C
708 708 node.next.prev = node.prev
709 709 # N.prev = E
710 710 node.prev = head.prev
711 711 # N.next = A
712 712 # It is tempting to do just "head" here, however if node is
713 713 # adjacent to head, this will do bad things.
714 714 node.next = head.prev.next
715 715 # E.next = N
716 716 node.next.prev = node
717 717 # A.prev = N
718 718 node.prev.next = node
719 719
720 720 self._head = node
721 721
722 722 def _addcapacity(self):
723 723 """Add a node to the circular linked list.
724 724
725 725 The new node is inserted before the head node.
726 726 """
727 727 head = self._head
728 728 node = _lrucachenode()
729 729 head.prev.next = node
730 730 node.prev = head.prev
731 731 node.next = head
732 732 head.prev = node
733 733 self._size += 1
734 734 return node
735 735
736 736 def lrucachefunc(func):
737 737 '''cache most recent results of function calls'''
738 738 cache = {}
739 739 order = collections.deque()
740 740 if func.__code__.co_argcount == 1:
741 741 def f(arg):
742 742 if arg not in cache:
743 743 if len(cache) > 20:
744 744 del cache[order.popleft()]
745 745 cache[arg] = func(arg)
746 746 else:
747 747 order.remove(arg)
748 748 order.append(arg)
749 749 return cache[arg]
750 750 else:
751 751 def f(*args):
752 752 if args not in cache:
753 753 if len(cache) > 20:
754 754 del cache[order.popleft()]
755 755 cache[args] = func(*args)
756 756 else:
757 757 order.remove(args)
758 758 order.append(args)
759 759 return cache[args]
760 760
761 761 return f
762 762
763 763 class propertycache(object):
764 764 def __init__(self, func):
765 765 self.func = func
766 766 self.name = func.__name__
767 767 def __get__(self, obj, type=None):
768 768 result = self.func(obj)
769 769 self.cachevalue(obj, result)
770 770 return result
771 771
772 772 def cachevalue(self, obj, value):
773 773 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
774 774 obj.__dict__[self.name] = value
775 775
776 776 def pipefilter(s, cmd):
777 777 '''filter string S through command CMD, returning its output'''
778 778 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
779 779 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
780 780 pout, perr = p.communicate(s)
781 781 return pout
782 782
783 783 def tempfilter(s, cmd):
784 784 '''filter string S through a pair of temporary files with CMD.
785 785 CMD is used as a template to create the real command to be run,
786 786 with the strings INFILE and OUTFILE replaced by the real names of
787 787 the temporary files generated.'''
788 788 inname, outname = None, None
789 789 try:
790 790 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
791 791 fp = os.fdopen(infd, 'wb')
792 792 fp.write(s)
793 793 fp.close()
794 794 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
795 795 os.close(outfd)
796 796 cmd = cmd.replace('INFILE', inname)
797 797 cmd = cmd.replace('OUTFILE', outname)
798 798 code = os.system(cmd)
799 799 if sys.platform == 'OpenVMS' and code & 1:
800 800 code = 0
801 801 if code:
802 802 raise Abort(_("command '%s' failed: %s") %
803 803 (cmd, explainexit(code)))
804 804 return readfile(outname)
805 805 finally:
806 806 try:
807 807 if inname:
808 808 os.unlink(inname)
809 809 except OSError:
810 810 pass
811 811 try:
812 812 if outname:
813 813 os.unlink(outname)
814 814 except OSError:
815 815 pass
816 816
817 817 filtertable = {
818 818 'tempfile:': tempfilter,
819 819 'pipe:': pipefilter,
820 820 }
821 821
822 822 def filter(s, cmd):
823 823 "filter a string through a command that transforms its input to its output"
824 824 for name, fn in filtertable.iteritems():
825 825 if cmd.startswith(name):
826 826 return fn(s, cmd[len(name):].lstrip())
827 827 return pipefilter(s, cmd)
828 828
829 829 def binary(s):
830 830 """return true if a string is binary data"""
831 831 return bool(s and '\0' in s)
832 832
833 833 def increasingchunks(source, min=1024, max=65536):
834 834 '''return no less than min bytes per chunk while data remains,
835 835 doubling min after each chunk until it reaches max'''
836 836 def log2(x):
837 837 if not x:
838 838 return 0
839 839 i = 0
840 840 while x:
841 841 x >>= 1
842 842 i += 1
843 843 return i - 1
844 844
845 845 buf = []
846 846 blen = 0
847 847 for chunk in source:
848 848 buf.append(chunk)
849 849 blen += len(chunk)
850 850 if blen >= min:
851 851 if min < max:
852 852 min = min << 1
853 853 nmin = 1 << log2(blen)
854 854 if nmin > min:
855 855 min = nmin
856 856 if min > max:
857 857 min = max
858 858 yield ''.join(buf)
859 859 blen = 0
860 860 buf = []
861 861 if buf:
862 862 yield ''.join(buf)
863 863
864 864 Abort = error.Abort
865 865
866 866 def always(fn):
867 867 return True
868 868
869 869 def never(fn):
870 870 return False
871 871
872 872 def nogc(func):
873 873 """disable garbage collector
874 874
875 875 Python's garbage collector triggers a GC each time a certain number of
876 876 container objects (the number being defined by gc.get_threshold()) are
877 877 allocated even when marked not to be tracked by the collector. Tracking has
878 878 no effect on when GCs are triggered, only on what objects the GC looks
879 879 into. As a workaround, disable GC while building complex (huge)
880 880 containers.
881 881
882 882 This garbage collector issue have been fixed in 2.7.
883 883 """
884 884 if sys.version >= (2, 7):
885 885 return func
886 886 def wrapper(*args, **kwargs):
887 887 gcenabled = gc.isenabled()
888 888 gc.disable()
889 889 try:
890 890 return func(*args, **kwargs)
891 891 finally:
892 892 if gcenabled:
893 893 gc.enable()
894 894 return wrapper
895 895
896 896 def pathto(root, n1, n2):
897 897 '''return the relative path from one place to another.
898 898 root should use os.sep to separate directories
899 899 n1 should use os.sep to separate directories
900 900 n2 should use "/" to separate directories
901 901 returns an os.sep-separated path.
902 902
903 903 If n1 is a relative path, it's assumed it's
904 904 relative to root.
905 905 n2 should always be relative to root.
906 906 '''
907 907 if not n1:
908 908 return localpath(n2)
909 909 if os.path.isabs(n1):
910 910 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
911 911 return os.path.join(root, localpath(n2))
912 912 n2 = '/'.join((pconvert(root), n2))
913 913 a, b = splitpath(n1), n2.split('/')
914 914 a.reverse()
915 915 b.reverse()
916 916 while a and b and a[-1] == b[-1]:
917 917 a.pop()
918 918 b.pop()
919 919 b.reverse()
920 920 return os.sep.join((['..'] * len(a)) + b) or '.'
921 921
922 922 def mainfrozen():
923 923 """return True if we are a frozen executable.
924 924
925 925 The code supports py2exe (most common, Windows only) and tools/freeze
926 926 (portable, not much used).
927 927 """
928 928 return (safehasattr(sys, "frozen") or # new py2exe
929 929 safehasattr(sys, "importers") or # old py2exe
930 930 imp.is_frozen("__main__")) # tools/freeze
931 931
932 932 # the location of data files matching the source code
933 933 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
934 934 # executable version (py2exe) doesn't support __file__
935 935 datapath = os.path.dirname(sys.executable)
936 936 else:
937 937 datapath = os.path.dirname(__file__)
938 938
939 939 i18n.setdatapath(datapath)
940 940
941 941 _hgexecutable = None
942 942
943 943 def hgexecutable():
944 944 """return location of the 'hg' executable.
945 945
946 946 Defaults to $HG or 'hg' in the search path.
947 947 """
948 948 if _hgexecutable is None:
949 949 hg = os.environ.get('HG')
950 950 mainmod = sys.modules['__main__']
951 951 if hg:
952 952 _sethgexecutable(hg)
953 953 elif mainfrozen():
954 954 if getattr(sys, 'frozen', None) == 'macosx_app':
955 955 # Env variable set by py2app
956 956 _sethgexecutable(os.environ['EXECUTABLEPATH'])
957 957 else:
958 958 _sethgexecutable(sys.executable)
959 959 elif os.path.basename(getattr(mainmod, '__file__', '')) == 'hg':
960 960 _sethgexecutable(mainmod.__file__)
961 961 else:
962 962 exe = findexe('hg') or os.path.basename(sys.argv[0])
963 963 _sethgexecutable(exe)
964 964 return _hgexecutable
965 965
966 966 def _sethgexecutable(path):
967 967 """set location of the 'hg' executable"""
968 968 global _hgexecutable
969 969 _hgexecutable = path
970 970
971 971 def _isstdout(f):
972 972 fileno = getattr(f, 'fileno', None)
973 973 return fileno and fileno() == sys.__stdout__.fileno()
974 974
975 975 def system(cmd, environ=None, cwd=None, onerr=None, errprefix=None, out=None):
976 976 '''enhanced shell command execution.
977 977 run with environment maybe modified, maybe in different dir.
978 978
979 979 if command fails and onerr is None, return status, else raise onerr
980 980 object as exception.
981 981
982 982 if out is specified, it is assumed to be a file-like object that has a
983 983 write() method. stdout and stderr will be redirected to out.'''
984 984 if environ is None:
985 985 environ = {}
986 986 try:
987 987 sys.stdout.flush()
988 988 except Exception:
989 989 pass
990 990 def py2shell(val):
991 991 'convert python object into string that is useful to shell'
992 992 if val is None or val is False:
993 993 return '0'
994 994 if val is True:
995 995 return '1'
996 996 return str(val)
997 997 origcmd = cmd
998 998 cmd = quotecommand(cmd)
999 999 if sys.platform == 'plan9' and (sys.version_info[0] == 2
1000 1000 and sys.version_info[1] < 7):
1001 1001 # subprocess kludge to work around issues in half-baked Python
1002 1002 # ports, notably bichued/python:
1003 1003 if not cwd is None:
1004 1004 os.chdir(cwd)
1005 1005 rc = os.system(cmd)
1006 1006 else:
1007 1007 env = dict(os.environ)
1008 1008 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1009 1009 env['HG'] = hgexecutable()
1010 1010 if out is None or _isstdout(out):
1011 1011 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1012 1012 env=env, cwd=cwd)
1013 1013 else:
1014 1014 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1015 1015 env=env, cwd=cwd, stdout=subprocess.PIPE,
1016 1016 stderr=subprocess.STDOUT)
1017 1017 for line in iter(proc.stdout.readline, ''):
1018 1018 out.write(line)
1019 1019 proc.wait()
1020 1020 rc = proc.returncode
1021 1021 if sys.platform == 'OpenVMS' and rc & 1:
1022 1022 rc = 0
1023 1023 if rc and onerr:
1024 1024 errmsg = '%s %s' % (os.path.basename(origcmd.split(None, 1)[0]),
1025 1025 explainexit(rc)[0])
1026 1026 if errprefix:
1027 1027 errmsg = '%s: %s' % (errprefix, errmsg)
1028 1028 raise onerr(errmsg)
1029 1029 return rc
1030 1030
1031 1031 def checksignature(func):
1032 1032 '''wrap a function with code to check for calling errors'''
1033 1033 def check(*args, **kwargs):
1034 1034 try:
1035 1035 return func(*args, **kwargs)
1036 1036 except TypeError:
1037 1037 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1038 1038 raise error.SignatureError
1039 1039 raise
1040 1040
1041 1041 return check
1042 1042
1043 1043 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1044 1044 '''copy a file, preserving mode and optionally other stat info like
1045 1045 atime/mtime
1046 1046
1047 1047 checkambig argument is used with filestat, and is useful only if
1048 1048 destination file is guarded by any lock (e.g. repo.lock or
1049 1049 repo.wlock).
1050 1050
1051 1051 copystat and checkambig should be exclusive.
1052 1052 '''
1053 1053 assert not (copystat and checkambig)
1054 1054 oldstat = None
1055 1055 if os.path.lexists(dest):
1056 1056 if checkambig:
1057 1057 oldstat = checkambig and filestat(dest)
1058 1058 unlink(dest)
1059 1059 # hardlinks are problematic on CIFS, quietly ignore this flag
1060 1060 # until we find a way to work around it cleanly (issue4546)
1061 1061 if False and hardlink:
1062 1062 try:
1063 1063 oslink(src, dest)
1064 1064 return
1065 1065 except (IOError, OSError):
1066 1066 pass # fall back to normal copy
1067 1067 if os.path.islink(src):
1068 1068 os.symlink(os.readlink(src), dest)
1069 1069 # copytime is ignored for symlinks, but in general copytime isn't needed
1070 1070 # for them anyway
1071 1071 else:
1072 1072 try:
1073 1073 shutil.copyfile(src, dest)
1074 1074 if copystat:
1075 1075 # copystat also copies mode
1076 1076 shutil.copystat(src, dest)
1077 1077 else:
1078 1078 shutil.copymode(src, dest)
1079 1079 if oldstat and oldstat.stat:
1080 1080 newstat = filestat(dest)
1081 1081 if newstat.isambig(oldstat):
1082 1082 # stat of copied file is ambiguous to original one
1083 1083 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1084 1084 os.utime(dest, (advanced, advanced))
1085 1085 except shutil.Error as inst:
1086 1086 raise Abort(str(inst))
1087 1087
1088 1088 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1089 1089 """Copy a directory tree using hardlinks if possible."""
1090 1090 num = 0
1091 1091
1092 1092 if hardlink is None:
1093 1093 hardlink = (os.stat(src).st_dev ==
1094 1094 os.stat(os.path.dirname(dst)).st_dev)
1095 1095 if hardlink:
1096 1096 topic = _('linking')
1097 1097 else:
1098 1098 topic = _('copying')
1099 1099
1100 1100 if os.path.isdir(src):
1101 1101 os.mkdir(dst)
1102 1102 for name, kind in osutil.listdir(src):
1103 1103 srcname = os.path.join(src, name)
1104 1104 dstname = os.path.join(dst, name)
1105 1105 def nprog(t, pos):
1106 1106 if pos is not None:
1107 1107 return progress(t, pos + num)
1108 1108 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1109 1109 num += n
1110 1110 else:
1111 1111 if hardlink:
1112 1112 try:
1113 1113 oslink(src, dst)
1114 1114 except (IOError, OSError):
1115 1115 hardlink = False
1116 1116 shutil.copy(src, dst)
1117 1117 else:
1118 1118 shutil.copy(src, dst)
1119 1119 num += 1
1120 1120 progress(topic, num)
1121 1121 progress(topic, None)
1122 1122
1123 1123 return hardlink, num
1124 1124
1125 1125 _winreservednames = '''con prn aux nul
1126 1126 com1 com2 com3 com4 com5 com6 com7 com8 com9
1127 1127 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
1128 1128 _winreservedchars = ':*?"<>|'
1129 1129 def checkwinfilename(path):
1130 1130 r'''Check that the base-relative path is a valid filename on Windows.
1131 1131 Returns None if the path is ok, or a UI string describing the problem.
1132 1132
1133 1133 >>> checkwinfilename("just/a/normal/path")
1134 1134 >>> checkwinfilename("foo/bar/con.xml")
1135 1135 "filename contains 'con', which is reserved on Windows"
1136 1136 >>> checkwinfilename("foo/con.xml/bar")
1137 1137 "filename contains 'con', which is reserved on Windows"
1138 1138 >>> checkwinfilename("foo/bar/xml.con")
1139 1139 >>> checkwinfilename("foo/bar/AUX/bla.txt")
1140 1140 "filename contains 'AUX', which is reserved on Windows"
1141 1141 >>> checkwinfilename("foo/bar/bla:.txt")
1142 1142 "filename contains ':', which is reserved on Windows"
1143 1143 >>> checkwinfilename("foo/bar/b\07la.txt")
1144 1144 "filename contains '\\x07', which is invalid on Windows"
1145 1145 >>> checkwinfilename("foo/bar/bla ")
1146 1146 "filename ends with ' ', which is not allowed on Windows"
1147 1147 >>> checkwinfilename("../bar")
1148 1148 >>> checkwinfilename("foo\\")
1149 1149 "filename ends with '\\', which is invalid on Windows"
1150 1150 >>> checkwinfilename("foo\\/bar")
1151 1151 "directory name ends with '\\', which is invalid on Windows"
1152 1152 '''
1153 1153 if path.endswith('\\'):
1154 1154 return _("filename ends with '\\', which is invalid on Windows")
1155 1155 if '\\/' in path:
1156 1156 return _("directory name ends with '\\', which is invalid on Windows")
1157 1157 for n in path.replace('\\', '/').split('/'):
1158 1158 if not n:
1159 1159 continue
1160 1160 for c in n:
1161 1161 if c in _winreservedchars:
1162 1162 return _("filename contains '%s', which is reserved "
1163 1163 "on Windows") % c
1164 1164 if ord(c) <= 31:
1165 1165 return _("filename contains %r, which is invalid "
1166 1166 "on Windows") % c
1167 1167 base = n.split('.')[0]
1168 1168 if base and base.lower() in _winreservednames:
1169 1169 return _("filename contains '%s', which is reserved "
1170 1170 "on Windows") % base
1171 1171 t = n[-1]
1172 1172 if t in '. ' and n not in '..':
1173 1173 return _("filename ends with '%s', which is not allowed "
1174 1174 "on Windows") % t
1175 1175
1176 1176 if os.name == 'nt':
1177 1177 checkosfilename = checkwinfilename
1178 1178 else:
1179 1179 checkosfilename = platform.checkosfilename
1180 1180
1181 1181 def makelock(info, pathname):
1182 1182 try:
1183 1183 return os.symlink(info, pathname)
1184 1184 except OSError as why:
1185 1185 if why.errno == errno.EEXIST:
1186 1186 raise
1187 1187 except AttributeError: # no symlink in os
1188 1188 pass
1189 1189
1190 1190 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1191 1191 os.write(ld, info)
1192 1192 os.close(ld)
1193 1193
1194 1194 def readlock(pathname):
1195 1195 try:
1196 1196 return os.readlink(pathname)
1197 1197 except OSError as why:
1198 1198 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1199 1199 raise
1200 1200 except AttributeError: # no symlink in os
1201 1201 pass
1202 1202 fp = posixfile(pathname)
1203 1203 r = fp.read()
1204 1204 fp.close()
1205 1205 return r
1206 1206
1207 1207 def fstat(fp):
1208 1208 '''stat file object that may not have fileno method.'''
1209 1209 try:
1210 1210 return os.fstat(fp.fileno())
1211 1211 except AttributeError:
1212 1212 return os.stat(fp.name)
1213 1213
1214 1214 # File system features
1215 1215
1216 1216 def fscasesensitive(path):
1217 1217 """
1218 1218 Return true if the given path is on a case-sensitive filesystem
1219 1219
1220 1220 Requires a path (like /foo/.hg) ending with a foldable final
1221 1221 directory component.
1222 1222 """
1223 1223 s1 = os.lstat(path)
1224 1224 d, b = os.path.split(path)
1225 1225 b2 = b.upper()
1226 1226 if b == b2:
1227 1227 b2 = b.lower()
1228 1228 if b == b2:
1229 1229 return True # no evidence against case sensitivity
1230 1230 p2 = os.path.join(d, b2)
1231 1231 try:
1232 1232 s2 = os.lstat(p2)
1233 1233 if s2 == s1:
1234 1234 return False
1235 1235 return True
1236 1236 except OSError:
1237 1237 return True
1238 1238
1239 1239 try:
1240 1240 import re2
1241 1241 _re2 = None
1242 1242 except ImportError:
1243 1243 _re2 = False
1244 1244
1245 1245 class _re(object):
1246 1246 def _checkre2(self):
1247 1247 global _re2
1248 1248 try:
1249 1249 # check if match works, see issue3964
1250 1250 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1251 1251 except ImportError:
1252 1252 _re2 = False
1253 1253
1254 1254 def compile(self, pat, flags=0):
1255 1255 '''Compile a regular expression, using re2 if possible
1256 1256
1257 1257 For best performance, use only re2-compatible regexp features. The
1258 1258 only flags from the re module that are re2-compatible are
1259 1259 IGNORECASE and MULTILINE.'''
1260 1260 if _re2 is None:
1261 1261 self._checkre2()
1262 1262 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1263 1263 if flags & remod.IGNORECASE:
1264 1264 pat = '(?i)' + pat
1265 1265 if flags & remod.MULTILINE:
1266 1266 pat = '(?m)' + pat
1267 1267 try:
1268 1268 return re2.compile(pat)
1269 1269 except re2.error:
1270 1270 pass
1271 1271 return remod.compile(pat, flags)
1272 1272
1273 1273 @propertycache
1274 1274 def escape(self):
1275 1275 '''Return the version of escape corresponding to self.compile.
1276 1276
1277 1277 This is imperfect because whether re2 or re is used for a particular
1278 1278 function depends on the flags, etc, but it's the best we can do.
1279 1279 '''
1280 1280 global _re2
1281 1281 if _re2 is None:
1282 1282 self._checkre2()
1283 1283 if _re2:
1284 1284 return re2.escape
1285 1285 else:
1286 1286 return remod.escape
1287 1287
1288 1288 re = _re()
1289 1289
1290 1290 _fspathcache = {}
1291 1291 def fspath(name, root):
1292 1292 '''Get name in the case stored in the filesystem
1293 1293
1294 1294 The name should be relative to root, and be normcase-ed for efficiency.
1295 1295
1296 1296 Note that this function is unnecessary, and should not be
1297 1297 called, for case-sensitive filesystems (simply because it's expensive).
1298 1298
1299 1299 The root should be normcase-ed, too.
1300 1300 '''
1301 1301 def _makefspathcacheentry(dir):
1302 1302 return dict((normcase(n), n) for n in os.listdir(dir))
1303 1303
1304 1304 seps = os.sep
1305 1305 if os.altsep:
1306 1306 seps = seps + os.altsep
1307 1307 # Protect backslashes. This gets silly very quickly.
1308 1308 seps.replace('\\','\\\\')
1309 1309 pattern = remod.compile(r'([^%s]+)|([%s]+)' % (seps, seps))
1310 1310 dir = os.path.normpath(root)
1311 1311 result = []
1312 1312 for part, sep in pattern.findall(name):
1313 1313 if sep:
1314 1314 result.append(sep)
1315 1315 continue
1316 1316
1317 1317 if dir not in _fspathcache:
1318 1318 _fspathcache[dir] = _makefspathcacheentry(dir)
1319 1319 contents = _fspathcache[dir]
1320 1320
1321 1321 found = contents.get(part)
1322 1322 if not found:
1323 1323 # retry "once per directory" per "dirstate.walk" which
1324 1324 # may take place for each patches of "hg qpush", for example
1325 1325 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1326 1326 found = contents.get(part)
1327 1327
1328 1328 result.append(found or part)
1329 1329 dir = os.path.join(dir, part)
1330 1330
1331 1331 return ''.join(result)
1332 1332
1333 1333 def checknlink(testfile):
1334 1334 '''check whether hardlink count reporting works properly'''
1335 1335
1336 1336 # testfile may be open, so we need a separate file for checking to
1337 1337 # work around issue2543 (or testfile may get lost on Samba shares)
1338 1338 f1 = testfile + ".hgtmp1"
1339 1339 if os.path.lexists(f1):
1340 1340 return False
1341 1341 try:
1342 1342 posixfile(f1, 'w').close()
1343 1343 except IOError:
1344 1344 try:
1345 1345 os.unlink(f1)
1346 1346 except OSError:
1347 1347 pass
1348 1348 return False
1349 1349
1350 1350 f2 = testfile + ".hgtmp2"
1351 1351 fd = None
1352 1352 try:
1353 1353 oslink(f1, f2)
1354 1354 # nlinks() may behave differently for files on Windows shares if
1355 1355 # the file is open.
1356 1356 fd = posixfile(f2)
1357 1357 return nlinks(f2) > 1
1358 1358 except OSError:
1359 1359 return False
1360 1360 finally:
1361 1361 if fd is not None:
1362 1362 fd.close()
1363 1363 for f in (f1, f2):
1364 1364 try:
1365 1365 os.unlink(f)
1366 1366 except OSError:
1367 1367 pass
1368 1368
1369 1369 def endswithsep(path):
1370 1370 '''Check path ends with os.sep or os.altsep.'''
1371 1371 return path.endswith(os.sep) or os.altsep and path.endswith(os.altsep)
1372 1372
1373 1373 def splitpath(path):
1374 1374 '''Split path by os.sep.
1375 1375 Note that this function does not use os.altsep because this is
1376 1376 an alternative of simple "xxx.split(os.sep)".
1377 1377 It is recommended to use os.path.normpath() before using this
1378 1378 function if need.'''
1379 1379 return path.split(os.sep)
1380 1380
1381 1381 def gui():
1382 1382 '''Are we running in a GUI?'''
1383 1383 if sys.platform == 'darwin':
1384 1384 if 'SSH_CONNECTION' in os.environ:
1385 1385 # handle SSH access to a box where the user is logged in
1386 1386 return False
1387 1387 elif getattr(osutil, 'isgui', None):
1388 1388 # check if a CoreGraphics session is available
1389 1389 return osutil.isgui()
1390 1390 else:
1391 1391 # pure build; use a safe default
1392 1392 return True
1393 1393 else:
1394 1394 return os.name == "nt" or os.environ.get("DISPLAY")
1395 1395
1396 1396 def mktempcopy(name, emptyok=False, createmode=None):
1397 1397 """Create a temporary file with the same contents from name
1398 1398
1399 1399 The permission bits are copied from the original file.
1400 1400
1401 1401 If the temporary file is going to be truncated immediately, you
1402 1402 can use emptyok=True as an optimization.
1403 1403
1404 1404 Returns the name of the temporary file.
1405 1405 """
1406 1406 d, fn = os.path.split(name)
1407 1407 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d)
1408 1408 os.close(fd)
1409 1409 # Temporary files are created with mode 0600, which is usually not
1410 1410 # what we want. If the original file already exists, just copy
1411 1411 # its mode. Otherwise, manually obey umask.
1412 1412 copymode(name, temp, createmode)
1413 1413 if emptyok:
1414 1414 return temp
1415 1415 try:
1416 1416 try:
1417 1417 ifp = posixfile(name, "rb")
1418 1418 except IOError as inst:
1419 1419 if inst.errno == errno.ENOENT:
1420 1420 return temp
1421 1421 if not getattr(inst, 'filename', None):
1422 1422 inst.filename = name
1423 1423 raise
1424 1424 ofp = posixfile(temp, "wb")
1425 1425 for chunk in filechunkiter(ifp):
1426 1426 ofp.write(chunk)
1427 1427 ifp.close()
1428 1428 ofp.close()
1429 1429 except: # re-raises
1430 1430 try: os.unlink(temp)
1431 1431 except OSError: pass
1432 1432 raise
1433 1433 return temp
1434 1434
1435 1435 class filestat(object):
1436 1436 """help to exactly detect change of a file
1437 1437
1438 1438 'stat' attribute is result of 'os.stat()' if specified 'path'
1439 1439 exists. Otherwise, it is None. This can avoid preparative
1440 1440 'exists()' examination on client side of this class.
1441 1441 """
1442 1442 def __init__(self, path):
1443 1443 try:
1444 1444 self.stat = os.stat(path)
1445 1445 except OSError as err:
1446 1446 if err.errno != errno.ENOENT:
1447 1447 raise
1448 1448 self.stat = None
1449 1449
1450 1450 __hash__ = object.__hash__
1451 1451
1452 1452 def __eq__(self, old):
1453 1453 try:
1454 1454 # if ambiguity between stat of new and old file is
1455 1455 # avoided, comparision of size, ctime and mtime is enough
1456 1456 # to exactly detect change of a file regardless of platform
1457 1457 return (self.stat.st_size == old.stat.st_size and
1458 1458 self.stat.st_ctime == old.stat.st_ctime and
1459 1459 self.stat.st_mtime == old.stat.st_mtime)
1460 1460 except AttributeError:
1461 1461 return False
1462 1462
1463 1463 def isambig(self, old):
1464 1464 """Examine whether new (= self) stat is ambiguous against old one
1465 1465
1466 1466 "S[N]" below means stat of a file at N-th change:
1467 1467
1468 1468 - S[n-1].ctime < S[n].ctime: can detect change of a file
1469 1469 - S[n-1].ctime == S[n].ctime
1470 1470 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1471 1471 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1472 1472 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1473 1473 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1474 1474
1475 1475 Case (*2) above means that a file was changed twice or more at
1476 1476 same time in sec (= S[n-1].ctime), and comparison of timestamp
1477 1477 is ambiguous.
1478 1478
1479 1479 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1480 1480 timestamp is ambiguous".
1481 1481
1482 1482 But advancing mtime only in case (*2) doesn't work as
1483 1483 expected, because naturally advanced S[n].mtime in case (*1)
1484 1484 might be equal to manually advanced S[n-1 or earlier].mtime.
1485 1485
1486 1486 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1487 1487 treated as ambiguous regardless of mtime, to avoid overlooking
1488 1488 by confliction between such mtime.
1489 1489
1490 1490 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1491 1491 S[n].mtime", even if size of a file isn't changed.
1492 1492 """
1493 1493 try:
1494 1494 return (self.stat.st_ctime == old.stat.st_ctime)
1495 1495 except AttributeError:
1496 1496 return False
1497 1497
1498 1498 def __ne__(self, other):
1499 1499 return not self == other
1500 1500
1501 1501 class atomictempfile(object):
1502 1502 '''writable file object that atomically updates a file
1503 1503
1504 1504 All writes will go to a temporary copy of the original file. Call
1505 1505 close() when you are done writing, and atomictempfile will rename
1506 1506 the temporary copy to the original name, making the changes
1507 1507 visible. If the object is destroyed without being closed, all your
1508 1508 writes are discarded.
1509 1509
1510 1510 checkambig argument of constructor is used with filestat, and is
1511 1511 useful only if target file is guarded by any lock (e.g. repo.lock
1512 1512 or repo.wlock).
1513 1513 '''
1514 1514 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1515 1515 self.__name = name # permanent name
1516 1516 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1517 1517 createmode=createmode)
1518 1518 self._fp = posixfile(self._tempname, mode)
1519 1519 self._checkambig = checkambig
1520 1520
1521 1521 # delegated methods
1522 1522 self.read = self._fp.read
1523 1523 self.write = self._fp.write
1524 1524 self.seek = self._fp.seek
1525 1525 self.tell = self._fp.tell
1526 1526 self.fileno = self._fp.fileno
1527 1527
1528 1528 def close(self):
1529 1529 if not self._fp.closed:
1530 1530 self._fp.close()
1531 1531 filename = localpath(self.__name)
1532 1532 oldstat = self._checkambig and filestat(filename)
1533 1533 if oldstat and oldstat.stat:
1534 1534 rename(self._tempname, filename)
1535 1535 newstat = filestat(filename)
1536 1536 if newstat.isambig(oldstat):
1537 1537 # stat of changed file is ambiguous to original one
1538 1538 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1539 1539 os.utime(filename, (advanced, advanced))
1540 1540 else:
1541 1541 rename(self._tempname, filename)
1542 1542
1543 1543 def discard(self):
1544 1544 if not self._fp.closed:
1545 1545 try:
1546 1546 os.unlink(self._tempname)
1547 1547 except OSError:
1548 1548 pass
1549 1549 self._fp.close()
1550 1550
1551 1551 def __del__(self):
1552 1552 if safehasattr(self, '_fp'): # constructor actually did something
1553 1553 self.discard()
1554 1554
1555 1555 def __enter__(self):
1556 1556 return self
1557 1557
1558 1558 def __exit__(self, exctype, excvalue, traceback):
1559 1559 if exctype is not None:
1560 1560 self.discard()
1561 1561 else:
1562 1562 self.close()
1563 1563
1564 1564 def makedirs(name, mode=None, notindexed=False):
1565 1565 """recursive directory creation with parent mode inheritance
1566 1566
1567 1567 Newly created directories are marked as "not to be indexed by
1568 1568 the content indexing service", if ``notindexed`` is specified
1569 1569 for "write" mode access.
1570 1570 """
1571 1571 try:
1572 1572 makedir(name, notindexed)
1573 1573 except OSError as err:
1574 1574 if err.errno == errno.EEXIST:
1575 1575 return
1576 1576 if err.errno != errno.ENOENT or not name:
1577 1577 raise
1578 1578 parent = os.path.dirname(os.path.abspath(name))
1579 1579 if parent == name:
1580 1580 raise
1581 1581 makedirs(parent, mode, notindexed)
1582 1582 try:
1583 1583 makedir(name, notindexed)
1584 1584 except OSError as err:
1585 1585 # Catch EEXIST to handle races
1586 1586 if err.errno == errno.EEXIST:
1587 1587 return
1588 1588 raise
1589 1589 if mode is not None:
1590 1590 os.chmod(name, mode)
1591 1591
1592 1592 def readfile(path):
1593 1593 with open(path, 'rb') as fp:
1594 1594 return fp.read()
1595 1595
1596 1596 def writefile(path, text):
1597 1597 with open(path, 'wb') as fp:
1598 1598 fp.write(text)
1599 1599
1600 1600 def appendfile(path, text):
1601 1601 with open(path, 'ab') as fp:
1602 1602 fp.write(text)
1603 1603
1604 1604 class chunkbuffer(object):
1605 1605 """Allow arbitrary sized chunks of data to be efficiently read from an
1606 1606 iterator over chunks of arbitrary size."""
1607 1607
1608 1608 def __init__(self, in_iter):
1609 1609 """in_iter is the iterator that's iterating over the input chunks.
1610 1610 targetsize is how big a buffer to try to maintain."""
1611 1611 def splitbig(chunks):
1612 1612 for chunk in chunks:
1613 1613 if len(chunk) > 2**20:
1614 1614 pos = 0
1615 1615 while pos < len(chunk):
1616 1616 end = pos + 2 ** 18
1617 1617 yield chunk[pos:end]
1618 1618 pos = end
1619 1619 else:
1620 1620 yield chunk
1621 1621 self.iter = splitbig(in_iter)
1622 1622 self._queue = collections.deque()
1623 1623 self._chunkoffset = 0
1624 1624
1625 1625 def read(self, l=None):
1626 1626 """Read L bytes of data from the iterator of chunks of data.
1627 1627 Returns less than L bytes if the iterator runs dry.
1628 1628
1629 1629 If size parameter is omitted, read everything"""
1630 1630 if l is None:
1631 1631 return ''.join(self.iter)
1632 1632
1633 1633 left = l
1634 1634 buf = []
1635 1635 queue = self._queue
1636 1636 while left > 0:
1637 1637 # refill the queue
1638 1638 if not queue:
1639 1639 target = 2**18
1640 1640 for chunk in self.iter:
1641 1641 queue.append(chunk)
1642 1642 target -= len(chunk)
1643 1643 if target <= 0:
1644 1644 break
1645 1645 if not queue:
1646 1646 break
1647 1647
1648 1648 # The easy way to do this would be to queue.popleft(), modify the
1649 1649 # chunk (if necessary), then queue.appendleft(). However, for cases
1650 1650 # where we read partial chunk content, this incurs 2 dequeue
1651 1651 # mutations and creates a new str for the remaining chunk in the
1652 1652 # queue. Our code below avoids this overhead.
1653 1653
1654 1654 chunk = queue[0]
1655 1655 chunkl = len(chunk)
1656 1656 offset = self._chunkoffset
1657 1657
1658 1658 # Use full chunk.
1659 1659 if offset == 0 and left >= chunkl:
1660 1660 left -= chunkl
1661 1661 queue.popleft()
1662 1662 buf.append(chunk)
1663 1663 # self._chunkoffset remains at 0.
1664 1664 continue
1665 1665
1666 1666 chunkremaining = chunkl - offset
1667 1667
1668 1668 # Use all of unconsumed part of chunk.
1669 1669 if left >= chunkremaining:
1670 1670 left -= chunkremaining
1671 1671 queue.popleft()
1672 1672 # offset == 0 is enabled by block above, so this won't merely
1673 1673 # copy via ``chunk[0:]``.
1674 1674 buf.append(chunk[offset:])
1675 1675 self._chunkoffset = 0
1676 1676
1677 1677 # Partial chunk needed.
1678 1678 else:
1679 1679 buf.append(chunk[offset:offset + left])
1680 1680 self._chunkoffset += left
1681 1681 left -= chunkremaining
1682 1682
1683 1683 return ''.join(buf)
1684 1684
1685 1685 def filechunkiter(f, size=65536, limit=None):
1686 1686 """Create a generator that produces the data in the file size
1687 1687 (default 65536) bytes at a time, up to optional limit (default is
1688 1688 to read all data). Chunks may be less than size bytes if the
1689 1689 chunk is the last chunk in the file, or the file is a socket or
1690 1690 some other type of file that sometimes reads less data than is
1691 1691 requested."""
1692 1692 assert size >= 0
1693 1693 assert limit is None or limit >= 0
1694 1694 while True:
1695 1695 if limit is None:
1696 1696 nbytes = size
1697 1697 else:
1698 1698 nbytes = min(limit, size)
1699 1699 s = nbytes and f.read(nbytes)
1700 1700 if not s:
1701 1701 break
1702 1702 if limit:
1703 1703 limit -= len(s)
1704 1704 yield s
1705 1705
1706 1706 def makedate(timestamp=None):
1707 1707 '''Return a unix timestamp (or the current time) as a (unixtime,
1708 1708 offset) tuple based off the local timezone.'''
1709 1709 if timestamp is None:
1710 1710 timestamp = time.time()
1711 1711 if timestamp < 0:
1712 1712 hint = _("check your clock")
1713 1713 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1714 1714 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1715 1715 datetime.datetime.fromtimestamp(timestamp))
1716 1716 tz = delta.days * 86400 + delta.seconds
1717 1717 return timestamp, tz
1718 1718
1719 1719 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1720 1720 """represent a (unixtime, offset) tuple as a localized time.
1721 1721 unixtime is seconds since the epoch, and offset is the time zone's
1722 1722 number of seconds away from UTC.
1723 1723
1724 1724 >>> datestr((0, 0))
1725 1725 'Thu Jan 01 00:00:00 1970 +0000'
1726 1726 >>> datestr((42, 0))
1727 1727 'Thu Jan 01 00:00:42 1970 +0000'
1728 1728 >>> datestr((-42, 0))
1729 1729 'Wed Dec 31 23:59:18 1969 +0000'
1730 1730 >>> datestr((0x7fffffff, 0))
1731 1731 'Tue Jan 19 03:14:07 2038 +0000'
1732 1732 >>> datestr((-0x80000000, 0))
1733 1733 'Fri Dec 13 20:45:52 1901 +0000'
1734 1734 """
1735 1735 t, tz = date or makedate()
1736 1736 if "%1" in format or "%2" in format or "%z" in format:
1737 1737 sign = (tz > 0) and "-" or "+"
1738 1738 minutes = abs(tz) // 60
1739 1739 q, r = divmod(minutes, 60)
1740 1740 format = format.replace("%z", "%1%2")
1741 1741 format = format.replace("%1", "%c%02d" % (sign, q))
1742 1742 format = format.replace("%2", "%02d" % r)
1743 1743 d = t - tz
1744 1744 if d > 0x7fffffff:
1745 1745 d = 0x7fffffff
1746 1746 elif d < -0x80000000:
1747 1747 d = -0x80000000
1748 1748 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1749 1749 # because they use the gmtime() system call which is buggy on Windows
1750 1750 # for negative values.
1751 1751 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1752 1752 s = t.strftime(format)
1753 1753 return s
1754 1754
1755 1755 def shortdate(date=None):
1756 1756 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1757 1757 return datestr(date, format='%Y-%m-%d')
1758 1758
1759 1759 def parsetimezone(s):
1760 1760 """find a trailing timezone, if any, in string, and return a
1761 1761 (offset, remainder) pair"""
1762 1762
1763 1763 if s.endswith("GMT") or s.endswith("UTC"):
1764 1764 return 0, s[:-3].rstrip()
1765 1765
1766 1766 # Unix-style timezones [+-]hhmm
1767 1767 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
1768 1768 sign = (s[-5] == "+") and 1 or -1
1769 1769 hours = int(s[-4:-2])
1770 1770 minutes = int(s[-2:])
1771 1771 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
1772 1772
1773 1773 # ISO8601 trailing Z
1774 1774 if s.endswith("Z") and s[-2:-1].isdigit():
1775 1775 return 0, s[:-1]
1776 1776
1777 1777 # ISO8601-style [+-]hh:mm
1778 1778 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
1779 1779 s[-5:-3].isdigit() and s[-2:].isdigit()):
1780 1780 sign = (s[-6] == "+") and 1 or -1
1781 1781 hours = int(s[-5:-3])
1782 1782 minutes = int(s[-2:])
1783 1783 return -sign * (hours * 60 + minutes) * 60, s[:-6]
1784 1784
1785 1785 return None, s
1786 1786
1787 1787 def strdate(string, format, defaults=[]):
1788 1788 """parse a localized time string and return a (unixtime, offset) tuple.
1789 1789 if the string cannot be parsed, ValueError is raised."""
1790 1790 # NOTE: unixtime = localunixtime + offset
1791 1791 offset, date = parsetimezone(string)
1792 1792
1793 1793 # add missing elements from defaults
1794 1794 usenow = False # default to using biased defaults
1795 1795 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
1796 1796 found = [True for p in part if ("%"+p) in format]
1797 1797 if not found:
1798 1798 date += "@" + defaults[part][usenow]
1799 1799 format += "@%" + part[0]
1800 1800 else:
1801 1801 # We've found a specific time element, less specific time
1802 1802 # elements are relative to today
1803 1803 usenow = True
1804 1804
1805 1805 timetuple = time.strptime(date, format)
1806 1806 localunixtime = int(calendar.timegm(timetuple))
1807 1807 if offset is None:
1808 1808 # local timezone
1809 1809 unixtime = int(time.mktime(timetuple))
1810 1810 offset = unixtime - localunixtime
1811 1811 else:
1812 1812 unixtime = localunixtime + offset
1813 1813 return unixtime, offset
1814 1814
1815 1815 def parsedate(date, formats=None, bias=None):
1816 1816 """parse a localized date/time and return a (unixtime, offset) tuple.
1817 1817
1818 1818 The date may be a "unixtime offset" string or in one of the specified
1819 1819 formats. If the date already is a (unixtime, offset) tuple, it is returned.
1820 1820
1821 1821 >>> parsedate(' today ') == parsedate(\
1822 1822 datetime.date.today().strftime('%b %d'))
1823 1823 True
1824 1824 >>> parsedate( 'yesterday ') == parsedate((datetime.date.today() -\
1825 1825 datetime.timedelta(days=1)\
1826 1826 ).strftime('%b %d'))
1827 1827 True
1828 1828 >>> now, tz = makedate()
1829 1829 >>> strnow, strtz = parsedate('now')
1830 1830 >>> (strnow - now) < 1
1831 1831 True
1832 1832 >>> tz == strtz
1833 1833 True
1834 1834 """
1835 1835 if bias is None:
1836 1836 bias = {}
1837 1837 if not date:
1838 1838 return 0, 0
1839 1839 if isinstance(date, tuple) and len(date) == 2:
1840 1840 return date
1841 1841 if not formats:
1842 1842 formats = defaultdateformats
1843 1843 date = date.strip()
1844 1844
1845 1845 if date == 'now' or date == _('now'):
1846 1846 return makedate()
1847 1847 if date == 'today' or date == _('today'):
1848 1848 date = datetime.date.today().strftime('%b %d')
1849 1849 elif date == 'yesterday' or date == _('yesterday'):
1850 1850 date = (datetime.date.today() -
1851 1851 datetime.timedelta(days=1)).strftime('%b %d')
1852 1852
1853 1853 try:
1854 1854 when, offset = map(int, date.split(' '))
1855 1855 except ValueError:
1856 1856 # fill out defaults
1857 1857 now = makedate()
1858 1858 defaults = {}
1859 1859 for part in ("d", "mb", "yY", "HI", "M", "S"):
1860 1860 # this piece is for rounding the specific end of unknowns
1861 1861 b = bias.get(part)
1862 1862 if b is None:
1863 1863 if part[0] in "HMS":
1864 1864 b = "00"
1865 1865 else:
1866 1866 b = "0"
1867 1867
1868 1868 # this piece is for matching the generic end to today's date
1869 1869 n = datestr(now, "%" + part[0])
1870 1870
1871 1871 defaults[part] = (b, n)
1872 1872
1873 1873 for format in formats:
1874 1874 try:
1875 1875 when, offset = strdate(date, format, defaults)
1876 1876 except (ValueError, OverflowError):
1877 1877 pass
1878 1878 else:
1879 1879 break
1880 1880 else:
1881 1881 raise Abort(_('invalid date: %r') % date)
1882 1882 # validate explicit (probably user-specified) date and
1883 1883 # time zone offset. values must fit in signed 32 bits for
1884 1884 # current 32-bit linux runtimes. timezones go from UTC-12
1885 1885 # to UTC+14
1886 1886 if when < -0x80000000 or when > 0x7fffffff:
1887 1887 raise Abort(_('date exceeds 32 bits: %d') % when)
1888 1888 if offset < -50400 or offset > 43200:
1889 1889 raise Abort(_('impossible time zone offset: %d') % offset)
1890 1890 return when, offset
1891 1891
1892 1892 def matchdate(date):
1893 1893 """Return a function that matches a given date match specifier
1894 1894
1895 1895 Formats include:
1896 1896
1897 1897 '{date}' match a given date to the accuracy provided
1898 1898
1899 1899 '<{date}' on or before a given date
1900 1900
1901 1901 '>{date}' on or after a given date
1902 1902
1903 1903 >>> p1 = parsedate("10:29:59")
1904 1904 >>> p2 = parsedate("10:30:00")
1905 1905 >>> p3 = parsedate("10:30:59")
1906 1906 >>> p4 = parsedate("10:31:00")
1907 1907 >>> p5 = parsedate("Sep 15 10:30:00 1999")
1908 1908 >>> f = matchdate("10:30")
1909 1909 >>> f(p1[0])
1910 1910 False
1911 1911 >>> f(p2[0])
1912 1912 True
1913 1913 >>> f(p3[0])
1914 1914 True
1915 1915 >>> f(p4[0])
1916 1916 False
1917 1917 >>> f(p5[0])
1918 1918 False
1919 1919 """
1920 1920
1921 1921 def lower(date):
1922 1922 d = {'mb': "1", 'd': "1"}
1923 1923 return parsedate(date, extendeddateformats, d)[0]
1924 1924
1925 1925 def upper(date):
1926 1926 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
1927 1927 for days in ("31", "30", "29"):
1928 1928 try:
1929 1929 d["d"] = days
1930 1930 return parsedate(date, extendeddateformats, d)[0]
1931 1931 except Abort:
1932 1932 pass
1933 1933 d["d"] = "28"
1934 1934 return parsedate(date, extendeddateformats, d)[0]
1935 1935
1936 1936 date = date.strip()
1937 1937
1938 1938 if not date:
1939 1939 raise Abort(_("dates cannot consist entirely of whitespace"))
1940 1940 elif date[0] == "<":
1941 1941 if not date[1:]:
1942 1942 raise Abort(_("invalid day spec, use '<DATE'"))
1943 1943 when = upper(date[1:])
1944 1944 return lambda x: x <= when
1945 1945 elif date[0] == ">":
1946 1946 if not date[1:]:
1947 1947 raise Abort(_("invalid day spec, use '>DATE'"))
1948 1948 when = lower(date[1:])
1949 1949 return lambda x: x >= when
1950 1950 elif date[0] == "-":
1951 1951 try:
1952 1952 days = int(date[1:])
1953 1953 except ValueError:
1954 1954 raise Abort(_("invalid day spec: %s") % date[1:])
1955 1955 if days < 0:
1956 1956 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
1957 1957 % date[1:])
1958 1958 when = makedate()[0] - days * 3600 * 24
1959 1959 return lambda x: x >= when
1960 1960 elif " to " in date:
1961 1961 a, b = date.split(" to ")
1962 1962 start, stop = lower(a), upper(b)
1963 1963 return lambda x: x >= start and x <= stop
1964 1964 else:
1965 1965 start, stop = lower(date), upper(date)
1966 1966 return lambda x: x >= start and x <= stop
1967 1967
1968 1968 def stringmatcher(pattern):
1969 1969 """
1970 1970 accepts a string, possibly starting with 're:' or 'literal:' prefix.
1971 1971 returns the matcher name, pattern, and matcher function.
1972 1972 missing or unknown prefixes are treated as literal matches.
1973 1973
1974 1974 helper for tests:
1975 1975 >>> def test(pattern, *tests):
1976 1976 ... kind, pattern, matcher = stringmatcher(pattern)
1977 1977 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
1978 1978
1979 1979 exact matching (no prefix):
1980 1980 >>> test('abcdefg', 'abc', 'def', 'abcdefg')
1981 1981 ('literal', 'abcdefg', [False, False, True])
1982 1982
1983 1983 regex matching ('re:' prefix)
1984 1984 >>> test('re:a.+b', 'nomatch', 'fooadef', 'fooadefbar')
1985 1985 ('re', 'a.+b', [False, False, True])
1986 1986
1987 1987 force exact matches ('literal:' prefix)
1988 1988 >>> test('literal:re:foobar', 'foobar', 're:foobar')
1989 1989 ('literal', 're:foobar', [False, True])
1990 1990
1991 1991 unknown prefixes are ignored and treated as literals
1992 1992 >>> test('foo:bar', 'foo', 'bar', 'foo:bar')
1993 1993 ('literal', 'foo:bar', [False, False, True])
1994 1994 """
1995 1995 if pattern.startswith('re:'):
1996 1996 pattern = pattern[3:]
1997 1997 try:
1998 1998 regex = remod.compile(pattern)
1999 1999 except remod.error as e:
2000 2000 raise error.ParseError(_('invalid regular expression: %s')
2001 2001 % e)
2002 2002 return 're', pattern, regex.search
2003 2003 elif pattern.startswith('literal:'):
2004 2004 pattern = pattern[8:]
2005 2005 return 'literal', pattern, pattern.__eq__
2006 2006
2007 2007 def shortuser(user):
2008 2008 """Return a short representation of a user name or email address."""
2009 2009 f = user.find('@')
2010 2010 if f >= 0:
2011 2011 user = user[:f]
2012 2012 f = user.find('<')
2013 2013 if f >= 0:
2014 2014 user = user[f + 1:]
2015 2015 f = user.find(' ')
2016 2016 if f >= 0:
2017 2017 user = user[:f]
2018 2018 f = user.find('.')
2019 2019 if f >= 0:
2020 2020 user = user[:f]
2021 2021 return user
2022 2022
2023 2023 def emailuser(user):
2024 2024 """Return the user portion of an email address."""
2025 2025 f = user.find('@')
2026 2026 if f >= 0:
2027 2027 user = user[:f]
2028 2028 f = user.find('<')
2029 2029 if f >= 0:
2030 2030 user = user[f + 1:]
2031 2031 return user
2032 2032
2033 2033 def email(author):
2034 2034 '''get email of author.'''
2035 2035 r = author.find('>')
2036 2036 if r == -1:
2037 2037 r = None
2038 2038 return author[author.find('<') + 1:r]
2039 2039
2040 2040 def ellipsis(text, maxlength=400):
2041 2041 """Trim string to at most maxlength (default: 400) columns in display."""
2042 2042 return encoding.trim(text, maxlength, ellipsis='...')
2043 2043
2044 2044 def unitcountfn(*unittable):
2045 2045 '''return a function that renders a readable count of some quantity'''
2046 2046
2047 2047 def go(count):
2048 2048 for multiplier, divisor, format in unittable:
2049 2049 if count >= divisor * multiplier:
2050 2050 return format % (count / float(divisor))
2051 2051 return unittable[-1][2] % count
2052 2052
2053 2053 return go
2054 2054
2055 2055 bytecount = unitcountfn(
2056 2056 (100, 1 << 30, _('%.0f GB')),
2057 2057 (10, 1 << 30, _('%.1f GB')),
2058 2058 (1, 1 << 30, _('%.2f GB')),
2059 2059 (100, 1 << 20, _('%.0f MB')),
2060 2060 (10, 1 << 20, _('%.1f MB')),
2061 2061 (1, 1 << 20, _('%.2f MB')),
2062 2062 (100, 1 << 10, _('%.0f KB')),
2063 2063 (10, 1 << 10, _('%.1f KB')),
2064 2064 (1, 1 << 10, _('%.2f KB')),
2065 2065 (1, 1, _('%.0f bytes')),
2066 2066 )
2067 2067
2068 2068 def uirepr(s):
2069 2069 # Avoid double backslash in Windows path repr()
2070 2070 return repr(s).replace('\\\\', '\\')
2071 2071
2072 2072 # delay import of textwrap
2073 2073 def MBTextWrapper(**kwargs):
2074 2074 class tw(textwrap.TextWrapper):
2075 2075 """
2076 2076 Extend TextWrapper for width-awareness.
2077 2077
2078 2078 Neither number of 'bytes' in any encoding nor 'characters' is
2079 2079 appropriate to calculate terminal columns for specified string.
2080 2080
2081 2081 Original TextWrapper implementation uses built-in 'len()' directly,
2082 2082 so overriding is needed to use width information of each characters.
2083 2083
2084 2084 In addition, characters classified into 'ambiguous' width are
2085 2085 treated as wide in East Asian area, but as narrow in other.
2086 2086
2087 2087 This requires use decision to determine width of such characters.
2088 2088 """
2089 2089 def _cutdown(self, ucstr, space_left):
2090 2090 l = 0
2091 2091 colwidth = encoding.ucolwidth
2092 2092 for i in xrange(len(ucstr)):
2093 2093 l += colwidth(ucstr[i])
2094 2094 if space_left < l:
2095 2095 return (ucstr[:i], ucstr[i:])
2096 2096 return ucstr, ''
2097 2097
2098 2098 # overriding of base class
2099 2099 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2100 2100 space_left = max(width - cur_len, 1)
2101 2101
2102 2102 if self.break_long_words:
2103 2103 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2104 2104 cur_line.append(cut)
2105 2105 reversed_chunks[-1] = res
2106 2106 elif not cur_line:
2107 2107 cur_line.append(reversed_chunks.pop())
2108 2108
2109 2109 # this overriding code is imported from TextWrapper of Python 2.6
2110 2110 # to calculate columns of string by 'encoding.ucolwidth()'
2111 2111 def _wrap_chunks(self, chunks):
2112 2112 colwidth = encoding.ucolwidth
2113 2113
2114 2114 lines = []
2115 2115 if self.width <= 0:
2116 2116 raise ValueError("invalid width %r (must be > 0)" % self.width)
2117 2117
2118 2118 # Arrange in reverse order so items can be efficiently popped
2119 2119 # from a stack of chucks.
2120 2120 chunks.reverse()
2121 2121
2122 2122 while chunks:
2123 2123
2124 2124 # Start the list of chunks that will make up the current line.
2125 2125 # cur_len is just the length of all the chunks in cur_line.
2126 2126 cur_line = []
2127 2127 cur_len = 0
2128 2128
2129 2129 # Figure out which static string will prefix this line.
2130 2130 if lines:
2131 2131 indent = self.subsequent_indent
2132 2132 else:
2133 2133 indent = self.initial_indent
2134 2134
2135 2135 # Maximum width for this line.
2136 2136 width = self.width - len(indent)
2137 2137
2138 2138 # First chunk on line is whitespace -- drop it, unless this
2139 2139 # is the very beginning of the text (i.e. no lines started yet).
2140 2140 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
2141 2141 del chunks[-1]
2142 2142
2143 2143 while chunks:
2144 2144 l = colwidth(chunks[-1])
2145 2145
2146 2146 # Can at least squeeze this chunk onto the current line.
2147 2147 if cur_len + l <= width:
2148 2148 cur_line.append(chunks.pop())
2149 2149 cur_len += l
2150 2150
2151 2151 # Nope, this line is full.
2152 2152 else:
2153 2153 break
2154 2154
2155 2155 # The current line is full, and the next chunk is too big to
2156 2156 # fit on *any* line (not just this one).
2157 2157 if chunks and colwidth(chunks[-1]) > width:
2158 2158 self._handle_long_word(chunks, cur_line, cur_len, width)
2159 2159
2160 2160 # If the last chunk on this line is all whitespace, drop it.
2161 2161 if (self.drop_whitespace and
2162 2162 cur_line and cur_line[-1].strip() == ''):
2163 2163 del cur_line[-1]
2164 2164
2165 2165 # Convert current line back to a string and store it in list
2166 2166 # of all lines (return value).
2167 2167 if cur_line:
2168 2168 lines.append(indent + ''.join(cur_line))
2169 2169
2170 2170 return lines
2171 2171
2172 2172 global MBTextWrapper
2173 2173 MBTextWrapper = tw
2174 2174 return tw(**kwargs)
2175 2175
2176 2176 def wrap(line, width, initindent='', hangindent=''):
2177 2177 maxindent = max(len(hangindent), len(initindent))
2178 2178 if width <= maxindent:
2179 2179 # adjust for weird terminal size
2180 2180 width = max(78, maxindent + 1)
2181 2181 line = line.decode(encoding.encoding, encoding.encodingmode)
2182 2182 initindent = initindent.decode(encoding.encoding, encoding.encodingmode)
2183 2183 hangindent = hangindent.decode(encoding.encoding, encoding.encodingmode)
2184 2184 wrapper = MBTextWrapper(width=width,
2185 2185 initial_indent=initindent,
2186 2186 subsequent_indent=hangindent)
2187 2187 return wrapper.fill(line).encode(encoding.encoding)
2188 2188
2189 2189 def iterlines(iterator):
2190 2190 for chunk in iterator:
2191 2191 for line in chunk.splitlines():
2192 2192 yield line
2193 2193
2194 2194 def expandpath(path):
2195 2195 return os.path.expanduser(os.path.expandvars(path))
2196 2196
2197 2197 def hgcmd():
2198 2198 """Return the command used to execute current hg
2199 2199
2200 2200 This is different from hgexecutable() because on Windows we want
2201 2201 to avoid things opening new shell windows like batch files, so we
2202 2202 get either the python call or current executable.
2203 2203 """
2204 2204 if mainfrozen():
2205 2205 if getattr(sys, 'frozen', None) == 'macosx_app':
2206 2206 # Env variable set by py2app
2207 2207 return [os.environ['EXECUTABLEPATH']]
2208 2208 else:
2209 2209 return [sys.executable]
2210 2210 return gethgcmd()
2211 2211
2212 2212 def rundetached(args, condfn):
2213 2213 """Execute the argument list in a detached process.
2214 2214
2215 2215 condfn is a callable which is called repeatedly and should return
2216 2216 True once the child process is known to have started successfully.
2217 2217 At this point, the child process PID is returned. If the child
2218 2218 process fails to start or finishes before condfn() evaluates to
2219 2219 True, return -1.
2220 2220 """
2221 2221 # Windows case is easier because the child process is either
2222 2222 # successfully starting and validating the condition or exiting
2223 2223 # on failure. We just poll on its PID. On Unix, if the child
2224 2224 # process fails to start, it will be left in a zombie state until
2225 2225 # the parent wait on it, which we cannot do since we expect a long
2226 2226 # running process on success. Instead we listen for SIGCHLD telling
2227 2227 # us our child process terminated.
2228 2228 terminated = set()
2229 2229 def handler(signum, frame):
2230 2230 terminated.add(os.wait())
2231 2231 prevhandler = None
2232 2232 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2233 2233 if SIGCHLD is not None:
2234 2234 prevhandler = signal.signal(SIGCHLD, handler)
2235 2235 try:
2236 2236 pid = spawndetached(args)
2237 2237 while not condfn():
2238 2238 if ((pid in terminated or not testpid(pid))
2239 2239 and not condfn()):
2240 2240 return -1
2241 2241 time.sleep(0.1)
2242 2242 return pid
2243 2243 finally:
2244 2244 if prevhandler is not None:
2245 2245 signal.signal(signal.SIGCHLD, prevhandler)
2246 2246
2247 2247 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2248 2248 """Return the result of interpolating items in the mapping into string s.
2249 2249
2250 2250 prefix is a single character string, or a two character string with
2251 2251 a backslash as the first character if the prefix needs to be escaped in
2252 2252 a regular expression.
2253 2253
2254 2254 fn is an optional function that will be applied to the replacement text
2255 2255 just before replacement.
2256 2256
2257 2257 escape_prefix is an optional flag that allows using doubled prefix for
2258 2258 its escaping.
2259 2259 """
2260 2260 fn = fn or (lambda s: s)
2261 2261 patterns = '|'.join(mapping.keys())
2262 2262 if escape_prefix:
2263 2263 patterns += '|' + prefix
2264 2264 if len(prefix) > 1:
2265 2265 prefix_char = prefix[1:]
2266 2266 else:
2267 2267 prefix_char = prefix
2268 2268 mapping[prefix_char] = prefix_char
2269 2269 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2270 2270 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2271 2271
2272 2272 def getport(port):
2273 2273 """Return the port for a given network service.
2274 2274
2275 2275 If port is an integer, it's returned as is. If it's a string, it's
2276 2276 looked up using socket.getservbyname(). If there's no matching
2277 2277 service, error.Abort is raised.
2278 2278 """
2279 2279 try:
2280 2280 return int(port)
2281 2281 except ValueError:
2282 2282 pass
2283 2283
2284 2284 try:
2285 2285 return socket.getservbyname(port)
2286 2286 except socket.error:
2287 2287 raise Abort(_("no port number associated with service '%s'") % port)
2288 2288
2289 2289 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2290 2290 '0': False, 'no': False, 'false': False, 'off': False,
2291 2291 'never': False}
2292 2292
2293 2293 def parsebool(s):
2294 2294 """Parse s into a boolean.
2295 2295
2296 2296 If s is not a valid boolean, returns None.
2297 2297 """
2298 2298 return _booleans.get(s.lower(), None)
2299 2299
2300 2300 _hexdig = '0123456789ABCDEFabcdef'
2301 2301 _hextochr = dict((a + b, chr(int(a + b, 16)))
2302 2302 for a in _hexdig for b in _hexdig)
2303 2303
2304 2304 def _urlunquote(s):
2305 2305 """Decode HTTP/HTML % encoding.
2306 2306
2307 2307 >>> _urlunquote('abc%20def')
2308 2308 'abc def'
2309 2309 """
2310 2310 res = s.split('%')
2311 2311 # fastpath
2312 2312 if len(res) == 1:
2313 2313 return s
2314 2314 s = res[0]
2315 2315 for item in res[1:]:
2316 2316 try:
2317 2317 s += _hextochr[item[:2]] + item[2:]
2318 2318 except KeyError:
2319 2319 s += '%' + item
2320 2320 except UnicodeDecodeError:
2321 2321 s += unichr(int(item[:2], 16)) + item[2:]
2322 2322 return s
2323 2323
2324 2324 class url(object):
2325 2325 r"""Reliable URL parser.
2326 2326
2327 2327 This parses URLs and provides attributes for the following
2328 2328 components:
2329 2329
2330 2330 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2331 2331
2332 2332 Missing components are set to None. The only exception is
2333 2333 fragment, which is set to '' if present but empty.
2334 2334
2335 2335 If parsefragment is False, fragment is included in query. If
2336 2336 parsequery is False, query is included in path. If both are
2337 2337 False, both fragment and query are included in path.
2338 2338
2339 2339 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2340 2340
2341 2341 Note that for backward compatibility reasons, bundle URLs do not
2342 2342 take host names. That means 'bundle://../' has a path of '../'.
2343 2343
2344 2344 Examples:
2345 2345
2346 2346 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
2347 2347 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2348 2348 >>> url('ssh://[::1]:2200//home/joe/repo')
2349 2349 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2350 2350 >>> url('file:///home/joe/repo')
2351 2351 <url scheme: 'file', path: '/home/joe/repo'>
2352 2352 >>> url('file:///c:/temp/foo/')
2353 2353 <url scheme: 'file', path: 'c:/temp/foo/'>
2354 2354 >>> url('bundle:foo')
2355 2355 <url scheme: 'bundle', path: 'foo'>
2356 2356 >>> url('bundle://../foo')
2357 2357 <url scheme: 'bundle', path: '../foo'>
2358 2358 >>> url(r'c:\foo\bar')
2359 2359 <url path: 'c:\\foo\\bar'>
2360 2360 >>> url(r'\\blah\blah\blah')
2361 2361 <url path: '\\\\blah\\blah\\blah'>
2362 2362 >>> url(r'\\blah\blah\blah#baz')
2363 2363 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2364 2364 >>> url(r'file:///C:\users\me')
2365 2365 <url scheme: 'file', path: 'C:\\users\\me'>
2366 2366
2367 2367 Authentication credentials:
2368 2368
2369 2369 >>> url('ssh://joe:xyz@x/repo')
2370 2370 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2371 2371 >>> url('ssh://joe@x/repo')
2372 2372 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2373 2373
2374 2374 Query strings and fragments:
2375 2375
2376 2376 >>> url('http://host/a?b#c')
2377 2377 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2378 2378 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
2379 2379 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2380 2380 """
2381 2381
2382 2382 _safechars = "!~*'()+"
2383 2383 _safepchars = "/!~*'()+:\\"
2384 2384 _matchscheme = remod.compile(r'^[a-zA-Z0-9+.\-]+:').match
2385 2385
2386 2386 def __init__(self, path, parsequery=True, parsefragment=True):
2387 2387 # We slowly chomp away at path until we have only the path left
2388 2388 self.scheme = self.user = self.passwd = self.host = None
2389 2389 self.port = self.path = self.query = self.fragment = None
2390 2390 self._localpath = True
2391 2391 self._hostport = ''
2392 2392 self._origpath = path
2393 2393
2394 2394 if parsefragment and '#' in path:
2395 2395 path, self.fragment = path.split('#', 1)
2396 2396 if not path:
2397 2397 path = None
2398 2398
2399 2399 # special case for Windows drive letters and UNC paths
2400 2400 if hasdriveletter(path) or path.startswith(r'\\'):
2401 2401 self.path = path
2402 2402 return
2403 2403
2404 2404 # For compatibility reasons, we can't handle bundle paths as
2405 2405 # normal URLS
2406 2406 if path.startswith('bundle:'):
2407 2407 self.scheme = 'bundle'
2408 2408 path = path[7:]
2409 2409 if path.startswith('//'):
2410 2410 path = path[2:]
2411 2411 self.path = path
2412 2412 return
2413 2413
2414 2414 if self._matchscheme(path):
2415 2415 parts = path.split(':', 1)
2416 2416 if parts[0]:
2417 2417 self.scheme, path = parts
2418 2418 self._localpath = False
2419 2419
2420 2420 if not path:
2421 2421 path = None
2422 2422 if self._localpath:
2423 2423 self.path = ''
2424 2424 return
2425 2425 else:
2426 2426 if self._localpath:
2427 2427 self.path = path
2428 2428 return
2429 2429
2430 2430 if parsequery and '?' in path:
2431 2431 path, self.query = path.split('?', 1)
2432 2432 if not path:
2433 2433 path = None
2434 2434 if not self.query:
2435 2435 self.query = None
2436 2436
2437 2437 # // is required to specify a host/authority
2438 2438 if path and path.startswith('//'):
2439 2439 parts = path[2:].split('/', 1)
2440 2440 if len(parts) > 1:
2441 2441 self.host, path = parts
2442 2442 else:
2443 2443 self.host = parts[0]
2444 2444 path = None
2445 2445 if not self.host:
2446 2446 self.host = None
2447 2447 # path of file:///d is /d
2448 2448 # path of file:///d:/ is d:/, not /d:/
2449 2449 if path and not hasdriveletter(path):
2450 2450 path = '/' + path
2451 2451
2452 2452 if self.host and '@' in self.host:
2453 2453 self.user, self.host = self.host.rsplit('@', 1)
2454 2454 if ':' in self.user:
2455 2455 self.user, self.passwd = self.user.split(':', 1)
2456 2456 if not self.host:
2457 2457 self.host = None
2458 2458
2459 2459 # Don't split on colons in IPv6 addresses without ports
2460 2460 if (self.host and ':' in self.host and
2461 2461 not (self.host.startswith('[') and self.host.endswith(']'))):
2462 2462 self._hostport = self.host
2463 2463 self.host, self.port = self.host.rsplit(':', 1)
2464 2464 if not self.host:
2465 2465 self.host = None
2466 2466
2467 2467 if (self.host and self.scheme == 'file' and
2468 2468 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2469 2469 raise Abort(_('file:// URLs can only refer to localhost'))
2470 2470
2471 2471 self.path = path
2472 2472
2473 2473 # leave the query string escaped
2474 2474 for a in ('user', 'passwd', 'host', 'port',
2475 2475 'path', 'fragment'):
2476 2476 v = getattr(self, a)
2477 2477 if v is not None:
2478 2478 setattr(self, a, _urlunquote(v))
2479 2479
2480 2480 def __repr__(self):
2481 2481 attrs = []
2482 2482 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2483 2483 'query', 'fragment'):
2484 2484 v = getattr(self, a)
2485 2485 if v is not None:
2486 2486 attrs.append('%s: %r' % (a, v))
2487 2487 return '<url %s>' % ', '.join(attrs)
2488 2488
2489 2489 def __str__(self):
2490 2490 r"""Join the URL's components back into a URL string.
2491 2491
2492 2492 Examples:
2493 2493
2494 2494 >>> str(url('http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2495 2495 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2496 2496 >>> str(url('http://user:pw@host:80/?foo=bar&baz=42'))
2497 2497 'http://user:pw@host:80/?foo=bar&baz=42'
2498 2498 >>> str(url('http://user:pw@host:80/?foo=bar%3dbaz'))
2499 2499 'http://user:pw@host:80/?foo=bar%3dbaz'
2500 2500 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
2501 2501 'ssh://user:pw@[::1]:2200//home/joe#'
2502 2502 >>> str(url('http://localhost:80//'))
2503 2503 'http://localhost:80//'
2504 2504 >>> str(url('http://localhost:80/'))
2505 2505 'http://localhost:80/'
2506 2506 >>> str(url('http://localhost:80'))
2507 2507 'http://localhost:80/'
2508 2508 >>> str(url('bundle:foo'))
2509 2509 'bundle:foo'
2510 2510 >>> str(url('bundle://../foo'))
2511 2511 'bundle:../foo'
2512 2512 >>> str(url('path'))
2513 2513 'path'
2514 2514 >>> str(url('file:///tmp/foo/bar'))
2515 2515 'file:///tmp/foo/bar'
2516 2516 >>> str(url('file:///c:/tmp/foo/bar'))
2517 2517 'file:///c:/tmp/foo/bar'
2518 2518 >>> print url(r'bundle:foo\bar')
2519 2519 bundle:foo\bar
2520 2520 >>> print url(r'file:///D:\data\hg')
2521 2521 file:///D:\data\hg
2522 2522 """
2523 2523 if self._localpath:
2524 2524 s = self.path
2525 2525 if self.scheme == 'bundle':
2526 2526 s = 'bundle:' + s
2527 2527 if self.fragment:
2528 2528 s += '#' + self.fragment
2529 2529 return s
2530 2530
2531 2531 s = self.scheme + ':'
2532 2532 if self.user or self.passwd or self.host:
2533 2533 s += '//'
2534 2534 elif self.scheme and (not self.path or self.path.startswith('/')
2535 2535 or hasdriveletter(self.path)):
2536 2536 s += '//'
2537 2537 if hasdriveletter(self.path):
2538 2538 s += '/'
2539 2539 if self.user:
2540 2540 s += urlreq.quote(self.user, safe=self._safechars)
2541 2541 if self.passwd:
2542 2542 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2543 2543 if self.user or self.passwd:
2544 2544 s += '@'
2545 2545 if self.host:
2546 2546 if not (self.host.startswith('[') and self.host.endswith(']')):
2547 2547 s += urlreq.quote(self.host)
2548 2548 else:
2549 2549 s += self.host
2550 2550 if self.port:
2551 2551 s += ':' + urlreq.quote(self.port)
2552 2552 if self.host:
2553 2553 s += '/'
2554 2554 if self.path:
2555 2555 # TODO: similar to the query string, we should not unescape the
2556 2556 # path when we store it, the path might contain '%2f' = '/',
2557 2557 # which we should *not* escape.
2558 2558 s += urlreq.quote(self.path, safe=self._safepchars)
2559 2559 if self.query:
2560 2560 # we store the query in escaped form.
2561 2561 s += '?' + self.query
2562 2562 if self.fragment is not None:
2563 2563 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2564 2564 return s
2565 2565
2566 2566 def authinfo(self):
2567 2567 user, passwd = self.user, self.passwd
2568 2568 try:
2569 2569 self.user, self.passwd = None, None
2570 2570 s = str(self)
2571 2571 finally:
2572 2572 self.user, self.passwd = user, passwd
2573 2573 if not self.user:
2574 2574 return (s, None)
2575 2575 # authinfo[1] is passed to urllib2 password manager, and its
2576 2576 # URIs must not contain credentials. The host is passed in the
2577 2577 # URIs list because Python < 2.4.3 uses only that to search for
2578 2578 # a password.
2579 2579 return (s, (None, (s, self.host),
2580 2580 self.user, self.passwd or ''))
2581 2581
2582 2582 def isabs(self):
2583 2583 if self.scheme and self.scheme != 'file':
2584 2584 return True # remote URL
2585 2585 if hasdriveletter(self.path):
2586 2586 return True # absolute for our purposes - can't be joined()
2587 2587 if self.path.startswith(r'\\'):
2588 2588 return True # Windows UNC path
2589 2589 if self.path.startswith('/'):
2590 2590 return True # POSIX-style
2591 2591 return False
2592 2592
2593 2593 def localpath(self):
2594 2594 if self.scheme == 'file' or self.scheme == 'bundle':
2595 2595 path = self.path or '/'
2596 2596 # For Windows, we need to promote hosts containing drive
2597 2597 # letters to paths with drive letters.
2598 2598 if hasdriveletter(self._hostport):
2599 2599 path = self._hostport + '/' + self.path
2600 2600 elif (self.host is not None and self.path
2601 2601 and not hasdriveletter(path)):
2602 2602 path = '/' + path
2603 2603 return path
2604 2604 return self._origpath
2605 2605
2606 2606 def islocal(self):
2607 2607 '''whether localpath will return something that posixfile can open'''
2608 2608 return (not self.scheme or self.scheme == 'file'
2609 2609 or self.scheme == 'bundle')
2610 2610
2611 2611 def hasscheme(path):
2612 2612 return bool(url(path).scheme)
2613 2613
2614 2614 def hasdriveletter(path):
2615 2615 return path and path[1:2] == ':' and path[0:1].isalpha()
2616 2616
2617 2617 def urllocalpath(path):
2618 2618 return url(path, parsequery=False, parsefragment=False).localpath()
2619 2619
2620 2620 def hidepassword(u):
2621 2621 '''hide user credential in a url string'''
2622 2622 u = url(u)
2623 2623 if u.passwd:
2624 2624 u.passwd = '***'
2625 2625 return str(u)
2626 2626
2627 2627 def removeauth(u):
2628 2628 '''remove all authentication information from a url string'''
2629 2629 u = url(u)
2630 2630 u.user = u.passwd = None
2631 2631 return str(u)
2632 2632
2633 2633 def isatty(fp):
2634 2634 try:
2635 2635 return fp.isatty()
2636 2636 except AttributeError:
2637 2637 return False
2638 2638
2639 2639 timecount = unitcountfn(
2640 2640 (1, 1e3, _('%.0f s')),
2641 2641 (100, 1, _('%.1f s')),
2642 2642 (10, 1, _('%.2f s')),
2643 2643 (1, 1, _('%.3f s')),
2644 2644 (100, 0.001, _('%.1f ms')),
2645 2645 (10, 0.001, _('%.2f ms')),
2646 2646 (1, 0.001, _('%.3f ms')),
2647 2647 (100, 0.000001, _('%.1f us')),
2648 2648 (10, 0.000001, _('%.2f us')),
2649 2649 (1, 0.000001, _('%.3f us')),
2650 2650 (100, 0.000000001, _('%.1f ns')),
2651 2651 (10, 0.000000001, _('%.2f ns')),
2652 2652 (1, 0.000000001, _('%.3f ns')),
2653 2653 )
2654 2654
2655 2655 _timenesting = [0]
2656 2656
2657 2657 def timed(func):
2658 2658 '''Report the execution time of a function call to stderr.
2659 2659
2660 2660 During development, use as a decorator when you need to measure
2661 2661 the cost of a function, e.g. as follows:
2662 2662
2663 2663 @util.timed
2664 2664 def foo(a, b, c):
2665 2665 pass
2666 2666 '''
2667 2667
2668 2668 def wrapper(*args, **kwargs):
2669 2669 start = time.time()
2670 2670 indent = 2
2671 2671 _timenesting[0] += indent
2672 2672 try:
2673 2673 return func(*args, **kwargs)
2674 2674 finally:
2675 2675 elapsed = time.time() - start
2676 2676 _timenesting[0] -= indent
2677 2677 sys.stderr.write('%s%s: %s\n' %
2678 2678 (' ' * _timenesting[0], func.__name__,
2679 2679 timecount(elapsed)))
2680 2680 return wrapper
2681 2681
2682 2682 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
2683 2683 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
2684 2684
2685 2685 def sizetoint(s):
2686 2686 '''Convert a space specifier to a byte count.
2687 2687
2688 2688 >>> sizetoint('30')
2689 2689 30
2690 2690 >>> sizetoint('2.2kb')
2691 2691 2252
2692 2692 >>> sizetoint('6M')
2693 2693 6291456
2694 2694 '''
2695 2695 t = s.strip().lower()
2696 2696 try:
2697 2697 for k, u in _sizeunits:
2698 2698 if t.endswith(k):
2699 2699 return int(float(t[:-len(k)]) * u)
2700 2700 return int(t)
2701 2701 except ValueError:
2702 2702 raise error.ParseError(_("couldn't parse size: %s") % s)
2703 2703
2704 2704 class hooks(object):
2705 2705 '''A collection of hook functions that can be used to extend a
2706 2706 function's behavior. Hooks are called in lexicographic order,
2707 2707 based on the names of their sources.'''
2708 2708
2709 2709 def __init__(self):
2710 2710 self._hooks = []
2711 2711
2712 2712 def add(self, source, hook):
2713 2713 self._hooks.append((source, hook))
2714 2714
2715 2715 def __call__(self, *args):
2716 2716 self._hooks.sort(key=lambda x: x[0])
2717 2717 results = []
2718 2718 for source, hook in self._hooks:
2719 2719 results.append(hook(*args))
2720 2720 return results
2721 2721
2722 2722 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s'):
2723 2723 '''Yields lines for a nicely formatted stacktrace.
2724 2724 Skips the 'skip' last entries.
2725 2725 Each file+linenumber is formatted according to fileline.
2726 2726 Each line is formatted according to line.
2727 2727 If line is None, it yields:
2728 2728 length of longest filepath+line number,
2729 2729 filepath+linenumber,
2730 2730 function
2731 2731
2732 2732 Not be used in production code but very convenient while developing.
2733 2733 '''
2734 2734 entries = [(fileline % (fn, ln), func)
2735 2735 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]]
2736 2736 if entries:
2737 2737 fnmax = max(len(entry[0]) for entry in entries)
2738 2738 for fnln, func in entries:
2739 2739 if line is None:
2740 2740 yield (fnmax, fnln, func)
2741 2741 else:
2742 2742 yield line % (fnmax, fnln, func)
2743 2743
2744 2744 def debugstacktrace(msg='stacktrace', skip=0, f=sys.stderr, otherf=sys.stdout):
2745 2745 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
2746 2746 Skips the 'skip' last entries. By default it will flush stdout first.
2747 2747 It can be used everywhere and intentionally does not require an ui object.
2748 2748 Not be used in production code but very convenient while developing.
2749 2749 '''
2750 2750 if otherf:
2751 2751 otherf.flush()
2752 2752 f.write('%s at:\n' % msg)
2753 2753 for line in getstackframes(skip + 1):
2754 2754 f.write(line)
2755 2755 f.flush()
2756 2756
2757 2757 class dirs(object):
2758 2758 '''a multiset of directory names from a dirstate or manifest'''
2759 2759
2760 2760 def __init__(self, map, skip=None):
2761 2761 self._dirs = {}
2762 2762 addpath = self.addpath
2763 2763 if safehasattr(map, 'iteritems') and skip is not None:
2764 2764 for f, s in map.iteritems():
2765 2765 if s[0] != skip:
2766 2766 addpath(f)
2767 2767 else:
2768 2768 for f in map:
2769 2769 addpath(f)
2770 2770
2771 2771 def addpath(self, path):
2772 2772 dirs = self._dirs
2773 2773 for base in finddirs(path):
2774 2774 if base in dirs:
2775 2775 dirs[base] += 1
2776 2776 return
2777 2777 dirs[base] = 1
2778 2778
2779 2779 def delpath(self, path):
2780 2780 dirs = self._dirs
2781 2781 for base in finddirs(path):
2782 2782 if dirs[base] > 1:
2783 2783 dirs[base] -= 1
2784 2784 return
2785 2785 del dirs[base]
2786 2786
2787 2787 def __iter__(self):
2788 2788 return self._dirs.iterkeys()
2789 2789
2790 2790 def __contains__(self, d):
2791 2791 return d in self._dirs
2792 2792
2793 2793 if safehasattr(parsers, 'dirs'):
2794 2794 dirs = parsers.dirs
2795 2795
2796 2796 def finddirs(path):
2797 2797 pos = path.rfind('/')
2798 2798 while pos != -1:
2799 2799 yield path[:pos]
2800 2800 pos = path.rfind('/', 0, pos)
2801 2801
2802 2802 # compression utility
2803 2803
2804 2804 class nocompress(object):
2805 2805 def compress(self, x):
2806 2806 return x
2807 2807 def flush(self):
2808 2808 return ""
2809 2809
2810 2810 compressors = {
2811 2811 None: nocompress,
2812 2812 # lambda to prevent early import
2813 2813 'BZ': lambda: bz2.BZ2Compressor(),
2814 2814 'GZ': lambda: zlib.compressobj(),
2815 2815 }
2816 2816 # also support the old form by courtesies
2817 2817 compressors['UN'] = compressors[None]
2818 2818
2819 2819 def _makedecompressor(decompcls):
2820 2820 def generator(f):
2821 2821 d = decompcls()
2822 2822 for chunk in filechunkiter(f):
2823 2823 yield d.decompress(chunk)
2824 2824 def func(fh):
2825 2825 return chunkbuffer(generator(fh))
2826 2826 return func
2827 2827
2828 2828 class ctxmanager(object):
2829 2829 '''A context manager for use in 'with' blocks to allow multiple
2830 2830 contexts to be entered at once. This is both safer and more
2831 2831 flexible than contextlib.nested.
2832 2832
2833 2833 Once Mercurial supports Python 2.7+, this will become mostly
2834 2834 unnecessary.
2835 2835 '''
2836 2836
2837 2837 def __init__(self, *args):
2838 2838 '''Accepts a list of no-argument functions that return context
2839 2839 managers. These will be invoked at __call__ time.'''
2840 2840 self._pending = args
2841 2841 self._atexit = []
2842 2842
2843 2843 def __enter__(self):
2844 2844 return self
2845 2845
2846 2846 def enter(self):
2847 2847 '''Create and enter context managers in the order in which they were
2848 2848 passed to the constructor.'''
2849 2849 values = []
2850 2850 for func in self._pending:
2851 2851 obj = func()
2852 2852 values.append(obj.__enter__())
2853 2853 self._atexit.append(obj.__exit__)
2854 2854 del self._pending
2855 2855 return values
2856 2856
2857 2857 def atexit(self, func, *args, **kwargs):
2858 2858 '''Add a function to call when this context manager exits. The
2859 2859 ordering of multiple atexit calls is unspecified, save that
2860 2860 they will happen before any __exit__ functions.'''
2861 2861 def wrapper(exc_type, exc_val, exc_tb):
2862 2862 func(*args, **kwargs)
2863 2863 self._atexit.append(wrapper)
2864 2864 return func
2865 2865
2866 2866 def __exit__(self, exc_type, exc_val, exc_tb):
2867 2867 '''Context managers are exited in the reverse order from which
2868 2868 they were created.'''
2869 2869 received = exc_type is not None
2870 2870 suppressed = False
2871 2871 pending = None
2872 2872 self._atexit.reverse()
2873 2873 for exitfunc in self._atexit:
2874 2874 try:
2875 2875 if exitfunc(exc_type, exc_val, exc_tb):
2876 2876 suppressed = True
2877 2877 exc_type = None
2878 2878 exc_val = None
2879 2879 exc_tb = None
2880 2880 except BaseException:
2881 2881 pending = sys.exc_info()
2882 2882 exc_type, exc_val, exc_tb = pending = sys.exc_info()
2883 2883 del self._atexit
2884 2884 if pending:
2885 2885 raise exc_val
2886 2886 return received and suppressed
2887 2887
2888 2888 def _bz2():
2889 2889 d = bz2.BZ2Decompressor()
2890 2890 # Bzip2 stream start with BZ, but we stripped it.
2891 2891 # we put it back for good measure.
2892 2892 d.decompress('BZ')
2893 2893 return d
2894 2894
2895 2895 decompressors = {None: lambda fh: fh,
2896 2896 '_truncatedBZ': _makedecompressor(_bz2),
2897 2897 'BZ': _makedecompressor(lambda: bz2.BZ2Decompressor()),
2898 2898 'GZ': _makedecompressor(lambda: zlib.decompressobj()),
2899 2899 }
2900 2900 # also support the old form by courtesies
2901 2901 decompressors['UN'] = decompressors[None]
2902 2902
2903 2903 # convenient shortcut
2904 2904 dst = debugstacktrace
General Comments 0
You need to be logged in to leave comments. Login now