##// END OF EJS Templates
py3: replace pycompat.getenv with encoding.environ.get...
Pulkit Goyal -
r30820:6a70cf94 default
parent child Browse files
Show More
@@ -1,691 +1,692
1 1 #!/usr/bin/env python
2 2 #
3 3 # check-code - a style and portability checker for Mercurial
4 4 #
5 5 # Copyright 2010 Matt Mackall <mpm@selenic.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """style and portability checker for Mercurial
11 11
12 12 when a rule triggers wrong, do one of the following (prefer one from top):
13 13 * do the work-around the rule suggests
14 14 * doublecheck that it is a false match
15 15 * improve the rule pattern
16 16 * add an ignore pattern to the rule (3rd arg) which matches your good line
17 17 (you can append a short comment and match this, like: #re-raises)
18 18 * change the pattern to a warning and list the exception in test-check-code-hg
19 19 * ONLY use no--check-code for skipping entire files from external sources
20 20 """
21 21
22 22 from __future__ import absolute_import, print_function
23 23 import glob
24 24 import keyword
25 25 import optparse
26 26 import os
27 27 import re
28 28 import sys
29 29 if sys.version_info[0] < 3:
30 30 opentext = open
31 31 else:
32 32 def opentext(f):
33 33 return open(f, encoding='ascii')
34 34 try:
35 35 xrange
36 36 except NameError:
37 37 xrange = range
38 38 try:
39 39 import re2
40 40 except ImportError:
41 41 re2 = None
42 42
43 43 def compilere(pat, multiline=False):
44 44 if multiline:
45 45 pat = '(?m)' + pat
46 46 if re2:
47 47 try:
48 48 return re2.compile(pat)
49 49 except re2.error:
50 50 pass
51 51 return re.compile(pat)
52 52
53 53 # check "rules depending on implementation of repquote()" in each
54 54 # patterns (especially pypats), before changing around repquote()
55 55 _repquotefixedmap = {' ': ' ', '\n': '\n', '.': 'p', ':': 'q',
56 56 '%': '%', '\\': 'b', '*': 'A', '+': 'P', '-': 'M'}
57 57 def _repquoteencodechr(i):
58 58 if i > 255:
59 59 return 'u'
60 60 c = chr(i)
61 61 if c in _repquotefixedmap:
62 62 return _repquotefixedmap[c]
63 63 if c.isalpha():
64 64 return 'x'
65 65 if c.isdigit():
66 66 return 'n'
67 67 return 'o'
68 68 _repquotett = ''.join(_repquoteencodechr(i) for i in xrange(256))
69 69
70 70 def repquote(m):
71 71 t = m.group('text')
72 72 t = t.translate(_repquotett)
73 73 return m.group('quote') + t + m.group('quote')
74 74
75 75 def reppython(m):
76 76 comment = m.group('comment')
77 77 if comment:
78 78 l = len(comment.rstrip())
79 79 return "#" * l + comment[l:]
80 80 return repquote(m)
81 81
82 82 def repcomment(m):
83 83 return m.group(1) + "#" * len(m.group(2))
84 84
85 85 def repccomment(m):
86 86 t = re.sub(r"((?<=\n) )|\S", "x", m.group(2))
87 87 return m.group(1) + t + "*/"
88 88
89 89 def repcallspaces(m):
90 90 t = re.sub(r"\n\s+", "\n", m.group(2))
91 91 return m.group(1) + t
92 92
93 93 def repinclude(m):
94 94 return m.group(1) + "<foo>"
95 95
96 96 def rephere(m):
97 97 t = re.sub(r"\S", "x", m.group(2))
98 98 return m.group(1) + t
99 99
100 100
101 101 testpats = [
102 102 [
103 103 (r'pushd|popd', "don't use 'pushd' or 'popd', use 'cd'"),
104 104 (r'\W\$?\(\([^\)\n]*\)\)', "don't use (()) or $(()), use 'expr'"),
105 105 (r'grep.*-q', "don't use 'grep -q', redirect to /dev/null"),
106 106 (r'(?<!hg )grep.* -a', "don't use 'grep -a', use in-line python"),
107 107 (r'sed.*-i', "don't use 'sed -i', use a temporary file"),
108 108 (r'\becho\b.*\\n', "don't use 'echo \\n', use printf"),
109 109 (r'echo -n', "don't use 'echo -n', use printf"),
110 110 (r'(^|\|\s*)\bwc\b[^|]*$\n(?!.*\(re\))', "filter wc output"),
111 111 (r'head -c', "don't use 'head -c', use 'dd'"),
112 112 (r'tail -n', "don't use the '-n' option to tail, just use '-<num>'"),
113 113 (r'sha1sum', "don't use sha1sum, use $TESTDIR/md5sum.py"),
114 114 (r'ls.*-\w*R', "don't use 'ls -R', use 'find'"),
115 115 (r'printf.*[^\\]\\([1-9]|0\d)', r"don't use 'printf \NNN', use Python"),
116 116 (r'printf.*[^\\]\\x', "don't use printf \\x, use Python"),
117 117 (r'\$\(.*\)', "don't use $(expr), use `expr`"),
118 118 (r'rm -rf \*', "don't use naked rm -rf, target a directory"),
119 119 (r'(^|\|\s*)grep (-\w\s+)*[^|]*[(|]\w',
120 120 "use egrep for extended grep syntax"),
121 121 (r'/bin/', "don't use explicit paths for tools"),
122 122 (r'[^\n]\Z', "no trailing newline"),
123 123 (r'export .*=', "don't export and assign at once"),
124 124 (r'^source\b', "don't use 'source', use '.'"),
125 125 (r'touch -d', "don't use 'touch -d', use 'touch -t' instead"),
126 126 (r'\bls +[^|\n-]+ +-', "options to 'ls' must come before filenames"),
127 127 (r'[^>\n]>\s*\$HGRCPATH', "don't overwrite $HGRCPATH, append to it"),
128 128 (r'^stop\(\)', "don't use 'stop' as a shell function name"),
129 129 (r'(\[|\btest\b).*-e ', "don't use 'test -e', use 'test -f'"),
130 130 (r'\[\[\s+[^\]]*\]\]', "don't use '[[ ]]', use '[ ]'"),
131 131 (r'^alias\b.*=', "don't use alias, use a function"),
132 132 (r'if\s*!', "don't use '!' to negate exit status"),
133 133 (r'/dev/u?random', "don't use entropy, use /dev/zero"),
134 134 (r'do\s*true;\s*done', "don't use true as loop body, use sleep 0"),
135 135 (r'^( *)\t', "don't use tabs to indent"),
136 136 (r'sed (-e )?\'(\d+|/[^/]*/)i(?!\\\n)',
137 137 "put a backslash-escaped newline after sed 'i' command"),
138 138 (r'^diff *-\w*[uU].*$\n(^ \$ |^$)', "prefix diff -u/-U with cmp"),
139 139 (r'^\s+(if)? diff *-\w*[uU]', "prefix diff -u/-U with cmp"),
140 140 (r'seq ', "don't use 'seq', use $TESTDIR/seq.py"),
141 141 (r'\butil\.Abort\b', "directly use error.Abort"),
142 142 (r'\|&', "don't use |&, use 2>&1"),
143 143 (r'\w = +\w', "only one space after = allowed"),
144 144 (r'\bsed\b.*[^\\]\\n', "don't use 'sed ... \\n', use a \\ and a newline"),
145 145 (r'env.*-u', "don't use 'env -u VAR', use 'unset VAR'"),
146 146 (r'cp.* -r ', "don't use 'cp -r', use 'cp -R'"),
147 147 ],
148 148 # warnings
149 149 [
150 150 (r'^function', "don't use 'function', use old style"),
151 151 (r'^diff.*-\w*N', "don't use 'diff -N'"),
152 152 (r'\$PWD|\${PWD}', "don't use $PWD, use `pwd`"),
153 153 (r'^([^"\'\n]|("[^"\n]*")|(\'[^\'\n]*\'))*\^', "^ must be quoted"),
154 154 (r'kill (`|\$\()', "don't use kill, use killdaemons.py")
155 155 ]
156 156 ]
157 157
158 158 testfilters = [
159 159 (r"( *)(#([^\n]*\S)?)", repcomment),
160 160 (r"<<(\S+)((.|\n)*?\n\1)", rephere),
161 161 ]
162 162
163 163 winglobmsg = "use (glob) to match Windows paths too"
164 164 uprefix = r"^ \$ "
165 165 utestpats = [
166 166 [
167 167 (r'^(\S.*|| [$>] \S.*)[ \t]\n', "trailing whitespace on non-output"),
168 168 (uprefix + r'.*\|\s*sed[^|>\n]*\n',
169 169 "use regex test output patterns instead of sed"),
170 170 (uprefix + r'(true|exit 0)', "explicit zero exit unnecessary"),
171 171 (uprefix + r'.*(?<!\[)\$\?', "explicit exit code checks unnecessary"),
172 172 (uprefix + r'.*\|\| echo.*(fail|error)',
173 173 "explicit exit code checks unnecessary"),
174 174 (uprefix + r'set -e', "don't use set -e"),
175 175 (uprefix + r'(\s|fi\b|done\b)', "use > for continued lines"),
176 176 (uprefix + r'.*:\.\S*/', "x:.y in a path does not work on msys, rewrite "
177 177 "as x://.y, or see `hg log -k msys` for alternatives", r'-\S+:\.|' #-Rxxx
178 178 '# no-msys'), # in test-pull.t which is skipped on windows
179 179 (r'^ saved backup bundle to \$TESTTMP.*\.hg$', winglobmsg),
180 180 (r'^ changeset .* references (corrupted|missing) \$TESTTMP/.*[^)]$',
181 181 winglobmsg),
182 182 (r'^ pulling from \$TESTTMP/.*[^)]$', winglobmsg,
183 183 '\$TESTTMP/unix-repo$'), # in test-issue1802.t which skipped on windows
184 184 (r'^ reverting (?!subrepo ).*/.*[^)]$', winglobmsg),
185 185 (r'^ cloning subrepo \S+/.*[^)]$', winglobmsg),
186 186 (r'^ pushing to \$TESTTMP/.*[^)]$', winglobmsg),
187 187 (r'^ pushing subrepo \S+/\S+ to.*[^)]$', winglobmsg),
188 188 (r'^ moving \S+/.*[^)]$', winglobmsg),
189 189 (r'^ no changes made to subrepo since.*/.*[^)]$', winglobmsg),
190 190 (r'^ .*: largefile \S+ not available from file:.*/.*[^)]$', winglobmsg),
191 191 (r'^ .*file://\$TESTTMP',
192 192 'write "file:/*/$TESTTMP" + (glob) to match on windows too'),
193 193 (r'^ [^$>].*27\.0\.0\.1.*[^)]$',
194 194 'use (glob) to match localhost IP on hosts without 127.0.0.1 too'),
195 195 (r'^ (cat|find): .*: No such file or directory',
196 196 'use test -f to test for file existence'),
197 197 (r'^ diff -[^ -]*p',
198 198 "don't use (external) diff with -p for portability"),
199 199 (r'^ [-+][-+][-+] .* [-+]0000 \(glob\)',
200 200 "glob timezone field in diff output for portability"),
201 201 (r'^ @@ -[0-9]+ [+][0-9]+,[0-9]+ @@',
202 202 "use '@@ -N* +N,n @@ (glob)' style chunk header for portability"),
203 203 (r'^ @@ -[0-9]+,[0-9]+ [+][0-9]+ @@',
204 204 "use '@@ -N,n +N* @@ (glob)' style chunk header for portability"),
205 205 (r'^ @@ -[0-9]+ [+][0-9]+ @@',
206 206 "use '@@ -N* +N* @@ (glob)' style chunk header for portability"),
207 207 (uprefix + r'hg( +-[^ ]+( +[^ ]+)?)* +extdiff'
208 208 r'( +(-[^ po-]+|--(?!program|option)[^ ]+|[^-][^ ]*))*$',
209 209 "use $RUNTESTDIR/pdiff via extdiff (or -o/-p for false-positives)"),
210 210 ],
211 211 # warnings
212 212 [
213 213 (r'^ (?!.*127\.0\.0\.1)[^*?/\n]* \(glob\)$',
214 214 "glob match with no glob string (?, *, /, and 127.0.0.1)"),
215 215 ]
216 216 ]
217 217
218 218 for i in [0, 1]:
219 219 for tp in testpats[i]:
220 220 p = tp[0]
221 221 m = tp[1]
222 222 if p.startswith(r'^'):
223 223 p = r"^ [$>] (%s)" % p[1:]
224 224 else:
225 225 p = r"^ [$>] .*(%s)" % p
226 226 utestpats[i].append((p, m) + tp[2:])
227 227
228 228 utestfilters = [
229 229 (r"<<(\S+)((.|\n)*?\n > \1)", rephere),
230 230 (r"( +)(#([^\n]*\S)?)", repcomment),
231 231 ]
232 232
233 233 pypats = [
234 234 [
235 235 (r'^\s*def\s*\w+\s*\(.*,\s*\(',
236 236 "tuple parameter unpacking not available in Python 3+"),
237 237 (r'lambda\s*\(.*,.*\)',
238 238 "tuple parameter unpacking not available in Python 3+"),
239 239 (r'(?<!def)\s+(cmp)\(', "cmp is not available in Python 3+"),
240 240 (r'\breduce\s*\(.*', "reduce is not available in Python 3+"),
241 241 (r'\bdict\(.*=', 'dict() is different in Py2 and 3 and is slower than {}',
242 242 'dict-from-generator'),
243 243 (r'\.has_key\b', "dict.has_key is not available in Python 3+"),
244 244 (r'\s<>\s', '<> operator is not available in Python 3+, use !='),
245 245 (r'^\s*\t', "don't use tabs"),
246 246 (r'\S;\s*\n', "semicolon"),
247 247 (r'[^_]_\([ \t\n]*(?:"[^"]+"[ \t\n+]*)+%', "don't use % inside _()"),
248 248 (r"[^_]_\([ \t\n]*(?:'[^']+'[ \t\n+]*)+%", "don't use % inside _()"),
249 249 (r'(\w|\)),\w', "missing whitespace after ,"),
250 250 (r'(\w|\))[+/*\-<>]\w', "missing whitespace in expression"),
251 251 (r'^\s+(\w|\.)+=\w[^,()\n]*$', "missing whitespace in assignment"),
252 252 (r'\w\s=\s\s+\w', "gratuitous whitespace after ="),
253 253 (r'.{81}', "line too long"),
254 254 (r'[^\n]\Z', "no trailing newline"),
255 255 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
256 256 # (r'^\s+[^_ \n][^_. \n]+_[^_\n]+\s*=',
257 257 # "don't use underbars in identifiers"),
258 258 (r'^\s+(self\.)?[A-za-z][a-z0-9]+[A-Z]\w* = ',
259 259 "don't use camelcase in identifiers"),
260 260 (r'^\s*(if|while|def|class|except|try)\s[^[\n]*:\s*[^\\n]#\s]+',
261 261 "linebreak after :"),
262 262 (r'class\s[^( \n]+:', "old-style class, use class foo(object)",
263 263 r'#.*old-style'),
264 264 (r'class\s[^( \n]+\(\):',
265 265 "class foo() creates old style object, use class foo(object)",
266 266 r'#.*old-style'),
267 267 (r'\b(%s)\(' % '|'.join(k for k in keyword.kwlist
268 268 if k not in ('print', 'exec')),
269 269 "Python keyword is not a function"),
270 270 (r',]', "unneeded trailing ',' in list"),
271 271 # (r'class\s[A-Z][^\(]*\((?!Exception)',
272 272 # "don't capitalize non-exception classes"),
273 273 # (r'in range\(', "use xrange"),
274 274 # (r'^\s*print\s+', "avoid using print in core and extensions"),
275 275 (r'[\x80-\xff]', "non-ASCII character literal"),
276 276 (r'("\')\.format\(', "str.format() has no bytes counterpart, use %"),
277 277 (r'^\s*(%s)\s\s' % '|'.join(keyword.kwlist),
278 278 "gratuitous whitespace after Python keyword"),
279 279 (r'([\(\[][ \t]\S)|(\S[ \t][\)\]])', "gratuitous whitespace in () or []"),
280 280 # (r'\s\s=', "gratuitous whitespace before ="),
281 281 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
282 282 "missing whitespace around operator"),
283 283 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\s',
284 284 "missing whitespace around operator"),
285 285 (r'\s(\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
286 286 "missing whitespace around operator"),
287 287 (r'[^^+=*/!<>&| %-](\s=|=\s)[^= ]',
288 288 "wrong whitespace around ="),
289 289 (r'\([^()]*( =[^=]|[^<>!=]= )',
290 290 "no whitespace around = for named parameters"),
291 291 (r'raise Exception', "don't raise generic exceptions"),
292 292 (r'raise [^,(]+, (\([^\)]+\)|[^,\(\)]+)$',
293 293 "don't use old-style two-argument raise, use Exception(message)"),
294 294 (r' is\s+(not\s+)?["\'0-9-]', "object comparison with literal"),
295 295 (r' [=!]=\s+(True|False|None)',
296 296 "comparison with singleton, use 'is' or 'is not' instead"),
297 297 (r'^\s*(while|if) [01]:',
298 298 "use True/False for constant Boolean expression"),
299 299 (r'(?:(?<!def)\s+|\()hasattr\(',
300 300 'hasattr(foo, bar) is broken, use util.safehasattr(foo, bar) instead'),
301 301 (r'opener\([^)]*\).read\(',
302 302 "use opener.read() instead"),
303 303 (r'opener\([^)]*\).write\(',
304 304 "use opener.write() instead"),
305 305 (r'[\s\(](open|file)\([^)]*\)\.read\(',
306 306 "use util.readfile() instead"),
307 307 (r'[\s\(](open|file)\([^)]*\)\.write\(',
308 308 "use util.writefile() instead"),
309 309 (r'^[\s\(]*(open(er)?|file)\([^)]*\)',
310 310 "always assign an opened file to a variable, and close it afterwards"),
311 311 (r'[\s\(](open|file)\([^)]*\)\.',
312 312 "always assign an opened file to a variable, and close it afterwards"),
313 313 (r'(?i)descend[e]nt', "the proper spelling is descendAnt"),
314 314 (r'\.debug\(\_', "don't mark debug messages for translation"),
315 315 (r'\.strip\(\)\.split\(\)', "no need to strip before splitting"),
316 316 (r'^\s*except\s*:', "naked except clause", r'#.*re-raises'),
317 317 (r'^\s*except\s([^\(,]+|\([^\)]+\))\s*,',
318 318 'legacy exception syntax; use "as" instead of ","'),
319 319 (r':\n( )*( ){1,3}[^ ]', "must indent 4 spaces"),
320 320 (r'release\(.*wlock, .*lock\)', "wrong lock release order"),
321 321 (r'\b__bool__\b', "__bool__ should be __nonzero__ in Python 2"),
322 322 (r'os\.path\.join\(.*, *(""|\'\')\)',
323 323 "use pathutil.normasprefix(path) instead of os.path.join(path, '')"),
324 324 (r'\s0[0-7]+\b', 'legacy octal syntax; use "0o" prefix instead of "0"'),
325 325 # XXX only catch mutable arguments on the first line of the definition
326 326 (r'def.*[( ]\w+=\{\}', "don't use mutable default arguments"),
327 327 (r'\butil\.Abort\b', "directly use error.Abort"),
328 328 (r'^@(\w*\.)?cachefunc', "module-level @cachefunc is risky, please avoid"),
329 329 (r'^import Queue', "don't use Queue, use util.queue + util.empty"),
330 330 (r'^import cStringIO', "don't use cStringIO.StringIO, use util.stringio"),
331 331 (r'^import urllib', "don't use urllib, use util.urlreq/util.urlerr"),
332 332 (r'^import SocketServer', "don't use SockerServer, use util.socketserver"),
333 333 (r'^import urlparse', "don't use urlparse, use util.urlparse"),
334 334 (r'^import xmlrpclib', "don't use xmlrpclib, use util.xmlrpclib"),
335 335 (r'^import cPickle', "don't use cPickle, use util.pickle"),
336 336 (r'^import pickle', "don't use pickle, use util.pickle"),
337 337 (r'^import httplib', "don't use httplib, use util.httplib"),
338 338 (r'^import BaseHTTPServer', "use util.httpserver instead"),
339 339 (r'\.next\(\)', "don't use .next(), use next(...)"),
340 340
341 341 # rules depending on implementation of repquote()
342 342 (r' x+[xpqo%APM][\'"]\n\s+[\'"]x',
343 343 'string join across lines with no space'),
344 344 (r'''(?x)ui\.(status|progress|write|note|warn)\(
345 345 [ \t\n#]*
346 346 (?# any strings/comments might precede a string, which
347 347 # contains translatable message)
348 348 ((['"]|\'\'\'|""")[ \npq%bAPMxno]*(['"]|\'\'\'|""")[ \t\n#]+)*
349 349 (?# sequence consisting of below might precede translatable message
350 350 # - formatting string: "% 10s", "%05d", "% -3.2f", "%*s", "%%" ...
351 351 # - escaped character: "\\", "\n", "\0" ...
352 352 # - character other than '%', 'b' as '\', and 'x' as alphabet)
353 353 (['"]|\'\'\'|""")
354 354 ((%([ n]?[PM]?([np]+|A))?x)|%%|b[bnx]|[ \nnpqAPMo])*x
355 355 (?# this regexp can't use [^...] style,
356 356 # because _preparepats forcibly adds "\n" into [^...],
357 357 # even though this regexp wants match it against "\n")''',
358 358 "missing _() in ui message (use () to hide false-positives)"),
359 359 ],
360 360 # warnings
361 361 [
362 362 # rules depending on implementation of repquote()
363 363 (r'(^| )pp +xxxxqq[ \n][^\n]', "add two newlines after '.. note::'"),
364 364 ]
365 365 ]
366 366
367 367 pyfilters = [
368 368 (r"""(?msx)(?P<comment>\#.*?$)|
369 369 ((?P<quote>('''|\"\"\"|(?<!')'(?!')|(?<!")"(?!")))
370 370 (?P<text>(([^\\]|\\.)*?))
371 371 (?P=quote))""", reppython),
372 372 ]
373 373
374 374 txtfilters = []
375 375
376 376 txtpats = [
377 377 [
378 378 ('\s$', 'trailing whitespace'),
379 379 ('.. note::[ \n][^\n]', 'add two newlines after note::')
380 380 ],
381 381 []
382 382 ]
383 383
384 384 cpats = [
385 385 [
386 386 (r'//', "don't use //-style comments"),
387 387 (r'^ ', "don't use spaces to indent"),
388 388 (r'\S\t', "don't use tabs except for indent"),
389 389 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
390 390 (r'.{81}', "line too long"),
391 391 (r'(while|if|do|for)\(', "use space after while/if/do/for"),
392 392 (r'return\(', "return is not a function"),
393 393 (r' ;', "no space before ;"),
394 394 (r'[^;] \)', "no space before )"),
395 395 (r'[)][{]', "space between ) and {"),
396 396 (r'\w+\* \w+', "use int *foo, not int* foo"),
397 397 (r'\W\([^\)]+\) \w+', "use (int)foo, not (int) foo"),
398 398 (r'\w+ (\+\+|--)', "use foo++, not foo ++"),
399 399 (r'\w,\w', "missing whitespace after ,"),
400 400 (r'^[^#]\w[+/*]\w', "missing whitespace in expression"),
401 401 (r'\w\s=\s\s+\w', "gratuitous whitespace after ="),
402 402 (r'^#\s+\w', "use #foo, not # foo"),
403 403 (r'[^\n]\Z', "no trailing newline"),
404 404 (r'^\s*#import\b', "use only #include in standard C code"),
405 405 (r'strcpy\(', "don't use strcpy, use strlcpy or memcpy"),
406 406 (r'strcat\(', "don't use strcat"),
407 407
408 408 # rules depending on implementation of repquote()
409 409 ],
410 410 # warnings
411 411 [
412 412 # rules depending on implementation of repquote()
413 413 ]
414 414 ]
415 415
416 416 cfilters = [
417 417 (r'(/\*)(((\*(?!/))|[^*])*)\*/', repccomment),
418 418 (r'''(?P<quote>(?<!")")(?P<text>([^"]|\\")+)"(?!")''', repquote),
419 419 (r'''(#\s*include\s+<)([^>]+)>''', repinclude),
420 420 (r'(\()([^)]+\))', repcallspaces),
421 421 ]
422 422
423 423 inutilpats = [
424 424 [
425 425 (r'\bui\.', "don't use ui in util"),
426 426 ],
427 427 # warnings
428 428 []
429 429 ]
430 430
431 431 inrevlogpats = [
432 432 [
433 433 (r'\brepo\.', "don't use repo in revlog"),
434 434 ],
435 435 # warnings
436 436 []
437 437 ]
438 438
439 439 webtemplatefilters = []
440 440
441 441 webtemplatepats = [
442 442 [],
443 443 [
444 444 (r'{desc(\|(?!websub|firstline)[^\|]*)+}',
445 445 'follow desc keyword with either firstline or websub'),
446 446 ]
447 447 ]
448 448
449 449 allfilesfilters = []
450 450
451 451 allfilespats = [
452 452 [
453 453 (r'(http|https)://[a-zA-Z0-9./]*selenic.com/',
454 454 'use mercurial-scm.org domain URL'),
455 455 ],
456 456 # warnings
457 457 [],
458 458 ]
459 459
460 460 py3pats = [
461 461 [
462 462 (r'os\.environ', "use encoding.environ instead (py3)"),
463 463 (r'os\.name', "use pycompat.osname instead (py3)"),
464 464 (r'os\.getcwd', "use pycompat.getcwd instead (py3)"),
465 465 (r'os\.sep', "use pycompat.ossep instead (py3)"),
466 466 (r'os\.pathsep', "use pycompat.ospathsep instead (py3)"),
467 467 (r'os\.altsep', "use pycompat.osaltsep instead (py3)"),
468 (r'os\.getenv', "use pycompat.osgetenv instead (py3)"),
469 468 (r'sys\.platform', "use pycompat.sysplatform instead (py3)"),
470 469 (r'getopt\.getopt', "use pycompat.getoptb instead (py3)"),
470 (r'os\.getenv', "use encoding.environ.get instead"),
471 (r'os\.setenv', "modifying the environ dict is not preferred"),
471 472 ],
472 473 # warnings
473 474 [],
474 475 ]
475 476
476 477 checks = [
477 478 ('python', r'.*\.(py|cgi)$', r'^#!.*python', pyfilters, pypats),
478 479 ('python 3', r'.*(hgext|mercurial).*(?<!pycompat)\.py', '',
479 480 pyfilters, py3pats),
480 481 ('test script', r'(.*/)?test-[^.~]*$', '', testfilters, testpats),
481 482 ('c', r'.*\.[ch]$', '', cfilters, cpats),
482 483 ('unified test', r'.*\.t$', '', utestfilters, utestpats),
483 484 ('layering violation repo in revlog', r'mercurial/revlog\.py', '',
484 485 pyfilters, inrevlogpats),
485 486 ('layering violation ui in util', r'mercurial/util\.py', '', pyfilters,
486 487 inutilpats),
487 488 ('txt', r'.*\.txt$', '', txtfilters, txtpats),
488 489 ('web template', r'mercurial/templates/.*\.tmpl', '',
489 490 webtemplatefilters, webtemplatepats),
490 491 ('all except for .po', r'.*(?<!\.po)$', '',
491 492 allfilesfilters, allfilespats),
492 493 ]
493 494
494 495 def _preparepats():
495 496 for c in checks:
496 497 failandwarn = c[-1]
497 498 for pats in failandwarn:
498 499 for i, pseq in enumerate(pats):
499 500 # fix-up regexes for multi-line searches
500 501 p = pseq[0]
501 502 # \s doesn't match \n
502 503 p = re.sub(r'(?<!\\)\\s', r'[ \\t]', p)
503 504 # [^...] doesn't match newline
504 505 p = re.sub(r'(?<!\\)\[\^', r'[^\\n', p)
505 506
506 507 pats[i] = (re.compile(p, re.MULTILINE),) + pseq[1:]
507 508 filters = c[3]
508 509 for i, flt in enumerate(filters):
509 510 filters[i] = re.compile(flt[0]), flt[1]
510 511
511 512 class norepeatlogger(object):
512 513 def __init__(self):
513 514 self._lastseen = None
514 515
515 516 def log(self, fname, lineno, line, msg, blame):
516 517 """print error related a to given line of a given file.
517 518
518 519 The faulty line will also be printed but only once in the case
519 520 of multiple errors.
520 521
521 522 :fname: filename
522 523 :lineno: line number
523 524 :line: actual content of the line
524 525 :msg: error message
525 526 """
526 527 msgid = fname, lineno, line
527 528 if msgid != self._lastseen:
528 529 if blame:
529 530 print("%s:%d (%s):" % (fname, lineno, blame))
530 531 else:
531 532 print("%s:%d:" % (fname, lineno))
532 533 print(" > %s" % line)
533 534 self._lastseen = msgid
534 535 print(" " + msg)
535 536
536 537 _defaultlogger = norepeatlogger()
537 538
538 539 def getblame(f):
539 540 lines = []
540 541 for l in os.popen('hg annotate -un %s' % f):
541 542 start, line = l.split(':', 1)
542 543 user, rev = start.split()
543 544 lines.append((line[1:-1], user, rev))
544 545 return lines
545 546
546 547 def checkfile(f, logfunc=_defaultlogger.log, maxerr=None, warnings=False,
547 548 blame=False, debug=False, lineno=True):
548 549 """checks style and portability of a given file
549 550
550 551 :f: filepath
551 552 :logfunc: function used to report error
552 553 logfunc(filename, linenumber, linecontent, errormessage)
553 554 :maxerr: number of error to display before aborting.
554 555 Set to false (default) to report all errors
555 556
556 557 return True if no error is found, False otherwise.
557 558 """
558 559 blamecache = None
559 560 result = True
560 561
561 562 try:
562 563 with opentext(f) as fp:
563 564 try:
564 565 pre = post = fp.read()
565 566 except UnicodeDecodeError as e:
566 567 print("%s while reading %s" % (e, f))
567 568 return result
568 569 except IOError as e:
569 570 print("Skipping %s, %s" % (f, str(e).split(':', 1)[0]))
570 571 return result
571 572
572 573 for name, match, magic, filters, pats in checks:
573 574 post = pre # discard filtering result of previous check
574 575 if debug:
575 576 print(name, f)
576 577 fc = 0
577 578 if not (re.match(match, f) or (magic and re.search(magic, pre))):
578 579 if debug:
579 580 print("Skipping %s for %s it doesn't match %s" % (
580 581 name, match, f))
581 582 continue
582 583 if "no-" "check-code" in pre:
583 584 # If you're looking at this line, it's because a file has:
584 585 # no- check- code
585 586 # but the reason to output skipping is to make life for
586 587 # tests easier. So, instead of writing it with a normal
587 588 # spelling, we write it with the expected spelling from
588 589 # tests/test-check-code.t
589 590 print("Skipping %s it has no-che?k-code (glob)" % f)
590 591 return "Skip" # skip checking this file
591 592 for p, r in filters:
592 593 post = re.sub(p, r, post)
593 594 nerrs = len(pats[0]) # nerr elements are errors
594 595 if warnings:
595 596 pats = pats[0] + pats[1]
596 597 else:
597 598 pats = pats[0]
598 599 # print post # uncomment to show filtered version
599 600
600 601 if debug:
601 602 print("Checking %s for %s" % (name, f))
602 603
603 604 prelines = None
604 605 errors = []
605 606 for i, pat in enumerate(pats):
606 607 if len(pat) == 3:
607 608 p, msg, ignore = pat
608 609 else:
609 610 p, msg = pat
610 611 ignore = None
611 612 if i >= nerrs:
612 613 msg = "warning: " + msg
613 614
614 615 pos = 0
615 616 n = 0
616 617 for m in p.finditer(post):
617 618 if prelines is None:
618 619 prelines = pre.splitlines()
619 620 postlines = post.splitlines(True)
620 621
621 622 start = m.start()
622 623 while n < len(postlines):
623 624 step = len(postlines[n])
624 625 if pos + step > start:
625 626 break
626 627 pos += step
627 628 n += 1
628 629 l = prelines[n]
629 630
630 631 if ignore and re.search(ignore, l, re.MULTILINE):
631 632 if debug:
632 633 print("Skipping %s for %s:%s (ignore pattern)" % (
633 634 name, f, n))
634 635 continue
635 636 bd = ""
636 637 if blame:
637 638 bd = 'working directory'
638 639 if not blamecache:
639 640 blamecache = getblame(f)
640 641 if n < len(blamecache):
641 642 bl, bu, br = blamecache[n]
642 643 if bl == l:
643 644 bd = '%s@%s' % (bu, br)
644 645
645 646 errors.append((f, lineno and n + 1, l, msg, bd))
646 647 result = False
647 648
648 649 errors.sort()
649 650 for e in errors:
650 651 logfunc(*e)
651 652 fc += 1
652 653 if maxerr and fc >= maxerr:
653 654 print(" (too many errors, giving up)")
654 655 break
655 656
656 657 return result
657 658
658 659 def main():
659 660 parser = optparse.OptionParser("%prog [options] [files]")
660 661 parser.add_option("-w", "--warnings", action="store_true",
661 662 help="include warning-level checks")
662 663 parser.add_option("-p", "--per-file", type="int",
663 664 help="max warnings per file")
664 665 parser.add_option("-b", "--blame", action="store_true",
665 666 help="use annotate to generate blame info")
666 667 parser.add_option("", "--debug", action="store_true",
667 668 help="show debug information")
668 669 parser.add_option("", "--nolineno", action="store_false",
669 670 dest='lineno', help="don't show line numbers")
670 671
671 672 parser.set_defaults(per_file=15, warnings=False, blame=False, debug=False,
672 673 lineno=True)
673 674 (options, args) = parser.parse_args()
674 675
675 676 if len(args) == 0:
676 677 check = glob.glob("*")
677 678 else:
678 679 check = args
679 680
680 681 _preparepats()
681 682
682 683 ret = 0
683 684 for f in check:
684 685 if not checkfile(f, maxerr=options.per_file, warnings=options.warnings,
685 686 blame=options.blame, debug=options.debug,
686 687 lineno=options.lineno):
687 688 ret = 1
688 689 return ret
689 690
690 691 if __name__ == "__main__":
691 692 sys.exit(main())
@@ -1,665 +1,666
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''largefiles utility code: must not import other modules in this package.'''
10 10 from __future__ import absolute_import
11 11
12 12 import copy
13 13 import hashlib
14 14 import os
15 15 import platform
16 16 import stat
17 17
18 18 from mercurial.i18n import _
19 19
20 20 from mercurial import (
21 21 dirstate,
22 encoding,
22 23 error,
23 24 httpconnection,
24 25 match as matchmod,
25 26 node,
26 27 pycompat,
27 28 scmutil,
28 29 util,
29 30 )
30 31
31 32 shortname = '.hglf'
32 33 shortnameslash = shortname + '/'
33 34 longname = 'largefiles'
34 35
35 36 # -- Private worker functions ------------------------------------------
36 37
37 38 def getminsize(ui, assumelfiles, opt, default=10):
38 39 lfsize = opt
39 40 if not lfsize and assumelfiles:
40 41 lfsize = ui.config(longname, 'minsize', default=default)
41 42 if lfsize:
42 43 try:
43 44 lfsize = float(lfsize)
44 45 except ValueError:
45 46 raise error.Abort(_('largefiles: size must be number (not %s)\n')
46 47 % lfsize)
47 48 if lfsize is None:
48 49 raise error.Abort(_('minimum size for largefiles must be specified'))
49 50 return lfsize
50 51
51 52 def link(src, dest):
52 53 """Try to create hardlink - if that fails, efficiently make a copy."""
53 54 util.makedirs(os.path.dirname(dest))
54 55 try:
55 56 util.oslink(src, dest)
56 57 except OSError:
57 58 # if hardlinks fail, fallback on atomic copy
58 59 with open(src, 'rb') as srcf:
59 60 with util.atomictempfile(dest) as dstf:
60 61 for chunk in util.filechunkiter(srcf):
61 62 dstf.write(chunk)
62 63 os.chmod(dest, os.stat(src).st_mode)
63 64
64 65 def usercachepath(ui, hash):
65 66 '''Return the correct location in the "global" largefiles cache for a file
66 67 with the given hash.
67 68 This cache is used for sharing of largefiles across repositories - both
68 69 to preserve download bandwidth and storage space.'''
69 70 return os.path.join(_usercachedir(ui), hash)
70 71
71 72 def _usercachedir(ui):
72 73 '''Return the location of the "global" largefiles cache.'''
73 74 path = ui.configpath(longname, 'usercache', None)
74 75 if path:
75 76 return path
76 77 if pycompat.osname == 'nt':
77 appdata = pycompat.osgetenv('LOCALAPPDATA',\
78 pycompat.osgetenv('APPDATA'))
78 appdata = encoding.environ.get('LOCALAPPDATA',\
79 encoding.environ.get('APPDATA'))
79 80 if appdata:
80 81 return os.path.join(appdata, longname)
81 82 elif platform.system() == 'Darwin':
82 home = pycompat.osgetenv('HOME')
83 home = encoding.environ.get('HOME')
83 84 if home:
84 85 return os.path.join(home, 'Library', 'Caches', longname)
85 86 elif pycompat.osname == 'posix':
86 path = pycompat.osgetenv('XDG_CACHE_HOME')
87 path = encoding.environ.get('XDG_CACHE_HOME')
87 88 if path:
88 89 return os.path.join(path, longname)
89 home = pycompat.osgetenv('HOME')
90 home = encoding.environ.get('HOME')
90 91 if home:
91 92 return os.path.join(home, '.cache', longname)
92 93 else:
93 94 raise error.Abort(_('unknown operating system: %s\n')
94 95 % pycompat.osname)
95 96 raise error.Abort(_('unknown %s usercache location') % longname)
96 97
97 98 def inusercache(ui, hash):
98 99 path = usercachepath(ui, hash)
99 100 return os.path.exists(path)
100 101
101 102 def findfile(repo, hash):
102 103 '''Return store path of the largefile with the specified hash.
103 104 As a side effect, the file might be linked from user cache.
104 105 Return None if the file can't be found locally.'''
105 106 path, exists = findstorepath(repo, hash)
106 107 if exists:
107 108 repo.ui.note(_('found %s in store\n') % hash)
108 109 return path
109 110 elif inusercache(repo.ui, hash):
110 111 repo.ui.note(_('found %s in system cache\n') % hash)
111 112 path = storepath(repo, hash)
112 113 link(usercachepath(repo.ui, hash), path)
113 114 return path
114 115 return None
115 116
116 117 class largefilesdirstate(dirstate.dirstate):
117 118 def __getitem__(self, key):
118 119 return super(largefilesdirstate, self).__getitem__(unixpath(key))
119 120 def normal(self, f):
120 121 return super(largefilesdirstate, self).normal(unixpath(f))
121 122 def remove(self, f):
122 123 return super(largefilesdirstate, self).remove(unixpath(f))
123 124 def add(self, f):
124 125 return super(largefilesdirstate, self).add(unixpath(f))
125 126 def drop(self, f):
126 127 return super(largefilesdirstate, self).drop(unixpath(f))
127 128 def forget(self, f):
128 129 return super(largefilesdirstate, self).forget(unixpath(f))
129 130 def normallookup(self, f):
130 131 return super(largefilesdirstate, self).normallookup(unixpath(f))
131 132 def _ignore(self, f):
132 133 return False
133 134 def write(self, tr=False):
134 135 # (1) disable PENDING mode always
135 136 # (lfdirstate isn't yet managed as a part of the transaction)
136 137 # (2) avoid develwarn 'use dirstate.write with ....'
137 138 super(largefilesdirstate, self).write(None)
138 139
139 140 def openlfdirstate(ui, repo, create=True):
140 141 '''
141 142 Return a dirstate object that tracks largefiles: i.e. its root is
142 143 the repo root, but it is saved in .hg/largefiles/dirstate.
143 144 '''
144 145 vfs = repo.vfs
145 146 lfstoredir = longname
146 147 opener = scmutil.opener(vfs.join(lfstoredir))
147 148 lfdirstate = largefilesdirstate(opener, ui, repo.root,
148 149 repo.dirstate._validate)
149 150
150 151 # If the largefiles dirstate does not exist, populate and create
151 152 # it. This ensures that we create it on the first meaningful
152 153 # largefiles operation in a new clone.
153 154 if create and not vfs.exists(vfs.join(lfstoredir, 'dirstate')):
154 155 matcher = getstandinmatcher(repo)
155 156 standins = repo.dirstate.walk(matcher, [], False, False)
156 157
157 158 if len(standins) > 0:
158 159 vfs.makedirs(lfstoredir)
159 160
160 161 for standin in standins:
161 162 lfile = splitstandin(standin)
162 163 lfdirstate.normallookup(lfile)
163 164 return lfdirstate
164 165
165 166 def lfdirstatestatus(lfdirstate, repo):
166 167 wctx = repo['.']
167 168 match = matchmod.always(repo.root, repo.getcwd())
168 169 unsure, s = lfdirstate.status(match, [], False, False, False)
169 170 modified, clean = s.modified, s.clean
170 171 for lfile in unsure:
171 172 try:
172 173 fctx = wctx[standin(lfile)]
173 174 except LookupError:
174 175 fctx = None
175 176 if not fctx or fctx.data().strip() != hashfile(repo.wjoin(lfile)):
176 177 modified.append(lfile)
177 178 else:
178 179 clean.append(lfile)
179 180 lfdirstate.normal(lfile)
180 181 return s
181 182
182 183 def listlfiles(repo, rev=None, matcher=None):
183 184 '''return a list of largefiles in the working copy or the
184 185 specified changeset'''
185 186
186 187 if matcher is None:
187 188 matcher = getstandinmatcher(repo)
188 189
189 190 # ignore unknown files in working directory
190 191 return [splitstandin(f)
191 192 for f in repo[rev].walk(matcher)
192 193 if rev is not None or repo.dirstate[f] != '?']
193 194
194 195 def instore(repo, hash, forcelocal=False):
195 196 '''Return true if a largefile with the given hash exists in the store'''
196 197 return os.path.exists(storepath(repo, hash, forcelocal))
197 198
198 199 def storepath(repo, hash, forcelocal=False):
199 200 '''Return the correct location in the repository largefiles store for a
200 201 file with the given hash.'''
201 202 if not forcelocal and repo.shared():
202 203 return repo.vfs.reljoin(repo.sharedpath, longname, hash)
203 204 return repo.join(longname, hash)
204 205
205 206 def findstorepath(repo, hash):
206 207 '''Search through the local store path(s) to find the file for the given
207 208 hash. If the file is not found, its path in the primary store is returned.
208 209 The return value is a tuple of (path, exists(path)).
209 210 '''
210 211 # For shared repos, the primary store is in the share source. But for
211 212 # backward compatibility, force a lookup in the local store if it wasn't
212 213 # found in the share source.
213 214 path = storepath(repo, hash, False)
214 215
215 216 if instore(repo, hash):
216 217 return (path, True)
217 218 elif repo.shared() and instore(repo, hash, True):
218 219 return storepath(repo, hash, True), True
219 220
220 221 return (path, False)
221 222
222 223 def copyfromcache(repo, hash, filename):
223 224 '''Copy the specified largefile from the repo or system cache to
224 225 filename in the repository. Return true on success or false if the
225 226 file was not found in either cache (which should not happened:
226 227 this is meant to be called only after ensuring that the needed
227 228 largefile exists in the cache).'''
228 229 wvfs = repo.wvfs
229 230 path = findfile(repo, hash)
230 231 if path is None:
231 232 return False
232 233 wvfs.makedirs(wvfs.dirname(wvfs.join(filename)))
233 234 # The write may fail before the file is fully written, but we
234 235 # don't use atomic writes in the working copy.
235 236 with open(path, 'rb') as srcfd:
236 237 with wvfs(filename, 'wb') as destfd:
237 238 gothash = copyandhash(
238 239 util.filechunkiter(srcfd), destfd)
239 240 if gothash != hash:
240 241 repo.ui.warn(_('%s: data corruption in %s with hash %s\n')
241 242 % (filename, path, gothash))
242 243 wvfs.unlink(filename)
243 244 return False
244 245 return True
245 246
246 247 def copytostore(repo, rev, file, uploaded=False):
247 248 wvfs = repo.wvfs
248 249 hash = readstandin(repo, file, rev)
249 250 if instore(repo, hash):
250 251 return
251 252 if wvfs.exists(file):
252 253 copytostoreabsolute(repo, wvfs.join(file), hash)
253 254 else:
254 255 repo.ui.warn(_("%s: largefile %s not available from local store\n") %
255 256 (file, hash))
256 257
257 258 def copyalltostore(repo, node):
258 259 '''Copy all largefiles in a given revision to the store'''
259 260
260 261 ctx = repo[node]
261 262 for filename in ctx.files():
262 263 if isstandin(filename) and filename in ctx.manifest():
263 264 realfile = splitstandin(filename)
264 265 copytostore(repo, ctx.node(), realfile)
265 266
266 267 def copytostoreabsolute(repo, file, hash):
267 268 if inusercache(repo.ui, hash):
268 269 link(usercachepath(repo.ui, hash), storepath(repo, hash))
269 270 else:
270 271 util.makedirs(os.path.dirname(storepath(repo, hash)))
271 272 with open(file, 'rb') as srcf:
272 273 with util.atomictempfile(storepath(repo, hash),
273 274 createmode=repo.store.createmode) as dstf:
274 275 for chunk in util.filechunkiter(srcf):
275 276 dstf.write(chunk)
276 277 linktousercache(repo, hash)
277 278
278 279 def linktousercache(repo, hash):
279 280 '''Link / copy the largefile with the specified hash from the store
280 281 to the cache.'''
281 282 path = usercachepath(repo.ui, hash)
282 283 link(storepath(repo, hash), path)
283 284
284 285 def getstandinmatcher(repo, rmatcher=None):
285 286 '''Return a match object that applies rmatcher to the standin directory'''
286 287 wvfs = repo.wvfs
287 288 standindir = shortname
288 289
289 290 # no warnings about missing files or directories
290 291 badfn = lambda f, msg: None
291 292
292 293 if rmatcher and not rmatcher.always():
293 294 pats = [wvfs.join(standindir, pat) for pat in rmatcher.files()]
294 295 if not pats:
295 296 pats = [wvfs.join(standindir)]
296 297 match = scmutil.match(repo[None], pats, badfn=badfn)
297 298 # if pats is empty, it would incorrectly always match, so clear _always
298 299 match._always = False
299 300 else:
300 301 # no patterns: relative to repo root
301 302 match = scmutil.match(repo[None], [wvfs.join(standindir)], badfn=badfn)
302 303 return match
303 304
304 305 def composestandinmatcher(repo, rmatcher):
305 306 '''Return a matcher that accepts standins corresponding to the
306 307 files accepted by rmatcher. Pass the list of files in the matcher
307 308 as the paths specified by the user.'''
308 309 smatcher = getstandinmatcher(repo, rmatcher)
309 310 isstandin = smatcher.matchfn
310 311 def composedmatchfn(f):
311 312 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
312 313 smatcher.matchfn = composedmatchfn
313 314
314 315 return smatcher
315 316
316 317 def standin(filename):
317 318 '''Return the repo-relative path to the standin for the specified big
318 319 file.'''
319 320 # Notes:
320 321 # 1) Some callers want an absolute path, but for instance addlargefiles
321 322 # needs it repo-relative so it can be passed to repo[None].add(). So
322 323 # leave it up to the caller to use repo.wjoin() to get an absolute path.
323 324 # 2) Join with '/' because that's what dirstate always uses, even on
324 325 # Windows. Change existing separator to '/' first in case we are
325 326 # passed filenames from an external source (like the command line).
326 327 return shortnameslash + util.pconvert(filename)
327 328
328 329 def isstandin(filename):
329 330 '''Return true if filename is a big file standin. filename must be
330 331 in Mercurial's internal form (slash-separated).'''
331 332 return filename.startswith(shortnameslash)
332 333
333 334 def splitstandin(filename):
334 335 # Split on / because that's what dirstate always uses, even on Windows.
335 336 # Change local separator to / first just in case we are passed filenames
336 337 # from an external source (like the command line).
337 338 bits = util.pconvert(filename).split('/', 1)
338 339 if len(bits) == 2 and bits[0] == shortname:
339 340 return bits[1]
340 341 else:
341 342 return None
342 343
343 344 def updatestandin(repo, standin):
344 345 file = repo.wjoin(splitstandin(standin))
345 346 if repo.wvfs.exists(splitstandin(standin)):
346 347 hash = hashfile(file)
347 348 executable = getexecutable(file)
348 349 writestandin(repo, standin, hash, executable)
349 350 else:
350 351 raise error.Abort(_('%s: file not found!') % splitstandin(standin))
351 352
352 353 def readstandin(repo, filename, node=None):
353 354 '''read hex hash from standin for filename at given node, or working
354 355 directory if no node is given'''
355 356 return repo[node][standin(filename)].data().strip()
356 357
357 358 def writestandin(repo, standin, hash, executable):
358 359 '''write hash to <repo.root>/<standin>'''
359 360 repo.wwrite(standin, hash + '\n', executable and 'x' or '')
360 361
361 362 def copyandhash(instream, outfile):
362 363 '''Read bytes from instream (iterable) and write them to outfile,
363 364 computing the SHA-1 hash of the data along the way. Return the hash.'''
364 365 hasher = hashlib.sha1('')
365 366 for data in instream:
366 367 hasher.update(data)
367 368 outfile.write(data)
368 369 return hasher.hexdigest()
369 370
370 371 def hashrepofile(repo, file):
371 372 return hashfile(repo.wjoin(file))
372 373
373 374 def hashfile(file):
374 375 if not os.path.exists(file):
375 376 return ''
376 377 hasher = hashlib.sha1('')
377 378 with open(file, 'rb') as fd:
378 379 for data in util.filechunkiter(fd):
379 380 hasher.update(data)
380 381 return hasher.hexdigest()
381 382
382 383 def getexecutable(filename):
383 384 mode = os.stat(filename).st_mode
384 385 return ((mode & stat.S_IXUSR) and
385 386 (mode & stat.S_IXGRP) and
386 387 (mode & stat.S_IXOTH))
387 388
388 389 def urljoin(first, second, *arg):
389 390 def join(left, right):
390 391 if not left.endswith('/'):
391 392 left += '/'
392 393 if right.startswith('/'):
393 394 right = right[1:]
394 395 return left + right
395 396
396 397 url = join(first, second)
397 398 for a in arg:
398 399 url = join(url, a)
399 400 return url
400 401
401 402 def hexsha1(data):
402 403 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
403 404 object data"""
404 405 h = hashlib.sha1()
405 406 for chunk in util.filechunkiter(data):
406 407 h.update(chunk)
407 408 return h.hexdigest()
408 409
409 410 def httpsendfile(ui, filename):
410 411 return httpconnection.httpsendfile(ui, filename, 'rb')
411 412
412 413 def unixpath(path):
413 414 '''Return a version of path normalized for use with the lfdirstate.'''
414 415 return util.pconvert(os.path.normpath(path))
415 416
416 417 def islfilesrepo(repo):
417 418 '''Return true if the repo is a largefile repo.'''
418 419 if ('largefiles' in repo.requirements and
419 420 any(shortnameslash in f[0] for f in repo.store.datafiles())):
420 421 return True
421 422
422 423 return any(openlfdirstate(repo.ui, repo, False))
423 424
424 425 class storeprotonotcapable(Exception):
425 426 def __init__(self, storetypes):
426 427 self.storetypes = storetypes
427 428
428 429 def getstandinsstate(repo):
429 430 standins = []
430 431 matcher = getstandinmatcher(repo)
431 432 for standin in repo.dirstate.walk(matcher, [], False, False):
432 433 lfile = splitstandin(standin)
433 434 try:
434 435 hash = readstandin(repo, lfile)
435 436 except IOError:
436 437 hash = None
437 438 standins.append((lfile, hash))
438 439 return standins
439 440
440 441 def synclfdirstate(repo, lfdirstate, lfile, normallookup):
441 442 lfstandin = standin(lfile)
442 443 if lfstandin in repo.dirstate:
443 444 stat = repo.dirstate._map[lfstandin]
444 445 state, mtime = stat[0], stat[3]
445 446 else:
446 447 state, mtime = '?', -1
447 448 if state == 'n':
448 449 if (normallookup or mtime < 0 or
449 450 not repo.wvfs.exists(lfile)):
450 451 # state 'n' doesn't ensure 'clean' in this case
451 452 lfdirstate.normallookup(lfile)
452 453 else:
453 454 lfdirstate.normal(lfile)
454 455 elif state == 'm':
455 456 lfdirstate.normallookup(lfile)
456 457 elif state == 'r':
457 458 lfdirstate.remove(lfile)
458 459 elif state == 'a':
459 460 lfdirstate.add(lfile)
460 461 elif state == '?':
461 462 lfdirstate.drop(lfile)
462 463
463 464 def markcommitted(orig, ctx, node):
464 465 repo = ctx.repo()
465 466
466 467 orig(node)
467 468
468 469 # ATTENTION: "ctx.files()" may differ from "repo[node].files()"
469 470 # because files coming from the 2nd parent are omitted in the latter.
470 471 #
471 472 # The former should be used to get targets of "synclfdirstate",
472 473 # because such files:
473 474 # - are marked as "a" by "patch.patch()" (e.g. via transplant), and
474 475 # - have to be marked as "n" after commit, but
475 476 # - aren't listed in "repo[node].files()"
476 477
477 478 lfdirstate = openlfdirstate(repo.ui, repo)
478 479 for f in ctx.files():
479 480 if isstandin(f):
480 481 lfile = splitstandin(f)
481 482 synclfdirstate(repo, lfdirstate, lfile, False)
482 483 lfdirstate.write()
483 484
484 485 # As part of committing, copy all of the largefiles into the cache.
485 486 copyalltostore(repo, node)
486 487
487 488 def getlfilestoupdate(oldstandins, newstandins):
488 489 changedstandins = set(oldstandins).symmetric_difference(set(newstandins))
489 490 filelist = []
490 491 for f in changedstandins:
491 492 if f[0] not in filelist:
492 493 filelist.append(f[0])
493 494 return filelist
494 495
495 496 def getlfilestoupload(repo, missing, addfunc):
496 497 for i, n in enumerate(missing):
497 498 repo.ui.progress(_('finding outgoing largefiles'), i,
498 499 unit=_('revisions'), total=len(missing))
499 500 parents = [p for p in repo[n].parents() if p != node.nullid]
500 501
501 502 oldlfstatus = repo.lfstatus
502 503 repo.lfstatus = False
503 504 try:
504 505 ctx = repo[n]
505 506 finally:
506 507 repo.lfstatus = oldlfstatus
507 508
508 509 files = set(ctx.files())
509 510 if len(parents) == 2:
510 511 mc = ctx.manifest()
511 512 mp1 = ctx.parents()[0].manifest()
512 513 mp2 = ctx.parents()[1].manifest()
513 514 for f in mp1:
514 515 if f not in mc:
515 516 files.add(f)
516 517 for f in mp2:
517 518 if f not in mc:
518 519 files.add(f)
519 520 for f in mc:
520 521 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
521 522 files.add(f)
522 523 for fn in files:
523 524 if isstandin(fn) and fn in ctx:
524 525 addfunc(fn, ctx[fn].data().strip())
525 526 repo.ui.progress(_('finding outgoing largefiles'), None)
526 527
527 528 def updatestandinsbymatch(repo, match):
528 529 '''Update standins in the working directory according to specified match
529 530
530 531 This returns (possibly modified) ``match`` object to be used for
531 532 subsequent commit process.
532 533 '''
533 534
534 535 ui = repo.ui
535 536
536 537 # Case 1: user calls commit with no specific files or
537 538 # include/exclude patterns: refresh and commit all files that
538 539 # are "dirty".
539 540 if match is None or match.always():
540 541 # Spend a bit of time here to get a list of files we know
541 542 # are modified so we can compare only against those.
542 543 # It can cost a lot of time (several seconds)
543 544 # otherwise to update all standins if the largefiles are
544 545 # large.
545 546 lfdirstate = openlfdirstate(ui, repo)
546 547 dirtymatch = matchmod.always(repo.root, repo.getcwd())
547 548 unsure, s = lfdirstate.status(dirtymatch, [], False, False,
548 549 False)
549 550 modifiedfiles = unsure + s.modified + s.added + s.removed
550 551 lfiles = listlfiles(repo)
551 552 # this only loops through largefiles that exist (not
552 553 # removed/renamed)
553 554 for lfile in lfiles:
554 555 if lfile in modifiedfiles:
555 556 if repo.wvfs.exists(standin(lfile)):
556 557 # this handles the case where a rebase is being
557 558 # performed and the working copy is not updated
558 559 # yet.
559 560 if repo.wvfs.exists(lfile):
560 561 updatestandin(repo,
561 562 standin(lfile))
562 563
563 564 return match
564 565
565 566 lfiles = listlfiles(repo)
566 567 match._files = repo._subdirlfs(match.files(), lfiles)
567 568
568 569 # Case 2: user calls commit with specified patterns: refresh
569 570 # any matching big files.
570 571 smatcher = composestandinmatcher(repo, match)
571 572 standins = repo.dirstate.walk(smatcher, [], False, False)
572 573
573 574 # No matching big files: get out of the way and pass control to
574 575 # the usual commit() method.
575 576 if not standins:
576 577 return match
577 578
578 579 # Refresh all matching big files. It's possible that the
579 580 # commit will end up failing, in which case the big files will
580 581 # stay refreshed. No harm done: the user modified them and
581 582 # asked to commit them, so sooner or later we're going to
582 583 # refresh the standins. Might as well leave them refreshed.
583 584 lfdirstate = openlfdirstate(ui, repo)
584 585 for fstandin in standins:
585 586 lfile = splitstandin(fstandin)
586 587 if lfdirstate[lfile] != 'r':
587 588 updatestandin(repo, fstandin)
588 589
589 590 # Cook up a new matcher that only matches regular files or
590 591 # standins corresponding to the big files requested by the
591 592 # user. Have to modify _files to prevent commit() from
592 593 # complaining "not tracked" for big files.
593 594 match = copy.copy(match)
594 595 origmatchfn = match.matchfn
595 596
596 597 # Check both the list of largefiles and the list of
597 598 # standins because if a largefile was removed, it
598 599 # won't be in the list of largefiles at this point
599 600 match._files += sorted(standins)
600 601
601 602 actualfiles = []
602 603 for f in match._files:
603 604 fstandin = standin(f)
604 605
605 606 # For largefiles, only one of the normal and standin should be
606 607 # committed (except if one of them is a remove). In the case of a
607 608 # standin removal, drop the normal file if it is unknown to dirstate.
608 609 # Thus, skip plain largefile names but keep the standin.
609 610 if f in lfiles or fstandin in standins:
610 611 if repo.dirstate[fstandin] != 'r':
611 612 if repo.dirstate[f] != 'r':
612 613 continue
613 614 elif repo.dirstate[f] == '?':
614 615 continue
615 616
616 617 actualfiles.append(f)
617 618 match._files = actualfiles
618 619
619 620 def matchfn(f):
620 621 if origmatchfn(f):
621 622 return f not in lfiles
622 623 else:
623 624 return f in standins
624 625
625 626 match.matchfn = matchfn
626 627
627 628 return match
628 629
629 630 class automatedcommithook(object):
630 631 '''Stateful hook to update standins at the 1st commit of resuming
631 632
632 633 For efficiency, updating standins in the working directory should
633 634 be avoided while automated committing (like rebase, transplant and
634 635 so on), because they should be updated before committing.
635 636
636 637 But the 1st commit of resuming automated committing (e.g. ``rebase
637 638 --continue``) should update them, because largefiles may be
638 639 modified manually.
639 640 '''
640 641 def __init__(self, resuming):
641 642 self.resuming = resuming
642 643
643 644 def __call__(self, repo, match):
644 645 if self.resuming:
645 646 self.resuming = False # avoids updating at subsequent commits
646 647 return updatestandinsbymatch(repo, match)
647 648 else:
648 649 return match
649 650
650 651 def getstatuswriter(ui, repo, forcibly=None):
651 652 '''Return the function to write largefiles specific status out
652 653
653 654 If ``forcibly`` is ``None``, this returns the last element of
654 655 ``repo._lfstatuswriters`` as "default" writer function.
655 656
656 657 Otherwise, this returns the function to always write out (or
657 658 ignore if ``not forcibly``) status.
658 659 '''
659 660 if forcibly is None and util.safehasattr(repo, '_largefilesenabled'):
660 661 return repo._lfstatuswriters[-1]
661 662 else:
662 663 if forcibly:
663 664 return ui.status # forcibly WRITE OUT
664 665 else:
665 666 return lambda *msg, **opts: None # forcibly IGNORE
@@ -1,176 +1,176
1 1 # profiling.py - profiling functions
2 2 #
3 3 # Copyright 2016 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import contextlib
11 11 import time
12 12
13 13 from .i18n import _
14 14 from . import (
15 encoding,
15 16 error,
16 pycompat,
17 17 util,
18 18 )
19 19
20 20 @contextlib.contextmanager
21 21 def lsprofile(ui, fp):
22 22 format = ui.config('profiling', 'format', default='text')
23 23 field = ui.config('profiling', 'sort', default='inlinetime')
24 24 limit = ui.configint('profiling', 'limit', default=30)
25 25 climit = ui.configint('profiling', 'nested', default=0)
26 26
27 27 if format not in ['text', 'kcachegrind']:
28 28 ui.warn(_("unrecognized profiling format '%s'"
29 29 " - Ignored\n") % format)
30 30 format = 'text'
31 31
32 32 try:
33 33 from . import lsprof
34 34 except ImportError:
35 35 raise error.Abort(_(
36 36 'lsprof not available - install from '
37 37 'http://codespeak.net/svn/user/arigo/hack/misc/lsprof/'))
38 38 p = lsprof.Profiler()
39 39 p.enable(subcalls=True)
40 40 try:
41 41 yield
42 42 finally:
43 43 p.disable()
44 44
45 45 if format == 'kcachegrind':
46 46 from . import lsprofcalltree
47 47 calltree = lsprofcalltree.KCacheGrind(p)
48 48 calltree.output(fp)
49 49 else:
50 50 # format == 'text'
51 51 stats = lsprof.Stats(p.getstats())
52 52 stats.sort(field)
53 53 stats.pprint(limit=limit, file=fp, climit=climit)
54 54
55 55 @contextlib.contextmanager
56 56 def flameprofile(ui, fp):
57 57 try:
58 58 from flamegraph import flamegraph
59 59 except ImportError:
60 60 raise error.Abort(_(
61 61 'flamegraph not available - install from '
62 62 'https://github.com/evanhempel/python-flamegraph'))
63 63 # developer config: profiling.freq
64 64 freq = ui.configint('profiling', 'freq', default=1000)
65 65 filter_ = None
66 66 collapse_recursion = True
67 67 thread = flamegraph.ProfileThread(fp, 1.0 / freq,
68 68 filter_, collapse_recursion)
69 69 start_time = time.clock()
70 70 try:
71 71 thread.start()
72 72 yield
73 73 finally:
74 74 thread.stop()
75 75 thread.join()
76 76 print('Collected %d stack frames (%d unique) in %2.2f seconds.' % (
77 77 time.clock() - start_time, thread.num_frames(),
78 78 thread.num_frames(unique=True)))
79 79
80 80 @contextlib.contextmanager
81 81 def statprofile(ui, fp):
82 82 from . import statprof
83 83
84 84 freq = ui.configint('profiling', 'freq', default=1000)
85 85 if freq > 0:
86 86 # Cannot reset when profiler is already active. So silently no-op.
87 87 if statprof.state.profile_level == 0:
88 88 statprof.reset(freq)
89 89 else:
90 90 ui.warn(_("invalid sampling frequency '%s' - ignoring\n") % freq)
91 91
92 92 statprof.start(mechanism='thread')
93 93
94 94 try:
95 95 yield
96 96 finally:
97 97 data = statprof.stop()
98 98
99 99 profformat = ui.config('profiling', 'statformat', 'hotpath')
100 100
101 101 formats = {
102 102 'byline': statprof.DisplayFormats.ByLine,
103 103 'bymethod': statprof.DisplayFormats.ByMethod,
104 104 'hotpath': statprof.DisplayFormats.Hotpath,
105 105 'json': statprof.DisplayFormats.Json,
106 106 }
107 107
108 108 if profformat in formats:
109 109 displayformat = formats[profformat]
110 110 else:
111 111 ui.warn(_('unknown profiler output format: %s\n') % profformat)
112 112 displayformat = statprof.DisplayFormats.Hotpath
113 113
114 114 statprof.display(fp, data=data, format=displayformat)
115 115
116 116 @contextlib.contextmanager
117 117 def profile(ui):
118 118 """Start profiling.
119 119
120 120 Profiling is active when the context manager is active. When the context
121 121 manager exits, profiling results will be written to the configured output.
122 122 """
123 profiler = pycompat.osgetenv('HGPROF')
123 profiler = encoding.environ.get('HGPROF')
124 124 if profiler is None:
125 125 profiler = ui.config('profiling', 'type', default='stat')
126 126 if profiler not in ('ls', 'stat', 'flame'):
127 127 ui.warn(_("unrecognized profiler '%s' - ignored\n") % profiler)
128 128 profiler = 'stat'
129 129
130 130 output = ui.config('profiling', 'output')
131 131
132 132 if output == 'blackbox':
133 133 fp = util.stringio()
134 134 elif output:
135 135 path = ui.expandpath(output)
136 136 fp = open(path, 'wb')
137 137 else:
138 138 fp = ui.ferr
139 139
140 140 try:
141 141 if profiler == 'ls':
142 142 proffn = lsprofile
143 143 elif profiler == 'flame':
144 144 proffn = flameprofile
145 145 else:
146 146 proffn = statprofile
147 147
148 148 with proffn(ui, fp):
149 149 yield
150 150
151 151 finally:
152 152 if output:
153 153 if output == 'blackbox':
154 154 val = 'Profile:\n%s' % fp.getvalue()
155 155 # ui.log treats the input as a format string,
156 156 # so we need to escape any % signs.
157 157 val = val.replace('%', '%%')
158 158 ui.log('profile', val)
159 159 fp.close()
160 160
161 161 @contextlib.contextmanager
162 162 def maybeprofile(ui):
163 163 """Profile if enabled, else do nothing.
164 164
165 165 This context manager can be used to optionally profile if profiling
166 166 is enabled. Otherwise, it does nothing.
167 167
168 168 The purpose of this context manager is to make calling code simpler:
169 169 just use a single code path for calling into code you may want to profile
170 170 and this function determines whether to start profiling.
171 171 """
172 172 if ui.configbool('profiling', 'enabled'):
173 173 with profile(ui):
174 174 yield
175 175 else:
176 176 yield
@@ -1,293 +1,291
1 1 # pycompat.py - portability shim for python 3
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6 """Mercurial portability shim for python 3.
7 7
8 8 This contains aliases to hide python version-specific details from the core.
9 9 """
10 10
11 11 from __future__ import absolute_import
12 12
13 13 import getopt
14 14 import os
15 15 import shlex
16 16 import sys
17 17
18 18 ispy3 = (sys.version_info[0] >= 3)
19 19
20 20 if not ispy3:
21 21 import cPickle as pickle
22 22 import cStringIO as io
23 23 import httplib
24 24 import Queue as _queue
25 25 import SocketServer as socketserver
26 26 import urlparse
27 27 urlunquote = urlparse.unquote
28 28 import xmlrpclib
29 29 else:
30 30 import http.client as httplib
31 31 import io
32 32 import pickle
33 33 import queue as _queue
34 34 import socketserver
35 35 import urllib.parse as urlparse
36 36 urlunquote = urlparse.unquote_to_bytes
37 37 import xmlrpc.client as xmlrpclib
38 38
39 39 if ispy3:
40 40 import builtins
41 41 import functools
42 42 fsencode = os.fsencode
43 43 fsdecode = os.fsdecode
44 44 # A bytes version of os.name.
45 45 osname = os.name.encode('ascii')
46 46 ospathsep = os.pathsep.encode('ascii')
47 47 ossep = os.sep.encode('ascii')
48 48 osaltsep = os.altsep
49 osgetenv = os.getenvb
50 49 if osaltsep:
51 50 osaltsep = osaltsep.encode('ascii')
52 51 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which
53 52 # returns bytes.
54 53 getcwd = os.getcwdb
55 54 sysplatform = sys.platform.encode('ascii')
56 55 sysexecutable = sys.executable
57 56 if sysexecutable:
58 57 sysexecutable = os.fsencode(sysexecutable)
59 58
60 59 # TODO: .buffer might not exist if std streams were replaced; we'll need
61 60 # a silly wrapper to make a bytes stream backed by a unicode one.
62 61 stdin = sys.stdin.buffer
63 62 stdout = sys.stdout.buffer
64 63 stderr = sys.stderr.buffer
65 64
66 65 # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
67 66 # we can use os.fsencode() to get back bytes argv.
68 67 #
69 68 # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
70 69 #
71 70 # TODO: On Windows, the native argv is wchar_t, so we'll need a different
72 71 # workaround to simulate the Python 2 (i.e. ANSI Win32 API) behavior.
73 72 sysargv = list(map(os.fsencode, sys.argv))
74 73
75 74 def sysstr(s):
76 75 """Return a keyword str to be passed to Python functions such as
77 76 getattr() and str.encode()
78 77
79 78 This never raises UnicodeDecodeError. Non-ascii characters are
80 79 considered invalid and mapped to arbitrary but unique code points
81 80 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
82 81 """
83 82 if isinstance(s, builtins.str):
84 83 return s
85 84 return s.decode(u'latin-1')
86 85
87 86 def _wrapattrfunc(f):
88 87 @functools.wraps(f)
89 88 def w(object, name, *args):
90 89 return f(object, sysstr(name), *args)
91 90 return w
92 91
93 92 # these wrappers are automagically imported by hgloader
94 93 delattr = _wrapattrfunc(builtins.delattr)
95 94 getattr = _wrapattrfunc(builtins.getattr)
96 95 hasattr = _wrapattrfunc(builtins.hasattr)
97 96 setattr = _wrapattrfunc(builtins.setattr)
98 97 xrange = builtins.range
99 98
100 99 # getopt.getopt() on Python 3 deals with unicodes internally so we cannot
101 100 # pass bytes there. Passing unicodes will result in unicodes as return
102 101 # values which we need to convert again to bytes.
103 102 def getoptb(args, shortlist, namelist):
104 103 args = [a.decode('latin-1') for a in args]
105 104 shortlist = shortlist.decode('latin-1')
106 105 namelist = [a.decode('latin-1') for a in namelist]
107 106 opts, args = getopt.getopt(args, shortlist, namelist)
108 107 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1'))
109 108 for a in opts]
110 109 args = [a.encode('latin-1') for a in args]
111 110 return opts, args
112 111
113 112 # keys of keyword arguments in Python need to be strings which are unicodes
114 113 # Python 3. This function takes keyword arguments, convert the keys to str.
115 114 def strkwargs(dic):
116 115 dic = dict((k.decode('latin-1'), v) for k, v in dic.iteritems())
117 116 return dic
118 117
119 118 # keys of keyword arguments need to be unicode while passing into
120 119 # a function. This function helps us to convert those keys back to bytes
121 120 # again as we need to deal with bytes.
122 121 def byteskwargs(dic):
123 122 dic = dict((k.encode('latin-1'), v) for k, v in dic.iteritems())
124 123 return dic
125 124
126 125 # shlex.split() accepts unicodes on Python 3. This function takes bytes
127 126 # argument, convert it into unicodes, pass into shlex.split(), convert the
128 127 # returned value to bytes and return that.
129 128 # TODO: handle shlex.shlex().
130 129 def shlexsplit(s):
131 130 ret = shlex.split(s.decode('latin-1'))
132 131 return [a.encode('latin-1') for a in ret]
133 132
134 133 else:
135 134 def sysstr(s):
136 135 return s
137 136
138 137 # Partial backport from os.py in Python 3, which only accepts bytes.
139 138 # In Python 2, our paths should only ever be bytes, a unicode path
140 139 # indicates a bug.
141 140 def fsencode(filename):
142 141 if isinstance(filename, str):
143 142 return filename
144 143 else:
145 144 raise TypeError(
146 145 "expect str, not %s" % type(filename).__name__)
147 146
148 147 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
149 148 # better not to touch Python 2 part as it's already working fine.
150 149 def fsdecode(filename):
151 150 return filename
152 151
153 152 def getoptb(args, shortlist, namelist):
154 153 return getopt.getopt(args, shortlist, namelist)
155 154
156 155 def strkwargs(dic):
157 156 return dic
158 157
159 158 def byteskwargs(dic):
160 159 return dic
161 160
162 161 osname = os.name
163 162 ospathsep = os.pathsep
164 163 ossep = os.sep
165 164 osaltsep = os.altsep
166 165 stdin = sys.stdin
167 166 stdout = sys.stdout
168 167 stderr = sys.stderr
169 168 sysargv = sys.argv
170 169 sysplatform = sys.platform
171 170 getcwd = os.getcwd
172 osgetenv = os.getenv
173 171 sysexecutable = sys.executable
174 172 shlexsplit = shlex.split
175 173
176 174 stringio = io.StringIO
177 175 empty = _queue.Empty
178 176 queue = _queue.Queue
179 177
180 178 class _pycompatstub(object):
181 179 def __init__(self):
182 180 self._aliases = {}
183 181
184 182 def _registeraliases(self, origin, items):
185 183 """Add items that will be populated at the first access"""
186 184 items = map(sysstr, items)
187 185 self._aliases.update(
188 186 (item.replace(sysstr('_'), sysstr('')).lower(), (origin, item))
189 187 for item in items)
190 188
191 189 def __getattr__(self, name):
192 190 try:
193 191 origin, item = self._aliases[name]
194 192 except KeyError:
195 193 raise AttributeError(name)
196 194 self.__dict__[name] = obj = getattr(origin, item)
197 195 return obj
198 196
199 197 httpserver = _pycompatstub()
200 198 urlreq = _pycompatstub()
201 199 urlerr = _pycompatstub()
202 200 if not ispy3:
203 201 import BaseHTTPServer
204 202 import CGIHTTPServer
205 203 import SimpleHTTPServer
206 204 import urllib2
207 205 import urllib
208 206 urlreq._registeraliases(urllib, (
209 207 "addclosehook",
210 208 "addinfourl",
211 209 "ftpwrapper",
212 210 "pathname2url",
213 211 "quote",
214 212 "splitattr",
215 213 "splitpasswd",
216 214 "splitport",
217 215 "splituser",
218 216 "unquote",
219 217 "url2pathname",
220 218 "urlencode",
221 219 ))
222 220 urlreq._registeraliases(urllib2, (
223 221 "AbstractHTTPHandler",
224 222 "BaseHandler",
225 223 "build_opener",
226 224 "FileHandler",
227 225 "FTPHandler",
228 226 "HTTPBasicAuthHandler",
229 227 "HTTPDigestAuthHandler",
230 228 "HTTPHandler",
231 229 "HTTPPasswordMgrWithDefaultRealm",
232 230 "HTTPSHandler",
233 231 "install_opener",
234 232 "ProxyHandler",
235 233 "Request",
236 234 "urlopen",
237 235 ))
238 236 urlerr._registeraliases(urllib2, (
239 237 "HTTPError",
240 238 "URLError",
241 239 ))
242 240 httpserver._registeraliases(BaseHTTPServer, (
243 241 "HTTPServer",
244 242 "BaseHTTPRequestHandler",
245 243 ))
246 244 httpserver._registeraliases(SimpleHTTPServer, (
247 245 "SimpleHTTPRequestHandler",
248 246 ))
249 247 httpserver._registeraliases(CGIHTTPServer, (
250 248 "CGIHTTPRequestHandler",
251 249 ))
252 250
253 251 else:
254 252 import urllib.request
255 253 urlreq._registeraliases(urllib.request, (
256 254 "AbstractHTTPHandler",
257 255 "addclosehook",
258 256 "addinfourl",
259 257 "BaseHandler",
260 258 "build_opener",
261 259 "FileHandler",
262 260 "FTPHandler",
263 261 "ftpwrapper",
264 262 "HTTPHandler",
265 263 "HTTPSHandler",
266 264 "install_opener",
267 265 "pathname2url",
268 266 "HTTPBasicAuthHandler",
269 267 "HTTPDigestAuthHandler",
270 268 "HTTPPasswordMgrWithDefaultRealm",
271 269 "ProxyHandler",
272 270 "quote",
273 271 "Request",
274 272 "splitattr",
275 273 "splitpasswd",
276 274 "splitport",
277 275 "splituser",
278 276 "unquote",
279 277 "url2pathname",
280 278 "urlopen",
281 279 ))
282 280 import urllib.error
283 281 urlerr._registeraliases(urllib.error, (
284 282 "HTTPError",
285 283 "URLError",
286 284 ))
287 285 import http.server
288 286 httpserver._registeraliases(http.server, (
289 287 "HTTPServer",
290 288 "BaseHTTPRequestHandler",
291 289 "SimpleHTTPRequestHandler",
292 290 "CGIHTTPRequestHandler",
293 291 ))
@@ -1,486 +1,486
1 1 # url.py - HTTP handling for mercurial
2 2 #
3 3 # Copyright 2005, 2006, 2007, 2008 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 from __future__ import absolute_import
11 11
12 12 import base64
13 13 import os
14 14 import socket
15 15
16 16 from .i18n import _
17 17 from . import (
18 encoding,
18 19 error,
19 20 httpconnection as httpconnectionmod,
20 21 keepalive,
21 pycompat,
22 22 sslutil,
23 23 util,
24 24 )
25 25
26 26 httplib = util.httplib
27 27 stringio = util.stringio
28 28 urlerr = util.urlerr
29 29 urlreq = util.urlreq
30 30
31 31 class passwordmgr(object):
32 32 def __init__(self, ui, passwddb):
33 33 self.ui = ui
34 34 self.passwddb = passwddb
35 35
36 36 def add_password(self, realm, uri, user, passwd):
37 37 return self.passwddb.add_password(realm, uri, user, passwd)
38 38
39 39 def find_user_password(self, realm, authuri):
40 40 authinfo = self.passwddb.find_user_password(realm, authuri)
41 41 user, passwd = authinfo
42 42 if user and passwd:
43 43 self._writedebug(user, passwd)
44 44 return (user, passwd)
45 45
46 46 if not user or not passwd:
47 47 res = httpconnectionmod.readauthforuri(self.ui, authuri, user)
48 48 if res:
49 49 group, auth = res
50 50 user, passwd = auth.get('username'), auth.get('password')
51 51 self.ui.debug("using auth.%s.* for authentication\n" % group)
52 52 if not user or not passwd:
53 53 u = util.url(authuri)
54 54 u.query = None
55 55 if not self.ui.interactive():
56 56 raise error.Abort(_('http authorization required for %s') %
57 57 util.hidepassword(str(u)))
58 58
59 59 self.ui.write(_("http authorization required for %s\n") %
60 60 util.hidepassword(str(u)))
61 61 self.ui.write(_("realm: %s\n") % realm)
62 62 if user:
63 63 self.ui.write(_("user: %s\n") % user)
64 64 else:
65 65 user = self.ui.prompt(_("user:"), default=None)
66 66
67 67 if not passwd:
68 68 passwd = self.ui.getpass()
69 69
70 70 self.passwddb.add_password(realm, authuri, user, passwd)
71 71 self._writedebug(user, passwd)
72 72 return (user, passwd)
73 73
74 74 def _writedebug(self, user, passwd):
75 75 msg = _('http auth: user %s, password %s\n')
76 76 self.ui.debug(msg % (user, passwd and '*' * len(passwd) or 'not set'))
77 77
78 78 def find_stored_password(self, authuri):
79 79 return self.passwddb.find_user_password(None, authuri)
80 80
81 81 class proxyhandler(urlreq.proxyhandler):
82 82 def __init__(self, ui):
83 83 proxyurl = (ui.config("http_proxy", "host") or
84 pycompat.osgetenv('http_proxy'))
84 encoding.environ.get('http_proxy'))
85 85 # XXX proxyauthinfo = None
86 86
87 87 if proxyurl:
88 88 # proxy can be proper url or host[:port]
89 89 if not (proxyurl.startswith('http:') or
90 90 proxyurl.startswith('https:')):
91 91 proxyurl = 'http://' + proxyurl + '/'
92 92 proxy = util.url(proxyurl)
93 93 if not proxy.user:
94 94 proxy.user = ui.config("http_proxy", "user")
95 95 proxy.passwd = ui.config("http_proxy", "passwd")
96 96
97 97 # see if we should use a proxy for this url
98 98 no_list = ["localhost", "127.0.0.1"]
99 99 no_list.extend([p.lower() for
100 100 p in ui.configlist("http_proxy", "no")])
101 101 no_list.extend([p.strip().lower() for
102 p in pycompat.osgetenv("no_proxy", '').split(',')
102 p in encoding.environ.get("no_proxy", '').split(',')
103 103 if p.strip()])
104 104 # "http_proxy.always" config is for running tests on localhost
105 105 if ui.configbool("http_proxy", "always"):
106 106 self.no_list = []
107 107 else:
108 108 self.no_list = no_list
109 109
110 110 proxyurl = str(proxy)
111 111 proxies = {'http': proxyurl, 'https': proxyurl}
112 112 ui.debug('proxying through http://%s:%s\n' %
113 113 (proxy.host, proxy.port))
114 114 else:
115 115 proxies = {}
116 116
117 117 urlreq.proxyhandler.__init__(self, proxies)
118 118 self.ui = ui
119 119
120 120 def proxy_open(self, req, proxy, type_):
121 121 host = req.get_host().split(':')[0]
122 122 for e in self.no_list:
123 123 if host == e:
124 124 return None
125 125 if e.startswith('*.') and host.endswith(e[2:]):
126 126 return None
127 127 if e.startswith('.') and host.endswith(e[1:]):
128 128 return None
129 129
130 130 return urlreq.proxyhandler.proxy_open(self, req, proxy, type_)
131 131
132 132 def _gen_sendfile(orgsend):
133 133 def _sendfile(self, data):
134 134 # send a file
135 135 if isinstance(data, httpconnectionmod.httpsendfile):
136 136 # if auth required, some data sent twice, so rewind here
137 137 data.seek(0)
138 138 for chunk in util.filechunkiter(data):
139 139 orgsend(self, chunk)
140 140 else:
141 141 orgsend(self, data)
142 142 return _sendfile
143 143
144 144 has_https = util.safehasattr(urlreq, 'httpshandler')
145 145
146 146 class httpconnection(keepalive.HTTPConnection):
147 147 # must be able to send big bundle as stream.
148 148 send = _gen_sendfile(keepalive.HTTPConnection.send)
149 149
150 150 def getresponse(self):
151 151 proxyres = getattr(self, 'proxyres', None)
152 152 if proxyres:
153 153 if proxyres.will_close:
154 154 self.close()
155 155 self.proxyres = None
156 156 return proxyres
157 157 return keepalive.HTTPConnection.getresponse(self)
158 158
159 159 # general transaction handler to support different ways to handle
160 160 # HTTPS proxying before and after Python 2.6.3.
161 161 def _generic_start_transaction(handler, h, req):
162 162 tunnel_host = getattr(req, '_tunnel_host', None)
163 163 if tunnel_host:
164 164 if tunnel_host[:7] not in ['http://', 'https:/']:
165 165 tunnel_host = 'https://' + tunnel_host
166 166 new_tunnel = True
167 167 else:
168 168 tunnel_host = req.get_selector()
169 169 new_tunnel = False
170 170
171 171 if new_tunnel or tunnel_host == req.get_full_url(): # has proxy
172 172 u = util.url(tunnel_host)
173 173 if new_tunnel or u.scheme == 'https': # only use CONNECT for HTTPS
174 174 h.realhostport = ':'.join([u.host, (u.port or '443')])
175 175 h.headers = req.headers.copy()
176 176 h.headers.update(handler.parent.addheaders)
177 177 return
178 178
179 179 h.realhostport = None
180 180 h.headers = None
181 181
182 182 def _generic_proxytunnel(self):
183 183 proxyheaders = dict(
184 184 [(x, self.headers[x]) for x in self.headers
185 185 if x.lower().startswith('proxy-')])
186 186 self.send('CONNECT %s HTTP/1.0\r\n' % self.realhostport)
187 187 for header in proxyheaders.iteritems():
188 188 self.send('%s: %s\r\n' % header)
189 189 self.send('\r\n')
190 190
191 191 # majority of the following code is duplicated from
192 192 # httplib.HTTPConnection as there are no adequate places to
193 193 # override functions to provide the needed functionality
194 194 res = self.response_class(self.sock,
195 195 strict=self.strict,
196 196 method=self._method)
197 197
198 198 while True:
199 199 version, status, reason = res._read_status()
200 200 if status != httplib.CONTINUE:
201 201 break
202 202 # skip lines that are all whitespace
203 203 list(iter(lambda: res.fp.readline().strip(), ''))
204 204 res.status = status
205 205 res.reason = reason.strip()
206 206
207 207 if res.status == 200:
208 208 # skip lines until we find a blank line
209 209 list(iter(res.fp.readline, '\r\n'))
210 210 return True
211 211
212 212 if version == 'HTTP/1.0':
213 213 res.version = 10
214 214 elif version.startswith('HTTP/1.'):
215 215 res.version = 11
216 216 elif version == 'HTTP/0.9':
217 217 res.version = 9
218 218 else:
219 219 raise httplib.UnknownProtocol(version)
220 220
221 221 if res.version == 9:
222 222 res.length = None
223 223 res.chunked = 0
224 224 res.will_close = 1
225 225 res.msg = httplib.HTTPMessage(stringio())
226 226 return False
227 227
228 228 res.msg = httplib.HTTPMessage(res.fp)
229 229 res.msg.fp = None
230 230
231 231 # are we using the chunked-style of transfer encoding?
232 232 trenc = res.msg.getheader('transfer-encoding')
233 233 if trenc and trenc.lower() == "chunked":
234 234 res.chunked = 1
235 235 res.chunk_left = None
236 236 else:
237 237 res.chunked = 0
238 238
239 239 # will the connection close at the end of the response?
240 240 res.will_close = res._check_close()
241 241
242 242 # do we have a Content-Length?
243 243 # NOTE: RFC 2616, section 4.4, #3 says we ignore this if
244 244 # transfer-encoding is "chunked"
245 245 length = res.msg.getheader('content-length')
246 246 if length and not res.chunked:
247 247 try:
248 248 res.length = int(length)
249 249 except ValueError:
250 250 res.length = None
251 251 else:
252 252 if res.length < 0: # ignore nonsensical negative lengths
253 253 res.length = None
254 254 else:
255 255 res.length = None
256 256
257 257 # does the body have a fixed length? (of zero)
258 258 if (status == httplib.NO_CONTENT or status == httplib.NOT_MODIFIED or
259 259 100 <= status < 200 or # 1xx codes
260 260 res._method == 'HEAD'):
261 261 res.length = 0
262 262
263 263 # if the connection remains open, and we aren't using chunked, and
264 264 # a content-length was not provided, then assume that the connection
265 265 # WILL close.
266 266 if (not res.will_close and
267 267 not res.chunked and
268 268 res.length is None):
269 269 res.will_close = 1
270 270
271 271 self.proxyres = res
272 272
273 273 return False
274 274
275 275 class httphandler(keepalive.HTTPHandler):
276 276 def http_open(self, req):
277 277 return self.do_open(httpconnection, req)
278 278
279 279 def _start_transaction(self, h, req):
280 280 _generic_start_transaction(self, h, req)
281 281 return keepalive.HTTPHandler._start_transaction(self, h, req)
282 282
283 283 if has_https:
284 284 class httpsconnection(httplib.HTTPConnection):
285 285 response_class = keepalive.HTTPResponse
286 286 default_port = httplib.HTTPS_PORT
287 287 # must be able to send big bundle as stream.
288 288 send = _gen_sendfile(keepalive.safesend)
289 289 getresponse = keepalive.wrapgetresponse(httplib.HTTPConnection)
290 290
291 291 def __init__(self, host, port=None, key_file=None, cert_file=None,
292 292 *args, **kwargs):
293 293 httplib.HTTPConnection.__init__(self, host, port, *args, **kwargs)
294 294 self.key_file = key_file
295 295 self.cert_file = cert_file
296 296
297 297 def connect(self):
298 298 self.sock = socket.create_connection((self.host, self.port))
299 299
300 300 host = self.host
301 301 if self.realhostport: # use CONNECT proxy
302 302 _generic_proxytunnel(self)
303 303 host = self.realhostport.rsplit(':', 1)[0]
304 304 self.sock = sslutil.wrapsocket(
305 305 self.sock, self.key_file, self.cert_file, ui=self.ui,
306 306 serverhostname=host)
307 307 sslutil.validatesocket(self.sock)
308 308
309 309 class httpshandler(keepalive.KeepAliveHandler, urlreq.httpshandler):
310 310 def __init__(self, ui):
311 311 keepalive.KeepAliveHandler.__init__(self)
312 312 urlreq.httpshandler.__init__(self)
313 313 self.ui = ui
314 314 self.pwmgr = passwordmgr(self.ui,
315 315 self.ui.httppasswordmgrdb)
316 316
317 317 def _start_transaction(self, h, req):
318 318 _generic_start_transaction(self, h, req)
319 319 return keepalive.KeepAliveHandler._start_transaction(self, h, req)
320 320
321 321 def https_open(self, req):
322 322 # req.get_full_url() does not contain credentials and we may
323 323 # need them to match the certificates.
324 324 url = req.get_full_url()
325 325 user, password = self.pwmgr.find_stored_password(url)
326 326 res = httpconnectionmod.readauthforuri(self.ui, url, user)
327 327 if res:
328 328 group, auth = res
329 329 self.auth = auth
330 330 self.ui.debug("using auth.%s.* for authentication\n" % group)
331 331 else:
332 332 self.auth = None
333 333 return self.do_open(self._makeconnection, req)
334 334
335 335 def _makeconnection(self, host, port=None, *args, **kwargs):
336 336 keyfile = None
337 337 certfile = None
338 338
339 339 if len(args) >= 1: # key_file
340 340 keyfile = args[0]
341 341 if len(args) >= 2: # cert_file
342 342 certfile = args[1]
343 343 args = args[2:]
344 344
345 345 # if the user has specified different key/cert files in
346 346 # hgrc, we prefer these
347 347 if self.auth and 'key' in self.auth and 'cert' in self.auth:
348 348 keyfile = self.auth['key']
349 349 certfile = self.auth['cert']
350 350
351 351 conn = httpsconnection(host, port, keyfile, certfile, *args,
352 352 **kwargs)
353 353 conn.ui = self.ui
354 354 return conn
355 355
356 356 class httpdigestauthhandler(urlreq.httpdigestauthhandler):
357 357 def __init__(self, *args, **kwargs):
358 358 urlreq.httpdigestauthhandler.__init__(self, *args, **kwargs)
359 359 self.retried_req = None
360 360
361 361 def reset_retry_count(self):
362 362 # Python 2.6.5 will call this on 401 or 407 errors and thus loop
363 363 # forever. We disable reset_retry_count completely and reset in
364 364 # http_error_auth_reqed instead.
365 365 pass
366 366
367 367 def http_error_auth_reqed(self, auth_header, host, req, headers):
368 368 # Reset the retry counter once for each request.
369 369 if req is not self.retried_req:
370 370 self.retried_req = req
371 371 self.retried = 0
372 372 return urlreq.httpdigestauthhandler.http_error_auth_reqed(
373 373 self, auth_header, host, req, headers)
374 374
375 375 class httpbasicauthhandler(urlreq.httpbasicauthhandler):
376 376 def __init__(self, *args, **kwargs):
377 377 self.auth = None
378 378 urlreq.httpbasicauthhandler.__init__(self, *args, **kwargs)
379 379 self.retried_req = None
380 380
381 381 def http_request(self, request):
382 382 if self.auth:
383 383 request.add_unredirected_header(self.auth_header, self.auth)
384 384
385 385 return request
386 386
387 387 def https_request(self, request):
388 388 if self.auth:
389 389 request.add_unredirected_header(self.auth_header, self.auth)
390 390
391 391 return request
392 392
393 393 def reset_retry_count(self):
394 394 # Python 2.6.5 will call this on 401 or 407 errors and thus loop
395 395 # forever. We disable reset_retry_count completely and reset in
396 396 # http_error_auth_reqed instead.
397 397 pass
398 398
399 399 def http_error_auth_reqed(self, auth_header, host, req, headers):
400 400 # Reset the retry counter once for each request.
401 401 if req is not self.retried_req:
402 402 self.retried_req = req
403 403 self.retried = 0
404 404 return urlreq.httpbasicauthhandler.http_error_auth_reqed(
405 405 self, auth_header, host, req, headers)
406 406
407 407 def retry_http_basic_auth(self, host, req, realm):
408 408 user, pw = self.passwd.find_user_password(realm, req.get_full_url())
409 409 if pw is not None:
410 410 raw = "%s:%s" % (user, pw)
411 411 auth = 'Basic %s' % base64.b64encode(raw).strip()
412 412 if req.get_header(self.auth_header, None) == auth:
413 413 return None
414 414 self.auth = auth
415 415 req.add_unredirected_header(self.auth_header, auth)
416 416 return self.parent.open(req)
417 417 else:
418 418 return None
419 419
420 420 handlerfuncs = []
421 421
422 422 def opener(ui, authinfo=None):
423 423 '''
424 424 construct an opener suitable for urllib2
425 425 authinfo will be added to the password manager
426 426 '''
427 427 # experimental config: ui.usehttp2
428 428 if ui.configbool('ui', 'usehttp2', False):
429 429 handlers = [
430 430 httpconnectionmod.http2handler(
431 431 ui,
432 432 passwordmgr(ui, ui.httppasswordmgrdb))
433 433 ]
434 434 else:
435 435 handlers = [httphandler()]
436 436 if has_https:
437 437 handlers.append(httpshandler(ui))
438 438
439 439 handlers.append(proxyhandler(ui))
440 440
441 441 passmgr = passwordmgr(ui, ui.httppasswordmgrdb)
442 442 if authinfo is not None:
443 443 realm, uris, user, passwd = authinfo
444 444 saveduser, savedpass = passmgr.find_stored_password(uris[0])
445 445 if user != saveduser or passwd:
446 446 passmgr.add_password(realm, uris, user, passwd)
447 447 ui.debug('http auth: user %s, password %s\n' %
448 448 (user, passwd and '*' * len(passwd) or 'not set'))
449 449
450 450 handlers.extend((httpbasicauthhandler(passmgr),
451 451 httpdigestauthhandler(passmgr)))
452 452 handlers.extend([h(ui, passmgr) for h in handlerfuncs])
453 453 opener = urlreq.buildopener(*handlers)
454 454
455 455 # The user agent should should *NOT* be used by servers for e.g.
456 456 # protocol detection or feature negotiation: there are other
457 457 # facilities for that.
458 458 #
459 459 # "mercurial/proto-1.0" was the original user agent string and
460 460 # exists for backwards compatibility reasons.
461 461 #
462 462 # The "(Mercurial %s)" string contains the distribution
463 463 # name and version. Other client implementations should choose their
464 464 # own distribution name. Since servers should not be using the user
465 465 # agent string for anything, clients should be able to define whatever
466 466 # user agent they deem appropriate.
467 467 agent = 'mercurial/proto-1.0 (Mercurial %s)' % util.version()
468 468 opener.addheaders = [('User-agent', agent)]
469 469
470 470 # This header should only be needed by wire protocol requests. But it has
471 471 # been sent on all requests since forever. We keep sending it for backwards
472 472 # compatibility reasons. Modern versions of the wire protocol use
473 473 # X-HgProto-<N> for advertising client support.
474 474 opener.addheaders.append(('Accept', 'application/mercurial-0.1'))
475 475 return opener
476 476
477 477 def open(ui, url_, data=None):
478 478 u = util.url(url_)
479 479 if u.scheme:
480 480 u.scheme = u.scheme.lower()
481 481 url_, authinfo = u.authinfo()
482 482 else:
483 483 path = util.normpath(os.path.abspath(url_))
484 484 url_ = 'file://' + urlreq.pathname2url(path)
485 485 authinfo = None
486 486 return opener(ui, authinfo).open(url_, data)
General Comments 0
You need to be logged in to leave comments. Login now