##// END OF EJS Templates
py3: stop normalizing .encode()/.decode() arguments to unicode...
Gregory Szorc -
r43361:127cc1f7 default
parent child Browse files
Show More
@@ -1,670 +1,670
1 # testparseutil.py - utilities to parse test script for check tools
1 # testparseutil.py - utilities to parse test script for check tools
2 #
2 #
3 # Copyright 2018 FUJIWARA Katsunori <foozy@lares.dti.ne.jp> and others
3 # Copyright 2018 FUJIWARA Katsunori <foozy@lares.dti.ne.jp> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import abc
10 import abc
11 import re
11 import re
12 import sys
12 import sys
13
13
14 ####################
14 ####################
15 # for Python3 compatibility (almost comes from mercurial/pycompat.py)
15 # for Python3 compatibility (almost comes from mercurial/pycompat.py)
16
16
17 ispy3 = sys.version_info[0] >= 3
17 ispy3 = sys.version_info[0] >= 3
18
18
19
19
20 def identity(a):
20 def identity(a):
21 return a
21 return a
22
22
23
23
24 def _rapply(f, xs):
24 def _rapply(f, xs):
25 if xs is None:
25 if xs is None:
26 # assume None means non-value of optional data
26 # assume None means non-value of optional data
27 return xs
27 return xs
28 if isinstance(xs, (list, set, tuple)):
28 if isinstance(xs, (list, set, tuple)):
29 return type(xs)(_rapply(f, x) for x in xs)
29 return type(xs)(_rapply(f, x) for x in xs)
30 if isinstance(xs, dict):
30 if isinstance(xs, dict):
31 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
31 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
32 return f(xs)
32 return f(xs)
33
33
34
34
35 def rapply(f, xs):
35 def rapply(f, xs):
36 if f is identity:
36 if f is identity:
37 # fast path mainly for py2
37 # fast path mainly for py2
38 return xs
38 return xs
39 return _rapply(f, xs)
39 return _rapply(f, xs)
40
40
41
41
42 if ispy3:
42 if ispy3:
43 import builtins
43 import builtins
44
44
45 def bytestr(s):
45 def bytestr(s):
46 # tiny version of pycompat.bytestr
46 # tiny version of pycompat.bytestr
47 return s.encode('latin1')
47 return s.encode('latin1')
48
48
49 def sysstr(s):
49 def sysstr(s):
50 if isinstance(s, builtins.str):
50 if isinstance(s, builtins.str):
51 return s
51 return s
52 return s.decode(u'latin-1')
52 return s.decode('latin-1')
53
53
54 def opentext(f):
54 def opentext(f):
55 return open(f, 'r')
55 return open(f, 'r')
56
56
57
57
58 else:
58 else:
59 bytestr = str
59 bytestr = str
60 sysstr = identity
60 sysstr = identity
61
61
62 opentext = open
62 opentext = open
63
63
64
64
65 def b2s(x):
65 def b2s(x):
66 # convert BYTES elements in "x" to SYSSTR recursively
66 # convert BYTES elements in "x" to SYSSTR recursively
67 return rapply(sysstr, x)
67 return rapply(sysstr, x)
68
68
69
69
70 def writeout(data):
70 def writeout(data):
71 # write "data" in BYTES into stdout
71 # write "data" in BYTES into stdout
72 sys.stdout.write(data)
72 sys.stdout.write(data)
73
73
74
74
75 def writeerr(data):
75 def writeerr(data):
76 # write "data" in BYTES into stderr
76 # write "data" in BYTES into stderr
77 sys.stderr.write(data)
77 sys.stderr.write(data)
78
78
79
79
80 ####################
80 ####################
81
81
82
82
83 class embeddedmatcher(object):
83 class embeddedmatcher(object):
84 """Base class to detect embedded code fragments in *.t test script
84 """Base class to detect embedded code fragments in *.t test script
85 """
85 """
86
86
87 __metaclass__ = abc.ABCMeta
87 __metaclass__ = abc.ABCMeta
88
88
89 def __init__(self, desc):
89 def __init__(self, desc):
90 self.desc = desc
90 self.desc = desc
91
91
92 @abc.abstractmethod
92 @abc.abstractmethod
93 def startsat(self, line):
93 def startsat(self, line):
94 """Examine whether embedded code starts at line
94 """Examine whether embedded code starts at line
95
95
96 This can return arbitrary object, and it is used as 'ctx' for
96 This can return arbitrary object, and it is used as 'ctx' for
97 subsequent method invocations.
97 subsequent method invocations.
98 """
98 """
99
99
100 @abc.abstractmethod
100 @abc.abstractmethod
101 def endsat(self, ctx, line):
101 def endsat(self, ctx, line):
102 """Examine whether embedded code ends at line"""
102 """Examine whether embedded code ends at line"""
103
103
104 @abc.abstractmethod
104 @abc.abstractmethod
105 def isinside(self, ctx, line):
105 def isinside(self, ctx, line):
106 """Examine whether line is inside embedded code, if not yet endsat
106 """Examine whether line is inside embedded code, if not yet endsat
107 """
107 """
108
108
109 @abc.abstractmethod
109 @abc.abstractmethod
110 def ignores(self, ctx):
110 def ignores(self, ctx):
111 """Examine whether detected embedded code should be ignored"""
111 """Examine whether detected embedded code should be ignored"""
112
112
113 @abc.abstractmethod
113 @abc.abstractmethod
114 def filename(self, ctx):
114 def filename(self, ctx):
115 """Return filename of embedded code
115 """Return filename of embedded code
116
116
117 If filename isn't specified for embedded code explicitly, this
117 If filename isn't specified for embedded code explicitly, this
118 returns None.
118 returns None.
119 """
119 """
120
120
121 @abc.abstractmethod
121 @abc.abstractmethod
122 def codeatstart(self, ctx, line):
122 def codeatstart(self, ctx, line):
123 """Return actual code at the start line of embedded code
123 """Return actual code at the start line of embedded code
124
124
125 This might return None, if the start line doesn't contain
125 This might return None, if the start line doesn't contain
126 actual code.
126 actual code.
127 """
127 """
128
128
129 @abc.abstractmethod
129 @abc.abstractmethod
130 def codeatend(self, ctx, line):
130 def codeatend(self, ctx, line):
131 """Return actual code at the end line of embedded code
131 """Return actual code at the end line of embedded code
132
132
133 This might return None, if the end line doesn't contain actual
133 This might return None, if the end line doesn't contain actual
134 code.
134 code.
135 """
135 """
136
136
137 @abc.abstractmethod
137 @abc.abstractmethod
138 def codeinside(self, ctx, line):
138 def codeinside(self, ctx, line):
139 """Return actual code at line inside embedded code"""
139 """Return actual code at line inside embedded code"""
140
140
141
141
142 def embedded(basefile, lines, errors, matchers):
142 def embedded(basefile, lines, errors, matchers):
143 """pick embedded code fragments up from given lines
143 """pick embedded code fragments up from given lines
144
144
145 This is common parsing logic, which examines specified matchers on
145 This is common parsing logic, which examines specified matchers on
146 given lines.
146 given lines.
147
147
148 :basefile: a name of a file, from which lines to be parsed come.
148 :basefile: a name of a file, from which lines to be parsed come.
149 :lines: to be parsed (might be a value returned by "open(basefile)")
149 :lines: to be parsed (might be a value returned by "open(basefile)")
150 :errors: an array, into which messages for detected error are stored
150 :errors: an array, into which messages for detected error are stored
151 :matchers: an array of embeddedmatcher objects
151 :matchers: an array of embeddedmatcher objects
152
152
153 This function yields '(filename, starts, ends, code)' tuple.
153 This function yields '(filename, starts, ends, code)' tuple.
154
154
155 :filename: a name of embedded code, if it is explicitly specified
155 :filename: a name of embedded code, if it is explicitly specified
156 (e.g. "foobar" of "cat >> foobar <<EOF").
156 (e.g. "foobar" of "cat >> foobar <<EOF").
157 Otherwise, this is None
157 Otherwise, this is None
158 :starts: line number (1-origin), at which embedded code starts (inclusive)
158 :starts: line number (1-origin), at which embedded code starts (inclusive)
159 :ends: line number (1-origin), at which embedded code ends (exclusive)
159 :ends: line number (1-origin), at which embedded code ends (exclusive)
160 :code: extracted embedded code, which is single-stringified
160 :code: extracted embedded code, which is single-stringified
161
161
162 >>> class ambigmatcher(object):
162 >>> class ambigmatcher(object):
163 ... # mock matcher class to examine implementation of
163 ... # mock matcher class to examine implementation of
164 ... # "ambiguous matching" corner case
164 ... # "ambiguous matching" corner case
165 ... def __init__(self, desc, matchfunc):
165 ... def __init__(self, desc, matchfunc):
166 ... self.desc = desc
166 ... self.desc = desc
167 ... self.matchfunc = matchfunc
167 ... self.matchfunc = matchfunc
168 ... def startsat(self, line):
168 ... def startsat(self, line):
169 ... return self.matchfunc(line)
169 ... return self.matchfunc(line)
170 >>> ambig1 = ambigmatcher('ambiguous #1',
170 >>> ambig1 = ambigmatcher('ambiguous #1',
171 ... lambda l: l.startswith(' $ cat '))
171 ... lambda l: l.startswith(' $ cat '))
172 >>> ambig2 = ambigmatcher('ambiguous #2',
172 >>> ambig2 = ambigmatcher('ambiguous #2',
173 ... lambda l: l.endswith('<< EOF\\n'))
173 ... lambda l: l.endswith('<< EOF\\n'))
174 >>> lines = [' $ cat > foo.py << EOF\\n']
174 >>> lines = [' $ cat > foo.py << EOF\\n']
175 >>> errors = []
175 >>> errors = []
176 >>> matchers = [ambig1, ambig2]
176 >>> matchers = [ambig1, ambig2]
177 >>> list(t for t in embedded('<dummy>', lines, errors, matchers))
177 >>> list(t for t in embedded('<dummy>', lines, errors, matchers))
178 []
178 []
179 >>> b2s(errors)
179 >>> b2s(errors)
180 ['<dummy>:1: ambiguous line for "ambiguous #1", "ambiguous #2"']
180 ['<dummy>:1: ambiguous line for "ambiguous #1", "ambiguous #2"']
181
181
182 """
182 """
183 matcher = None
183 matcher = None
184 ctx = filename = code = startline = None # for pyflakes
184 ctx = filename = code = startline = None # for pyflakes
185
185
186 for lineno, line in enumerate(lines, 1):
186 for lineno, line in enumerate(lines, 1):
187 if not line.endswith('\n'):
187 if not line.endswith('\n'):
188 line += '\n' # to normalize EOF line
188 line += '\n' # to normalize EOF line
189 if matcher: # now, inside embedded code
189 if matcher: # now, inside embedded code
190 if matcher.endsat(ctx, line):
190 if matcher.endsat(ctx, line):
191 codeatend = matcher.codeatend(ctx, line)
191 codeatend = matcher.codeatend(ctx, line)
192 if codeatend is not None:
192 if codeatend is not None:
193 code.append(codeatend)
193 code.append(codeatend)
194 if not matcher.ignores(ctx):
194 if not matcher.ignores(ctx):
195 yield (filename, startline, lineno, ''.join(code))
195 yield (filename, startline, lineno, ''.join(code))
196 matcher = None
196 matcher = None
197 # DO NOT "continue", because line might start next fragment
197 # DO NOT "continue", because line might start next fragment
198 elif not matcher.isinside(ctx, line):
198 elif not matcher.isinside(ctx, line):
199 # this is an error of basefile
199 # this is an error of basefile
200 # (if matchers are implemented correctly)
200 # (if matchers are implemented correctly)
201 errors.append(
201 errors.append(
202 '%s:%d: unexpected line for "%s"'
202 '%s:%d: unexpected line for "%s"'
203 % (basefile, lineno, matcher.desc)
203 % (basefile, lineno, matcher.desc)
204 )
204 )
205 # stop extracting embedded code by current 'matcher',
205 # stop extracting embedded code by current 'matcher',
206 # because appearance of unexpected line might mean
206 # because appearance of unexpected line might mean
207 # that expected end-of-embedded-code line might never
207 # that expected end-of-embedded-code line might never
208 # appear
208 # appear
209 matcher = None
209 matcher = None
210 # DO NOT "continue", because line might start next fragment
210 # DO NOT "continue", because line might start next fragment
211 else:
211 else:
212 code.append(matcher.codeinside(ctx, line))
212 code.append(matcher.codeinside(ctx, line))
213 continue
213 continue
214
214
215 # examine whether current line starts embedded code or not
215 # examine whether current line starts embedded code or not
216 assert not matcher
216 assert not matcher
217
217
218 matched = []
218 matched = []
219 for m in matchers:
219 for m in matchers:
220 ctx = m.startsat(line)
220 ctx = m.startsat(line)
221 if ctx:
221 if ctx:
222 matched.append((m, ctx))
222 matched.append((m, ctx))
223 if matched:
223 if matched:
224 if len(matched) > 1:
224 if len(matched) > 1:
225 # this is an error of matchers, maybe
225 # this is an error of matchers, maybe
226 errors.append(
226 errors.append(
227 '%s:%d: ambiguous line for %s'
227 '%s:%d: ambiguous line for %s'
228 % (
228 % (
229 basefile,
229 basefile,
230 lineno,
230 lineno,
231 ', '.join(['"%s"' % m.desc for m, c in matched]),
231 ', '.join(['"%s"' % m.desc for m, c in matched]),
232 )
232 )
233 )
233 )
234 # omit extracting embedded code, because choosing
234 # omit extracting embedded code, because choosing
235 # arbitrary matcher from matched ones might fail to
235 # arbitrary matcher from matched ones might fail to
236 # detect the end of embedded code as expected.
236 # detect the end of embedded code as expected.
237 continue
237 continue
238 matcher, ctx = matched[0]
238 matcher, ctx = matched[0]
239 filename = matcher.filename(ctx)
239 filename = matcher.filename(ctx)
240 code = []
240 code = []
241 codeatstart = matcher.codeatstart(ctx, line)
241 codeatstart = matcher.codeatstart(ctx, line)
242 if codeatstart is not None:
242 if codeatstart is not None:
243 code.append(codeatstart)
243 code.append(codeatstart)
244 startline = lineno
244 startline = lineno
245 else:
245 else:
246 startline = lineno + 1
246 startline = lineno + 1
247
247
248 if matcher:
248 if matcher:
249 # examine whether EOF ends embedded code, because embedded
249 # examine whether EOF ends embedded code, because embedded
250 # code isn't yet ended explicitly
250 # code isn't yet ended explicitly
251 if matcher.endsat(ctx, '\n'):
251 if matcher.endsat(ctx, '\n'):
252 codeatend = matcher.codeatend(ctx, '\n')
252 codeatend = matcher.codeatend(ctx, '\n')
253 if codeatend is not None:
253 if codeatend is not None:
254 code.append(codeatend)
254 code.append(codeatend)
255 if not matcher.ignores(ctx):
255 if not matcher.ignores(ctx):
256 yield (filename, startline, lineno + 1, ''.join(code))
256 yield (filename, startline, lineno + 1, ''.join(code))
257 else:
257 else:
258 # this is an error of basefile
258 # this is an error of basefile
259 # (if matchers are implemented correctly)
259 # (if matchers are implemented correctly)
260 errors.append(
260 errors.append(
261 '%s:%d: unexpected end of file for "%s"'
261 '%s:%d: unexpected end of file for "%s"'
262 % (basefile, lineno, matcher.desc)
262 % (basefile, lineno, matcher.desc)
263 )
263 )
264
264
265
265
266 # heredoc limit mark to ignore embedded code at check-code.py or so
266 # heredoc limit mark to ignore embedded code at check-code.py or so
267 heredocignorelimit = 'NO_CHECK_EOF'
267 heredocignorelimit = 'NO_CHECK_EOF'
268
268
269 # the pattern to match against cases below, and to return a limit mark
269 # the pattern to match against cases below, and to return a limit mark
270 # string as 'lname' group
270 # string as 'lname' group
271 #
271 #
272 # - << LIMITMARK
272 # - << LIMITMARK
273 # - << "LIMITMARK"
273 # - << "LIMITMARK"
274 # - << 'LIMITMARK'
274 # - << 'LIMITMARK'
275 heredoclimitpat = r'\s*<<\s*(?P<lquote>["\']?)(?P<limit>\w+)(?P=lquote)'
275 heredoclimitpat = r'\s*<<\s*(?P<lquote>["\']?)(?P<limit>\w+)(?P=lquote)'
276
276
277
277
278 class fileheredocmatcher(embeddedmatcher):
278 class fileheredocmatcher(embeddedmatcher):
279 """Detect "cat > FILE << LIMIT" style embedded code
279 """Detect "cat > FILE << LIMIT" style embedded code
280
280
281 >>> matcher = fileheredocmatcher('heredoc .py file', r'[^<]+\\.py')
281 >>> matcher = fileheredocmatcher('heredoc .py file', r'[^<]+\\.py')
282 >>> b2s(matcher.startsat(' $ cat > file.py << EOF\\n'))
282 >>> b2s(matcher.startsat(' $ cat > file.py << EOF\\n'))
283 ('file.py', ' > EOF\\n')
283 ('file.py', ' > EOF\\n')
284 >>> b2s(matcher.startsat(' $ cat >>file.py <<EOF\\n'))
284 >>> b2s(matcher.startsat(' $ cat >>file.py <<EOF\\n'))
285 ('file.py', ' > EOF\\n')
285 ('file.py', ' > EOF\\n')
286 >>> b2s(matcher.startsat(' $ cat> \\x27any file.py\\x27<< "EOF"\\n'))
286 >>> b2s(matcher.startsat(' $ cat> \\x27any file.py\\x27<< "EOF"\\n'))
287 ('any file.py', ' > EOF\\n')
287 ('any file.py', ' > EOF\\n')
288 >>> b2s(matcher.startsat(" $ cat > file.py << 'ANYLIMIT'\\n"))
288 >>> b2s(matcher.startsat(" $ cat > file.py << 'ANYLIMIT'\\n"))
289 ('file.py', ' > ANYLIMIT\\n')
289 ('file.py', ' > ANYLIMIT\\n')
290 >>> b2s(matcher.startsat(' $ cat<<ANYLIMIT>"file.py"\\n'))
290 >>> b2s(matcher.startsat(' $ cat<<ANYLIMIT>"file.py"\\n'))
291 ('file.py', ' > ANYLIMIT\\n')
291 ('file.py', ' > ANYLIMIT\\n')
292 >>> start = ' $ cat > file.py << EOF\\n'
292 >>> start = ' $ cat > file.py << EOF\\n'
293 >>> ctx = matcher.startsat(start)
293 >>> ctx = matcher.startsat(start)
294 >>> matcher.codeatstart(ctx, start)
294 >>> matcher.codeatstart(ctx, start)
295 >>> b2s(matcher.filename(ctx))
295 >>> b2s(matcher.filename(ctx))
296 'file.py'
296 'file.py'
297 >>> matcher.ignores(ctx)
297 >>> matcher.ignores(ctx)
298 False
298 False
299 >>> inside = ' > foo = 1\\n'
299 >>> inside = ' > foo = 1\\n'
300 >>> matcher.endsat(ctx, inside)
300 >>> matcher.endsat(ctx, inside)
301 False
301 False
302 >>> matcher.isinside(ctx, inside)
302 >>> matcher.isinside(ctx, inside)
303 True
303 True
304 >>> b2s(matcher.codeinside(ctx, inside))
304 >>> b2s(matcher.codeinside(ctx, inside))
305 'foo = 1\\n'
305 'foo = 1\\n'
306 >>> end = ' > EOF\\n'
306 >>> end = ' > EOF\\n'
307 >>> matcher.endsat(ctx, end)
307 >>> matcher.endsat(ctx, end)
308 True
308 True
309 >>> matcher.codeatend(ctx, end)
309 >>> matcher.codeatend(ctx, end)
310 >>> matcher.endsat(ctx, ' > EOFEOF\\n')
310 >>> matcher.endsat(ctx, ' > EOFEOF\\n')
311 False
311 False
312 >>> ctx = matcher.startsat(' $ cat > file.py << NO_CHECK_EOF\\n')
312 >>> ctx = matcher.startsat(' $ cat > file.py << NO_CHECK_EOF\\n')
313 >>> matcher.ignores(ctx)
313 >>> matcher.ignores(ctx)
314 True
314 True
315 """
315 """
316
316
317 _prefix = ' > '
317 _prefix = ' > '
318
318
319 def __init__(self, desc, namepat):
319 def __init__(self, desc, namepat):
320 super(fileheredocmatcher, self).__init__(desc)
320 super(fileheredocmatcher, self).__init__(desc)
321
321
322 # build the pattern to match against cases below (and ">>"
322 # build the pattern to match against cases below (and ">>"
323 # variants), and to return a target filename string as 'name'
323 # variants), and to return a target filename string as 'name'
324 # group
324 # group
325 #
325 #
326 # - > NAMEPAT
326 # - > NAMEPAT
327 # - > "NAMEPAT"
327 # - > "NAMEPAT"
328 # - > 'NAMEPAT'
328 # - > 'NAMEPAT'
329 namepat = (
329 namepat = (
330 r'\s*>>?\s*(?P<nquote>["\']?)(?P<name>%s)(?P=nquote)' % namepat
330 r'\s*>>?\s*(?P<nquote>["\']?)(?P<name>%s)(?P=nquote)' % namepat
331 )
331 )
332 self._fileres = [
332 self._fileres = [
333 # "cat > NAME << LIMIT" case
333 # "cat > NAME << LIMIT" case
334 re.compile(r' \$ \s*cat' + namepat + heredoclimitpat),
334 re.compile(r' \$ \s*cat' + namepat + heredoclimitpat),
335 # "cat << LIMIT > NAME" case
335 # "cat << LIMIT > NAME" case
336 re.compile(r' \$ \s*cat' + heredoclimitpat + namepat),
336 re.compile(r' \$ \s*cat' + heredoclimitpat + namepat),
337 ]
337 ]
338
338
339 def startsat(self, line):
339 def startsat(self, line):
340 # ctx is (filename, END-LINE-OF-EMBEDDED-CODE) tuple
340 # ctx is (filename, END-LINE-OF-EMBEDDED-CODE) tuple
341 for filere in self._fileres:
341 for filere in self._fileres:
342 matched = filere.match(line)
342 matched = filere.match(line)
343 if matched:
343 if matched:
344 return (
344 return (
345 matched.group('name'),
345 matched.group('name'),
346 ' > %s\n' % matched.group('limit'),
346 ' > %s\n' % matched.group('limit'),
347 )
347 )
348
348
349 def endsat(self, ctx, line):
349 def endsat(self, ctx, line):
350 return ctx[1] == line
350 return ctx[1] == line
351
351
352 def isinside(self, ctx, line):
352 def isinside(self, ctx, line):
353 return line.startswith(self._prefix)
353 return line.startswith(self._prefix)
354
354
355 def ignores(self, ctx):
355 def ignores(self, ctx):
356 return ' > %s\n' % heredocignorelimit == ctx[1]
356 return ' > %s\n' % heredocignorelimit == ctx[1]
357
357
358 def filename(self, ctx):
358 def filename(self, ctx):
359 return ctx[0]
359 return ctx[0]
360
360
361 def codeatstart(self, ctx, line):
361 def codeatstart(self, ctx, line):
362 return None # no embedded code at start line
362 return None # no embedded code at start line
363
363
364 def codeatend(self, ctx, line):
364 def codeatend(self, ctx, line):
365 return None # no embedded code at end line
365 return None # no embedded code at end line
366
366
367 def codeinside(self, ctx, line):
367 def codeinside(self, ctx, line):
368 return line[len(self._prefix) :] # strip prefix
368 return line[len(self._prefix) :] # strip prefix
369
369
370
370
371 ####
371 ####
372 # for embedded python script
372 # for embedded python script
373
373
374
374
375 class pydoctestmatcher(embeddedmatcher):
375 class pydoctestmatcher(embeddedmatcher):
376 """Detect ">>> code" style embedded python code
376 """Detect ">>> code" style embedded python code
377
377
378 >>> matcher = pydoctestmatcher()
378 >>> matcher = pydoctestmatcher()
379 >>> startline = ' >>> foo = 1\\n'
379 >>> startline = ' >>> foo = 1\\n'
380 >>> matcher.startsat(startline)
380 >>> matcher.startsat(startline)
381 True
381 True
382 >>> matcher.startsat(' ... foo = 1\\n')
382 >>> matcher.startsat(' ... foo = 1\\n')
383 False
383 False
384 >>> ctx = matcher.startsat(startline)
384 >>> ctx = matcher.startsat(startline)
385 >>> matcher.filename(ctx)
385 >>> matcher.filename(ctx)
386 >>> matcher.ignores(ctx)
386 >>> matcher.ignores(ctx)
387 False
387 False
388 >>> b2s(matcher.codeatstart(ctx, startline))
388 >>> b2s(matcher.codeatstart(ctx, startline))
389 'foo = 1\\n'
389 'foo = 1\\n'
390 >>> inside = ' >>> foo = 1\\n'
390 >>> inside = ' >>> foo = 1\\n'
391 >>> matcher.endsat(ctx, inside)
391 >>> matcher.endsat(ctx, inside)
392 False
392 False
393 >>> matcher.isinside(ctx, inside)
393 >>> matcher.isinside(ctx, inside)
394 True
394 True
395 >>> b2s(matcher.codeinside(ctx, inside))
395 >>> b2s(matcher.codeinside(ctx, inside))
396 'foo = 1\\n'
396 'foo = 1\\n'
397 >>> inside = ' ... foo = 1\\n'
397 >>> inside = ' ... foo = 1\\n'
398 >>> matcher.endsat(ctx, inside)
398 >>> matcher.endsat(ctx, inside)
399 False
399 False
400 >>> matcher.isinside(ctx, inside)
400 >>> matcher.isinside(ctx, inside)
401 True
401 True
402 >>> b2s(matcher.codeinside(ctx, inside))
402 >>> b2s(matcher.codeinside(ctx, inside))
403 'foo = 1\\n'
403 'foo = 1\\n'
404 >>> inside = ' expected output\\n'
404 >>> inside = ' expected output\\n'
405 >>> matcher.endsat(ctx, inside)
405 >>> matcher.endsat(ctx, inside)
406 False
406 False
407 >>> matcher.isinside(ctx, inside)
407 >>> matcher.isinside(ctx, inside)
408 True
408 True
409 >>> b2s(matcher.codeinside(ctx, inside))
409 >>> b2s(matcher.codeinside(ctx, inside))
410 '\\n'
410 '\\n'
411 >>> inside = ' \\n'
411 >>> inside = ' \\n'
412 >>> matcher.endsat(ctx, inside)
412 >>> matcher.endsat(ctx, inside)
413 False
413 False
414 >>> matcher.isinside(ctx, inside)
414 >>> matcher.isinside(ctx, inside)
415 True
415 True
416 >>> b2s(matcher.codeinside(ctx, inside))
416 >>> b2s(matcher.codeinside(ctx, inside))
417 '\\n'
417 '\\n'
418 >>> end = ' $ foo bar\\n'
418 >>> end = ' $ foo bar\\n'
419 >>> matcher.endsat(ctx, end)
419 >>> matcher.endsat(ctx, end)
420 True
420 True
421 >>> matcher.codeatend(ctx, end)
421 >>> matcher.codeatend(ctx, end)
422 >>> end = '\\n'
422 >>> end = '\\n'
423 >>> matcher.endsat(ctx, end)
423 >>> matcher.endsat(ctx, end)
424 True
424 True
425 >>> matcher.codeatend(ctx, end)
425 >>> matcher.codeatend(ctx, end)
426 """
426 """
427
427
428 _prefix = ' >>> '
428 _prefix = ' >>> '
429 _prefixre = re.compile(r' (>>>|\.\.\.) ')
429 _prefixre = re.compile(r' (>>>|\.\.\.) ')
430
430
431 # If a line matches against not _prefixre but _outputre, that line
431 # If a line matches against not _prefixre but _outputre, that line
432 # is "an expected output line" (= not a part of code fragment).
432 # is "an expected output line" (= not a part of code fragment).
433 #
433 #
434 # Strictly speaking, a line matching against "(#if|#else|#endif)"
434 # Strictly speaking, a line matching against "(#if|#else|#endif)"
435 # is also treated similarly in "inline python code" semantics by
435 # is also treated similarly in "inline python code" semantics by
436 # run-tests.py. But "directive line inside inline python code"
436 # run-tests.py. But "directive line inside inline python code"
437 # should be rejected by Mercurial reviewers. Therefore, this
437 # should be rejected by Mercurial reviewers. Therefore, this
438 # regexp does not matche against such directive lines.
438 # regexp does not matche against such directive lines.
439 _outputre = re.compile(r' $| [^$]')
439 _outputre = re.compile(r' $| [^$]')
440
440
441 def __init__(self):
441 def __init__(self):
442 super(pydoctestmatcher, self).__init__("doctest style python code")
442 super(pydoctestmatcher, self).__init__("doctest style python code")
443
443
444 def startsat(self, line):
444 def startsat(self, line):
445 # ctx is "True"
445 # ctx is "True"
446 return line.startswith(self._prefix)
446 return line.startswith(self._prefix)
447
447
448 def endsat(self, ctx, line):
448 def endsat(self, ctx, line):
449 return not (self._prefixre.match(line) or self._outputre.match(line))
449 return not (self._prefixre.match(line) or self._outputre.match(line))
450
450
451 def isinside(self, ctx, line):
451 def isinside(self, ctx, line):
452 return True # always true, if not yet ended
452 return True # always true, if not yet ended
453
453
454 def ignores(self, ctx):
454 def ignores(self, ctx):
455 return False # should be checked always
455 return False # should be checked always
456
456
457 def filename(self, ctx):
457 def filename(self, ctx):
458 return None # no filename
458 return None # no filename
459
459
460 def codeatstart(self, ctx, line):
460 def codeatstart(self, ctx, line):
461 return line[len(self._prefix) :] # strip prefix ' >>> '/' ... '
461 return line[len(self._prefix) :] # strip prefix ' >>> '/' ... '
462
462
463 def codeatend(self, ctx, line):
463 def codeatend(self, ctx, line):
464 return None # no embedded code at end line
464 return None # no embedded code at end line
465
465
466 def codeinside(self, ctx, line):
466 def codeinside(self, ctx, line):
467 if self._prefixre.match(line):
467 if self._prefixre.match(line):
468 return line[len(self._prefix) :] # strip prefix ' >>> '/' ... '
468 return line[len(self._prefix) :] # strip prefix ' >>> '/' ... '
469 return '\n' # an expected output line is treated as an empty line
469 return '\n' # an expected output line is treated as an empty line
470
470
471
471
472 class pyheredocmatcher(embeddedmatcher):
472 class pyheredocmatcher(embeddedmatcher):
473 """Detect "python << LIMIT" style embedded python code
473 """Detect "python << LIMIT" style embedded python code
474
474
475 >>> matcher = pyheredocmatcher()
475 >>> matcher = pyheredocmatcher()
476 >>> b2s(matcher.startsat(' $ python << EOF\\n'))
476 >>> b2s(matcher.startsat(' $ python << EOF\\n'))
477 ' > EOF\\n'
477 ' > EOF\\n'
478 >>> b2s(matcher.startsat(' $ $PYTHON <<EOF\\n'))
478 >>> b2s(matcher.startsat(' $ $PYTHON <<EOF\\n'))
479 ' > EOF\\n'
479 ' > EOF\\n'
480 >>> b2s(matcher.startsat(' $ "$PYTHON"<< "EOF"\\n'))
480 >>> b2s(matcher.startsat(' $ "$PYTHON"<< "EOF"\\n'))
481 ' > EOF\\n'
481 ' > EOF\\n'
482 >>> b2s(matcher.startsat(" $ $PYTHON << 'ANYLIMIT'\\n"))
482 >>> b2s(matcher.startsat(" $ $PYTHON << 'ANYLIMIT'\\n"))
483 ' > ANYLIMIT\\n'
483 ' > ANYLIMIT\\n'
484 >>> matcher.startsat(' $ "$PYTHON" < EOF\\n')
484 >>> matcher.startsat(' $ "$PYTHON" < EOF\\n')
485 >>> start = ' $ python << EOF\\n'
485 >>> start = ' $ python << EOF\\n'
486 >>> ctx = matcher.startsat(start)
486 >>> ctx = matcher.startsat(start)
487 >>> matcher.codeatstart(ctx, start)
487 >>> matcher.codeatstart(ctx, start)
488 >>> matcher.filename(ctx)
488 >>> matcher.filename(ctx)
489 >>> matcher.ignores(ctx)
489 >>> matcher.ignores(ctx)
490 False
490 False
491 >>> inside = ' > foo = 1\\n'
491 >>> inside = ' > foo = 1\\n'
492 >>> matcher.endsat(ctx, inside)
492 >>> matcher.endsat(ctx, inside)
493 False
493 False
494 >>> matcher.isinside(ctx, inside)
494 >>> matcher.isinside(ctx, inside)
495 True
495 True
496 >>> b2s(matcher.codeinside(ctx, inside))
496 >>> b2s(matcher.codeinside(ctx, inside))
497 'foo = 1\\n'
497 'foo = 1\\n'
498 >>> end = ' > EOF\\n'
498 >>> end = ' > EOF\\n'
499 >>> matcher.endsat(ctx, end)
499 >>> matcher.endsat(ctx, end)
500 True
500 True
501 >>> matcher.codeatend(ctx, end)
501 >>> matcher.codeatend(ctx, end)
502 >>> matcher.endsat(ctx, ' > EOFEOF\\n')
502 >>> matcher.endsat(ctx, ' > EOFEOF\\n')
503 False
503 False
504 >>> ctx = matcher.startsat(' $ python << NO_CHECK_EOF\\n')
504 >>> ctx = matcher.startsat(' $ python << NO_CHECK_EOF\\n')
505 >>> matcher.ignores(ctx)
505 >>> matcher.ignores(ctx)
506 True
506 True
507 """
507 """
508
508
509 _prefix = ' > '
509 _prefix = ' > '
510
510
511 _startre = re.compile(
511 _startre = re.compile(
512 r' \$ (\$PYTHON|"\$PYTHON"|python).*' + heredoclimitpat
512 r' \$ (\$PYTHON|"\$PYTHON"|python).*' + heredoclimitpat
513 )
513 )
514
514
515 def __init__(self):
515 def __init__(self):
516 super(pyheredocmatcher, self).__init__("heredoc python invocation")
516 super(pyheredocmatcher, self).__init__("heredoc python invocation")
517
517
518 def startsat(self, line):
518 def startsat(self, line):
519 # ctx is END-LINE-OF-EMBEDDED-CODE
519 # ctx is END-LINE-OF-EMBEDDED-CODE
520 matched = self._startre.match(line)
520 matched = self._startre.match(line)
521 if matched:
521 if matched:
522 return ' > %s\n' % matched.group('limit')
522 return ' > %s\n' % matched.group('limit')
523
523
524 def endsat(self, ctx, line):
524 def endsat(self, ctx, line):
525 return ctx == line
525 return ctx == line
526
526
527 def isinside(self, ctx, line):
527 def isinside(self, ctx, line):
528 return line.startswith(self._prefix)
528 return line.startswith(self._prefix)
529
529
530 def ignores(self, ctx):
530 def ignores(self, ctx):
531 return ' > %s\n' % heredocignorelimit == ctx
531 return ' > %s\n' % heredocignorelimit == ctx
532
532
533 def filename(self, ctx):
533 def filename(self, ctx):
534 return None # no filename
534 return None # no filename
535
535
536 def codeatstart(self, ctx, line):
536 def codeatstart(self, ctx, line):
537 return None # no embedded code at start line
537 return None # no embedded code at start line
538
538
539 def codeatend(self, ctx, line):
539 def codeatend(self, ctx, line):
540 return None # no embedded code at end line
540 return None # no embedded code at end line
541
541
542 def codeinside(self, ctx, line):
542 def codeinside(self, ctx, line):
543 return line[len(self._prefix) :] # strip prefix
543 return line[len(self._prefix) :] # strip prefix
544
544
545
545
546 _pymatchers = [
546 _pymatchers = [
547 pydoctestmatcher(),
547 pydoctestmatcher(),
548 pyheredocmatcher(),
548 pyheredocmatcher(),
549 # use '[^<]+' instead of '\S+', in order to match against
549 # use '[^<]+' instead of '\S+', in order to match against
550 # paths including whitespaces
550 # paths including whitespaces
551 fileheredocmatcher('heredoc .py file', r'[^<]+\.py'),
551 fileheredocmatcher('heredoc .py file', r'[^<]+\.py'),
552 ]
552 ]
553
553
554
554
555 def pyembedded(basefile, lines, errors):
555 def pyembedded(basefile, lines, errors):
556 return embedded(basefile, lines, errors, _pymatchers)
556 return embedded(basefile, lines, errors, _pymatchers)
557
557
558
558
559 ####
559 ####
560 # for embedded shell script
560 # for embedded shell script
561
561
562 _shmatchers = [
562 _shmatchers = [
563 # use '[^<]+' instead of '\S+', in order to match against
563 # use '[^<]+' instead of '\S+', in order to match against
564 # paths including whitespaces
564 # paths including whitespaces
565 fileheredocmatcher('heredoc .sh file', r'[^<]+\.sh'),
565 fileheredocmatcher('heredoc .sh file', r'[^<]+\.sh'),
566 ]
566 ]
567
567
568
568
569 def shembedded(basefile, lines, errors):
569 def shembedded(basefile, lines, errors):
570 return embedded(basefile, lines, errors, _shmatchers)
570 return embedded(basefile, lines, errors, _shmatchers)
571
571
572
572
573 ####
573 ####
574 # for embedded hgrc configuration
574 # for embedded hgrc configuration
575
575
576 _hgrcmatchers = [
576 _hgrcmatchers = [
577 # use '[^<]+' instead of '\S+', in order to match against
577 # use '[^<]+' instead of '\S+', in order to match against
578 # paths including whitespaces
578 # paths including whitespaces
579 fileheredocmatcher(
579 fileheredocmatcher(
580 'heredoc hgrc file', r'(([^/<]+/)+hgrc|\$HGRCPATH|\${HGRCPATH})'
580 'heredoc hgrc file', r'(([^/<]+/)+hgrc|\$HGRCPATH|\${HGRCPATH})'
581 ),
581 ),
582 ]
582 ]
583
583
584
584
585 def hgrcembedded(basefile, lines, errors):
585 def hgrcembedded(basefile, lines, errors):
586 return embedded(basefile, lines, errors, _hgrcmatchers)
586 return embedded(basefile, lines, errors, _hgrcmatchers)
587
587
588
588
589 ####
589 ####
590
590
591 if __name__ == "__main__":
591 if __name__ == "__main__":
592 import optparse
592 import optparse
593 import sys
593 import sys
594
594
595 def showembedded(basefile, lines, embeddedfunc, opts):
595 def showembedded(basefile, lines, embeddedfunc, opts):
596 errors = []
596 errors = []
597 for name, starts, ends, code in embeddedfunc(basefile, lines, errors):
597 for name, starts, ends, code in embeddedfunc(basefile, lines, errors):
598 if not name:
598 if not name:
599 name = '<anonymous>'
599 name = '<anonymous>'
600 writeout("%s:%d: %s starts\n" % (basefile, starts, name))
600 writeout("%s:%d: %s starts\n" % (basefile, starts, name))
601 if opts.verbose and code:
601 if opts.verbose and code:
602 writeout(" |%s\n" % "\n |".join(l for l in code.splitlines()))
602 writeout(" |%s\n" % "\n |".join(l for l in code.splitlines()))
603 writeout("%s:%d: %s ends\n" % (basefile, ends, name))
603 writeout("%s:%d: %s ends\n" % (basefile, ends, name))
604 for e in errors:
604 for e in errors:
605 writeerr("%s\n" % e)
605 writeerr("%s\n" % e)
606 return len(errors)
606 return len(errors)
607
607
608 def applyembedded(args, embeddedfunc, opts):
608 def applyembedded(args, embeddedfunc, opts):
609 ret = 0
609 ret = 0
610 if args:
610 if args:
611 for f in args:
611 for f in args:
612 with opentext(f) as fp:
612 with opentext(f) as fp:
613 if showembedded(f, fp, embeddedfunc, opts):
613 if showembedded(f, fp, embeddedfunc, opts):
614 ret = 1
614 ret = 1
615 else:
615 else:
616 lines = [l for l in sys.stdin.readlines()]
616 lines = [l for l in sys.stdin.readlines()]
617 if showembedded('<stdin>', lines, embeddedfunc, opts):
617 if showembedded('<stdin>', lines, embeddedfunc, opts):
618 ret = 1
618 ret = 1
619 return ret
619 return ret
620
620
621 commands = {}
621 commands = {}
622
622
623 def command(name, desc):
623 def command(name, desc):
624 def wrap(func):
624 def wrap(func):
625 commands[name] = (desc, func)
625 commands[name] = (desc, func)
626
626
627 return wrap
627 return wrap
628
628
629 @command("pyembedded", "detect embedded python script")
629 @command("pyembedded", "detect embedded python script")
630 def pyembeddedcmd(args, opts):
630 def pyembeddedcmd(args, opts):
631 return applyembedded(args, pyembedded, opts)
631 return applyembedded(args, pyembedded, opts)
632
632
633 @command("shembedded", "detect embedded shell script")
633 @command("shembedded", "detect embedded shell script")
634 def shembeddedcmd(args, opts):
634 def shembeddedcmd(args, opts):
635 return applyembedded(args, shembedded, opts)
635 return applyembedded(args, shembedded, opts)
636
636
637 @command("hgrcembedded", "detect embedded hgrc configuration")
637 @command("hgrcembedded", "detect embedded hgrc configuration")
638 def hgrcembeddedcmd(args, opts):
638 def hgrcembeddedcmd(args, opts):
639 return applyembedded(args, hgrcembedded, opts)
639 return applyembedded(args, hgrcembedded, opts)
640
640
641 availablecommands = "\n".join(
641 availablecommands = "\n".join(
642 [" - %s: %s" % (key, value[0]) for key, value in commands.items()]
642 [" - %s: %s" % (key, value[0]) for key, value in commands.items()]
643 )
643 )
644
644
645 parser = optparse.OptionParser(
645 parser = optparse.OptionParser(
646 """%prog COMMAND [file ...]
646 """%prog COMMAND [file ...]
647
647
648 Pick up embedded code fragments from given file(s) or stdin, and list
648 Pick up embedded code fragments from given file(s) or stdin, and list
649 up start/end lines of them in standard compiler format
649 up start/end lines of them in standard compiler format
650 ("FILENAME:LINENO:").
650 ("FILENAME:LINENO:").
651
651
652 Available commands are:
652 Available commands are:
653 """
653 """
654 + availablecommands
654 + availablecommands
655 + """
655 + """
656 """
656 """
657 )
657 )
658 parser.add_option(
658 parser.add_option(
659 "-v",
659 "-v",
660 "--verbose",
660 "--verbose",
661 help="enable additional output (e.g. actual code)",
661 help="enable additional output (e.g. actual code)",
662 action="store_true",
662 action="store_true",
663 )
663 )
664 (opts, args) = parser.parse_args()
664 (opts, args) = parser.parse_args()
665
665
666 if not args or args[0] not in commands:
666 if not args or args[0] not in commands:
667 parser.print_help()
667 parser.print_help()
668 sys.exit(255)
668 sys.exit(255)
669
669
670 sys.exit(commands[args[0]][1](args[1:], opts))
670 sys.exit(commands[args[0]][1](args[1:], opts))
@@ -1,746 +1,746
1 # blobstore.py - local and remote (speaking Git-LFS protocol) blob storages
1 # blobstore.py - local and remote (speaking Git-LFS protocol) blob storages
2 #
2 #
3 # Copyright 2017 Facebook, Inc.
3 # Copyright 2017 Facebook, Inc.
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import contextlib
10 import contextlib
11 import errno
11 import errno
12 import hashlib
12 import hashlib
13 import json
13 import json
14 import os
14 import os
15 import re
15 import re
16 import socket
16 import socket
17
17
18 from mercurial.i18n import _
18 from mercurial.i18n import _
19 from mercurial.pycompat import getattr
19 from mercurial.pycompat import getattr
20
20
21 from mercurial import (
21 from mercurial import (
22 encoding,
22 encoding,
23 error,
23 error,
24 node,
24 node,
25 pathutil,
25 pathutil,
26 pycompat,
26 pycompat,
27 url as urlmod,
27 url as urlmod,
28 util,
28 util,
29 vfs as vfsmod,
29 vfs as vfsmod,
30 worker,
30 worker,
31 )
31 )
32
32
33 from mercurial.utils import stringutil
33 from mercurial.utils import stringutil
34
34
35 from ..largefiles import lfutil
35 from ..largefiles import lfutil
36
36
37 # 64 bytes for SHA256
37 # 64 bytes for SHA256
38 _lfsre = re.compile(br'\A[a-f0-9]{64}\Z')
38 _lfsre = re.compile(br'\A[a-f0-9]{64}\Z')
39
39
40
40
41 class lfsvfs(vfsmod.vfs):
41 class lfsvfs(vfsmod.vfs):
42 def join(self, path):
42 def join(self, path):
43 """split the path at first two characters, like: XX/XXXXX..."""
43 """split the path at first two characters, like: XX/XXXXX..."""
44 if not _lfsre.match(path):
44 if not _lfsre.match(path):
45 raise error.ProgrammingError(b'unexpected lfs path: %s' % path)
45 raise error.ProgrammingError(b'unexpected lfs path: %s' % path)
46 return super(lfsvfs, self).join(path[0:2], path[2:])
46 return super(lfsvfs, self).join(path[0:2], path[2:])
47
47
48 def walk(self, path=None, onerror=None):
48 def walk(self, path=None, onerror=None):
49 """Yield (dirpath, [], oids) tuple for blobs under path
49 """Yield (dirpath, [], oids) tuple for blobs under path
50
50
51 Oids only exist in the root of this vfs, so dirpath is always ''.
51 Oids only exist in the root of this vfs, so dirpath is always ''.
52 """
52 """
53 root = os.path.normpath(self.base)
53 root = os.path.normpath(self.base)
54 # when dirpath == root, dirpath[prefixlen:] becomes empty
54 # when dirpath == root, dirpath[prefixlen:] becomes empty
55 # because len(dirpath) < prefixlen.
55 # because len(dirpath) < prefixlen.
56 prefixlen = len(pathutil.normasprefix(root))
56 prefixlen = len(pathutil.normasprefix(root))
57 oids = []
57 oids = []
58
58
59 for dirpath, dirs, files in os.walk(
59 for dirpath, dirs, files in os.walk(
60 self.reljoin(self.base, path or b''), onerror=onerror
60 self.reljoin(self.base, path or b''), onerror=onerror
61 ):
61 ):
62 dirpath = dirpath[prefixlen:]
62 dirpath = dirpath[prefixlen:]
63
63
64 # Silently skip unexpected files and directories
64 # Silently skip unexpected files and directories
65 if len(dirpath) == 2:
65 if len(dirpath) == 2:
66 oids.extend(
66 oids.extend(
67 [dirpath + f for f in files if _lfsre.match(dirpath + f)]
67 [dirpath + f for f in files if _lfsre.match(dirpath + f)]
68 )
68 )
69
69
70 yield (b'', [], oids)
70 yield (b'', [], oids)
71
71
72
72
73 class nullvfs(lfsvfs):
73 class nullvfs(lfsvfs):
74 def __init__(self):
74 def __init__(self):
75 pass
75 pass
76
76
77 def exists(self, oid):
77 def exists(self, oid):
78 return False
78 return False
79
79
80 def read(self, oid):
80 def read(self, oid):
81 # store.read() calls into here if the blob doesn't exist in its
81 # store.read() calls into here if the blob doesn't exist in its
82 # self.vfs. Raise the same error as a normal vfs when asked to read a
82 # self.vfs. Raise the same error as a normal vfs when asked to read a
83 # file that doesn't exist. The only difference is the full file path
83 # file that doesn't exist. The only difference is the full file path
84 # isn't available in the error.
84 # isn't available in the error.
85 raise IOError(
85 raise IOError(
86 errno.ENOENT,
86 errno.ENOENT,
87 pycompat.sysstr(b'%s: No such file or directory' % oid),
87 pycompat.sysstr(b'%s: No such file or directory' % oid),
88 )
88 )
89
89
90 def walk(self, path=None, onerror=None):
90 def walk(self, path=None, onerror=None):
91 return (b'', [], [])
91 return (b'', [], [])
92
92
93 def write(self, oid, data):
93 def write(self, oid, data):
94 pass
94 pass
95
95
96
96
97 class filewithprogress(object):
97 class filewithprogress(object):
98 """a file-like object that supports __len__ and read.
98 """a file-like object that supports __len__ and read.
99
99
100 Useful to provide progress information for how many bytes are read.
100 Useful to provide progress information for how many bytes are read.
101 """
101 """
102
102
103 def __init__(self, fp, callback):
103 def __init__(self, fp, callback):
104 self._fp = fp
104 self._fp = fp
105 self._callback = callback # func(readsize)
105 self._callback = callback # func(readsize)
106 fp.seek(0, os.SEEK_END)
106 fp.seek(0, os.SEEK_END)
107 self._len = fp.tell()
107 self._len = fp.tell()
108 fp.seek(0)
108 fp.seek(0)
109
109
110 def __len__(self):
110 def __len__(self):
111 return self._len
111 return self._len
112
112
113 def read(self, size):
113 def read(self, size):
114 if self._fp is None:
114 if self._fp is None:
115 return b''
115 return b''
116 data = self._fp.read(size)
116 data = self._fp.read(size)
117 if data:
117 if data:
118 if self._callback:
118 if self._callback:
119 self._callback(len(data))
119 self._callback(len(data))
120 else:
120 else:
121 self._fp.close()
121 self._fp.close()
122 self._fp = None
122 self._fp = None
123 return data
123 return data
124
124
125
125
126 class local(object):
126 class local(object):
127 """Local blobstore for large file contents.
127 """Local blobstore for large file contents.
128
128
129 This blobstore is used both as a cache and as a staging area for large blobs
129 This blobstore is used both as a cache and as a staging area for large blobs
130 to be uploaded to the remote blobstore.
130 to be uploaded to the remote blobstore.
131 """
131 """
132
132
133 def __init__(self, repo):
133 def __init__(self, repo):
134 fullpath = repo.svfs.join(b'lfs/objects')
134 fullpath = repo.svfs.join(b'lfs/objects')
135 self.vfs = lfsvfs(fullpath)
135 self.vfs = lfsvfs(fullpath)
136
136
137 if repo.ui.configbool(b'experimental', b'lfs.disableusercache'):
137 if repo.ui.configbool(b'experimental', b'lfs.disableusercache'):
138 self.cachevfs = nullvfs()
138 self.cachevfs = nullvfs()
139 else:
139 else:
140 usercache = lfutil._usercachedir(repo.ui, b'lfs')
140 usercache = lfutil._usercachedir(repo.ui, b'lfs')
141 self.cachevfs = lfsvfs(usercache)
141 self.cachevfs = lfsvfs(usercache)
142 self.ui = repo.ui
142 self.ui = repo.ui
143
143
144 def open(self, oid):
144 def open(self, oid):
145 """Open a read-only file descriptor to the named blob, in either the
145 """Open a read-only file descriptor to the named blob, in either the
146 usercache or the local store."""
146 usercache or the local store."""
147 # The usercache is the most likely place to hold the file. Commit will
147 # The usercache is the most likely place to hold the file. Commit will
148 # write to both it and the local store, as will anything that downloads
148 # write to both it and the local store, as will anything that downloads
149 # the blobs. However, things like clone without an update won't
149 # the blobs. However, things like clone without an update won't
150 # populate the local store. For an init + push of a local clone,
150 # populate the local store. For an init + push of a local clone,
151 # the usercache is the only place it _could_ be. If not present, the
151 # the usercache is the only place it _could_ be. If not present, the
152 # missing file msg here will indicate the local repo, not the usercache.
152 # missing file msg here will indicate the local repo, not the usercache.
153 if self.cachevfs.exists(oid):
153 if self.cachevfs.exists(oid):
154 return self.cachevfs(oid, b'rb')
154 return self.cachevfs(oid, b'rb')
155
155
156 return self.vfs(oid, b'rb')
156 return self.vfs(oid, b'rb')
157
157
158 def download(self, oid, src):
158 def download(self, oid, src):
159 """Read the blob from the remote source in chunks, verify the content,
159 """Read the blob from the remote source in chunks, verify the content,
160 and write to this local blobstore."""
160 and write to this local blobstore."""
161 sha256 = hashlib.sha256()
161 sha256 = hashlib.sha256()
162
162
163 with self.vfs(oid, b'wb', atomictemp=True) as fp:
163 with self.vfs(oid, b'wb', atomictemp=True) as fp:
164 for chunk in util.filechunkiter(src, size=1048576):
164 for chunk in util.filechunkiter(src, size=1048576):
165 fp.write(chunk)
165 fp.write(chunk)
166 sha256.update(chunk)
166 sha256.update(chunk)
167
167
168 realoid = node.hex(sha256.digest())
168 realoid = node.hex(sha256.digest())
169 if realoid != oid:
169 if realoid != oid:
170 raise LfsCorruptionError(
170 raise LfsCorruptionError(
171 _(b'corrupt remote lfs object: %s') % oid
171 _(b'corrupt remote lfs object: %s') % oid
172 )
172 )
173
173
174 self._linktousercache(oid)
174 self._linktousercache(oid)
175
175
176 def write(self, oid, data):
176 def write(self, oid, data):
177 """Write blob to local blobstore.
177 """Write blob to local blobstore.
178
178
179 This should only be called from the filelog during a commit or similar.
179 This should only be called from the filelog during a commit or similar.
180 As such, there is no need to verify the data. Imports from a remote
180 As such, there is no need to verify the data. Imports from a remote
181 store must use ``download()`` instead."""
181 store must use ``download()`` instead."""
182 with self.vfs(oid, b'wb', atomictemp=True) as fp:
182 with self.vfs(oid, b'wb', atomictemp=True) as fp:
183 fp.write(data)
183 fp.write(data)
184
184
185 self._linktousercache(oid)
185 self._linktousercache(oid)
186
186
187 def linkfromusercache(self, oid):
187 def linkfromusercache(self, oid):
188 """Link blobs found in the user cache into this store.
188 """Link blobs found in the user cache into this store.
189
189
190 The server module needs to do this when it lets the client know not to
190 The server module needs to do this when it lets the client know not to
191 upload the blob, to ensure it is always available in this store.
191 upload the blob, to ensure it is always available in this store.
192 Normally this is done implicitly when the client reads or writes the
192 Normally this is done implicitly when the client reads or writes the
193 blob, but that doesn't happen when the server tells the client that it
193 blob, but that doesn't happen when the server tells the client that it
194 already has the blob.
194 already has the blob.
195 """
195 """
196 if not isinstance(self.cachevfs, nullvfs) and not self.vfs.exists(oid):
196 if not isinstance(self.cachevfs, nullvfs) and not self.vfs.exists(oid):
197 self.ui.note(_(b'lfs: found %s in the usercache\n') % oid)
197 self.ui.note(_(b'lfs: found %s in the usercache\n') % oid)
198 lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
198 lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
199
199
200 def _linktousercache(self, oid):
200 def _linktousercache(self, oid):
201 # XXX: should we verify the content of the cache, and hardlink back to
201 # XXX: should we verify the content of the cache, and hardlink back to
202 # the local store on success, but truncate, write and link on failure?
202 # the local store on success, but truncate, write and link on failure?
203 if not self.cachevfs.exists(oid) and not isinstance(
203 if not self.cachevfs.exists(oid) and not isinstance(
204 self.cachevfs, nullvfs
204 self.cachevfs, nullvfs
205 ):
205 ):
206 self.ui.note(_(b'lfs: adding %s to the usercache\n') % oid)
206 self.ui.note(_(b'lfs: adding %s to the usercache\n') % oid)
207 lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
207 lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
208
208
209 def read(self, oid, verify=True):
209 def read(self, oid, verify=True):
210 """Read blob from local blobstore."""
210 """Read blob from local blobstore."""
211 if not self.vfs.exists(oid):
211 if not self.vfs.exists(oid):
212 blob = self._read(self.cachevfs, oid, verify)
212 blob = self._read(self.cachevfs, oid, verify)
213
213
214 # Even if revlog will verify the content, it needs to be verified
214 # Even if revlog will verify the content, it needs to be verified
215 # now before making the hardlink to avoid propagating corrupt blobs.
215 # now before making the hardlink to avoid propagating corrupt blobs.
216 # Don't abort if corruption is detected, because `hg verify` will
216 # Don't abort if corruption is detected, because `hg verify` will
217 # give more useful info about the corruption- simply don't add the
217 # give more useful info about the corruption- simply don't add the
218 # hardlink.
218 # hardlink.
219 if verify or node.hex(hashlib.sha256(blob).digest()) == oid:
219 if verify or node.hex(hashlib.sha256(blob).digest()) == oid:
220 self.ui.note(_(b'lfs: found %s in the usercache\n') % oid)
220 self.ui.note(_(b'lfs: found %s in the usercache\n') % oid)
221 lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
221 lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
222 else:
222 else:
223 self.ui.note(_(b'lfs: found %s in the local lfs store\n') % oid)
223 self.ui.note(_(b'lfs: found %s in the local lfs store\n') % oid)
224 blob = self._read(self.vfs, oid, verify)
224 blob = self._read(self.vfs, oid, verify)
225 return blob
225 return blob
226
226
227 def _read(self, vfs, oid, verify):
227 def _read(self, vfs, oid, verify):
228 """Read blob (after verifying) from the given store"""
228 """Read blob (after verifying) from the given store"""
229 blob = vfs.read(oid)
229 blob = vfs.read(oid)
230 if verify:
230 if verify:
231 _verify(oid, blob)
231 _verify(oid, blob)
232 return blob
232 return blob
233
233
234 def verify(self, oid):
234 def verify(self, oid):
235 """Indicate whether or not the hash of the underlying file matches its
235 """Indicate whether or not the hash of the underlying file matches its
236 name."""
236 name."""
237 sha256 = hashlib.sha256()
237 sha256 = hashlib.sha256()
238
238
239 with self.open(oid) as fp:
239 with self.open(oid) as fp:
240 for chunk in util.filechunkiter(fp, size=1048576):
240 for chunk in util.filechunkiter(fp, size=1048576):
241 sha256.update(chunk)
241 sha256.update(chunk)
242
242
243 return oid == node.hex(sha256.digest())
243 return oid == node.hex(sha256.digest())
244
244
245 def has(self, oid):
245 def has(self, oid):
246 """Returns True if the local blobstore contains the requested blob,
246 """Returns True if the local blobstore contains the requested blob,
247 False otherwise."""
247 False otherwise."""
248 return self.cachevfs.exists(oid) or self.vfs.exists(oid)
248 return self.cachevfs.exists(oid) or self.vfs.exists(oid)
249
249
250
250
251 def _urlerrorreason(urlerror):
251 def _urlerrorreason(urlerror):
252 '''Create a friendly message for the given URLError to be used in an
252 '''Create a friendly message for the given URLError to be used in an
253 LfsRemoteError message.
253 LfsRemoteError message.
254 '''
254 '''
255 inst = urlerror
255 inst = urlerror
256
256
257 if isinstance(urlerror.reason, Exception):
257 if isinstance(urlerror.reason, Exception):
258 inst = urlerror.reason
258 inst = urlerror.reason
259
259
260 if util.safehasattr(inst, b'reason'):
260 if util.safehasattr(inst, b'reason'):
261 try: # usually it is in the form (errno, strerror)
261 try: # usually it is in the form (errno, strerror)
262 reason = inst.reason.args[1]
262 reason = inst.reason.args[1]
263 except (AttributeError, IndexError):
263 except (AttributeError, IndexError):
264 # it might be anything, for example a string
264 # it might be anything, for example a string
265 reason = inst.reason
265 reason = inst.reason
266 if isinstance(reason, pycompat.unicode):
266 if isinstance(reason, pycompat.unicode):
267 # SSLError of Python 2.7.9 contains a unicode
267 # SSLError of Python 2.7.9 contains a unicode
268 reason = encoding.unitolocal(reason)
268 reason = encoding.unitolocal(reason)
269 return reason
269 return reason
270 elif getattr(inst, "strerror", None):
270 elif getattr(inst, "strerror", None):
271 return encoding.strtolocal(inst.strerror)
271 return encoding.strtolocal(inst.strerror)
272 else:
272 else:
273 return stringutil.forcebytestr(urlerror)
273 return stringutil.forcebytestr(urlerror)
274
274
275
275
276 class lfsauthhandler(util.urlreq.basehandler):
276 class lfsauthhandler(util.urlreq.basehandler):
277 handler_order = 480 # Before HTTPDigestAuthHandler (== 490)
277 handler_order = 480 # Before HTTPDigestAuthHandler (== 490)
278
278
279 def http_error_401(self, req, fp, code, msg, headers):
279 def http_error_401(self, req, fp, code, msg, headers):
280 """Enforces that any authentication performed is HTTP Basic
280 """Enforces that any authentication performed is HTTP Basic
281 Authentication. No authentication is also acceptable.
281 Authentication. No authentication is also acceptable.
282 """
282 """
283 authreq = headers.get(r'www-authenticate', None)
283 authreq = headers.get(r'www-authenticate', None)
284 if authreq:
284 if authreq:
285 scheme = authreq.split()[0]
285 scheme = authreq.split()[0]
286
286
287 if scheme.lower() != r'basic':
287 if scheme.lower() != r'basic':
288 msg = _(b'the server must support Basic Authentication')
288 msg = _(b'the server must support Basic Authentication')
289 raise util.urlerr.httperror(
289 raise util.urlerr.httperror(
290 req.get_full_url(),
290 req.get_full_url(),
291 code,
291 code,
292 encoding.strfromlocal(msg),
292 encoding.strfromlocal(msg),
293 headers,
293 headers,
294 fp,
294 fp,
295 )
295 )
296 return None
296 return None
297
297
298
298
299 class _gitlfsremote(object):
299 class _gitlfsremote(object):
300 def __init__(self, repo, url):
300 def __init__(self, repo, url):
301 ui = repo.ui
301 ui = repo.ui
302 self.ui = ui
302 self.ui = ui
303 baseurl, authinfo = url.authinfo()
303 baseurl, authinfo = url.authinfo()
304 self.baseurl = baseurl.rstrip(b'/')
304 self.baseurl = baseurl.rstrip(b'/')
305 useragent = repo.ui.config(b'experimental', b'lfs.user-agent')
305 useragent = repo.ui.config(b'experimental', b'lfs.user-agent')
306 if not useragent:
306 if not useragent:
307 useragent = b'git-lfs/2.3.4 (Mercurial %s)' % util.version()
307 useragent = b'git-lfs/2.3.4 (Mercurial %s)' % util.version()
308 self.urlopener = urlmod.opener(ui, authinfo, useragent)
308 self.urlopener = urlmod.opener(ui, authinfo, useragent)
309 self.urlopener.add_handler(lfsauthhandler())
309 self.urlopener.add_handler(lfsauthhandler())
310 self.retry = ui.configint(b'lfs', b'retry')
310 self.retry = ui.configint(b'lfs', b'retry')
311
311
312 def writebatch(self, pointers, fromstore):
312 def writebatch(self, pointers, fromstore):
313 """Batch upload from local to remote blobstore."""
313 """Batch upload from local to remote blobstore."""
314 self._batch(_deduplicate(pointers), fromstore, b'upload')
314 self._batch(_deduplicate(pointers), fromstore, b'upload')
315
315
316 def readbatch(self, pointers, tostore):
316 def readbatch(self, pointers, tostore):
317 """Batch download from remote to local blostore."""
317 """Batch download from remote to local blostore."""
318 self._batch(_deduplicate(pointers), tostore, b'download')
318 self._batch(_deduplicate(pointers), tostore, b'download')
319
319
320 def _batchrequest(self, pointers, action):
320 def _batchrequest(self, pointers, action):
321 """Get metadata about objects pointed by pointers for given action
321 """Get metadata about objects pointed by pointers for given action
322
322
323 Return decoded JSON object like {'objects': [{'oid': '', 'size': 1}]}
323 Return decoded JSON object like {'objects': [{'oid': '', 'size': 1}]}
324 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/batch.md
324 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/batch.md
325 """
325 """
326 objects = [
326 objects = [
327 {r'oid': pycompat.strurl(p.oid()), r'size': p.size()}
327 {r'oid': pycompat.strurl(p.oid()), r'size': p.size()}
328 for p in pointers
328 for p in pointers
329 ]
329 ]
330 requestdata = pycompat.bytesurl(
330 requestdata = pycompat.bytesurl(
331 json.dumps(
331 json.dumps(
332 {r'objects': objects, r'operation': pycompat.strurl(action),}
332 {r'objects': objects, r'operation': pycompat.strurl(action),}
333 )
333 )
334 )
334 )
335 url = b'%s/objects/batch' % self.baseurl
335 url = b'%s/objects/batch' % self.baseurl
336 batchreq = util.urlreq.request(pycompat.strurl(url), data=requestdata)
336 batchreq = util.urlreq.request(pycompat.strurl(url), data=requestdata)
337 batchreq.add_header(r'Accept', r'application/vnd.git-lfs+json')
337 batchreq.add_header(r'Accept', r'application/vnd.git-lfs+json')
338 batchreq.add_header(r'Content-Type', r'application/vnd.git-lfs+json')
338 batchreq.add_header(r'Content-Type', r'application/vnd.git-lfs+json')
339 try:
339 try:
340 with contextlib.closing(self.urlopener.open(batchreq)) as rsp:
340 with contextlib.closing(self.urlopener.open(batchreq)) as rsp:
341 rawjson = rsp.read()
341 rawjson = rsp.read()
342 except util.urlerr.httperror as ex:
342 except util.urlerr.httperror as ex:
343 hints = {
343 hints = {
344 400: _(
344 400: _(
345 b'check that lfs serving is enabled on %s and "%s" is '
345 b'check that lfs serving is enabled on %s and "%s" is '
346 b'supported'
346 b'supported'
347 )
347 )
348 % (self.baseurl, action),
348 % (self.baseurl, action),
349 404: _(b'the "lfs.url" config may be used to override %s')
349 404: _(b'the "lfs.url" config may be used to override %s')
350 % self.baseurl,
350 % self.baseurl,
351 }
351 }
352 hint = hints.get(ex.code, _(b'api=%s, action=%s') % (url, action))
352 hint = hints.get(ex.code, _(b'api=%s, action=%s') % (url, action))
353 raise LfsRemoteError(
353 raise LfsRemoteError(
354 _(b'LFS HTTP error: %s') % stringutil.forcebytestr(ex),
354 _(b'LFS HTTP error: %s') % stringutil.forcebytestr(ex),
355 hint=hint,
355 hint=hint,
356 )
356 )
357 except util.urlerr.urlerror as ex:
357 except util.urlerr.urlerror as ex:
358 hint = (
358 hint = (
359 _(b'the "lfs.url" config may be used to override %s')
359 _(b'the "lfs.url" config may be used to override %s')
360 % self.baseurl
360 % self.baseurl
361 )
361 )
362 raise LfsRemoteError(
362 raise LfsRemoteError(
363 _(b'LFS error: %s') % _urlerrorreason(ex), hint=hint
363 _(b'LFS error: %s') % _urlerrorreason(ex), hint=hint
364 )
364 )
365 try:
365 try:
366 response = json.loads(rawjson)
366 response = json.loads(rawjson)
367 except ValueError:
367 except ValueError:
368 raise LfsRemoteError(
368 raise LfsRemoteError(
369 _(b'LFS server returns invalid JSON: %s')
369 _(b'LFS server returns invalid JSON: %s')
370 % rawjson.encode("utf-8")
370 % rawjson.encode("utf-8")
371 )
371 )
372
372
373 if self.ui.debugflag:
373 if self.ui.debugflag:
374 self.ui.debug(b'Status: %d\n' % rsp.status)
374 self.ui.debug(b'Status: %d\n' % rsp.status)
375 # lfs-test-server and hg serve return headers in different order
375 # lfs-test-server and hg serve return headers in different order
376 headers = pycompat.bytestr(rsp.info()).strip()
376 headers = pycompat.bytestr(rsp.info()).strip()
377 self.ui.debug(b'%s\n' % b'\n'.join(sorted(headers.splitlines())))
377 self.ui.debug(b'%s\n' % b'\n'.join(sorted(headers.splitlines())))
378
378
379 if r'objects' in response:
379 if r'objects' in response:
380 response[r'objects'] = sorted(
380 response[r'objects'] = sorted(
381 response[r'objects'], key=lambda p: p[r'oid']
381 response[r'objects'], key=lambda p: p[r'oid']
382 )
382 )
383 self.ui.debug(
383 self.ui.debug(
384 b'%s\n'
384 b'%s\n'
385 % pycompat.bytesurl(
385 % pycompat.bytesurl(
386 json.dumps(
386 json.dumps(
387 response,
387 response,
388 indent=2,
388 indent=2,
389 separators=(r'', r': '),
389 separators=(r'', r': '),
390 sort_keys=True,
390 sort_keys=True,
391 )
391 )
392 )
392 )
393 )
393 )
394
394
395 def encodestr(x):
395 def encodestr(x):
396 if isinstance(x, pycompat.unicode):
396 if isinstance(x, pycompat.unicode):
397 return x.encode(u'utf-8')
397 return x.encode('utf-8')
398 return x
398 return x
399
399
400 return pycompat.rapply(encodestr, response)
400 return pycompat.rapply(encodestr, response)
401
401
402 def _checkforservererror(self, pointers, responses, action):
402 def _checkforservererror(self, pointers, responses, action):
403 """Scans errors from objects
403 """Scans errors from objects
404
404
405 Raises LfsRemoteError if any objects have an error"""
405 Raises LfsRemoteError if any objects have an error"""
406 for response in responses:
406 for response in responses:
407 # The server should return 404 when objects cannot be found. Some
407 # The server should return 404 when objects cannot be found. Some
408 # server implementation (ex. lfs-test-server) does not set "error"
408 # server implementation (ex. lfs-test-server) does not set "error"
409 # but just removes "download" from "actions". Treat that case
409 # but just removes "download" from "actions". Treat that case
410 # as the same as 404 error.
410 # as the same as 404 error.
411 if b'error' not in response:
411 if b'error' not in response:
412 if action == b'download' and action not in response.get(
412 if action == b'download' and action not in response.get(
413 b'actions', []
413 b'actions', []
414 ):
414 ):
415 code = 404
415 code = 404
416 else:
416 else:
417 continue
417 continue
418 else:
418 else:
419 # An error dict without a code doesn't make much sense, so
419 # An error dict without a code doesn't make much sense, so
420 # treat as a server error.
420 # treat as a server error.
421 code = response.get(b'error').get(b'code', 500)
421 code = response.get(b'error').get(b'code', 500)
422
422
423 ptrmap = {p.oid(): p for p in pointers}
423 ptrmap = {p.oid(): p for p in pointers}
424 p = ptrmap.get(response[b'oid'], None)
424 p = ptrmap.get(response[b'oid'], None)
425 if p:
425 if p:
426 filename = getattr(p, 'filename', b'unknown')
426 filename = getattr(p, 'filename', b'unknown')
427 errors = {
427 errors = {
428 404: b'The object does not exist',
428 404: b'The object does not exist',
429 410: b'The object was removed by the owner',
429 410: b'The object was removed by the owner',
430 422: b'Validation error',
430 422: b'Validation error',
431 500: b'Internal server error',
431 500: b'Internal server error',
432 }
432 }
433 msg = errors.get(code, b'status code %d' % code)
433 msg = errors.get(code, b'status code %d' % code)
434 raise LfsRemoteError(
434 raise LfsRemoteError(
435 _(b'LFS server error for "%s": %s') % (filename, msg)
435 _(b'LFS server error for "%s": %s') % (filename, msg)
436 )
436 )
437 else:
437 else:
438 raise LfsRemoteError(
438 raise LfsRemoteError(
439 _(b'LFS server error. Unsolicited response for oid %s')
439 _(b'LFS server error. Unsolicited response for oid %s')
440 % response[b'oid']
440 % response[b'oid']
441 )
441 )
442
442
443 def _extractobjects(self, response, pointers, action):
443 def _extractobjects(self, response, pointers, action):
444 """extract objects from response of the batch API
444 """extract objects from response of the batch API
445
445
446 response: parsed JSON object returned by batch API
446 response: parsed JSON object returned by batch API
447 return response['objects'] filtered by action
447 return response['objects'] filtered by action
448 raise if any object has an error
448 raise if any object has an error
449 """
449 """
450 # Scan errors from objects - fail early
450 # Scan errors from objects - fail early
451 objects = response.get(b'objects', [])
451 objects = response.get(b'objects', [])
452 self._checkforservererror(pointers, objects, action)
452 self._checkforservererror(pointers, objects, action)
453
453
454 # Filter objects with given action. Practically, this skips uploading
454 # Filter objects with given action. Practically, this skips uploading
455 # objects which exist in the server.
455 # objects which exist in the server.
456 filteredobjects = [
456 filteredobjects = [
457 o for o in objects if action in o.get(b'actions', [])
457 o for o in objects if action in o.get(b'actions', [])
458 ]
458 ]
459
459
460 return filteredobjects
460 return filteredobjects
461
461
462 def _basictransfer(self, obj, action, localstore):
462 def _basictransfer(self, obj, action, localstore):
463 """Download or upload a single object using basic transfer protocol
463 """Download or upload a single object using basic transfer protocol
464
464
465 obj: dict, an object description returned by batch API
465 obj: dict, an object description returned by batch API
466 action: string, one of ['upload', 'download']
466 action: string, one of ['upload', 'download']
467 localstore: blobstore.local
467 localstore: blobstore.local
468
468
469 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/\
469 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/\
470 basic-transfers.md
470 basic-transfers.md
471 """
471 """
472 oid = obj[b'oid']
472 oid = obj[b'oid']
473 href = obj[b'actions'][action].get(b'href')
473 href = obj[b'actions'][action].get(b'href')
474 headers = obj[b'actions'][action].get(b'header', {}).items()
474 headers = obj[b'actions'][action].get(b'header', {}).items()
475
475
476 request = util.urlreq.request(pycompat.strurl(href))
476 request = util.urlreq.request(pycompat.strurl(href))
477 if action == b'upload':
477 if action == b'upload':
478 # If uploading blobs, read data from local blobstore.
478 # If uploading blobs, read data from local blobstore.
479 if not localstore.verify(oid):
479 if not localstore.verify(oid):
480 raise error.Abort(
480 raise error.Abort(
481 _(b'detected corrupt lfs object: %s') % oid,
481 _(b'detected corrupt lfs object: %s') % oid,
482 hint=_(b'run hg verify'),
482 hint=_(b'run hg verify'),
483 )
483 )
484 request.data = filewithprogress(localstore.open(oid), None)
484 request.data = filewithprogress(localstore.open(oid), None)
485 request.get_method = lambda: r'PUT'
485 request.get_method = lambda: r'PUT'
486 request.add_header(r'Content-Type', r'application/octet-stream')
486 request.add_header(r'Content-Type', r'application/octet-stream')
487 request.add_header(r'Content-Length', len(request.data))
487 request.add_header(r'Content-Length', len(request.data))
488
488
489 for k, v in headers:
489 for k, v in headers:
490 request.add_header(pycompat.strurl(k), pycompat.strurl(v))
490 request.add_header(pycompat.strurl(k), pycompat.strurl(v))
491
491
492 response = b''
492 response = b''
493 try:
493 try:
494 with contextlib.closing(self.urlopener.open(request)) as req:
494 with contextlib.closing(self.urlopener.open(request)) as req:
495 ui = self.ui # Shorten debug lines
495 ui = self.ui # Shorten debug lines
496 if self.ui.debugflag:
496 if self.ui.debugflag:
497 ui.debug(b'Status: %d\n' % req.status)
497 ui.debug(b'Status: %d\n' % req.status)
498 # lfs-test-server and hg serve return headers in different
498 # lfs-test-server and hg serve return headers in different
499 # order
499 # order
500 headers = pycompat.bytestr(req.info()).strip()
500 headers = pycompat.bytestr(req.info()).strip()
501 ui.debug(b'%s\n' % b'\n'.join(sorted(headers.splitlines())))
501 ui.debug(b'%s\n' % b'\n'.join(sorted(headers.splitlines())))
502
502
503 if action == b'download':
503 if action == b'download':
504 # If downloading blobs, store downloaded data to local
504 # If downloading blobs, store downloaded data to local
505 # blobstore
505 # blobstore
506 localstore.download(oid, req)
506 localstore.download(oid, req)
507 else:
507 else:
508 while True:
508 while True:
509 data = req.read(1048576)
509 data = req.read(1048576)
510 if not data:
510 if not data:
511 break
511 break
512 response += data
512 response += data
513 if response:
513 if response:
514 ui.debug(b'lfs %s response: %s' % (action, response))
514 ui.debug(b'lfs %s response: %s' % (action, response))
515 except util.urlerr.httperror as ex:
515 except util.urlerr.httperror as ex:
516 if self.ui.debugflag:
516 if self.ui.debugflag:
517 self.ui.debug(
517 self.ui.debug(
518 b'%s: %s\n' % (oid, ex.read())
518 b'%s: %s\n' % (oid, ex.read())
519 ) # XXX: also bytes?
519 ) # XXX: also bytes?
520 raise LfsRemoteError(
520 raise LfsRemoteError(
521 _(b'LFS HTTP error: %s (oid=%s, action=%s)')
521 _(b'LFS HTTP error: %s (oid=%s, action=%s)')
522 % (stringutil.forcebytestr(ex), oid, action)
522 % (stringutil.forcebytestr(ex), oid, action)
523 )
523 )
524 except util.urlerr.urlerror as ex:
524 except util.urlerr.urlerror as ex:
525 hint = _(b'attempted connection to %s') % pycompat.bytesurl(
525 hint = _(b'attempted connection to %s') % pycompat.bytesurl(
526 util.urllibcompat.getfullurl(request)
526 util.urllibcompat.getfullurl(request)
527 )
527 )
528 raise LfsRemoteError(
528 raise LfsRemoteError(
529 _(b'LFS error: %s') % _urlerrorreason(ex), hint=hint
529 _(b'LFS error: %s') % _urlerrorreason(ex), hint=hint
530 )
530 )
531
531
532 def _batch(self, pointers, localstore, action):
532 def _batch(self, pointers, localstore, action):
533 if action not in [b'upload', b'download']:
533 if action not in [b'upload', b'download']:
534 raise error.ProgrammingError(b'invalid Git-LFS action: %s' % action)
534 raise error.ProgrammingError(b'invalid Git-LFS action: %s' % action)
535
535
536 response = self._batchrequest(pointers, action)
536 response = self._batchrequest(pointers, action)
537 objects = self._extractobjects(response, pointers, action)
537 objects = self._extractobjects(response, pointers, action)
538 total = sum(x.get(b'size', 0) for x in objects)
538 total = sum(x.get(b'size', 0) for x in objects)
539 sizes = {}
539 sizes = {}
540 for obj in objects:
540 for obj in objects:
541 sizes[obj.get(b'oid')] = obj.get(b'size', 0)
541 sizes[obj.get(b'oid')] = obj.get(b'size', 0)
542 topic = {
542 topic = {
543 b'upload': _(b'lfs uploading'),
543 b'upload': _(b'lfs uploading'),
544 b'download': _(b'lfs downloading'),
544 b'download': _(b'lfs downloading'),
545 }[action]
545 }[action]
546 if len(objects) > 1:
546 if len(objects) > 1:
547 self.ui.note(
547 self.ui.note(
548 _(b'lfs: need to transfer %d objects (%s)\n')
548 _(b'lfs: need to transfer %d objects (%s)\n')
549 % (len(objects), util.bytecount(total))
549 % (len(objects), util.bytecount(total))
550 )
550 )
551
551
552 def transfer(chunk):
552 def transfer(chunk):
553 for obj in chunk:
553 for obj in chunk:
554 objsize = obj.get(b'size', 0)
554 objsize = obj.get(b'size', 0)
555 if self.ui.verbose:
555 if self.ui.verbose:
556 if action == b'download':
556 if action == b'download':
557 msg = _(b'lfs: downloading %s (%s)\n')
557 msg = _(b'lfs: downloading %s (%s)\n')
558 elif action == b'upload':
558 elif action == b'upload':
559 msg = _(b'lfs: uploading %s (%s)\n')
559 msg = _(b'lfs: uploading %s (%s)\n')
560 self.ui.note(
560 self.ui.note(
561 msg % (obj.get(b'oid'), util.bytecount(objsize))
561 msg % (obj.get(b'oid'), util.bytecount(objsize))
562 )
562 )
563 retry = self.retry
563 retry = self.retry
564 while True:
564 while True:
565 try:
565 try:
566 self._basictransfer(obj, action, localstore)
566 self._basictransfer(obj, action, localstore)
567 yield 1, obj.get(b'oid')
567 yield 1, obj.get(b'oid')
568 break
568 break
569 except socket.error as ex:
569 except socket.error as ex:
570 if retry > 0:
570 if retry > 0:
571 self.ui.note(
571 self.ui.note(
572 _(b'lfs: failed: %r (remaining retry %d)\n')
572 _(b'lfs: failed: %r (remaining retry %d)\n')
573 % (stringutil.forcebytestr(ex), retry)
573 % (stringutil.forcebytestr(ex), retry)
574 )
574 )
575 retry -= 1
575 retry -= 1
576 continue
576 continue
577 raise
577 raise
578
578
579 # Until https multiplexing gets sorted out
579 # Until https multiplexing gets sorted out
580 if self.ui.configbool(b'experimental', b'lfs.worker-enable'):
580 if self.ui.configbool(b'experimental', b'lfs.worker-enable'):
581 oids = worker.worker(
581 oids = worker.worker(
582 self.ui,
582 self.ui,
583 0.1,
583 0.1,
584 transfer,
584 transfer,
585 (),
585 (),
586 sorted(objects, key=lambda o: o.get(b'oid')),
586 sorted(objects, key=lambda o: o.get(b'oid')),
587 )
587 )
588 else:
588 else:
589 oids = transfer(sorted(objects, key=lambda o: o.get(b'oid')))
589 oids = transfer(sorted(objects, key=lambda o: o.get(b'oid')))
590
590
591 with self.ui.makeprogress(topic, total=total) as progress:
591 with self.ui.makeprogress(topic, total=total) as progress:
592 progress.update(0)
592 progress.update(0)
593 processed = 0
593 processed = 0
594 blobs = 0
594 blobs = 0
595 for _one, oid in oids:
595 for _one, oid in oids:
596 processed += sizes[oid]
596 processed += sizes[oid]
597 blobs += 1
597 blobs += 1
598 progress.update(processed)
598 progress.update(processed)
599 self.ui.note(_(b'lfs: processed: %s\n') % oid)
599 self.ui.note(_(b'lfs: processed: %s\n') % oid)
600
600
601 if blobs > 0:
601 if blobs > 0:
602 if action == b'upload':
602 if action == b'upload':
603 self.ui.status(
603 self.ui.status(
604 _(b'lfs: uploaded %d files (%s)\n')
604 _(b'lfs: uploaded %d files (%s)\n')
605 % (blobs, util.bytecount(processed))
605 % (blobs, util.bytecount(processed))
606 )
606 )
607 elif action == b'download':
607 elif action == b'download':
608 self.ui.status(
608 self.ui.status(
609 _(b'lfs: downloaded %d files (%s)\n')
609 _(b'lfs: downloaded %d files (%s)\n')
610 % (blobs, util.bytecount(processed))
610 % (blobs, util.bytecount(processed))
611 )
611 )
612
612
613 def __del__(self):
613 def __del__(self):
614 # copied from mercurial/httppeer.py
614 # copied from mercurial/httppeer.py
615 urlopener = getattr(self, 'urlopener', None)
615 urlopener = getattr(self, 'urlopener', None)
616 if urlopener:
616 if urlopener:
617 for h in urlopener.handlers:
617 for h in urlopener.handlers:
618 h.close()
618 h.close()
619 getattr(h, "close_all", lambda: None)()
619 getattr(h, "close_all", lambda: None)()
620
620
621
621
622 class _dummyremote(object):
622 class _dummyremote(object):
623 """Dummy store storing blobs to temp directory."""
623 """Dummy store storing blobs to temp directory."""
624
624
625 def __init__(self, repo, url):
625 def __init__(self, repo, url):
626 fullpath = repo.vfs.join(b'lfs', url.path)
626 fullpath = repo.vfs.join(b'lfs', url.path)
627 self.vfs = lfsvfs(fullpath)
627 self.vfs = lfsvfs(fullpath)
628
628
629 def writebatch(self, pointers, fromstore):
629 def writebatch(self, pointers, fromstore):
630 for p in _deduplicate(pointers):
630 for p in _deduplicate(pointers):
631 content = fromstore.read(p.oid(), verify=True)
631 content = fromstore.read(p.oid(), verify=True)
632 with self.vfs(p.oid(), b'wb', atomictemp=True) as fp:
632 with self.vfs(p.oid(), b'wb', atomictemp=True) as fp:
633 fp.write(content)
633 fp.write(content)
634
634
635 def readbatch(self, pointers, tostore):
635 def readbatch(self, pointers, tostore):
636 for p in _deduplicate(pointers):
636 for p in _deduplicate(pointers):
637 with self.vfs(p.oid(), b'rb') as fp:
637 with self.vfs(p.oid(), b'rb') as fp:
638 tostore.download(p.oid(), fp)
638 tostore.download(p.oid(), fp)
639
639
640
640
641 class _nullremote(object):
641 class _nullremote(object):
642 """Null store storing blobs to /dev/null."""
642 """Null store storing blobs to /dev/null."""
643
643
644 def __init__(self, repo, url):
644 def __init__(self, repo, url):
645 pass
645 pass
646
646
647 def writebatch(self, pointers, fromstore):
647 def writebatch(self, pointers, fromstore):
648 pass
648 pass
649
649
650 def readbatch(self, pointers, tostore):
650 def readbatch(self, pointers, tostore):
651 pass
651 pass
652
652
653
653
654 class _promptremote(object):
654 class _promptremote(object):
655 """Prompt user to set lfs.url when accessed."""
655 """Prompt user to set lfs.url when accessed."""
656
656
657 def __init__(self, repo, url):
657 def __init__(self, repo, url):
658 pass
658 pass
659
659
660 def writebatch(self, pointers, fromstore, ui=None):
660 def writebatch(self, pointers, fromstore, ui=None):
661 self._prompt()
661 self._prompt()
662
662
663 def readbatch(self, pointers, tostore, ui=None):
663 def readbatch(self, pointers, tostore, ui=None):
664 self._prompt()
664 self._prompt()
665
665
666 def _prompt(self):
666 def _prompt(self):
667 raise error.Abort(_(b'lfs.url needs to be configured'))
667 raise error.Abort(_(b'lfs.url needs to be configured'))
668
668
669
669
670 _storemap = {
670 _storemap = {
671 b'https': _gitlfsremote,
671 b'https': _gitlfsremote,
672 b'http': _gitlfsremote,
672 b'http': _gitlfsremote,
673 b'file': _dummyremote,
673 b'file': _dummyremote,
674 b'null': _nullremote,
674 b'null': _nullremote,
675 None: _promptremote,
675 None: _promptremote,
676 }
676 }
677
677
678
678
679 def _deduplicate(pointers):
679 def _deduplicate(pointers):
680 """Remove any duplicate oids that exist in the list"""
680 """Remove any duplicate oids that exist in the list"""
681 reduced = util.sortdict()
681 reduced = util.sortdict()
682 for p in pointers:
682 for p in pointers:
683 reduced[p.oid()] = p
683 reduced[p.oid()] = p
684 return reduced.values()
684 return reduced.values()
685
685
686
686
687 def _verify(oid, content):
687 def _verify(oid, content):
688 realoid = node.hex(hashlib.sha256(content).digest())
688 realoid = node.hex(hashlib.sha256(content).digest())
689 if realoid != oid:
689 if realoid != oid:
690 raise LfsCorruptionError(
690 raise LfsCorruptionError(
691 _(b'detected corrupt lfs object: %s') % oid,
691 _(b'detected corrupt lfs object: %s') % oid,
692 hint=_(b'run hg verify'),
692 hint=_(b'run hg verify'),
693 )
693 )
694
694
695
695
696 def remote(repo, remote=None):
696 def remote(repo, remote=None):
697 """remotestore factory. return a store in _storemap depending on config
697 """remotestore factory. return a store in _storemap depending on config
698
698
699 If ``lfs.url`` is specified, use that remote endpoint. Otherwise, try to
699 If ``lfs.url`` is specified, use that remote endpoint. Otherwise, try to
700 infer the endpoint, based on the remote repository using the same path
700 infer the endpoint, based on the remote repository using the same path
701 adjustments as git. As an extension, 'http' is supported as well so that
701 adjustments as git. As an extension, 'http' is supported as well so that
702 ``hg serve`` works out of the box.
702 ``hg serve`` works out of the box.
703
703
704 https://github.com/git-lfs/git-lfs/blob/master/docs/api/server-discovery.md
704 https://github.com/git-lfs/git-lfs/blob/master/docs/api/server-discovery.md
705 """
705 """
706 lfsurl = repo.ui.config(b'lfs', b'url')
706 lfsurl = repo.ui.config(b'lfs', b'url')
707 url = util.url(lfsurl or b'')
707 url = util.url(lfsurl or b'')
708 if lfsurl is None:
708 if lfsurl is None:
709 if remote:
709 if remote:
710 path = remote
710 path = remote
711 elif util.safehasattr(repo, b'_subtoppath'):
711 elif util.safehasattr(repo, b'_subtoppath'):
712 # The pull command sets this during the optional update phase, which
712 # The pull command sets this during the optional update phase, which
713 # tells exactly where the pull originated, whether 'paths.default'
713 # tells exactly where the pull originated, whether 'paths.default'
714 # or explicit.
714 # or explicit.
715 path = repo._subtoppath
715 path = repo._subtoppath
716 else:
716 else:
717 # TODO: investigate 'paths.remote:lfsurl' style path customization,
717 # TODO: investigate 'paths.remote:lfsurl' style path customization,
718 # and fall back to inferring from 'paths.remote' if unspecified.
718 # and fall back to inferring from 'paths.remote' if unspecified.
719 path = repo.ui.config(b'paths', b'default') or b''
719 path = repo.ui.config(b'paths', b'default') or b''
720
720
721 defaulturl = util.url(path)
721 defaulturl = util.url(path)
722
722
723 # TODO: support local paths as well.
723 # TODO: support local paths as well.
724 # TODO: consider the ssh -> https transformation that git applies
724 # TODO: consider the ssh -> https transformation that git applies
725 if defaulturl.scheme in (b'http', b'https'):
725 if defaulturl.scheme in (b'http', b'https'):
726 if defaulturl.path and defaulturl.path[:-1] != b'/':
726 if defaulturl.path and defaulturl.path[:-1] != b'/':
727 defaulturl.path += b'/'
727 defaulturl.path += b'/'
728 defaulturl.path = (defaulturl.path or b'') + b'.git/info/lfs'
728 defaulturl.path = (defaulturl.path or b'') + b'.git/info/lfs'
729
729
730 url = util.url(bytes(defaulturl))
730 url = util.url(bytes(defaulturl))
731 repo.ui.note(_(b'lfs: assuming remote store: %s\n') % url)
731 repo.ui.note(_(b'lfs: assuming remote store: %s\n') % url)
732
732
733 scheme = url.scheme
733 scheme = url.scheme
734 if scheme not in _storemap:
734 if scheme not in _storemap:
735 raise error.Abort(_(b'lfs: unknown url scheme: %s') % scheme)
735 raise error.Abort(_(b'lfs: unknown url scheme: %s') % scheme)
736 return _storemap[scheme](repo, url)
736 return _storemap[scheme](repo, url)
737
737
738
738
739 class LfsRemoteError(error.StorageError):
739 class LfsRemoteError(error.StorageError):
740 pass
740 pass
741
741
742
742
743 class LfsCorruptionError(error.Abort):
743 class LfsCorruptionError(error.Abort):
744 """Raised when a corrupt blob is detected, aborting an operation
744 """Raised when a corrupt blob is detected, aborting an operation
745
745
746 It exists to allow specialized handling on the server side."""
746 It exists to allow specialized handling on the server side."""
@@ -1,267 +1,259
1 # __init__.py - Startup and module loading logic for Mercurial.
1 # __init__.py - Startup and module loading logic for Mercurial.
2 #
2 #
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import sys
10 import sys
11
11
12 # Allow 'from mercurial import demandimport' to keep working.
12 # Allow 'from mercurial import demandimport' to keep working.
13 import hgdemandimport
13 import hgdemandimport
14
14
15 demandimport = hgdemandimport
15 demandimport = hgdemandimport
16
16
17 __all__ = []
17 __all__ = []
18
18
19 # Python 3 uses a custom module loader that transforms source code between
19 # Python 3 uses a custom module loader that transforms source code between
20 # source file reading and compilation. This is done by registering a custom
20 # source file reading and compilation. This is done by registering a custom
21 # finder that changes the spec for Mercurial modules to use a custom loader.
21 # finder that changes the spec for Mercurial modules to use a custom loader.
22 if sys.version_info[0] >= 3:
22 if sys.version_info[0] >= 3:
23 import importlib
23 import importlib
24 import importlib.abc
24 import importlib.abc
25 import io
25 import io
26 import token
26 import token
27 import tokenize
27 import tokenize
28
28
29 class hgpathentryfinder(importlib.abc.MetaPathFinder):
29 class hgpathentryfinder(importlib.abc.MetaPathFinder):
30 """A sys.meta_path finder that uses a custom module loader."""
30 """A sys.meta_path finder that uses a custom module loader."""
31
31
32 def find_spec(self, fullname, path, target=None):
32 def find_spec(self, fullname, path, target=None):
33 # Only handle Mercurial-related modules.
33 # Only handle Mercurial-related modules.
34 if not fullname.startswith(('mercurial.', 'hgext.')):
34 if not fullname.startswith(('mercurial.', 'hgext.')):
35 return None
35 return None
36 # don't try to parse binary
36 # don't try to parse binary
37 if fullname.startswith('mercurial.cext.'):
37 if fullname.startswith('mercurial.cext.'):
38 return None
38 return None
39 # third-party packages are expected to be dual-version clean
39 # third-party packages are expected to be dual-version clean
40 if fullname.startswith('mercurial.thirdparty'):
40 if fullname.startswith('mercurial.thirdparty'):
41 return None
41 return None
42 # zstd is already dual-version clean, don't try and mangle it
42 # zstd is already dual-version clean, don't try and mangle it
43 if fullname.startswith('mercurial.zstd'):
43 if fullname.startswith('mercurial.zstd'):
44 return None
44 return None
45 # rustext is built for the right python version,
45 # rustext is built for the right python version,
46 # don't try and mangle it
46 # don't try and mangle it
47 if fullname.startswith('mercurial.rustext'):
47 if fullname.startswith('mercurial.rustext'):
48 return None
48 return None
49 # pywatchman is already dual-version clean, don't try and mangle it
49 # pywatchman is already dual-version clean, don't try and mangle it
50 if fullname.startswith('hgext.fsmonitor.pywatchman'):
50 if fullname.startswith('hgext.fsmonitor.pywatchman'):
51 return None
51 return None
52
52
53 # Try to find the module using other registered finders.
53 # Try to find the module using other registered finders.
54 spec = None
54 spec = None
55 for finder in sys.meta_path:
55 for finder in sys.meta_path:
56 if finder == self:
56 if finder == self:
57 continue
57 continue
58
58
59 # Originally the API was a `find_module` method, but it was
59 # Originally the API was a `find_module` method, but it was
60 # renamed to `find_spec` in python 3.4, with a new `target`
60 # renamed to `find_spec` in python 3.4, with a new `target`
61 # argument.
61 # argument.
62 find_spec_method = getattr(finder, 'find_spec', None)
62 find_spec_method = getattr(finder, 'find_spec', None)
63 if find_spec_method:
63 if find_spec_method:
64 spec = find_spec_method(fullname, path, target=target)
64 spec = find_spec_method(fullname, path, target=target)
65 else:
65 else:
66 spec = finder.find_module(fullname)
66 spec = finder.find_module(fullname)
67 if spec is not None:
67 if spec is not None:
68 spec = importlib.util.spec_from_loader(fullname, spec)
68 spec = importlib.util.spec_from_loader(fullname, spec)
69 if spec:
69 if spec:
70 break
70 break
71
71
72 # This is a Mercurial-related module but we couldn't find it
72 # This is a Mercurial-related module but we couldn't find it
73 # using the previously-registered finders. This likely means
73 # using the previously-registered finders. This likely means
74 # the module doesn't exist.
74 # the module doesn't exist.
75 if not spec:
75 if not spec:
76 return None
76 return None
77
77
78 # TODO need to support loaders from alternate specs, like zip
78 # TODO need to support loaders from alternate specs, like zip
79 # loaders.
79 # loaders.
80 loader = hgloader(spec.name, spec.origin)
80 loader = hgloader(spec.name, spec.origin)
81 # Can't use util.safehasattr here because that would require
81 # Can't use util.safehasattr here because that would require
82 # importing util, and we're in import code.
82 # importing util, and we're in import code.
83 if hasattr(spec.loader, 'loader'): # hasattr-py3-only
83 if hasattr(spec.loader, 'loader'): # hasattr-py3-only
84 # This is a nested loader (maybe a lazy loader?)
84 # This is a nested loader (maybe a lazy loader?)
85 spec.loader.loader = loader
85 spec.loader.loader = loader
86 else:
86 else:
87 spec.loader = loader
87 spec.loader = loader
88 return spec
88 return spec
89
89
90 def replacetokens(tokens, fullname):
90 def replacetokens(tokens, fullname):
91 """Transform a stream of tokens from raw to Python 3.
91 """Transform a stream of tokens from raw to Python 3.
92
92
93 It is called by the custom module loading machinery to rewrite
93 It is called by the custom module loading machinery to rewrite
94 source/tokens between source decoding and compilation.
94 source/tokens between source decoding and compilation.
95
95
96 Returns a generator of possibly rewritten tokens.
96 Returns a generator of possibly rewritten tokens.
97
97
98 The input token list may be mutated as part of processing. However,
98 The input token list may be mutated as part of processing. However,
99 its changes do not necessarily match the output token stream.
99 its changes do not necessarily match the output token stream.
100
100
101 REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION
101 REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION
102 OR CACHED FILES WON'T GET INVALIDATED PROPERLY.
102 OR CACHED FILES WON'T GET INVALIDATED PROPERLY.
103 """
103 """
104 # The following utility functions access the tokens list and i index of
104 # The following utility functions access the tokens list and i index of
105 # the for i, t enumerate(tokens) loop below
105 # the for i, t enumerate(tokens) loop below
106 def _isop(j, *o):
106 def _isop(j, *o):
107 """Assert that tokens[j] is an OP with one of the given values"""
107 """Assert that tokens[j] is an OP with one of the given values"""
108 try:
108 try:
109 return tokens[j].type == token.OP and tokens[j].string in o
109 return tokens[j].type == token.OP and tokens[j].string in o
110 except IndexError:
110 except IndexError:
111 return False
111 return False
112
112
113 def _findargnofcall(n):
113 def _findargnofcall(n):
114 """Find arg n of a call expression (start at 0)
114 """Find arg n of a call expression (start at 0)
115
115
116 Returns index of the first token of that argument, or None if
116 Returns index of the first token of that argument, or None if
117 there is not that many arguments.
117 there is not that many arguments.
118
118
119 Assumes that token[i + 1] is '('.
119 Assumes that token[i + 1] is '('.
120
120
121 """
121 """
122 nested = 0
122 nested = 0
123 for j in range(i + 2, len(tokens)):
123 for j in range(i + 2, len(tokens)):
124 if _isop(j, ')', ']', '}'):
124 if _isop(j, ')', ']', '}'):
125 # end of call, tuple, subscription or dict / set
125 # end of call, tuple, subscription or dict / set
126 nested -= 1
126 nested -= 1
127 if nested < 0:
127 if nested < 0:
128 return None
128 return None
129 elif n == 0:
129 elif n == 0:
130 # this is the starting position of arg
130 # this is the starting position of arg
131 return j
131 return j
132 elif _isop(j, '(', '[', '{'):
132 elif _isop(j, '(', '[', '{'):
133 nested += 1
133 nested += 1
134 elif _isop(j, ',') and nested == 0:
134 elif _isop(j, ',') and nested == 0:
135 n -= 1
135 n -= 1
136
136
137 return None
137 return None
138
138
139 def _ensureunicode(j):
139 def _ensureunicode(j):
140 """Make sure the token at j is a unicode string
140 """Make sure the token at j is a unicode string
141
141
142 This rewrites a string token to include the unicode literal prefix
142 This rewrites a string token to include the unicode literal prefix
143 so the string transformer won't add the byte prefix.
143 so the string transformer won't add the byte prefix.
144
144
145 Ignores tokens that are not strings. Assumes bounds checking has
145 Ignores tokens that are not strings. Assumes bounds checking has
146 already been done.
146 already been done.
147
147
148 """
148 """
149 st = tokens[j]
149 st = tokens[j]
150 if st.type == token.STRING and st.string.startswith(("'", '"')):
150 if st.type == token.STRING and st.string.startswith(("'", '"')):
151 tokens[j] = st._replace(string='u%s' % st.string)
151 tokens[j] = st._replace(string='u%s' % st.string)
152
152
153 for i, t in enumerate(tokens):
153 for i, t in enumerate(tokens):
154 # This looks like a function call.
154 # This looks like a function call.
155 if t.type == token.NAME and _isop(i + 1, '('):
155 if t.type == token.NAME and _isop(i + 1, '('):
156 fn = t.string
156 fn = t.string
157
157
158 # *attr() builtins don't accept byte strings to 2nd argument.
158 # *attr() builtins don't accept byte strings to 2nd argument.
159 if fn in (
159 if fn in (
160 'getattr',
160 'getattr',
161 'setattr',
161 'setattr',
162 'hasattr',
162 'hasattr',
163 'safehasattr',
163 'safehasattr',
164 ) and not _isop(i - 1, '.'):
164 ) and not _isop(i - 1, '.'):
165 arg1idx = _findargnofcall(1)
165 arg1idx = _findargnofcall(1)
166 if arg1idx is not None:
166 if arg1idx is not None:
167 _ensureunicode(arg1idx)
167 _ensureunicode(arg1idx)
168
168
169 # .encode() and .decode() on str/bytes/unicode don't accept
170 # byte strings on Python 3.
171 elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
172 for argn in range(2):
173 argidx = _findargnofcall(argn)
174 if argidx is not None:
175 _ensureunicode(argidx)
176
177 # It changes iteritems/values to items/values as they are not
169 # It changes iteritems/values to items/values as they are not
178 # present in Python 3 world.
170 # present in Python 3 world.
179 elif fn in ('iteritems', 'itervalues') and not (
171 elif fn in ('iteritems', 'itervalues') and not (
180 tokens[i - 1].type == token.NAME
172 tokens[i - 1].type == token.NAME
181 and tokens[i - 1].string == 'def'
173 and tokens[i - 1].string == 'def'
182 ):
174 ):
183 yield t._replace(string=fn[4:])
175 yield t._replace(string=fn[4:])
184 continue
176 continue
185
177
186 # Emit unmodified token.
178 # Emit unmodified token.
187 yield t
179 yield t
188
180
189 # Header to add to bytecode files. This MUST be changed when
181 # Header to add to bytecode files. This MUST be changed when
190 # ``replacetoken`` or any mechanism that changes semantics of module
182 # ``replacetoken`` or any mechanism that changes semantics of module
191 # loading is changed. Otherwise cached bytecode may get loaded without
183 # loading is changed. Otherwise cached bytecode may get loaded without
192 # the new transformation mechanisms applied.
184 # the new transformation mechanisms applied.
193 BYTECODEHEADER = b'HG\x00\x12'
185 BYTECODEHEADER = b'HG\x00\x13'
194
186
195 class hgloader(importlib.machinery.SourceFileLoader):
187 class hgloader(importlib.machinery.SourceFileLoader):
196 """Custom module loader that transforms source code.
188 """Custom module loader that transforms source code.
197
189
198 When the source code is converted to a code object, we transform
190 When the source code is converted to a code object, we transform
199 certain patterns to be Python 3 compatible. This allows us to write code
191 certain patterns to be Python 3 compatible. This allows us to write code
200 that is natively Python 2 and compatible with Python 3 without
192 that is natively Python 2 and compatible with Python 3 without
201 making the code excessively ugly.
193 making the code excessively ugly.
202
194
203 We do this by transforming the token stream between parse and compile.
195 We do this by transforming the token stream between parse and compile.
204
196
205 Implementing transformations invalidates caching assumptions made
197 Implementing transformations invalidates caching assumptions made
206 by the built-in importer. The built-in importer stores a header on
198 by the built-in importer. The built-in importer stores a header on
207 saved bytecode files indicating the Python/bytecode version. If the
199 saved bytecode files indicating the Python/bytecode version. If the
208 version changes, the cached bytecode is ignored. The Mercurial
200 version changes, the cached bytecode is ignored. The Mercurial
209 transformations could change at any time. This means we need to check
201 transformations could change at any time. This means we need to check
210 that cached bytecode was generated with the current transformation
202 that cached bytecode was generated with the current transformation
211 code or there could be a mismatch between cached bytecode and what
203 code or there could be a mismatch between cached bytecode and what
212 would be generated from this class.
204 would be generated from this class.
213
205
214 We supplement the bytecode caching layer by wrapping ``get_data``
206 We supplement the bytecode caching layer by wrapping ``get_data``
215 and ``set_data``. These functions are called when the
207 and ``set_data``. These functions are called when the
216 ``SourceFileLoader`` retrieves and saves bytecode cache files,
208 ``SourceFileLoader`` retrieves and saves bytecode cache files,
217 respectively. We simply add an additional header on the file. As
209 respectively. We simply add an additional header on the file. As
218 long as the version in this file is changed when semantics change,
210 long as the version in this file is changed when semantics change,
219 cached bytecode should be invalidated when transformations change.
211 cached bytecode should be invalidated when transformations change.
220
212
221 The added header has the form ``HG<VERSION>``. That is a literal
213 The added header has the form ``HG<VERSION>``. That is a literal
222 ``HG`` with 2 binary bytes indicating the transformation version.
214 ``HG`` with 2 binary bytes indicating the transformation version.
223 """
215 """
224
216
225 def get_data(self, path):
217 def get_data(self, path):
226 data = super(hgloader, self).get_data(path)
218 data = super(hgloader, self).get_data(path)
227
219
228 if not path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
220 if not path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
229 return data
221 return data
230
222
231 # There should be a header indicating the Mercurial transformation
223 # There should be a header indicating the Mercurial transformation
232 # version. If it doesn't exist or doesn't match the current version,
224 # version. If it doesn't exist or doesn't match the current version,
233 # we raise an OSError because that is what
225 # we raise an OSError because that is what
234 # ``SourceFileLoader.get_code()`` expects when loading bytecode
226 # ``SourceFileLoader.get_code()`` expects when loading bytecode
235 # paths to indicate the cached file is "bad."
227 # paths to indicate the cached file is "bad."
236 if data[0:2] != b'HG':
228 if data[0:2] != b'HG':
237 raise OSError('no hg header')
229 raise OSError('no hg header')
238 if data[0:4] != BYTECODEHEADER:
230 if data[0:4] != BYTECODEHEADER:
239 raise OSError('hg header version mismatch')
231 raise OSError('hg header version mismatch')
240
232
241 return data[4:]
233 return data[4:]
242
234
243 def set_data(self, path, data, *args, **kwargs):
235 def set_data(self, path, data, *args, **kwargs):
244 if path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
236 if path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
245 data = BYTECODEHEADER + data
237 data = BYTECODEHEADER + data
246
238
247 return super(hgloader, self).set_data(path, data, *args, **kwargs)
239 return super(hgloader, self).set_data(path, data, *args, **kwargs)
248
240
249 def source_to_code(self, data, path):
241 def source_to_code(self, data, path):
250 """Perform token transformation before compilation."""
242 """Perform token transformation before compilation."""
251 buf = io.BytesIO(data)
243 buf = io.BytesIO(data)
252 tokens = tokenize.tokenize(buf.readline)
244 tokens = tokenize.tokenize(buf.readline)
253 data = tokenize.untokenize(replacetokens(list(tokens), self.name))
245 data = tokenize.untokenize(replacetokens(list(tokens), self.name))
254 # Python's built-in importer strips frames from exceptions raised
246 # Python's built-in importer strips frames from exceptions raised
255 # for this code. Unfortunately, that mechanism isn't extensible
247 # for this code. Unfortunately, that mechanism isn't extensible
256 # and our frame will be blamed for the import failure. There
248 # and our frame will be blamed for the import failure. There
257 # are extremely hacky ways to do frame stripping. We haven't
249 # are extremely hacky ways to do frame stripping. We haven't
258 # implemented them because they are very ugly.
250 # implemented them because they are very ugly.
259 return super(hgloader, self).source_to_code(data, path)
251 return super(hgloader, self).source_to_code(data, path)
260
252
261 # We automagically register our custom importer as a side-effect of
253 # We automagically register our custom importer as a side-effect of
262 # loading. This is necessary to ensure that any entry points are able
254 # loading. This is necessary to ensure that any entry points are able
263 # to import mercurial.* modules without having to perform this
255 # to import mercurial.* modules without having to perform this
264 # registration themselves.
256 # registration themselves.
265 if not any(isinstance(x, hgpathentryfinder) for x in sys.meta_path):
257 if not any(isinstance(x, hgpathentryfinder) for x in sys.meta_path):
266 # meta_path is used before any implicit finders and before sys.path.
258 # meta_path is used before any implicit finders and before sys.path.
267 sys.meta_path.insert(0, hgpathentryfinder())
259 sys.meta_path.insert(0, hgpathentryfinder())
@@ -1,453 +1,453
1 # pycompat.py - portability shim for python 3
1 # pycompat.py - portability shim for python 3
2 #
2 #
3 # This software may be used and distributed according to the terms of the
3 # This software may be used and distributed according to the terms of the
4 # GNU General Public License version 2 or any later version.
4 # GNU General Public License version 2 or any later version.
5
5
6 """Mercurial portability shim for python 3.
6 """Mercurial portability shim for python 3.
7
7
8 This contains aliases to hide python version-specific details from the core.
8 This contains aliases to hide python version-specific details from the core.
9 """
9 """
10
10
11 from __future__ import absolute_import
11 from __future__ import absolute_import
12
12
13 import getopt
13 import getopt
14 import inspect
14 import inspect
15 import os
15 import os
16 import shlex
16 import shlex
17 import sys
17 import sys
18 import tempfile
18 import tempfile
19
19
20 ispy3 = sys.version_info[0] >= 3
20 ispy3 = sys.version_info[0] >= 3
21 ispypy = r'__pypy__' in sys.builtin_module_names
21 ispypy = r'__pypy__' in sys.builtin_module_names
22
22
23 if not ispy3:
23 if not ispy3:
24 import cookielib
24 import cookielib
25 import cPickle as pickle
25 import cPickle as pickle
26 import httplib
26 import httplib
27 import Queue as queue
27 import Queue as queue
28 import SocketServer as socketserver
28 import SocketServer as socketserver
29 import xmlrpclib
29 import xmlrpclib
30
30
31 from .thirdparty.concurrent import futures
31 from .thirdparty.concurrent import futures
32
32
33 def future_set_exception_info(f, exc_info):
33 def future_set_exception_info(f, exc_info):
34 f.set_exception_info(*exc_info)
34 f.set_exception_info(*exc_info)
35
35
36
36
37 else:
37 else:
38 import concurrent.futures as futures
38 import concurrent.futures as futures
39 import http.cookiejar as cookielib
39 import http.cookiejar as cookielib
40 import http.client as httplib
40 import http.client as httplib
41 import pickle
41 import pickle
42 import queue as queue
42 import queue as queue
43 import socketserver
43 import socketserver
44 import xmlrpc.client as xmlrpclib
44 import xmlrpc.client as xmlrpclib
45
45
46 def future_set_exception_info(f, exc_info):
46 def future_set_exception_info(f, exc_info):
47 f.set_exception(exc_info[0])
47 f.set_exception(exc_info[0])
48
48
49
49
50 def identity(a):
50 def identity(a):
51 return a
51 return a
52
52
53
53
54 def _rapply(f, xs):
54 def _rapply(f, xs):
55 if xs is None:
55 if xs is None:
56 # assume None means non-value of optional data
56 # assume None means non-value of optional data
57 return xs
57 return xs
58 if isinstance(xs, (list, set, tuple)):
58 if isinstance(xs, (list, set, tuple)):
59 return type(xs)(_rapply(f, x) for x in xs)
59 return type(xs)(_rapply(f, x) for x in xs)
60 if isinstance(xs, dict):
60 if isinstance(xs, dict):
61 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
61 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
62 return f(xs)
62 return f(xs)
63
63
64
64
65 def rapply(f, xs):
65 def rapply(f, xs):
66 """Apply function recursively to every item preserving the data structure
66 """Apply function recursively to every item preserving the data structure
67
67
68 >>> def f(x):
68 >>> def f(x):
69 ... return 'f(%s)' % x
69 ... return 'f(%s)' % x
70 >>> rapply(f, None) is None
70 >>> rapply(f, None) is None
71 True
71 True
72 >>> rapply(f, 'a')
72 >>> rapply(f, 'a')
73 'f(a)'
73 'f(a)'
74 >>> rapply(f, {'a'}) == {'f(a)'}
74 >>> rapply(f, {'a'}) == {'f(a)'}
75 True
75 True
76 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
76 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
77 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
77 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
78
78
79 >>> xs = [object()]
79 >>> xs = [object()]
80 >>> rapply(identity, xs) is xs
80 >>> rapply(identity, xs) is xs
81 True
81 True
82 """
82 """
83 if f is identity:
83 if f is identity:
84 # fast path mainly for py2
84 # fast path mainly for py2
85 return xs
85 return xs
86 return _rapply(f, xs)
86 return _rapply(f, xs)
87
87
88
88
89 if ispy3:
89 if ispy3:
90 import builtins
90 import builtins
91 import functools
91 import functools
92 import io
92 import io
93 import struct
93 import struct
94
94
95 fsencode = os.fsencode
95 fsencode = os.fsencode
96 fsdecode = os.fsdecode
96 fsdecode = os.fsdecode
97 oscurdir = os.curdir.encode('ascii')
97 oscurdir = os.curdir.encode('ascii')
98 oslinesep = os.linesep.encode('ascii')
98 oslinesep = os.linesep.encode('ascii')
99 osname = os.name.encode('ascii')
99 osname = os.name.encode('ascii')
100 ospathsep = os.pathsep.encode('ascii')
100 ospathsep = os.pathsep.encode('ascii')
101 ospardir = os.pardir.encode('ascii')
101 ospardir = os.pardir.encode('ascii')
102 ossep = os.sep.encode('ascii')
102 ossep = os.sep.encode('ascii')
103 osaltsep = os.altsep
103 osaltsep = os.altsep
104 if osaltsep:
104 if osaltsep:
105 osaltsep = osaltsep.encode('ascii')
105 osaltsep = osaltsep.encode('ascii')
106
106
107 sysplatform = sys.platform.encode('ascii')
107 sysplatform = sys.platform.encode('ascii')
108 sysexecutable = sys.executable
108 sysexecutable = sys.executable
109 if sysexecutable:
109 if sysexecutable:
110 sysexecutable = os.fsencode(sysexecutable)
110 sysexecutable = os.fsencode(sysexecutable)
111 bytesio = io.BytesIO
111 bytesio = io.BytesIO
112 # TODO deprecate stringio name, as it is a lie on Python 3.
112 # TODO deprecate stringio name, as it is a lie on Python 3.
113 stringio = bytesio
113 stringio = bytesio
114
114
115 def maplist(*args):
115 def maplist(*args):
116 return list(map(*args))
116 return list(map(*args))
117
117
118 def rangelist(*args):
118 def rangelist(*args):
119 return list(range(*args))
119 return list(range(*args))
120
120
121 def ziplist(*args):
121 def ziplist(*args):
122 return list(zip(*args))
122 return list(zip(*args))
123
123
124 rawinput = input
124 rawinput = input
125 getargspec = inspect.getfullargspec
125 getargspec = inspect.getfullargspec
126
126
127 long = int
127 long = int
128
128
129 # TODO: .buffer might not exist if std streams were replaced; we'll need
129 # TODO: .buffer might not exist if std streams were replaced; we'll need
130 # a silly wrapper to make a bytes stream backed by a unicode one.
130 # a silly wrapper to make a bytes stream backed by a unicode one.
131 stdin = sys.stdin.buffer
131 stdin = sys.stdin.buffer
132 stdout = sys.stdout.buffer
132 stdout = sys.stdout.buffer
133 stderr = sys.stderr.buffer
133 stderr = sys.stderr.buffer
134
134
135 # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
135 # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
136 # we can use os.fsencode() to get back bytes argv.
136 # we can use os.fsencode() to get back bytes argv.
137 #
137 #
138 # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
138 # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
139 #
139 #
140 # TODO: On Windows, the native argv is wchar_t, so we'll need a different
140 # TODO: On Windows, the native argv is wchar_t, so we'll need a different
141 # workaround to simulate the Python 2 (i.e. ANSI Win32 API) behavior.
141 # workaround to simulate the Python 2 (i.e. ANSI Win32 API) behavior.
142 if getattr(sys, 'argv', None) is not None:
142 if getattr(sys, 'argv', None) is not None:
143 sysargv = list(map(os.fsencode, sys.argv))
143 sysargv = list(map(os.fsencode, sys.argv))
144
144
145 bytechr = struct.Struct(r'>B').pack
145 bytechr = struct.Struct(r'>B').pack
146 byterepr = b'%r'.__mod__
146 byterepr = b'%r'.__mod__
147
147
148 class bytestr(bytes):
148 class bytestr(bytes):
149 """A bytes which mostly acts as a Python 2 str
149 """A bytes which mostly acts as a Python 2 str
150
150
151 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
151 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
152 ('', 'foo', 'ascii', '1')
152 ('', 'foo', 'ascii', '1')
153 >>> s = bytestr(b'foo')
153 >>> s = bytestr(b'foo')
154 >>> assert s is bytestr(s)
154 >>> assert s is bytestr(s)
155
155
156 __bytes__() should be called if provided:
156 __bytes__() should be called if provided:
157
157
158 >>> class bytesable(object):
158 >>> class bytesable(object):
159 ... def __bytes__(self):
159 ... def __bytes__(self):
160 ... return b'bytes'
160 ... return b'bytes'
161 >>> bytestr(bytesable())
161 >>> bytestr(bytesable())
162 'bytes'
162 'bytes'
163
163
164 There's no implicit conversion from non-ascii str as its encoding is
164 There's no implicit conversion from non-ascii str as its encoding is
165 unknown:
165 unknown:
166
166
167 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
167 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
168 Traceback (most recent call last):
168 Traceback (most recent call last):
169 ...
169 ...
170 UnicodeEncodeError: ...
170 UnicodeEncodeError: ...
171
171
172 Comparison between bytestr and bytes should work:
172 Comparison between bytestr and bytes should work:
173
173
174 >>> assert bytestr(b'foo') == b'foo'
174 >>> assert bytestr(b'foo') == b'foo'
175 >>> assert b'foo' == bytestr(b'foo')
175 >>> assert b'foo' == bytestr(b'foo')
176 >>> assert b'f' in bytestr(b'foo')
176 >>> assert b'f' in bytestr(b'foo')
177 >>> assert bytestr(b'f') in b'foo'
177 >>> assert bytestr(b'f') in b'foo'
178
178
179 Sliced elements should be bytes, not integer:
179 Sliced elements should be bytes, not integer:
180
180
181 >>> s[1], s[:2]
181 >>> s[1], s[:2]
182 (b'o', b'fo')
182 (b'o', b'fo')
183 >>> list(s), list(reversed(s))
183 >>> list(s), list(reversed(s))
184 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
184 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
185
185
186 As bytestr type isn't propagated across operations, you need to cast
186 As bytestr type isn't propagated across operations, you need to cast
187 bytes to bytestr explicitly:
187 bytes to bytestr explicitly:
188
188
189 >>> s = bytestr(b'foo').upper()
189 >>> s = bytestr(b'foo').upper()
190 >>> t = bytestr(s)
190 >>> t = bytestr(s)
191 >>> s[0], t[0]
191 >>> s[0], t[0]
192 (70, b'F')
192 (70, b'F')
193
193
194 Be careful to not pass a bytestr object to a function which expects
194 Be careful to not pass a bytestr object to a function which expects
195 bytearray-like behavior.
195 bytearray-like behavior.
196
196
197 >>> t = bytes(t) # cast to bytes
197 >>> t = bytes(t) # cast to bytes
198 >>> assert type(t) is bytes
198 >>> assert type(t) is bytes
199 """
199 """
200
200
201 def __new__(cls, s=b''):
201 def __new__(cls, s=b''):
202 if isinstance(s, bytestr):
202 if isinstance(s, bytestr):
203 return s
203 return s
204 if not isinstance(
204 if not isinstance(
205 s, (bytes, bytearray)
205 s, (bytes, bytearray)
206 ) and not hasattr( # hasattr-py3-only
206 ) and not hasattr( # hasattr-py3-only
207 s, u'__bytes__'
207 s, u'__bytes__'
208 ):
208 ):
209 s = str(s).encode(u'ascii')
209 s = str(s).encode('ascii')
210 return bytes.__new__(cls, s)
210 return bytes.__new__(cls, s)
211
211
212 def __getitem__(self, key):
212 def __getitem__(self, key):
213 s = bytes.__getitem__(self, key)
213 s = bytes.__getitem__(self, key)
214 if not isinstance(s, bytes):
214 if not isinstance(s, bytes):
215 s = bytechr(s)
215 s = bytechr(s)
216 return s
216 return s
217
217
218 def __iter__(self):
218 def __iter__(self):
219 return iterbytestr(bytes.__iter__(self))
219 return iterbytestr(bytes.__iter__(self))
220
220
221 def __repr__(self):
221 def __repr__(self):
222 return bytes.__repr__(self)[1:] # drop b''
222 return bytes.__repr__(self)[1:] # drop b''
223
223
224 def iterbytestr(s):
224 def iterbytestr(s):
225 """Iterate bytes as if it were a str object of Python 2"""
225 """Iterate bytes as if it were a str object of Python 2"""
226 return map(bytechr, s)
226 return map(bytechr, s)
227
227
228 def maybebytestr(s):
228 def maybebytestr(s):
229 """Promote bytes to bytestr"""
229 """Promote bytes to bytestr"""
230 if isinstance(s, bytes):
230 if isinstance(s, bytes):
231 return bytestr(s)
231 return bytestr(s)
232 return s
232 return s
233
233
234 def sysbytes(s):
234 def sysbytes(s):
235 """Convert an internal str (e.g. keyword, __doc__) back to bytes
235 """Convert an internal str (e.g. keyword, __doc__) back to bytes
236
236
237 This never raises UnicodeEncodeError, but only ASCII characters
237 This never raises UnicodeEncodeError, but only ASCII characters
238 can be round-trip by sysstr(sysbytes(s)).
238 can be round-trip by sysstr(sysbytes(s)).
239 """
239 """
240 return s.encode(u'utf-8')
240 return s.encode('utf-8')
241
241
242 def sysstr(s):
242 def sysstr(s):
243 """Return a keyword str to be passed to Python functions such as
243 """Return a keyword str to be passed to Python functions such as
244 getattr() and str.encode()
244 getattr() and str.encode()
245
245
246 This never raises UnicodeDecodeError. Non-ascii characters are
246 This never raises UnicodeDecodeError. Non-ascii characters are
247 considered invalid and mapped to arbitrary but unique code points
247 considered invalid and mapped to arbitrary but unique code points
248 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
248 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
249 """
249 """
250 if isinstance(s, builtins.str):
250 if isinstance(s, builtins.str):
251 return s
251 return s
252 return s.decode(u'latin-1')
252 return s.decode('latin-1')
253
253
254 def strurl(url):
254 def strurl(url):
255 """Converts a bytes url back to str"""
255 """Converts a bytes url back to str"""
256 if isinstance(url, bytes):
256 if isinstance(url, bytes):
257 return url.decode(u'ascii')
257 return url.decode('ascii')
258 return url
258 return url
259
259
260 def bytesurl(url):
260 def bytesurl(url):
261 """Converts a str url to bytes by encoding in ascii"""
261 """Converts a str url to bytes by encoding in ascii"""
262 if isinstance(url, str):
262 if isinstance(url, str):
263 return url.encode(u'ascii')
263 return url.encode('ascii')
264 return url
264 return url
265
265
266 def raisewithtb(exc, tb):
266 def raisewithtb(exc, tb):
267 """Raise exception with the given traceback"""
267 """Raise exception with the given traceback"""
268 raise exc.with_traceback(tb)
268 raise exc.with_traceback(tb)
269
269
270 def getdoc(obj):
270 def getdoc(obj):
271 """Get docstring as bytes; may be None so gettext() won't confuse it
271 """Get docstring as bytes; may be None so gettext() won't confuse it
272 with _('')"""
272 with _('')"""
273 doc = getattr(obj, u'__doc__', None)
273 doc = getattr(obj, u'__doc__', None)
274 if doc is None:
274 if doc is None:
275 return doc
275 return doc
276 return sysbytes(doc)
276 return sysbytes(doc)
277
277
278 def _wrapattrfunc(f):
278 def _wrapattrfunc(f):
279 @functools.wraps(f)
279 @functools.wraps(f)
280 def w(object, name, *args):
280 def w(object, name, *args):
281 return f(object, sysstr(name), *args)
281 return f(object, sysstr(name), *args)
282
282
283 return w
283 return w
284
284
285 # these wrappers are automagically imported by hgloader
285 # these wrappers are automagically imported by hgloader
286 delattr = _wrapattrfunc(builtins.delattr)
286 delattr = _wrapattrfunc(builtins.delattr)
287 getattr = _wrapattrfunc(builtins.getattr)
287 getattr = _wrapattrfunc(builtins.getattr)
288 hasattr = _wrapattrfunc(builtins.hasattr)
288 hasattr = _wrapattrfunc(builtins.hasattr)
289 setattr = _wrapattrfunc(builtins.setattr)
289 setattr = _wrapattrfunc(builtins.setattr)
290 xrange = builtins.range
290 xrange = builtins.range
291 unicode = str
291 unicode = str
292
292
293 def open(name, mode=b'r', buffering=-1, encoding=None):
293 def open(name, mode=b'r', buffering=-1, encoding=None):
294 return builtins.open(name, sysstr(mode), buffering, encoding)
294 return builtins.open(name, sysstr(mode), buffering, encoding)
295
295
296 safehasattr = _wrapattrfunc(builtins.hasattr)
296 safehasattr = _wrapattrfunc(builtins.hasattr)
297
297
298 def _getoptbwrapper(orig, args, shortlist, namelist):
298 def _getoptbwrapper(orig, args, shortlist, namelist):
299 """
299 """
300 Takes bytes arguments, converts them to unicode, pass them to
300 Takes bytes arguments, converts them to unicode, pass them to
301 getopt.getopt(), convert the returned values back to bytes and then
301 getopt.getopt(), convert the returned values back to bytes and then
302 return them for Python 3 compatibility as getopt.getopt() don't accepts
302 return them for Python 3 compatibility as getopt.getopt() don't accepts
303 bytes on Python 3.
303 bytes on Python 3.
304 """
304 """
305 args = [a.decode('latin-1') for a in args]
305 args = [a.decode('latin-1') for a in args]
306 shortlist = shortlist.decode('latin-1')
306 shortlist = shortlist.decode('latin-1')
307 namelist = [a.decode('latin-1') for a in namelist]
307 namelist = [a.decode('latin-1') for a in namelist]
308 opts, args = orig(args, shortlist, namelist)
308 opts, args = orig(args, shortlist, namelist)
309 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]
309 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]
310 args = [a.encode('latin-1') for a in args]
310 args = [a.encode('latin-1') for a in args]
311 return opts, args
311 return opts, args
312
312
313 def strkwargs(dic):
313 def strkwargs(dic):
314 """
314 """
315 Converts the keys of a python dictonary to str i.e. unicodes so that
315 Converts the keys of a python dictonary to str i.e. unicodes so that
316 they can be passed as keyword arguments as dictonaries with bytes keys
316 they can be passed as keyword arguments as dictonaries with bytes keys
317 can't be passed as keyword arguments to functions on Python 3.
317 can't be passed as keyword arguments to functions on Python 3.
318 """
318 """
319 dic = dict((k.decode('latin-1'), v) for k, v in dic.iteritems())
319 dic = dict((k.decode('latin-1'), v) for k, v in dic.iteritems())
320 return dic
320 return dic
321
321
322 def byteskwargs(dic):
322 def byteskwargs(dic):
323 """
323 """
324 Converts keys of python dictonaries to bytes as they were converted to
324 Converts keys of python dictonaries to bytes as they were converted to
325 str to pass that dictonary as a keyword argument on Python 3.
325 str to pass that dictonary as a keyword argument on Python 3.
326 """
326 """
327 dic = dict((k.encode('latin-1'), v) for k, v in dic.iteritems())
327 dic = dict((k.encode('latin-1'), v) for k, v in dic.iteritems())
328 return dic
328 return dic
329
329
330 # TODO: handle shlex.shlex().
330 # TODO: handle shlex.shlex().
331 def shlexsplit(s, comments=False, posix=True):
331 def shlexsplit(s, comments=False, posix=True):
332 """
332 """
333 Takes bytes argument, convert it to str i.e. unicodes, pass that into
333 Takes bytes argument, convert it to str i.e. unicodes, pass that into
334 shlex.split(), convert the returned value to bytes and return that for
334 shlex.split(), convert the returned value to bytes and return that for
335 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
335 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
336 """
336 """
337 ret = shlex.split(s.decode('latin-1'), comments, posix)
337 ret = shlex.split(s.decode('latin-1'), comments, posix)
338 return [a.encode('latin-1') for a in ret]
338 return [a.encode('latin-1') for a in ret]
339
339
340 shlexquote = shlex.quote
340 shlexquote = shlex.quote
341
341
342 else:
342 else:
343 import cStringIO
343 import cStringIO
344 import pipes
344 import pipes
345
345
346 xrange = xrange
346 xrange = xrange
347 unicode = unicode
347 unicode = unicode
348 bytechr = chr
348 bytechr = chr
349 byterepr = repr
349 byterepr = repr
350 bytestr = str
350 bytestr = str
351 iterbytestr = iter
351 iterbytestr = iter
352 maybebytestr = identity
352 maybebytestr = identity
353 sysbytes = identity
353 sysbytes = identity
354 sysstr = identity
354 sysstr = identity
355 strurl = identity
355 strurl = identity
356 bytesurl = identity
356 bytesurl = identity
357 open = open
357 open = open
358 delattr = delattr
358 delattr = delattr
359 getattr = getattr
359 getattr = getattr
360 hasattr = hasattr
360 hasattr = hasattr
361 setattr = setattr
361 setattr = setattr
362
362
363 # this can't be parsed on Python 3
363 # this can't be parsed on Python 3
364 exec(b'def raisewithtb(exc, tb):\n' b' raise exc, None, tb\n')
364 exec(b'def raisewithtb(exc, tb):\n' b' raise exc, None, tb\n')
365
365
366 def fsencode(filename):
366 def fsencode(filename):
367 """
367 """
368 Partial backport from os.py in Python 3, which only accepts bytes.
368 Partial backport from os.py in Python 3, which only accepts bytes.
369 In Python 2, our paths should only ever be bytes, a unicode path
369 In Python 2, our paths should only ever be bytes, a unicode path
370 indicates a bug.
370 indicates a bug.
371 """
371 """
372 if isinstance(filename, str):
372 if isinstance(filename, str):
373 return filename
373 return filename
374 else:
374 else:
375 raise TypeError(r"expect str, not %s" % type(filename).__name__)
375 raise TypeError(r"expect str, not %s" % type(filename).__name__)
376
376
377 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
377 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
378 # better not to touch Python 2 part as it's already working fine.
378 # better not to touch Python 2 part as it's already working fine.
379 fsdecode = identity
379 fsdecode = identity
380
380
381 def getdoc(obj):
381 def getdoc(obj):
382 return getattr(obj, '__doc__', None)
382 return getattr(obj, '__doc__', None)
383
383
384 _notset = object()
384 _notset = object()
385
385
386 def safehasattr(thing, attr):
386 def safehasattr(thing, attr):
387 return getattr(thing, attr, _notset) is not _notset
387 return getattr(thing, attr, _notset) is not _notset
388
388
389 def _getoptbwrapper(orig, args, shortlist, namelist):
389 def _getoptbwrapper(orig, args, shortlist, namelist):
390 return orig(args, shortlist, namelist)
390 return orig(args, shortlist, namelist)
391
391
392 strkwargs = identity
392 strkwargs = identity
393 byteskwargs = identity
393 byteskwargs = identity
394
394
395 oscurdir = os.curdir
395 oscurdir = os.curdir
396 oslinesep = os.linesep
396 oslinesep = os.linesep
397 osname = os.name
397 osname = os.name
398 ospathsep = os.pathsep
398 ospathsep = os.pathsep
399 ospardir = os.pardir
399 ospardir = os.pardir
400 ossep = os.sep
400 ossep = os.sep
401 osaltsep = os.altsep
401 osaltsep = os.altsep
402 long = long
402 long = long
403 stdin = sys.stdin
403 stdin = sys.stdin
404 stdout = sys.stdout
404 stdout = sys.stdout
405 stderr = sys.stderr
405 stderr = sys.stderr
406 if getattr(sys, 'argv', None) is not None:
406 if getattr(sys, 'argv', None) is not None:
407 sysargv = sys.argv
407 sysargv = sys.argv
408 sysplatform = sys.platform
408 sysplatform = sys.platform
409 sysexecutable = sys.executable
409 sysexecutable = sys.executable
410 shlexsplit = shlex.split
410 shlexsplit = shlex.split
411 shlexquote = pipes.quote
411 shlexquote = pipes.quote
412 bytesio = cStringIO.StringIO
412 bytesio = cStringIO.StringIO
413 stringio = bytesio
413 stringio = bytesio
414 maplist = map
414 maplist = map
415 rangelist = range
415 rangelist = range
416 ziplist = zip
416 ziplist = zip
417 rawinput = raw_input
417 rawinput = raw_input
418 getargspec = inspect.getargspec
418 getargspec = inspect.getargspec
419
419
420 isjython = sysplatform.startswith(b'java')
420 isjython = sysplatform.startswith(b'java')
421
421
422 isdarwin = sysplatform.startswith(b'darwin')
422 isdarwin = sysplatform.startswith(b'darwin')
423 islinux = sysplatform.startswith(b'linux')
423 islinux = sysplatform.startswith(b'linux')
424 isposix = osname == b'posix'
424 isposix = osname == b'posix'
425 iswindows = osname == b'nt'
425 iswindows = osname == b'nt'
426
426
427
427
428 def getoptb(args, shortlist, namelist):
428 def getoptb(args, shortlist, namelist):
429 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
429 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
430
430
431
431
432 def gnugetoptb(args, shortlist, namelist):
432 def gnugetoptb(args, shortlist, namelist):
433 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
433 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
434
434
435
435
436 def mkdtemp(suffix=b'', prefix=b'tmp', dir=None):
436 def mkdtemp(suffix=b'', prefix=b'tmp', dir=None):
437 return tempfile.mkdtemp(suffix, prefix, dir)
437 return tempfile.mkdtemp(suffix, prefix, dir)
438
438
439
439
440 # text=True is not supported; use util.from/tonativeeol() instead
440 # text=True is not supported; use util.from/tonativeeol() instead
441 def mkstemp(suffix=b'', prefix=b'tmp', dir=None):
441 def mkstemp(suffix=b'', prefix=b'tmp', dir=None):
442 return tempfile.mkstemp(suffix, prefix, dir)
442 return tempfile.mkstemp(suffix, prefix, dir)
443
443
444
444
445 # mode must include 'b'ytes as encoding= is not supported
445 # mode must include 'b'ytes as encoding= is not supported
446 def namedtempfile(
446 def namedtempfile(
447 mode=b'w+b', bufsize=-1, suffix=b'', prefix=b'tmp', dir=None, delete=True
447 mode=b'w+b', bufsize=-1, suffix=b'', prefix=b'tmp', dir=None, delete=True
448 ):
448 ):
449 mode = sysstr(mode)
449 mode = sysstr(mode)
450 assert r'b' in mode
450 assert r'b' in mode
451 return tempfile.NamedTemporaryFile(
451 return tempfile.NamedTemporaryFile(
452 mode, bufsize, suffix=suffix, prefix=prefix, dir=dir, delete=delete
452 mode, bufsize, suffix=suffix, prefix=prefix, dir=dir, delete=delete
453 )
453 )
General Comments 0
You need to be logged in to leave comments. Login now