##// END OF EJS Templates
diffhelpers: switch to policy importer...
Yuya Nishihara -
r32370:017ad85e default
parent child Browse files
Show More
@@ -1,104 +1,103
1 1 #!/usr/bin/env python
2 2 #
3 3 # check-py3-compat - check Python 3 compatibility of Mercurial files
4 4 #
5 5 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 from __future__ import absolute_import, print_function
11 11
12 12 import ast
13 13 import importlib
14 14 import os
15 15 import sys
16 16 import traceback
17 17
18 18 # Modules that have both Python and C implementations.
19 19 _dualmodules = (
20 'diffhelpers.py',
21 20 'mpatch.py',
22 21 'parsers.py',
23 22 )
24 23
25 24 def check_compat_py2(f):
26 25 """Check Python 3 compatibility for a file with Python 2"""
27 26 with open(f, 'rb') as fh:
28 27 content = fh.read()
29 28 root = ast.parse(content)
30 29
31 30 # Ignore empty files.
32 31 if not root.body:
33 32 return
34 33
35 34 futures = set()
36 35 haveprint = False
37 36 for node in ast.walk(root):
38 37 if isinstance(node, ast.ImportFrom):
39 38 if node.module == '__future__':
40 39 futures |= set(n.name for n in node.names)
41 40 elif isinstance(node, ast.Print):
42 41 haveprint = True
43 42
44 43 if 'absolute_import' not in futures:
45 44 print('%s not using absolute_import' % f)
46 45 if haveprint and 'print_function' not in futures:
47 46 print('%s requires print_function' % f)
48 47
49 48 def check_compat_py3(f):
50 49 """Check Python 3 compatibility of a file with Python 3."""
51 50 with open(f, 'rb') as fh:
52 51 content = fh.read()
53 52
54 53 try:
55 54 ast.parse(content)
56 55 except SyntaxError as e:
57 56 print('%s: invalid syntax: %s' % (f, e))
58 57 return
59 58
60 59 # Try to import the module.
61 60 # For now we only support mercurial.* and hgext.* modules because figuring
62 61 # out module paths for things not in a package can be confusing.
63 62 if f.startswith(('hgext/', 'mercurial/')) and not f.endswith('__init__.py'):
64 63 assert f.endswith('.py')
65 64 name = f.replace('/', '.')[:-3]
66 65 if f.endswith(_dualmodules):
67 66 name = name.replace('.pure.', '.')
68 67 try:
69 68 importlib.import_module(name)
70 69 except Exception as e:
71 70 exc_type, exc_value, tb = sys.exc_info()
72 71 # We walk the stack and ignore frames from our custom importer,
73 72 # import mechanisms, and stdlib modules. This kinda/sorta
74 73 # emulates CPython behavior in import.c while also attempting
75 74 # to pin blame on a Mercurial file.
76 75 for frame in reversed(traceback.extract_tb(tb)):
77 76 if frame.name == '_call_with_frames_removed':
78 77 continue
79 78 if 'importlib' in frame.filename:
80 79 continue
81 80 if 'mercurial/__init__.py' in frame.filename:
82 81 continue
83 82 if frame.filename.startswith(sys.prefix):
84 83 continue
85 84 break
86 85
87 86 if frame.filename:
88 87 filename = os.path.basename(frame.filename)
89 88 print('%s: error importing: <%s> %s (error at %s:%d)' % (
90 89 f, type(e).__name__, e, filename, frame.lineno))
91 90 else:
92 91 print('%s: error importing module: <%s> %s (line %d)' % (
93 92 f, type(e).__name__, e, frame.lineno))
94 93
95 94 if __name__ == '__main__':
96 95 if sys.version_info[0] == 2:
97 96 fn = check_compat_py2
98 97 else:
99 98 fn = check_compat_py3
100 99
101 100 for f in sys.argv[1:]:
102 101 fn(f)
103 102
104 103 sys.exit(0)
@@ -1,733 +1,732
1 1 #!/usr/bin/env python
2 2
3 3 from __future__ import absolute_import, print_function
4 4
5 5 import ast
6 6 import collections
7 7 import os
8 8 import re
9 9 import sys
10 10
11 11 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
12 12 # to work when run from a virtualenv. The modules were chosen empirically
13 13 # so that the return value matches the return value without virtualenv.
14 14 if True: # disable lexical sorting checks
15 15 import BaseHTTPServer
16 16 import zlib
17 17
18 18 # Whitelist of modules that symbols can be directly imported from.
19 19 allowsymbolimports = (
20 20 '__future__',
21 21 'mercurial.hgweb.common',
22 22 'mercurial.hgweb.request',
23 23 'mercurial.i18n',
24 24 'mercurial.node',
25 25 )
26 26
27 27 # Modules that have both Python and C implementations.
28 28 _dualmodules = (
29 'diffhelpers.py',
30 29 'mpatch.py',
31 30 'parsers.py',
32 31 )
33 32
34 33 # Modules that must be aliased because they are commonly confused with
35 34 # common variables and can create aliasing and readability issues.
36 35 requirealias = {
37 36 'ui': 'uimod',
38 37 }
39 38
40 39 def usingabsolute(root):
41 40 """Whether absolute imports are being used."""
42 41 if sys.version_info[0] >= 3:
43 42 return True
44 43
45 44 for node in ast.walk(root):
46 45 if isinstance(node, ast.ImportFrom):
47 46 if node.module == '__future__':
48 47 for n in node.names:
49 48 if n.name == 'absolute_import':
50 49 return True
51 50
52 51 return False
53 52
54 53 def walklocal(root):
55 54 """Recursively yield all descendant nodes but not in a different scope"""
56 55 todo = collections.deque(ast.iter_child_nodes(root))
57 56 yield root, False
58 57 while todo:
59 58 node = todo.popleft()
60 59 newscope = isinstance(node, ast.FunctionDef)
61 60 if not newscope:
62 61 todo.extend(ast.iter_child_nodes(node))
63 62 yield node, newscope
64 63
65 64 def dotted_name_of_path(path, trimpure=False):
66 65 """Given a relative path to a source file, return its dotted module name.
67 66
68 67 >>> dotted_name_of_path('mercurial/error.py')
69 68 'mercurial.error'
70 69 >>> dotted_name_of_path('mercurial/pure/parsers.py', trimpure=True)
71 70 'mercurial.parsers'
72 71 >>> dotted_name_of_path('zlibmodule.so')
73 72 'zlib'
74 73 """
75 74 parts = path.replace(os.sep, '/').split('/')
76 75 parts[-1] = parts[-1].split('.', 1)[0] # remove .py and .so and .ARCH.so
77 76 if parts[-1].endswith('module'):
78 77 parts[-1] = parts[-1][:-6]
79 78 if trimpure:
80 79 return '.'.join(p for p in parts if p != 'pure')
81 80 return '.'.join(parts)
82 81
83 82 def fromlocalfunc(modulename, localmods):
84 83 """Get a function to examine which locally defined module the
85 84 target source imports via a specified name.
86 85
87 86 `modulename` is an `dotted_name_of_path()`-ed source file path,
88 87 which may have `.__init__` at the end of it, of the target source.
89 88
90 89 `localmods` is a dict (or set), of which key is an absolute
91 90 `dotted_name_of_path()`-ed source file path of locally defined (=
92 91 Mercurial specific) modules.
93 92
94 93 This function assumes that module names not existing in
95 94 `localmods` are from the Python standard library.
96 95
97 96 This function returns the function, which takes `name` argument,
98 97 and returns `(absname, dottedpath, hassubmod)` tuple if `name`
99 98 matches against locally defined module. Otherwise, it returns
100 99 False.
101 100
102 101 It is assumed that `name` doesn't have `.__init__`.
103 102
104 103 `absname` is an absolute module name of specified `name`
105 104 (e.g. "hgext.convert"). This can be used to compose prefix for sub
106 105 modules or so.
107 106
108 107 `dottedpath` is a `dotted_name_of_path()`-ed source file path
109 108 (e.g. "hgext.convert.__init__") of `name`. This is used to look
110 109 module up in `localmods` again.
111 110
112 111 `hassubmod` is whether it may have sub modules under it (for
113 112 convenient, even though this is also equivalent to "absname !=
114 113 dottednpath")
115 114
116 115 >>> localmods = {'foo.__init__': True, 'foo.foo1': True,
117 116 ... 'foo.bar.__init__': True, 'foo.bar.bar1': True,
118 117 ... 'baz.__init__': True, 'baz.baz1': True }
119 118 >>> fromlocal = fromlocalfunc('foo.xxx', localmods)
120 119 >>> # relative
121 120 >>> fromlocal('foo1')
122 121 ('foo.foo1', 'foo.foo1', False)
123 122 >>> fromlocal('bar')
124 123 ('foo.bar', 'foo.bar.__init__', True)
125 124 >>> fromlocal('bar.bar1')
126 125 ('foo.bar.bar1', 'foo.bar.bar1', False)
127 126 >>> # absolute
128 127 >>> fromlocal('baz')
129 128 ('baz', 'baz.__init__', True)
130 129 >>> fromlocal('baz.baz1')
131 130 ('baz.baz1', 'baz.baz1', False)
132 131 >>> # unknown = maybe standard library
133 132 >>> fromlocal('os')
134 133 False
135 134 >>> fromlocal(None, 1)
136 135 ('foo', 'foo.__init__', True)
137 136 >>> fromlocal('foo1', 1)
138 137 ('foo.foo1', 'foo.foo1', False)
139 138 >>> fromlocal2 = fromlocalfunc('foo.xxx.yyy', localmods)
140 139 >>> fromlocal2(None, 2)
141 140 ('foo', 'foo.__init__', True)
142 141 >>> fromlocal2('bar2', 1)
143 142 False
144 143 >>> fromlocal2('bar', 2)
145 144 ('foo.bar', 'foo.bar.__init__', True)
146 145 """
147 146 prefix = '.'.join(modulename.split('.')[:-1])
148 147 if prefix:
149 148 prefix += '.'
150 149 def fromlocal(name, level=0):
151 150 # name is false value when relative imports are used.
152 151 if not name:
153 152 # If relative imports are used, level must not be absolute.
154 153 assert level > 0
155 154 candidates = ['.'.join(modulename.split('.')[:-level])]
156 155 else:
157 156 if not level:
158 157 # Check relative name first.
159 158 candidates = [prefix + name, name]
160 159 else:
161 160 candidates = ['.'.join(modulename.split('.')[:-level]) +
162 161 '.' + name]
163 162
164 163 for n in candidates:
165 164 if n in localmods:
166 165 return (n, n, False)
167 166 dottedpath = n + '.__init__'
168 167 if dottedpath in localmods:
169 168 return (n, dottedpath, True)
170 169 return False
171 170 return fromlocal
172 171
173 172 def list_stdlib_modules():
174 173 """List the modules present in the stdlib.
175 174
176 175 >>> mods = set(list_stdlib_modules())
177 176 >>> 'BaseHTTPServer' in mods
178 177 True
179 178
180 179 os.path isn't really a module, so it's missing:
181 180
182 181 >>> 'os.path' in mods
183 182 False
184 183
185 184 sys requires special treatment, because it's baked into the
186 185 interpreter, but it should still appear:
187 186
188 187 >>> 'sys' in mods
189 188 True
190 189
191 190 >>> 'collections' in mods
192 191 True
193 192
194 193 >>> 'cStringIO' in mods
195 194 True
196 195
197 196 >>> 'cffi' in mods
198 197 True
199 198 """
200 199 for m in sys.builtin_module_names:
201 200 yield m
202 201 # These modules only exist on windows, but we should always
203 202 # consider them stdlib.
204 203 for m in ['msvcrt', '_winreg']:
205 204 yield m
206 205 yield 'builtins' # python3 only
207 206 for m in 'fcntl', 'grp', 'pwd', 'termios': # Unix only
208 207 yield m
209 208 for m in 'cPickle', 'datetime': # in Python (not C) on PyPy
210 209 yield m
211 210 for m in ['cffi']:
212 211 yield m
213 212 stdlib_prefixes = {sys.prefix, sys.exec_prefix}
214 213 # We need to supplement the list of prefixes for the search to work
215 214 # when run from within a virtualenv.
216 215 for mod in (BaseHTTPServer, zlib):
217 216 try:
218 217 # Not all module objects have a __file__ attribute.
219 218 filename = mod.__file__
220 219 except AttributeError:
221 220 continue
222 221 dirname = os.path.dirname(filename)
223 222 for prefix in stdlib_prefixes:
224 223 if dirname.startswith(prefix):
225 224 # Then this directory is redundant.
226 225 break
227 226 else:
228 227 stdlib_prefixes.add(dirname)
229 228 for libpath in sys.path:
230 229 # We want to walk everything in sys.path that starts with
231 230 # something in stdlib_prefixes.
232 231 if not any(libpath.startswith(p) for p in stdlib_prefixes):
233 232 continue
234 233 for top, dirs, files in os.walk(libpath):
235 234 for i, d in reversed(list(enumerate(dirs))):
236 235 if (not os.path.exists(os.path.join(top, d, '__init__.py'))
237 236 or top == libpath and d in ('hgext', 'mercurial')):
238 237 del dirs[i]
239 238 for name in files:
240 239 if not name.endswith(('.py', '.so', '.pyc', '.pyo', '.pyd')):
241 240 continue
242 241 if name.startswith('__init__.py'):
243 242 full_path = top
244 243 else:
245 244 full_path = os.path.join(top, name)
246 245 rel_path = full_path[len(libpath) + 1:]
247 246 mod = dotted_name_of_path(rel_path)
248 247 yield mod
249 248
250 249 stdlib_modules = set(list_stdlib_modules())
251 250
252 251 def imported_modules(source, modulename, f, localmods, ignore_nested=False):
253 252 """Given the source of a file as a string, yield the names
254 253 imported by that file.
255 254
256 255 Args:
257 256 source: The python source to examine as a string.
258 257 modulename: of specified python source (may have `__init__`)
259 258 localmods: dict of locally defined module names (may have `__init__`)
260 259 ignore_nested: If true, import statements that do not start in
261 260 column zero will be ignored.
262 261
263 262 Returns:
264 263 A list of absolute module names imported by the given source.
265 264
266 265 >>> f = 'foo/xxx.py'
267 266 >>> modulename = 'foo.xxx'
268 267 >>> localmods = {'foo.__init__': True,
269 268 ... 'foo.foo1': True, 'foo.foo2': True,
270 269 ... 'foo.bar.__init__': True, 'foo.bar.bar1': True,
271 270 ... 'baz.__init__': True, 'baz.baz1': True }
272 271 >>> # standard library (= not locally defined ones)
273 272 >>> sorted(imported_modules(
274 273 ... 'from stdlib1 import foo, bar; import stdlib2',
275 274 ... modulename, f, localmods))
276 275 []
277 276 >>> # relative importing
278 277 >>> sorted(imported_modules(
279 278 ... 'import foo1; from bar import bar1',
280 279 ... modulename, f, localmods))
281 280 ['foo.bar.bar1', 'foo.foo1']
282 281 >>> sorted(imported_modules(
283 282 ... 'from bar.bar1 import name1, name2, name3',
284 283 ... modulename, f, localmods))
285 284 ['foo.bar.bar1']
286 285 >>> # absolute importing
287 286 >>> sorted(imported_modules(
288 287 ... 'from baz import baz1, name1',
289 288 ... modulename, f, localmods))
290 289 ['baz.__init__', 'baz.baz1']
291 290 >>> # mixed importing, even though it shouldn't be recommended
292 291 >>> sorted(imported_modules(
293 292 ... 'import stdlib, foo1, baz',
294 293 ... modulename, f, localmods))
295 294 ['baz.__init__', 'foo.foo1']
296 295 >>> # ignore_nested
297 296 >>> sorted(imported_modules(
298 297 ... '''import foo
299 298 ... def wat():
300 299 ... import bar
301 300 ... ''', modulename, f, localmods))
302 301 ['foo.__init__', 'foo.bar.__init__']
303 302 >>> sorted(imported_modules(
304 303 ... '''import foo
305 304 ... def wat():
306 305 ... import bar
307 306 ... ''', modulename, f, localmods, ignore_nested=True))
308 307 ['foo.__init__']
309 308 """
310 309 fromlocal = fromlocalfunc(modulename, localmods)
311 310 for node in ast.walk(ast.parse(source, f)):
312 311 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
313 312 continue
314 313 if isinstance(node, ast.Import):
315 314 for n in node.names:
316 315 found = fromlocal(n.name)
317 316 if not found:
318 317 # this should import standard library
319 318 continue
320 319 yield found[1]
321 320 elif isinstance(node, ast.ImportFrom):
322 321 found = fromlocal(node.module, node.level)
323 322 if not found:
324 323 # this should import standard library
325 324 continue
326 325
327 326 absname, dottedpath, hassubmod = found
328 327 if not hassubmod:
329 328 # "dottedpath" is not a package; must be imported
330 329 yield dottedpath
331 330 # examination of "node.names" should be redundant
332 331 # e.g.: from mercurial.node import nullid, nullrev
333 332 continue
334 333
335 334 modnotfound = False
336 335 prefix = absname + '.'
337 336 for n in node.names:
338 337 found = fromlocal(prefix + n.name)
339 338 if not found:
340 339 # this should be a function or a property of "node.module"
341 340 modnotfound = True
342 341 continue
343 342 yield found[1]
344 343 if modnotfound:
345 344 # "dottedpath" is a package, but imported because of non-module
346 345 # lookup
347 346 yield dottedpath
348 347
349 348 def verify_import_convention(module, source, localmods):
350 349 """Verify imports match our established coding convention.
351 350
352 351 We have 2 conventions: legacy and modern. The modern convention is in
353 352 effect when using absolute imports.
354 353
355 354 The legacy convention only looks for mixed imports. The modern convention
356 355 is much more thorough.
357 356 """
358 357 root = ast.parse(source)
359 358 absolute = usingabsolute(root)
360 359
361 360 if absolute:
362 361 return verify_modern_convention(module, root, localmods)
363 362 else:
364 363 return verify_stdlib_on_own_line(root)
365 364
366 365 def verify_modern_convention(module, root, localmods, root_col_offset=0):
367 366 """Verify a file conforms to the modern import convention rules.
368 367
369 368 The rules of the modern convention are:
370 369
371 370 * Ordering is stdlib followed by local imports. Each group is lexically
372 371 sorted.
373 372 * Importing multiple modules via "import X, Y" is not allowed: use
374 373 separate import statements.
375 374 * Importing multiple modules via "from X import ..." is allowed if using
376 375 parenthesis and one entry per line.
377 376 * Only 1 relative import statement per import level ("from .", "from ..")
378 377 is allowed.
379 378 * Relative imports from higher levels must occur before lower levels. e.g.
380 379 "from .." must be before "from .".
381 380 * Imports from peer packages should use relative import (e.g. do not
382 381 "import mercurial.foo" from a "mercurial.*" module).
383 382 * Symbols can only be imported from specific modules (see
384 383 `allowsymbolimports`). For other modules, first import the module then
385 384 assign the symbol to a module-level variable. In addition, these imports
386 385 must be performed before other local imports. This rule only
387 386 applies to import statements outside of any blocks.
388 387 * Relative imports from the standard library are not allowed.
389 388 * Certain modules must be aliased to alternate names to avoid aliasing
390 389 and readability problems. See `requirealias`.
391 390 """
392 391 topmodule = module.split('.')[0]
393 392 fromlocal = fromlocalfunc(module, localmods)
394 393
395 394 # Whether a local/non-stdlib import has been performed.
396 395 seenlocal = None
397 396 # Whether a local/non-stdlib, non-symbol import has been seen.
398 397 seennonsymbollocal = False
399 398 # The last name to be imported (for sorting).
400 399 lastname = None
401 400 laststdlib = None
402 401 # Relative import levels encountered so far.
403 402 seenlevels = set()
404 403
405 404 for node, newscope in walklocal(root):
406 405 def msg(fmt, *args):
407 406 return (fmt % args, node.lineno)
408 407 if newscope:
409 408 # Check for local imports in function
410 409 for r in verify_modern_convention(module, node, localmods,
411 410 node.col_offset + 4):
412 411 yield r
413 412 elif isinstance(node, ast.Import):
414 413 # Disallow "import foo, bar" and require separate imports
415 414 # for each module.
416 415 if len(node.names) > 1:
417 416 yield msg('multiple imported names: %s',
418 417 ', '.join(n.name for n in node.names))
419 418
420 419 name = node.names[0].name
421 420 asname = node.names[0].asname
422 421
423 422 stdlib = name in stdlib_modules
424 423
425 424 # Ignore sorting rules on imports inside blocks.
426 425 if node.col_offset == root_col_offset:
427 426 if lastname and name < lastname and laststdlib == stdlib:
428 427 yield msg('imports not lexically sorted: %s < %s',
429 428 name, lastname)
430 429
431 430 lastname = name
432 431 laststdlib = stdlib
433 432
434 433 # stdlib imports should be before local imports.
435 434 if stdlib and seenlocal and node.col_offset == root_col_offset:
436 435 yield msg('stdlib import "%s" follows local import: %s',
437 436 name, seenlocal)
438 437
439 438 if not stdlib:
440 439 seenlocal = name
441 440
442 441 # Import of sibling modules should use relative imports.
443 442 topname = name.split('.')[0]
444 443 if topname == topmodule:
445 444 yield msg('import should be relative: %s', name)
446 445
447 446 if name in requirealias and asname != requirealias[name]:
448 447 yield msg('%s module must be "as" aliased to %s',
449 448 name, requirealias[name])
450 449
451 450 elif isinstance(node, ast.ImportFrom):
452 451 # Resolve the full imported module name.
453 452 if node.level > 0:
454 453 fullname = '.'.join(module.split('.')[:-node.level])
455 454 if node.module:
456 455 fullname += '.%s' % node.module
457 456 else:
458 457 assert node.module
459 458 fullname = node.module
460 459
461 460 topname = fullname.split('.')[0]
462 461 if topname == topmodule:
463 462 yield msg('import should be relative: %s', fullname)
464 463
465 464 # __future__ is special since it needs to come first and use
466 465 # symbol import.
467 466 if fullname != '__future__':
468 467 if not fullname or fullname in stdlib_modules:
469 468 yield msg('relative import of stdlib module')
470 469 else:
471 470 seenlocal = fullname
472 471
473 472 # Direct symbol import is only allowed from certain modules and
474 473 # must occur before non-symbol imports.
475 474 found = fromlocal(node.module, node.level)
476 475 if found and found[2]: # node.module is a package
477 476 prefix = found[0] + '.'
478 477 symbols = [n.name for n in node.names
479 478 if not fromlocal(prefix + n.name)]
480 479 else:
481 480 symbols = [n.name for n in node.names]
482 481 if node.module and node.col_offset == root_col_offset:
483 482 if symbols and fullname not in allowsymbolimports:
484 483 yield msg('direct symbol import %s from %s',
485 484 ', '.join(symbols), fullname)
486 485
487 486 if symbols and seennonsymbollocal:
488 487 yield msg('symbol import follows non-symbol import: %s',
489 488 fullname)
490 489 if not symbols and fullname not in stdlib_modules:
491 490 seennonsymbollocal = True
492 491
493 492 if not node.module:
494 493 assert node.level
495 494
496 495 # Only allow 1 group per level.
497 496 if (node.level in seenlevels
498 497 and node.col_offset == root_col_offset):
499 498 yield msg('multiple "from %s import" statements',
500 499 '.' * node.level)
501 500
502 501 # Higher-level groups come before lower-level groups.
503 502 if any(node.level > l for l in seenlevels):
504 503 yield msg('higher-level import should come first: %s',
505 504 fullname)
506 505
507 506 seenlevels.add(node.level)
508 507
509 508 # Entries in "from .X import ( ... )" lists must be lexically
510 509 # sorted.
511 510 lastentryname = None
512 511
513 512 for n in node.names:
514 513 if lastentryname and n.name < lastentryname:
515 514 yield msg('imports from %s not lexically sorted: %s < %s',
516 515 fullname, n.name, lastentryname)
517 516
518 517 lastentryname = n.name
519 518
520 519 if n.name in requirealias and n.asname != requirealias[n.name]:
521 520 yield msg('%s from %s must be "as" aliased to %s',
522 521 n.name, fullname, requirealias[n.name])
523 522
524 523 def verify_stdlib_on_own_line(root):
525 524 """Given some python source, verify that stdlib imports are done
526 525 in separate statements from relative local module imports.
527 526
528 527 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, foo')))
529 528 [('mixed imports\\n stdlib: sys\\n relative: foo', 1)]
530 529 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, os')))
531 530 []
532 531 >>> list(verify_stdlib_on_own_line(ast.parse('import foo, bar')))
533 532 []
534 533 """
535 534 for node in ast.walk(root):
536 535 if isinstance(node, ast.Import):
537 536 from_stdlib = {False: [], True: []}
538 537 for n in node.names:
539 538 from_stdlib[n.name in stdlib_modules].append(n.name)
540 539 if from_stdlib[True] and from_stdlib[False]:
541 540 yield ('mixed imports\n stdlib: %s\n relative: %s' %
542 541 (', '.join(sorted(from_stdlib[True])),
543 542 ', '.join(sorted(from_stdlib[False]))), node.lineno)
544 543
545 544 class CircularImport(Exception):
546 545 pass
547 546
548 547 def checkmod(mod, imports):
549 548 shortest = {}
550 549 visit = [[mod]]
551 550 while visit:
552 551 path = visit.pop(0)
553 552 for i in sorted(imports.get(path[-1], [])):
554 553 if len(path) < shortest.get(i, 1000):
555 554 shortest[i] = len(path)
556 555 if i in path:
557 556 if i == path[0]:
558 557 raise CircularImport(path)
559 558 continue
560 559 visit.append(path + [i])
561 560
562 561 def rotatecycle(cycle):
563 562 """arrange a cycle so that the lexicographically first module listed first
564 563
565 564 >>> rotatecycle(['foo', 'bar'])
566 565 ['bar', 'foo', 'bar']
567 566 """
568 567 lowest = min(cycle)
569 568 idx = cycle.index(lowest)
570 569 return cycle[idx:] + cycle[:idx] + [lowest]
571 570
572 571 def find_cycles(imports):
573 572 """Find cycles in an already-loaded import graph.
574 573
575 574 All module names recorded in `imports` should be absolute one.
576 575
577 576 >>> from __future__ import print_function
578 577 >>> imports = {'top.foo': ['top.bar', 'os.path', 'top.qux'],
579 578 ... 'top.bar': ['top.baz', 'sys'],
580 579 ... 'top.baz': ['top.foo'],
581 580 ... 'top.qux': ['top.foo']}
582 581 >>> print('\\n'.join(sorted(find_cycles(imports))))
583 582 top.bar -> top.baz -> top.foo -> top.bar
584 583 top.foo -> top.qux -> top.foo
585 584 """
586 585 cycles = set()
587 586 for mod in sorted(imports.keys()):
588 587 try:
589 588 checkmod(mod, imports)
590 589 except CircularImport as e:
591 590 cycle = e.args[0]
592 591 cycles.add(" -> ".join(rotatecycle(cycle)))
593 592 return cycles
594 593
595 594 def _cycle_sortkey(c):
596 595 return len(c), c
597 596
598 597 def embedded(f, modname, src):
599 598 """Extract embedded python code
600 599
601 600 >>> def test(fn, lines):
602 601 ... for s, m, f, l in embedded(fn, "example", lines):
603 602 ... print("%s %s %s" % (m, f, l))
604 603 ... print(repr(s))
605 604 >>> lines = [
606 605 ... 'comment',
607 606 ... ' >>> from __future__ import print_function',
608 607 ... " >>> ' multiline",
609 608 ... " ... string'",
610 609 ... ' ',
611 610 ... 'comment',
612 611 ... ' $ cat > foo.py <<EOF',
613 612 ... ' > from __future__ import print_function',
614 613 ... ' > EOF',
615 614 ... ]
616 615 >>> test("example.t", lines)
617 616 example[2] doctest.py 2
618 617 "from __future__ import print_function\\n' multiline\\nstring'\\n"
619 618 example[7] foo.py 7
620 619 'from __future__ import print_function\\n'
621 620 """
622 621 inlinepython = 0
623 622 shpython = 0
624 623 script = []
625 624 prefix = 6
626 625 t = ''
627 626 n = 0
628 627 for l in src:
629 628 n += 1
630 629 if not l.endswith(b'\n'):
631 630 l += b'\n'
632 631 if l.startswith(b' >>> '): # python inlines
633 632 if shpython:
634 633 print("%s:%d: Parse Error" % (f, n))
635 634 if not inlinepython:
636 635 # We've just entered a Python block.
637 636 inlinepython = n
638 637 t = 'doctest.py'
639 638 script.append(l[prefix:])
640 639 continue
641 640 if l.startswith(b' ... '): # python inlines
642 641 script.append(l[prefix:])
643 642 continue
644 643 cat = re.search(r"\$ \s*cat\s*>\s*(\S+\.py)\s*<<\s*EOF", l)
645 644 if cat:
646 645 if inlinepython:
647 646 yield ''.join(script), ("%s[%d]" %
648 647 (modname, inlinepython)), t, inlinepython
649 648 script = []
650 649 inlinepython = 0
651 650 shpython = n
652 651 t = cat.group(1)
653 652 continue
654 653 if shpython and l.startswith(b' > '): # sh continuation
655 654 if l == b' > EOF\n':
656 655 yield ''.join(script), ("%s[%d]" %
657 656 (modname, shpython)), t, shpython
658 657 script = []
659 658 shpython = 0
660 659 else:
661 660 script.append(l[4:])
662 661 continue
663 662 if inlinepython and l == b' \n':
664 663 yield ''.join(script), ("%s[%d]" %
665 664 (modname, inlinepython)), t, inlinepython
666 665 script = []
667 666 inlinepython = 0
668 667 continue
669 668
670 669 def sources(f, modname):
671 670 """Yields possibly multiple sources from a filepath
672 671
673 672 input: filepath, modulename
674 673 yields: script(string), modulename, filepath, linenumber
675 674
676 675 For embedded scripts, the modulename and filepath will be different
677 676 from the function arguments. linenumber is an offset relative to
678 677 the input file.
679 678 """
680 679 py = False
681 680 if not f.endswith('.t'):
682 681 with open(f) as src:
683 682 yield src.read(), modname, f, 0
684 683 py = True
685 684 if py or f.endswith('.t'):
686 685 with open(f) as src:
687 686 for script, modname, t, line in embedded(f, modname, src):
688 687 yield script, modname, t, line
689 688
690 689 def main(argv):
691 690 if len(argv) < 2 or (argv[1] == '-' and len(argv) > 2):
692 691 print('Usage: %s {-|file [file] [file] ...}')
693 692 return 1
694 693 if argv[1] == '-':
695 694 argv = argv[:1]
696 695 argv.extend(l.rstrip() for l in sys.stdin.readlines())
697 696 localmods = {}
698 697 used_imports = {}
699 698 any_errors = False
700 699 for source_path in argv[1:]:
701 700 trimpure = source_path.endswith(_dualmodules)
702 701 modname = dotted_name_of_path(source_path, trimpure=trimpure)
703 702 localmods[modname] = source_path
704 703 for localmodname, source_path in sorted(localmods.items()):
705 704 for src, modname, name, line in sources(source_path, localmodname):
706 705 try:
707 706 used_imports[modname] = sorted(
708 707 imported_modules(src, modname, name, localmods,
709 708 ignore_nested=True))
710 709 for error, lineno in verify_import_convention(modname, src,
711 710 localmods):
712 711 any_errors = True
713 712 print('%s:%d: %s' % (source_path, lineno + line, error))
714 713 except SyntaxError as e:
715 714 print('%s:%d: SyntaxError: %s' %
716 715 (source_path, e.lineno + line, e))
717 716 cycles = find_cycles(used_imports)
718 717 if cycles:
719 718 firstmods = set()
720 719 for c in sorted(cycles, key=_cycle_sortkey):
721 720 first = c.split()[0]
722 721 # As a rough cut, ignore any cycle that starts with the
723 722 # same module as some other cycle. Otherwise we see lots
724 723 # of cycles that are effectively duplicates.
725 724 if first in firstmods:
726 725 continue
727 726 print('Import cycle:', c)
728 727 firstmods.add(first)
729 728 any_errors = True
730 729 return any_errors != 0
731 730
732 731 if __name__ == '__main__':
733 732 sys.exit(int(main(sys.argv)))
@@ -1,35 +1,35
1 1 <?xml version="1.0" encoding="utf-8"?>
2 2 <Wix xmlns="http://schemas.microsoft.com/wix/2006/wi">
3 3
4 4 <?include guids.wxi ?>
5 5 <?include defines.wxi ?>
6 6
7 7 <Fragment>
8 8 <DirectoryRef Id="INSTALLDIR" FileSource="$(var.SourceDir)">
9 9 <Component Id="distOutput" Guid="$(var.dist.guid)" Win64='$(var.IsX64)'>
10 10 <File Name="python27.dll" KeyPath="yes" />
11 11 </Component>
12 12 <Directory Id="libdir" Name="lib" FileSource="$(var.SourceDir)/lib">
13 13 <Component Id="libOutput" Guid="$(var.lib.guid)" Win64='$(var.IsX64)'>
14 14 <File Name="library.zip" KeyPath="yes" />
15 15 <File Name="mercurial.cext.base85.pyd" />
16 16 <File Name="mercurial.cext.bdiff.pyd" />
17 <File Name="mercurial.diffhelpers.pyd" />
17 <File Name="mercurial.cext.diffhelpers.pyd" />
18 18 <File Name="mercurial.mpatch.pyd" />
19 19 <File Name="mercurial.cext.osutil.pyd" />
20 20 <File Name="mercurial.parsers.pyd" />
21 21 <File Name="pyexpat.pyd" />
22 22 <File Name="bz2.pyd" />
23 23 <File Name="select.pyd" />
24 24 <File Name="unicodedata.pyd" />
25 25 <File Name="_ctypes.pyd" />
26 26 <File Name="_elementtree.pyd" />
27 27 <File Name="_hashlib.pyd" />
28 28 <File Name="_socket.pyd" />
29 29 <File Name="_ssl.pyd" />
30 30 </Component>
31 31 </Directory>
32 32 </DirectoryRef>
33 33 </Fragment>
34 34
35 35 </Wix>
@@ -1,401 +1,400
1 1 # __init__.py - Startup and module loading logic for Mercurial.
2 2 #
3 3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import imp
11 11 import os
12 12 import sys
13 13 import zipimport
14 14
15 15 from . import (
16 16 policy
17 17 )
18 18
19 19 __all__ = []
20 20
21 21 modulepolicy = policy.policy
22 22
23 23 # Modules that have both Python and C implementations. See also the
24 24 # set of .py files under mercurial/pure/.
25 25 _dualmodules = {
26 'mercurial.diffhelpers',
27 26 'mercurial.mpatch',
28 27 'mercurial.parsers',
29 28 }
30 29
31 30 class hgimporter(object):
32 31 """Object that conforms to import hook interface defined in PEP-302."""
33 32 def find_module(self, name, path=None):
34 33 # We only care about modules that have both C and pure implementations.
35 34 if name in _dualmodules:
36 35 return self
37 36 return None
38 37
39 38 def load_module(self, name):
40 39 mod = sys.modules.get(name, None)
41 40 if mod:
42 41 return mod
43 42
44 43 mercurial = sys.modules['mercurial']
45 44
46 45 # The zip importer behaves sufficiently differently from the default
47 46 # importer to warrant its own code path.
48 47 loader = getattr(mercurial, '__loader__', None)
49 48 if isinstance(loader, zipimport.zipimporter):
50 49 def ziploader(*paths):
51 50 """Obtain a zipimporter for a directory under the main zip."""
52 51 path = os.path.join(loader.archive, *paths)
53 52 zl = sys.path_importer_cache.get(path)
54 53 if not zl:
55 54 zl = zipimport.zipimporter(path)
56 55 return zl
57 56
58 57 try:
59 58 if modulepolicy in policy.policynoc:
60 59 raise ImportError()
61 60
62 61 zl = ziploader('mercurial')
63 62 mod = zl.load_module(name)
64 63 # Unlike imp, ziploader doesn't expose module metadata that
65 64 # indicates the type of module. So just assume what we found
66 65 # is OK (even though it could be a pure Python module).
67 66 except ImportError:
68 67 if modulepolicy == b'c':
69 68 raise
70 69 zl = ziploader('mercurial', 'pure')
71 70 mod = zl.load_module(name)
72 71
73 72 sys.modules[name] = mod
74 73 return mod
75 74
76 75 # Unlike the default importer which searches special locations and
77 76 # sys.path, we only look in the directory where "mercurial" was
78 77 # imported from.
79 78
80 79 # imp.find_module doesn't support submodules (modules with ".").
81 80 # Instead you have to pass the parent package's __path__ attribute
82 81 # as the path argument.
83 82 stem = name.split('.')[-1]
84 83
85 84 try:
86 85 if modulepolicy in policy.policynoc:
87 86 raise ImportError()
88 87
89 88 modinfo = imp.find_module(stem, mercurial.__path__)
90 89
91 90 # The Mercurial installer used to copy files from
92 91 # mercurial/pure/*.py to mercurial/*.py. Therefore, it's possible
93 92 # for some installations to have .py files under mercurial/*.
94 93 # Loading Python modules when we expected C versions could result
95 94 # in a) poor performance b) loading a version from a previous
96 95 # Mercurial version, potentially leading to incompatibility. Either
97 96 # scenario is bad. So we verify that modules loaded from
98 97 # mercurial/* are C extensions. If the current policy allows the
99 98 # loading of .py modules, the module will be re-imported from
100 99 # mercurial/pure/* below.
101 100 if modinfo[2][2] != imp.C_EXTENSION:
102 101 raise ImportError('.py version of %s found where C '
103 102 'version should exist' % name)
104 103
105 104 except ImportError:
106 105 if modulepolicy == b'c':
107 106 raise
108 107
109 108 # Could not load the C extension and pure Python is allowed. So
110 109 # try to load them.
111 110 from . import pure
112 111 modinfo = imp.find_module(stem, pure.__path__)
113 112 if not modinfo:
114 113 raise ImportError('could not find mercurial module %s' %
115 114 name)
116 115
117 116 mod = imp.load_module(name, *modinfo)
118 117 sys.modules[name] = mod
119 118 return mod
120 119
121 120 # Python 3 uses a custom module loader that transforms source code between
122 121 # source file reading and compilation. This is done by registering a custom
123 122 # finder that changes the spec for Mercurial modules to use a custom loader.
124 123 if sys.version_info[0] >= 3:
125 124 from . import pure
126 125 import importlib
127 126 import io
128 127 import token
129 128 import tokenize
130 129
131 130 class hgpathentryfinder(importlib.abc.MetaPathFinder):
132 131 """A sys.meta_path finder that uses a custom module loader."""
133 132 def find_spec(self, fullname, path, target=None):
134 133 # Only handle Mercurial-related modules.
135 134 if not fullname.startswith(('mercurial.', 'hgext.', 'hgext3rd.')):
136 135 return None
137 136 # zstd is already dual-version clean, don't try and mangle it
138 137 if fullname.startswith('mercurial.zstd'):
139 138 return None
140 139
141 140 # This assumes Python 3 doesn't support loading C modules.
142 141 if fullname in _dualmodules:
143 142 stem = fullname.split('.')[-1]
144 143 fullname = 'mercurial.pure.%s' % stem
145 144 target = pure
146 145 assert len(path) == 1
147 146 path = [os.path.join(path[0], 'pure')]
148 147
149 148 # Try to find the module using other registered finders.
150 149 spec = None
151 150 for finder in sys.meta_path:
152 151 if finder == self:
153 152 continue
154 153
155 154 spec = finder.find_spec(fullname, path, target=target)
156 155 if spec:
157 156 break
158 157
159 158 # This is a Mercurial-related module but we couldn't find it
160 159 # using the previously-registered finders. This likely means
161 160 # the module doesn't exist.
162 161 if not spec:
163 162 return None
164 163
165 164 if (fullname.startswith('mercurial.pure.')
166 165 and fullname.replace('.pure.', '.') in _dualmodules):
167 166 spec.name = spec.name.replace('.pure.', '.')
168 167
169 168 # TODO need to support loaders from alternate specs, like zip
170 169 # loaders.
171 170 spec.loader = hgloader(spec.name, spec.origin)
172 171 return spec
173 172
174 173 def replacetokens(tokens, fullname):
175 174 """Transform a stream of tokens from raw to Python 3.
176 175
177 176 It is called by the custom module loading machinery to rewrite
178 177 source/tokens between source decoding and compilation.
179 178
180 179 Returns a generator of possibly rewritten tokens.
181 180
182 181 The input token list may be mutated as part of processing. However,
183 182 its changes do not necessarily match the output token stream.
184 183
185 184 REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION
186 185 OR CACHED FILES WON'T GET INVALIDATED PROPERLY.
187 186 """
188 187 futureimpline = False
189 188
190 189 # The following utility functions access the tokens list and i index of
191 190 # the for i, t enumerate(tokens) loop below
192 191 def _isop(j, *o):
193 192 """Assert that tokens[j] is an OP with one of the given values"""
194 193 try:
195 194 return tokens[j].type == token.OP and tokens[j].string in o
196 195 except IndexError:
197 196 return False
198 197
199 198 def _findargnofcall(n):
200 199 """Find arg n of a call expression (start at 0)
201 200
202 201 Returns index of the first token of that argument, or None if
203 202 there is not that many arguments.
204 203
205 204 Assumes that token[i + 1] is '('.
206 205
207 206 """
208 207 nested = 0
209 208 for j in range(i + 2, len(tokens)):
210 209 if _isop(j, ')', ']', '}'):
211 210 # end of call, tuple, subscription or dict / set
212 211 nested -= 1
213 212 if nested < 0:
214 213 return None
215 214 elif n == 0:
216 215 # this is the starting position of arg
217 216 return j
218 217 elif _isop(j, '(', '[', '{'):
219 218 nested += 1
220 219 elif _isop(j, ',') and nested == 0:
221 220 n -= 1
222 221
223 222 return None
224 223
225 224 def _ensureunicode(j):
226 225 """Make sure the token at j is a unicode string
227 226
228 227 This rewrites a string token to include the unicode literal prefix
229 228 so the string transformer won't add the byte prefix.
230 229
231 230 Ignores tokens that are not strings. Assumes bounds checking has
232 231 already been done.
233 232
234 233 """
235 234 st = tokens[j]
236 235 if st.type == token.STRING and st.string.startswith(("'", '"')):
237 236 tokens[j] = st._replace(string='u%s' % st.string)
238 237
239 238 for i, t in enumerate(tokens):
240 239 # Convert most string literals to byte literals. String literals
241 240 # in Python 2 are bytes. String literals in Python 3 are unicode.
242 241 # Most strings in Mercurial are bytes and unicode strings are rare.
243 242 # Rather than rewrite all string literals to use ``b''`` to indicate
244 243 # byte strings, we apply this token transformer to insert the ``b``
245 244 # prefix nearly everywhere.
246 245 if t.type == token.STRING:
247 246 s = t.string
248 247
249 248 # Preserve docstrings as string literals. This is inconsistent
250 249 # with regular unprefixed strings. However, the
251 250 # "from __future__" parsing (which allows a module docstring to
252 251 # exist before it) doesn't properly handle the docstring if it
253 252 # is b''' prefixed, leading to a SyntaxError. We leave all
254 253 # docstrings as unprefixed to avoid this. This means Mercurial
255 254 # components touching docstrings need to handle unicode,
256 255 # unfortunately.
257 256 if s[0:3] in ("'''", '"""'):
258 257 yield t
259 258 continue
260 259
261 260 # If the first character isn't a quote, it is likely a string
262 261 # prefixing character (such as 'b', 'u', or 'r'. Ignore.
263 262 if s[0] not in ("'", '"'):
264 263 yield t
265 264 continue
266 265
267 266 # String literal. Prefix to make a b'' string.
268 267 yield t._replace(string='b%s' % t.string)
269 268 continue
270 269
271 270 # Insert compatibility imports at "from __future__ import" line.
272 271 # No '\n' should be added to preserve line numbers.
273 272 if (t.type == token.NAME and t.string == 'import' and
274 273 all(u.type == token.NAME for u in tokens[i - 2:i]) and
275 274 [u.string for u in tokens[i - 2:i]] == ['from', '__future__']):
276 275 futureimpline = True
277 276 if t.type == token.NEWLINE and futureimpline:
278 277 futureimpline = False
279 278 if fullname == 'mercurial.pycompat':
280 279 yield t
281 280 continue
282 281 r, c = t.start
283 282 l = (b'; from mercurial.pycompat import '
284 283 b'delattr, getattr, hasattr, setattr, xrange, '
285 284 b'open, unicode\n')
286 285 for u in tokenize.tokenize(io.BytesIO(l).readline):
287 286 if u.type in (tokenize.ENCODING, token.ENDMARKER):
288 287 continue
289 288 yield u._replace(
290 289 start=(r, c + u.start[1]), end=(r, c + u.end[1]))
291 290 continue
292 291
293 292 # This looks like a function call.
294 293 if t.type == token.NAME and _isop(i + 1, '('):
295 294 fn = t.string
296 295
297 296 # *attr() builtins don't accept byte strings to 2nd argument.
298 297 if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and
299 298 not _isop(i - 1, '.')):
300 299 arg1idx = _findargnofcall(1)
301 300 if arg1idx is not None:
302 301 _ensureunicode(arg1idx)
303 302
304 303 # .encode() and .decode() on str/bytes/unicode don't accept
305 304 # byte strings on Python 3.
306 305 elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
307 306 for argn in range(2):
308 307 argidx = _findargnofcall(argn)
309 308 if argidx is not None:
310 309 _ensureunicode(argidx)
311 310
312 311 # It changes iteritems/values to items/values as they are not
313 312 # present in Python 3 world.
314 313 elif fn in ('iteritems', 'itervalues'):
315 314 yield t._replace(string=fn[4:])
316 315 continue
317 316
318 317 # Emit unmodified token.
319 318 yield t
320 319
321 320 # Header to add to bytecode files. This MUST be changed when
322 321 # ``replacetoken`` or any mechanism that changes semantics of module
323 322 # loading is changed. Otherwise cached bytecode may get loaded without
324 323 # the new transformation mechanisms applied.
325 324 BYTECODEHEADER = b'HG\x00\x0a'
326 325
327 326 class hgloader(importlib.machinery.SourceFileLoader):
328 327 """Custom module loader that transforms source code.
329 328
330 329 When the source code is converted to a code object, we transform
331 330 certain patterns to be Python 3 compatible. This allows us to write code
332 331 that is natively Python 2 and compatible with Python 3 without
333 332 making the code excessively ugly.
334 333
335 334 We do this by transforming the token stream between parse and compile.
336 335
337 336 Implementing transformations invalidates caching assumptions made
338 337 by the built-in importer. The built-in importer stores a header on
339 338 saved bytecode files indicating the Python/bytecode version. If the
340 339 version changes, the cached bytecode is ignored. The Mercurial
341 340 transformations could change at any time. This means we need to check
342 341 that cached bytecode was generated with the current transformation
343 342 code or there could be a mismatch between cached bytecode and what
344 343 would be generated from this class.
345 344
346 345 We supplement the bytecode caching layer by wrapping ``get_data``
347 346 and ``set_data``. These functions are called when the
348 347 ``SourceFileLoader`` retrieves and saves bytecode cache files,
349 348 respectively. We simply add an additional header on the file. As
350 349 long as the version in this file is changed when semantics change,
351 350 cached bytecode should be invalidated when transformations change.
352 351
353 352 The added header has the form ``HG<VERSION>``. That is a literal
354 353 ``HG`` with 2 binary bytes indicating the transformation version.
355 354 """
356 355 def get_data(self, path):
357 356 data = super(hgloader, self).get_data(path)
358 357
359 358 if not path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
360 359 return data
361 360
362 361 # There should be a header indicating the Mercurial transformation
363 362 # version. If it doesn't exist or doesn't match the current version,
364 363 # we raise an OSError because that is what
365 364 # ``SourceFileLoader.get_code()`` expects when loading bytecode
366 365 # paths to indicate the cached file is "bad."
367 366 if data[0:2] != b'HG':
368 367 raise OSError('no hg header')
369 368 if data[0:4] != BYTECODEHEADER:
370 369 raise OSError('hg header version mismatch')
371 370
372 371 return data[4:]
373 372
374 373 def set_data(self, path, data, *args, **kwargs):
375 374 if path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
376 375 data = BYTECODEHEADER + data
377 376
378 377 return super(hgloader, self).set_data(path, data, *args, **kwargs)
379 378
380 379 def source_to_code(self, data, path):
381 380 """Perform token transformation before compilation."""
382 381 buf = io.BytesIO(data)
383 382 tokens = tokenize.tokenize(buf.readline)
384 383 data = tokenize.untokenize(replacetokens(list(tokens), self.name))
385 384 # Python's built-in importer strips frames from exceptions raised
386 385 # for this code. Unfortunately, that mechanism isn't extensible
387 386 # and our frame will be blamed for the import failure. There
388 387 # are extremely hacky ways to do frame stripping. We haven't
389 388 # implemented them because they are very ugly.
390 389 return super(hgloader, self).source_to_code(data, path)
391 390
392 391 # We automagically register our custom importer as a side-effect of loading.
393 392 # This is necessary to ensure that any entry points are able to import
394 393 # mercurial.* modules without having to perform this registration themselves.
395 394 if sys.version_info[0] >= 3:
396 395 _importercls = hgpathentryfinder
397 396 else:
398 397 _importercls = hgimporter
399 398 if not any(isinstance(x, _importercls) for x in sys.meta_path):
400 399 # meta_path is used before any implicit finders and before sys.path.
401 400 sys.meta_path.insert(0, _importercls())
1 NO CONTENT: file renamed from mercurial/diffhelpers.c to mercurial/cext/diffhelpers.c
@@ -1,2741 +1,2743
1 1 # patch.py - patch file parsing routines
2 2 #
3 3 # Copyright 2006 Brendan Cully <brendan@kublai.com>
4 4 # Copyright 2007 Chris Mason <chris.mason@oracle.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import collections
12 12 import copy
13 13 import email
14 14 import errno
15 15 import hashlib
16 16 import os
17 17 import posixpath
18 18 import re
19 19 import shutil
20 20 import tempfile
21 21 import zlib
22 22
23 23 from .i18n import _
24 24 from .node import (
25 25 hex,
26 26 short,
27 27 )
28 28 from . import (
29 29 copies,
30 diffhelpers,
31 30 encoding,
32 31 error,
33 32 mail,
34 33 mdiff,
35 34 pathutil,
35 policy,
36 36 pycompat,
37 37 scmutil,
38 38 similar,
39 39 util,
40 40 vfs as vfsmod,
41 41 )
42
43 diffhelpers = policy.importmod(r'diffhelpers')
42 44 stringio = util.stringio
43 45
44 46 gitre = re.compile(br'diff --git a/(.*) b/(.*)')
45 47 tabsplitter = re.compile(br'(\t+|[^\t]+)')
46 48
47 49 class PatchError(Exception):
48 50 pass
49 51
50 52
51 53 # public functions
52 54
53 55 def split(stream):
54 56 '''return an iterator of individual patches from a stream'''
55 57 def isheader(line, inheader):
56 58 if inheader and line[0] in (' ', '\t'):
57 59 # continuation
58 60 return True
59 61 if line[0] in (' ', '-', '+'):
60 62 # diff line - don't check for header pattern in there
61 63 return False
62 64 l = line.split(': ', 1)
63 65 return len(l) == 2 and ' ' not in l[0]
64 66
65 67 def chunk(lines):
66 68 return stringio(''.join(lines))
67 69
68 70 def hgsplit(stream, cur):
69 71 inheader = True
70 72
71 73 for line in stream:
72 74 if not line.strip():
73 75 inheader = False
74 76 if not inheader and line.startswith('# HG changeset patch'):
75 77 yield chunk(cur)
76 78 cur = []
77 79 inheader = True
78 80
79 81 cur.append(line)
80 82
81 83 if cur:
82 84 yield chunk(cur)
83 85
84 86 def mboxsplit(stream, cur):
85 87 for line in stream:
86 88 if line.startswith('From '):
87 89 for c in split(chunk(cur[1:])):
88 90 yield c
89 91 cur = []
90 92
91 93 cur.append(line)
92 94
93 95 if cur:
94 96 for c in split(chunk(cur[1:])):
95 97 yield c
96 98
97 99 def mimesplit(stream, cur):
98 100 def msgfp(m):
99 101 fp = stringio()
100 102 g = email.Generator.Generator(fp, mangle_from_=False)
101 103 g.flatten(m)
102 104 fp.seek(0)
103 105 return fp
104 106
105 107 for line in stream:
106 108 cur.append(line)
107 109 c = chunk(cur)
108 110
109 111 m = email.Parser.Parser().parse(c)
110 112 if not m.is_multipart():
111 113 yield msgfp(m)
112 114 else:
113 115 ok_types = ('text/plain', 'text/x-diff', 'text/x-patch')
114 116 for part in m.walk():
115 117 ct = part.get_content_type()
116 118 if ct not in ok_types:
117 119 continue
118 120 yield msgfp(part)
119 121
120 122 def headersplit(stream, cur):
121 123 inheader = False
122 124
123 125 for line in stream:
124 126 if not inheader and isheader(line, inheader):
125 127 yield chunk(cur)
126 128 cur = []
127 129 inheader = True
128 130 if inheader and not isheader(line, inheader):
129 131 inheader = False
130 132
131 133 cur.append(line)
132 134
133 135 if cur:
134 136 yield chunk(cur)
135 137
136 138 def remainder(cur):
137 139 yield chunk(cur)
138 140
139 141 class fiter(object):
140 142 def __init__(self, fp):
141 143 self.fp = fp
142 144
143 145 def __iter__(self):
144 146 return self
145 147
146 148 def next(self):
147 149 l = self.fp.readline()
148 150 if not l:
149 151 raise StopIteration
150 152 return l
151 153
152 154 inheader = False
153 155 cur = []
154 156
155 157 mimeheaders = ['content-type']
156 158
157 159 if not util.safehasattr(stream, 'next'):
158 160 # http responses, for example, have readline but not next
159 161 stream = fiter(stream)
160 162
161 163 for line in stream:
162 164 cur.append(line)
163 165 if line.startswith('# HG changeset patch'):
164 166 return hgsplit(stream, cur)
165 167 elif line.startswith('From '):
166 168 return mboxsplit(stream, cur)
167 169 elif isheader(line, inheader):
168 170 inheader = True
169 171 if line.split(':', 1)[0].lower() in mimeheaders:
170 172 # let email parser handle this
171 173 return mimesplit(stream, cur)
172 174 elif line.startswith('--- ') and inheader:
173 175 # No evil headers seen by diff start, split by hand
174 176 return headersplit(stream, cur)
175 177 # Not enough info, keep reading
176 178
177 179 # if we are here, we have a very plain patch
178 180 return remainder(cur)
179 181
180 182 ## Some facility for extensible patch parsing:
181 183 # list of pairs ("header to match", "data key")
182 184 patchheadermap = [('Date', 'date'),
183 185 ('Branch', 'branch'),
184 186 ('Node ID', 'nodeid'),
185 187 ]
186 188
187 189 def extract(ui, fileobj):
188 190 '''extract patch from data read from fileobj.
189 191
190 192 patch can be a normal patch or contained in an email message.
191 193
192 194 return a dictionary. Standard keys are:
193 195 - filename,
194 196 - message,
195 197 - user,
196 198 - date,
197 199 - branch,
198 200 - node,
199 201 - p1,
200 202 - p2.
201 203 Any item can be missing from the dictionary. If filename is missing,
202 204 fileobj did not contain a patch. Caller must unlink filename when done.'''
203 205
204 206 # attempt to detect the start of a patch
205 207 # (this heuristic is borrowed from quilt)
206 208 diffre = re.compile(r'^(?:Index:[ \t]|diff[ \t]|RCS file: |'
207 209 r'retrieving revision [0-9]+(\.[0-9]+)*$|'
208 210 r'---[ \t].*?^\+\+\+[ \t]|'
209 211 r'\*\*\*[ \t].*?^---[ \t])', re.MULTILINE|re.DOTALL)
210 212
211 213 data = {}
212 214 fd, tmpname = tempfile.mkstemp(prefix='hg-patch-')
213 215 tmpfp = os.fdopen(fd, pycompat.sysstr('w'))
214 216 try:
215 217 msg = email.Parser.Parser().parse(fileobj)
216 218
217 219 subject = msg['Subject'] and mail.headdecode(msg['Subject'])
218 220 data['user'] = msg['From'] and mail.headdecode(msg['From'])
219 221 if not subject and not data['user']:
220 222 # Not an email, restore parsed headers if any
221 223 subject = '\n'.join(': '.join(h) for h in msg.items()) + '\n'
222 224
223 225 # should try to parse msg['Date']
224 226 parents = []
225 227
226 228 if subject:
227 229 if subject.startswith('[PATCH'):
228 230 pend = subject.find(']')
229 231 if pend >= 0:
230 232 subject = subject[pend + 1:].lstrip()
231 233 subject = re.sub(r'\n[ \t]+', ' ', subject)
232 234 ui.debug('Subject: %s\n' % subject)
233 235 if data['user']:
234 236 ui.debug('From: %s\n' % data['user'])
235 237 diffs_seen = 0
236 238 ok_types = ('text/plain', 'text/x-diff', 'text/x-patch')
237 239 message = ''
238 240 for part in msg.walk():
239 241 content_type = part.get_content_type()
240 242 ui.debug('Content-Type: %s\n' % content_type)
241 243 if content_type not in ok_types:
242 244 continue
243 245 payload = part.get_payload(decode=True)
244 246 m = diffre.search(payload)
245 247 if m:
246 248 hgpatch = False
247 249 hgpatchheader = False
248 250 ignoretext = False
249 251
250 252 ui.debug('found patch at byte %d\n' % m.start(0))
251 253 diffs_seen += 1
252 254 cfp = stringio()
253 255 for line in payload[:m.start(0)].splitlines():
254 256 if line.startswith('# HG changeset patch') and not hgpatch:
255 257 ui.debug('patch generated by hg export\n')
256 258 hgpatch = True
257 259 hgpatchheader = True
258 260 # drop earlier commit message content
259 261 cfp.seek(0)
260 262 cfp.truncate()
261 263 subject = None
262 264 elif hgpatchheader:
263 265 if line.startswith('# User '):
264 266 data['user'] = line[7:]
265 267 ui.debug('From: %s\n' % data['user'])
266 268 elif line.startswith("# Parent "):
267 269 parents.append(line[9:].lstrip())
268 270 elif line.startswith("# "):
269 271 for header, key in patchheadermap:
270 272 prefix = '# %s ' % header
271 273 if line.startswith(prefix):
272 274 data[key] = line[len(prefix):]
273 275 else:
274 276 hgpatchheader = False
275 277 elif line == '---':
276 278 ignoretext = True
277 279 if not hgpatchheader and not ignoretext:
278 280 cfp.write(line)
279 281 cfp.write('\n')
280 282 message = cfp.getvalue()
281 283 if tmpfp:
282 284 tmpfp.write(payload)
283 285 if not payload.endswith('\n'):
284 286 tmpfp.write('\n')
285 287 elif not diffs_seen and message and content_type == 'text/plain':
286 288 message += '\n' + payload
287 289 except: # re-raises
288 290 tmpfp.close()
289 291 os.unlink(tmpname)
290 292 raise
291 293
292 294 if subject and not message.startswith(subject):
293 295 message = '%s\n%s' % (subject, message)
294 296 data['message'] = message
295 297 tmpfp.close()
296 298 if parents:
297 299 data['p1'] = parents.pop(0)
298 300 if parents:
299 301 data['p2'] = parents.pop(0)
300 302
301 303 if diffs_seen:
302 304 data['filename'] = tmpname
303 305 else:
304 306 os.unlink(tmpname)
305 307 return data
306 308
307 309 class patchmeta(object):
308 310 """Patched file metadata
309 311
310 312 'op' is the performed operation within ADD, DELETE, RENAME, MODIFY
311 313 or COPY. 'path' is patched file path. 'oldpath' is set to the
312 314 origin file when 'op' is either COPY or RENAME, None otherwise. If
313 315 file mode is changed, 'mode' is a tuple (islink, isexec) where
314 316 'islink' is True if the file is a symlink and 'isexec' is True if
315 317 the file is executable. Otherwise, 'mode' is None.
316 318 """
317 319 def __init__(self, path):
318 320 self.path = path
319 321 self.oldpath = None
320 322 self.mode = None
321 323 self.op = 'MODIFY'
322 324 self.binary = False
323 325
324 326 def setmode(self, mode):
325 327 islink = mode & 0o20000
326 328 isexec = mode & 0o100
327 329 self.mode = (islink, isexec)
328 330
329 331 def copy(self):
330 332 other = patchmeta(self.path)
331 333 other.oldpath = self.oldpath
332 334 other.mode = self.mode
333 335 other.op = self.op
334 336 other.binary = self.binary
335 337 return other
336 338
337 339 def _ispatchinga(self, afile):
338 340 if afile == '/dev/null':
339 341 return self.op == 'ADD'
340 342 return afile == 'a/' + (self.oldpath or self.path)
341 343
342 344 def _ispatchingb(self, bfile):
343 345 if bfile == '/dev/null':
344 346 return self.op == 'DELETE'
345 347 return bfile == 'b/' + self.path
346 348
347 349 def ispatching(self, afile, bfile):
348 350 return self._ispatchinga(afile) and self._ispatchingb(bfile)
349 351
350 352 def __repr__(self):
351 353 return "<patchmeta %s %r>" % (self.op, self.path)
352 354
353 355 def readgitpatch(lr):
354 356 """extract git-style metadata about patches from <patchname>"""
355 357
356 358 # Filter patch for git information
357 359 gp = None
358 360 gitpatches = []
359 361 for line in lr:
360 362 line = line.rstrip(' \r\n')
361 363 if line.startswith('diff --git a/'):
362 364 m = gitre.match(line)
363 365 if m:
364 366 if gp:
365 367 gitpatches.append(gp)
366 368 dst = m.group(2)
367 369 gp = patchmeta(dst)
368 370 elif gp:
369 371 if line.startswith('--- '):
370 372 gitpatches.append(gp)
371 373 gp = None
372 374 continue
373 375 if line.startswith('rename from '):
374 376 gp.op = 'RENAME'
375 377 gp.oldpath = line[12:]
376 378 elif line.startswith('rename to '):
377 379 gp.path = line[10:]
378 380 elif line.startswith('copy from '):
379 381 gp.op = 'COPY'
380 382 gp.oldpath = line[10:]
381 383 elif line.startswith('copy to '):
382 384 gp.path = line[8:]
383 385 elif line.startswith('deleted file'):
384 386 gp.op = 'DELETE'
385 387 elif line.startswith('new file mode '):
386 388 gp.op = 'ADD'
387 389 gp.setmode(int(line[-6:], 8))
388 390 elif line.startswith('new mode '):
389 391 gp.setmode(int(line[-6:], 8))
390 392 elif line.startswith('GIT binary patch'):
391 393 gp.binary = True
392 394 if gp:
393 395 gitpatches.append(gp)
394 396
395 397 return gitpatches
396 398
397 399 class linereader(object):
398 400 # simple class to allow pushing lines back into the input stream
399 401 def __init__(self, fp):
400 402 self.fp = fp
401 403 self.buf = []
402 404
403 405 def push(self, line):
404 406 if line is not None:
405 407 self.buf.append(line)
406 408
407 409 def readline(self):
408 410 if self.buf:
409 411 l = self.buf[0]
410 412 del self.buf[0]
411 413 return l
412 414 return self.fp.readline()
413 415
414 416 def __iter__(self):
415 417 return iter(self.readline, '')
416 418
417 419 class abstractbackend(object):
418 420 def __init__(self, ui):
419 421 self.ui = ui
420 422
421 423 def getfile(self, fname):
422 424 """Return target file data and flags as a (data, (islink,
423 425 isexec)) tuple. Data is None if file is missing/deleted.
424 426 """
425 427 raise NotImplementedError
426 428
427 429 def setfile(self, fname, data, mode, copysource):
428 430 """Write data to target file fname and set its mode. mode is a
429 431 (islink, isexec) tuple. If data is None, the file content should
430 432 be left unchanged. If the file is modified after being copied,
431 433 copysource is set to the original file name.
432 434 """
433 435 raise NotImplementedError
434 436
435 437 def unlink(self, fname):
436 438 """Unlink target file."""
437 439 raise NotImplementedError
438 440
439 441 def writerej(self, fname, failed, total, lines):
440 442 """Write rejected lines for fname. total is the number of hunks
441 443 which failed to apply and total the total number of hunks for this
442 444 files.
443 445 """
444 446 pass
445 447
446 448 def exists(self, fname):
447 449 raise NotImplementedError
448 450
449 451 class fsbackend(abstractbackend):
450 452 def __init__(self, ui, basedir):
451 453 super(fsbackend, self).__init__(ui)
452 454 self.opener = vfsmod.vfs(basedir)
453 455
454 456 def _join(self, f):
455 457 return os.path.join(self.opener.base, f)
456 458
457 459 def getfile(self, fname):
458 460 if self.opener.islink(fname):
459 461 return (self.opener.readlink(fname), (True, False))
460 462
461 463 isexec = False
462 464 try:
463 465 isexec = self.opener.lstat(fname).st_mode & 0o100 != 0
464 466 except OSError as e:
465 467 if e.errno != errno.ENOENT:
466 468 raise
467 469 try:
468 470 return (self.opener.read(fname), (False, isexec))
469 471 except IOError as e:
470 472 if e.errno != errno.ENOENT:
471 473 raise
472 474 return None, None
473 475
474 476 def setfile(self, fname, data, mode, copysource):
475 477 islink, isexec = mode
476 478 if data is None:
477 479 self.opener.setflags(fname, islink, isexec)
478 480 return
479 481 if islink:
480 482 self.opener.symlink(data, fname)
481 483 else:
482 484 self.opener.write(fname, data)
483 485 if isexec:
484 486 self.opener.setflags(fname, False, True)
485 487
486 488 def unlink(self, fname):
487 489 self.opener.unlinkpath(fname, ignoremissing=True)
488 490
489 491 def writerej(self, fname, failed, total, lines):
490 492 fname = fname + ".rej"
491 493 self.ui.warn(
492 494 _("%d out of %d hunks FAILED -- saving rejects to file %s\n") %
493 495 (failed, total, fname))
494 496 fp = self.opener(fname, 'w')
495 497 fp.writelines(lines)
496 498 fp.close()
497 499
498 500 def exists(self, fname):
499 501 return self.opener.lexists(fname)
500 502
501 503 class workingbackend(fsbackend):
502 504 def __init__(self, ui, repo, similarity):
503 505 super(workingbackend, self).__init__(ui, repo.root)
504 506 self.repo = repo
505 507 self.similarity = similarity
506 508 self.removed = set()
507 509 self.changed = set()
508 510 self.copied = []
509 511
510 512 def _checkknown(self, fname):
511 513 if self.repo.dirstate[fname] == '?' and self.exists(fname):
512 514 raise PatchError(_('cannot patch %s: file is not tracked') % fname)
513 515
514 516 def setfile(self, fname, data, mode, copysource):
515 517 self._checkknown(fname)
516 518 super(workingbackend, self).setfile(fname, data, mode, copysource)
517 519 if copysource is not None:
518 520 self.copied.append((copysource, fname))
519 521 self.changed.add(fname)
520 522
521 523 def unlink(self, fname):
522 524 self._checkknown(fname)
523 525 super(workingbackend, self).unlink(fname)
524 526 self.removed.add(fname)
525 527 self.changed.add(fname)
526 528
527 529 def close(self):
528 530 wctx = self.repo[None]
529 531 changed = set(self.changed)
530 532 for src, dst in self.copied:
531 533 scmutil.dirstatecopy(self.ui, self.repo, wctx, src, dst)
532 534 if self.removed:
533 535 wctx.forget(sorted(self.removed))
534 536 for f in self.removed:
535 537 if f not in self.repo.dirstate:
536 538 # File was deleted and no longer belongs to the
537 539 # dirstate, it was probably marked added then
538 540 # deleted, and should not be considered by
539 541 # marktouched().
540 542 changed.discard(f)
541 543 if changed:
542 544 scmutil.marktouched(self.repo, changed, self.similarity)
543 545 return sorted(self.changed)
544 546
545 547 class filestore(object):
546 548 def __init__(self, maxsize=None):
547 549 self.opener = None
548 550 self.files = {}
549 551 self.created = 0
550 552 self.maxsize = maxsize
551 553 if self.maxsize is None:
552 554 self.maxsize = 4*(2**20)
553 555 self.size = 0
554 556 self.data = {}
555 557
556 558 def setfile(self, fname, data, mode, copied=None):
557 559 if self.maxsize < 0 or (len(data) + self.size) <= self.maxsize:
558 560 self.data[fname] = (data, mode, copied)
559 561 self.size += len(data)
560 562 else:
561 563 if self.opener is None:
562 564 root = tempfile.mkdtemp(prefix='hg-patch-')
563 565 self.opener = vfsmod.vfs(root)
564 566 # Avoid filename issues with these simple names
565 567 fn = str(self.created)
566 568 self.opener.write(fn, data)
567 569 self.created += 1
568 570 self.files[fname] = (fn, mode, copied)
569 571
570 572 def getfile(self, fname):
571 573 if fname in self.data:
572 574 return self.data[fname]
573 575 if not self.opener or fname not in self.files:
574 576 return None, None, None
575 577 fn, mode, copied = self.files[fname]
576 578 return self.opener.read(fn), mode, copied
577 579
578 580 def close(self):
579 581 if self.opener:
580 582 shutil.rmtree(self.opener.base)
581 583
582 584 class repobackend(abstractbackend):
583 585 def __init__(self, ui, repo, ctx, store):
584 586 super(repobackend, self).__init__(ui)
585 587 self.repo = repo
586 588 self.ctx = ctx
587 589 self.store = store
588 590 self.changed = set()
589 591 self.removed = set()
590 592 self.copied = {}
591 593
592 594 def _checkknown(self, fname):
593 595 if fname not in self.ctx:
594 596 raise PatchError(_('cannot patch %s: file is not tracked') % fname)
595 597
596 598 def getfile(self, fname):
597 599 try:
598 600 fctx = self.ctx[fname]
599 601 except error.LookupError:
600 602 return None, None
601 603 flags = fctx.flags()
602 604 return fctx.data(), ('l' in flags, 'x' in flags)
603 605
604 606 def setfile(self, fname, data, mode, copysource):
605 607 if copysource:
606 608 self._checkknown(copysource)
607 609 if data is None:
608 610 data = self.ctx[fname].data()
609 611 self.store.setfile(fname, data, mode, copysource)
610 612 self.changed.add(fname)
611 613 if copysource:
612 614 self.copied[fname] = copysource
613 615
614 616 def unlink(self, fname):
615 617 self._checkknown(fname)
616 618 self.removed.add(fname)
617 619
618 620 def exists(self, fname):
619 621 return fname in self.ctx
620 622
621 623 def close(self):
622 624 return self.changed | self.removed
623 625
624 626 # @@ -start,len +start,len @@ or @@ -start +start @@ if len is 1
625 627 unidesc = re.compile('@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@')
626 628 contextdesc = re.compile('(?:---|\*\*\*) (\d+)(?:,(\d+))? (?:---|\*\*\*)')
627 629 eolmodes = ['strict', 'crlf', 'lf', 'auto']
628 630
629 631 class patchfile(object):
630 632 def __init__(self, ui, gp, backend, store, eolmode='strict'):
631 633 self.fname = gp.path
632 634 self.eolmode = eolmode
633 635 self.eol = None
634 636 self.backend = backend
635 637 self.ui = ui
636 638 self.lines = []
637 639 self.exists = False
638 640 self.missing = True
639 641 self.mode = gp.mode
640 642 self.copysource = gp.oldpath
641 643 self.create = gp.op in ('ADD', 'COPY', 'RENAME')
642 644 self.remove = gp.op == 'DELETE'
643 645 if self.copysource is None:
644 646 data, mode = backend.getfile(self.fname)
645 647 else:
646 648 data, mode = store.getfile(self.copysource)[:2]
647 649 if data is not None:
648 650 self.exists = self.copysource is None or backend.exists(self.fname)
649 651 self.missing = False
650 652 if data:
651 653 self.lines = mdiff.splitnewlines(data)
652 654 if self.mode is None:
653 655 self.mode = mode
654 656 if self.lines:
655 657 # Normalize line endings
656 658 if self.lines[0].endswith('\r\n'):
657 659 self.eol = '\r\n'
658 660 elif self.lines[0].endswith('\n'):
659 661 self.eol = '\n'
660 662 if eolmode != 'strict':
661 663 nlines = []
662 664 for l in self.lines:
663 665 if l.endswith('\r\n'):
664 666 l = l[:-2] + '\n'
665 667 nlines.append(l)
666 668 self.lines = nlines
667 669 else:
668 670 if self.create:
669 671 self.missing = False
670 672 if self.mode is None:
671 673 self.mode = (False, False)
672 674 if self.missing:
673 675 self.ui.warn(_("unable to find '%s' for patching\n") % self.fname)
674 676 self.ui.warn(_("(use '--prefix' to apply patch relative to the "
675 677 "current directory)\n"))
676 678
677 679 self.hash = {}
678 680 self.dirty = 0
679 681 self.offset = 0
680 682 self.skew = 0
681 683 self.rej = []
682 684 self.fileprinted = False
683 685 self.printfile(False)
684 686 self.hunks = 0
685 687
686 688 def writelines(self, fname, lines, mode):
687 689 if self.eolmode == 'auto':
688 690 eol = self.eol
689 691 elif self.eolmode == 'crlf':
690 692 eol = '\r\n'
691 693 else:
692 694 eol = '\n'
693 695
694 696 if self.eolmode != 'strict' and eol and eol != '\n':
695 697 rawlines = []
696 698 for l in lines:
697 699 if l and l[-1] == '\n':
698 700 l = l[:-1] + eol
699 701 rawlines.append(l)
700 702 lines = rawlines
701 703
702 704 self.backend.setfile(fname, ''.join(lines), mode, self.copysource)
703 705
704 706 def printfile(self, warn):
705 707 if self.fileprinted:
706 708 return
707 709 if warn or self.ui.verbose:
708 710 self.fileprinted = True
709 711 s = _("patching file %s\n") % self.fname
710 712 if warn:
711 713 self.ui.warn(s)
712 714 else:
713 715 self.ui.note(s)
714 716
715 717
716 718 def findlines(self, l, linenum):
717 719 # looks through the hash and finds candidate lines. The
718 720 # result is a list of line numbers sorted based on distance
719 721 # from linenum
720 722
721 723 cand = self.hash.get(l, [])
722 724 if len(cand) > 1:
723 725 # resort our list of potentials forward then back.
724 726 cand.sort(key=lambda x: abs(x - linenum))
725 727 return cand
726 728
727 729 def write_rej(self):
728 730 # our rejects are a little different from patch(1). This always
729 731 # creates rejects in the same form as the original patch. A file
730 732 # header is inserted so that you can run the reject through patch again
731 733 # without having to type the filename.
732 734 if not self.rej:
733 735 return
734 736 base = os.path.basename(self.fname)
735 737 lines = ["--- %s\n+++ %s\n" % (base, base)]
736 738 for x in self.rej:
737 739 for l in x.hunk:
738 740 lines.append(l)
739 741 if l[-1:] != '\n':
740 742 lines.append("\n\ No newline at end of file\n")
741 743 self.backend.writerej(self.fname, len(self.rej), self.hunks, lines)
742 744
743 745 def apply(self, h):
744 746 if not h.complete():
745 747 raise PatchError(_("bad hunk #%d %s (%d %d %d %d)") %
746 748 (h.number, h.desc, len(h.a), h.lena, len(h.b),
747 749 h.lenb))
748 750
749 751 self.hunks += 1
750 752
751 753 if self.missing:
752 754 self.rej.append(h)
753 755 return -1
754 756
755 757 if self.exists and self.create:
756 758 if self.copysource:
757 759 self.ui.warn(_("cannot create %s: destination already "
758 760 "exists\n") % self.fname)
759 761 else:
760 762 self.ui.warn(_("file %s already exists\n") % self.fname)
761 763 self.rej.append(h)
762 764 return -1
763 765
764 766 if isinstance(h, binhunk):
765 767 if self.remove:
766 768 self.backend.unlink(self.fname)
767 769 else:
768 770 l = h.new(self.lines)
769 771 self.lines[:] = l
770 772 self.offset += len(l)
771 773 self.dirty = True
772 774 return 0
773 775
774 776 horig = h
775 777 if (self.eolmode in ('crlf', 'lf')
776 778 or self.eolmode == 'auto' and self.eol):
777 779 # If new eols are going to be normalized, then normalize
778 780 # hunk data before patching. Otherwise, preserve input
779 781 # line-endings.
780 782 h = h.getnormalized()
781 783
782 784 # fast case first, no offsets, no fuzz
783 785 old, oldstart, new, newstart = h.fuzzit(0, False)
784 786 oldstart += self.offset
785 787 orig_start = oldstart
786 788 # if there's skew we want to emit the "(offset %d lines)" even
787 789 # when the hunk cleanly applies at start + skew, so skip the
788 790 # fast case code
789 791 if (self.skew == 0 and
790 792 diffhelpers.testhunk(old, self.lines, oldstart) == 0):
791 793 if self.remove:
792 794 self.backend.unlink(self.fname)
793 795 else:
794 796 self.lines[oldstart:oldstart + len(old)] = new
795 797 self.offset += len(new) - len(old)
796 798 self.dirty = True
797 799 return 0
798 800
799 801 # ok, we couldn't match the hunk. Lets look for offsets and fuzz it
800 802 self.hash = {}
801 803 for x, s in enumerate(self.lines):
802 804 self.hash.setdefault(s, []).append(x)
803 805
804 806 for fuzzlen in xrange(self.ui.configint("patch", "fuzz", 2) + 1):
805 807 for toponly in [True, False]:
806 808 old, oldstart, new, newstart = h.fuzzit(fuzzlen, toponly)
807 809 oldstart = oldstart + self.offset + self.skew
808 810 oldstart = min(oldstart, len(self.lines))
809 811 if old:
810 812 cand = self.findlines(old[0][1:], oldstart)
811 813 else:
812 814 # Only adding lines with no or fuzzed context, just
813 815 # take the skew in account
814 816 cand = [oldstart]
815 817
816 818 for l in cand:
817 819 if not old or diffhelpers.testhunk(old, self.lines, l) == 0:
818 820 self.lines[l : l + len(old)] = new
819 821 self.offset += len(new) - len(old)
820 822 self.skew = l - orig_start
821 823 self.dirty = True
822 824 offset = l - orig_start - fuzzlen
823 825 if fuzzlen:
824 826 msg = _("Hunk #%d succeeded at %d "
825 827 "with fuzz %d "
826 828 "(offset %d lines).\n")
827 829 self.printfile(True)
828 830 self.ui.warn(msg %
829 831 (h.number, l + 1, fuzzlen, offset))
830 832 else:
831 833 msg = _("Hunk #%d succeeded at %d "
832 834 "(offset %d lines).\n")
833 835 self.ui.note(msg % (h.number, l + 1, offset))
834 836 return fuzzlen
835 837 self.printfile(True)
836 838 self.ui.warn(_("Hunk #%d FAILED at %d\n") % (h.number, orig_start))
837 839 self.rej.append(horig)
838 840 return -1
839 841
840 842 def close(self):
841 843 if self.dirty:
842 844 self.writelines(self.fname, self.lines, self.mode)
843 845 self.write_rej()
844 846 return len(self.rej)
845 847
846 848 class header(object):
847 849 """patch header
848 850 """
849 851 diffgit_re = re.compile('diff --git a/(.*) b/(.*)$')
850 852 diff_re = re.compile('diff -r .* (.*)$')
851 853 allhunks_re = re.compile('(?:index|deleted file) ')
852 854 pretty_re = re.compile('(?:new file|deleted file) ')
853 855 special_re = re.compile('(?:index|deleted|copy|rename) ')
854 856 newfile_re = re.compile('(?:new file)')
855 857
856 858 def __init__(self, header):
857 859 self.header = header
858 860 self.hunks = []
859 861
860 862 def binary(self):
861 863 return any(h.startswith('index ') for h in self.header)
862 864
863 865 def pretty(self, fp):
864 866 for h in self.header:
865 867 if h.startswith('index '):
866 868 fp.write(_('this modifies a binary file (all or nothing)\n'))
867 869 break
868 870 if self.pretty_re.match(h):
869 871 fp.write(h)
870 872 if self.binary():
871 873 fp.write(_('this is a binary file\n'))
872 874 break
873 875 if h.startswith('---'):
874 876 fp.write(_('%d hunks, %d lines changed\n') %
875 877 (len(self.hunks),
876 878 sum([max(h.added, h.removed) for h in self.hunks])))
877 879 break
878 880 fp.write(h)
879 881
880 882 def write(self, fp):
881 883 fp.write(''.join(self.header))
882 884
883 885 def allhunks(self):
884 886 return any(self.allhunks_re.match(h) for h in self.header)
885 887
886 888 def files(self):
887 889 match = self.diffgit_re.match(self.header[0])
888 890 if match:
889 891 fromfile, tofile = match.groups()
890 892 if fromfile == tofile:
891 893 return [fromfile]
892 894 return [fromfile, tofile]
893 895 else:
894 896 return self.diff_re.match(self.header[0]).groups()
895 897
896 898 def filename(self):
897 899 return self.files()[-1]
898 900
899 901 def __repr__(self):
900 902 return '<header %s>' % (' '.join(map(repr, self.files())))
901 903
902 904 def isnewfile(self):
903 905 return any(self.newfile_re.match(h) for h in self.header)
904 906
905 907 def special(self):
906 908 # Special files are shown only at the header level and not at the hunk
907 909 # level for example a file that has been deleted is a special file.
908 910 # The user cannot change the content of the operation, in the case of
909 911 # the deleted file he has to take the deletion or not take it, he
910 912 # cannot take some of it.
911 913 # Newly added files are special if they are empty, they are not special
912 914 # if they have some content as we want to be able to change it
913 915 nocontent = len(self.header) == 2
914 916 emptynewfile = self.isnewfile() and nocontent
915 917 return emptynewfile or \
916 918 any(self.special_re.match(h) for h in self.header)
917 919
918 920 class recordhunk(object):
919 921 """patch hunk
920 922
921 923 XXX shouldn't we merge this with the other hunk class?
922 924 """
923 925 maxcontext = 3
924 926
925 927 def __init__(self, header, fromline, toline, proc, before, hunk, after):
926 928 def trimcontext(number, lines):
927 929 delta = len(lines) - self.maxcontext
928 930 if False and delta > 0:
929 931 return number + delta, lines[:self.maxcontext]
930 932 return number, lines
931 933
932 934 self.header = header
933 935 self.fromline, self.before = trimcontext(fromline, before)
934 936 self.toline, self.after = trimcontext(toline, after)
935 937 self.proc = proc
936 938 self.hunk = hunk
937 939 self.added, self.removed = self.countchanges(self.hunk)
938 940
939 941 def __eq__(self, v):
940 942 if not isinstance(v, recordhunk):
941 943 return False
942 944
943 945 return ((v.hunk == self.hunk) and
944 946 (v.proc == self.proc) and
945 947 (self.fromline == v.fromline) and
946 948 (self.header.files() == v.header.files()))
947 949
948 950 def __hash__(self):
949 951 return hash((tuple(self.hunk),
950 952 tuple(self.header.files()),
951 953 self.fromline,
952 954 self.proc))
953 955
954 956 def countchanges(self, hunk):
955 957 """hunk -> (n+,n-)"""
956 958 add = len([h for h in hunk if h[0] == '+'])
957 959 rem = len([h for h in hunk if h[0] == '-'])
958 960 return add, rem
959 961
960 962 def write(self, fp):
961 963 delta = len(self.before) + len(self.after)
962 964 if self.after and self.after[-1] == '\\ No newline at end of file\n':
963 965 delta -= 1
964 966 fromlen = delta + self.removed
965 967 tolen = delta + self.added
966 968 fp.write('@@ -%d,%d +%d,%d @@%s\n' %
967 969 (self.fromline, fromlen, self.toline, tolen,
968 970 self.proc and (' ' + self.proc)))
969 971 fp.write(''.join(self.before + self.hunk + self.after))
970 972
971 973 pretty = write
972 974
973 975 def filename(self):
974 976 return self.header.filename()
975 977
976 978 def __repr__(self):
977 979 return '<hunk %r@%d>' % (self.filename(), self.fromline)
978 980
979 981 def filterpatch(ui, headers, operation=None):
980 982 """Interactively filter patch chunks into applied-only chunks"""
981 983 if operation is None:
982 984 operation = 'record'
983 985 messages = {
984 986 'multiple': {
985 987 'discard': _("discard change %d/%d to '%s'?"),
986 988 'record': _("record change %d/%d to '%s'?"),
987 989 'revert': _("revert change %d/%d to '%s'?"),
988 990 }[operation],
989 991 'single': {
990 992 'discard': _("discard this change to '%s'?"),
991 993 'record': _("record this change to '%s'?"),
992 994 'revert': _("revert this change to '%s'?"),
993 995 }[operation],
994 996 'help': {
995 997 'discard': _('[Ynesfdaq?]'
996 998 '$$ &Yes, discard this change'
997 999 '$$ &No, skip this change'
998 1000 '$$ &Edit this change manually'
999 1001 '$$ &Skip remaining changes to this file'
1000 1002 '$$ Discard remaining changes to this &file'
1001 1003 '$$ &Done, skip remaining changes and files'
1002 1004 '$$ Discard &all changes to all remaining files'
1003 1005 '$$ &Quit, discarding no changes'
1004 1006 '$$ &? (display help)'),
1005 1007 'record': _('[Ynesfdaq?]'
1006 1008 '$$ &Yes, record this change'
1007 1009 '$$ &No, skip this change'
1008 1010 '$$ &Edit this change manually'
1009 1011 '$$ &Skip remaining changes to this file'
1010 1012 '$$ Record remaining changes to this &file'
1011 1013 '$$ &Done, skip remaining changes and files'
1012 1014 '$$ Record &all changes to all remaining files'
1013 1015 '$$ &Quit, recording no changes'
1014 1016 '$$ &? (display help)'),
1015 1017 'revert': _('[Ynesfdaq?]'
1016 1018 '$$ &Yes, revert this change'
1017 1019 '$$ &No, skip this change'
1018 1020 '$$ &Edit this change manually'
1019 1021 '$$ &Skip remaining changes to this file'
1020 1022 '$$ Revert remaining changes to this &file'
1021 1023 '$$ &Done, skip remaining changes and files'
1022 1024 '$$ Revert &all changes to all remaining files'
1023 1025 '$$ &Quit, reverting no changes'
1024 1026 '$$ &? (display help)')
1025 1027 }[operation]
1026 1028 }
1027 1029
1028 1030 def prompt(skipfile, skipall, query, chunk):
1029 1031 """prompt query, and process base inputs
1030 1032
1031 1033 - y/n for the rest of file
1032 1034 - y/n for the rest
1033 1035 - ? (help)
1034 1036 - q (quit)
1035 1037
1036 1038 Return True/False and possibly updated skipfile and skipall.
1037 1039 """
1038 1040 newpatches = None
1039 1041 if skipall is not None:
1040 1042 return skipall, skipfile, skipall, newpatches
1041 1043 if skipfile is not None:
1042 1044 return skipfile, skipfile, skipall, newpatches
1043 1045 while True:
1044 1046 resps = messages['help']
1045 1047 r = ui.promptchoice("%s %s" % (query, resps))
1046 1048 ui.write("\n")
1047 1049 if r == 8: # ?
1048 1050 for c, t in ui.extractchoices(resps)[1]:
1049 1051 ui.write('%s - %s\n' % (c, encoding.lower(t)))
1050 1052 continue
1051 1053 elif r == 0: # yes
1052 1054 ret = True
1053 1055 elif r == 1: # no
1054 1056 ret = False
1055 1057 elif r == 2: # Edit patch
1056 1058 if chunk is None:
1057 1059 ui.write(_('cannot edit patch for whole file'))
1058 1060 ui.write("\n")
1059 1061 continue
1060 1062 if chunk.header.binary():
1061 1063 ui.write(_('cannot edit patch for binary file'))
1062 1064 ui.write("\n")
1063 1065 continue
1064 1066 # Patch comment based on the Git one (based on comment at end of
1065 1067 # https://mercurial-scm.org/wiki/RecordExtension)
1066 1068 phelp = '---' + _("""
1067 1069 To remove '-' lines, make them ' ' lines (context).
1068 1070 To remove '+' lines, delete them.
1069 1071 Lines starting with # will be removed from the patch.
1070 1072
1071 1073 If the patch applies cleanly, the edited hunk will immediately be
1072 1074 added to the record list. If it does not apply cleanly, a rejects
1073 1075 file will be generated: you can use that when you try again. If
1074 1076 all lines of the hunk are removed, then the edit is aborted and
1075 1077 the hunk is left unchanged.
1076 1078 """)
1077 1079 (patchfd, patchfn) = tempfile.mkstemp(prefix="hg-editor-",
1078 1080 suffix=".diff", text=True)
1079 1081 ncpatchfp = None
1080 1082 try:
1081 1083 # Write the initial patch
1082 1084 f = os.fdopen(patchfd, pycompat.sysstr("w"))
1083 1085 chunk.header.write(f)
1084 1086 chunk.write(f)
1085 1087 f.write('\n'.join(['# ' + i for i in phelp.splitlines()]))
1086 1088 f.close()
1087 1089 # Start the editor and wait for it to complete
1088 1090 editor = ui.geteditor()
1089 1091 ret = ui.system("%s \"%s\"" % (editor, patchfn),
1090 1092 environ={'HGUSER': ui.username()},
1091 1093 blockedtag='filterpatch')
1092 1094 if ret != 0:
1093 1095 ui.warn(_("editor exited with exit code %d\n") % ret)
1094 1096 continue
1095 1097 # Remove comment lines
1096 1098 patchfp = open(patchfn)
1097 1099 ncpatchfp = stringio()
1098 1100 for line in util.iterfile(patchfp):
1099 1101 if not line.startswith('#'):
1100 1102 ncpatchfp.write(line)
1101 1103 patchfp.close()
1102 1104 ncpatchfp.seek(0)
1103 1105 newpatches = parsepatch(ncpatchfp)
1104 1106 finally:
1105 1107 os.unlink(patchfn)
1106 1108 del ncpatchfp
1107 1109 # Signal that the chunk shouldn't be applied as-is, but
1108 1110 # provide the new patch to be used instead.
1109 1111 ret = False
1110 1112 elif r == 3: # Skip
1111 1113 ret = skipfile = False
1112 1114 elif r == 4: # file (Record remaining)
1113 1115 ret = skipfile = True
1114 1116 elif r == 5: # done, skip remaining
1115 1117 ret = skipall = False
1116 1118 elif r == 6: # all
1117 1119 ret = skipall = True
1118 1120 elif r == 7: # quit
1119 1121 raise error.Abort(_('user quit'))
1120 1122 return ret, skipfile, skipall, newpatches
1121 1123
1122 1124 seen = set()
1123 1125 applied = {} # 'filename' -> [] of chunks
1124 1126 skipfile, skipall = None, None
1125 1127 pos, total = 1, sum(len(h.hunks) for h in headers)
1126 1128 for h in headers:
1127 1129 pos += len(h.hunks)
1128 1130 skipfile = None
1129 1131 fixoffset = 0
1130 1132 hdr = ''.join(h.header)
1131 1133 if hdr in seen:
1132 1134 continue
1133 1135 seen.add(hdr)
1134 1136 if skipall is None:
1135 1137 h.pretty(ui)
1136 1138 msg = (_('examine changes to %s?') %
1137 1139 _(' and ').join("'%s'" % f for f in h.files()))
1138 1140 r, skipfile, skipall, np = prompt(skipfile, skipall, msg, None)
1139 1141 if not r:
1140 1142 continue
1141 1143 applied[h.filename()] = [h]
1142 1144 if h.allhunks():
1143 1145 applied[h.filename()] += h.hunks
1144 1146 continue
1145 1147 for i, chunk in enumerate(h.hunks):
1146 1148 if skipfile is None and skipall is None:
1147 1149 chunk.pretty(ui)
1148 1150 if total == 1:
1149 1151 msg = messages['single'] % chunk.filename()
1150 1152 else:
1151 1153 idx = pos - len(h.hunks) + i
1152 1154 msg = messages['multiple'] % (idx, total, chunk.filename())
1153 1155 r, skipfile, skipall, newpatches = prompt(skipfile,
1154 1156 skipall, msg, chunk)
1155 1157 if r:
1156 1158 if fixoffset:
1157 1159 chunk = copy.copy(chunk)
1158 1160 chunk.toline += fixoffset
1159 1161 applied[chunk.filename()].append(chunk)
1160 1162 elif newpatches is not None:
1161 1163 for newpatch in newpatches:
1162 1164 for newhunk in newpatch.hunks:
1163 1165 if fixoffset:
1164 1166 newhunk.toline += fixoffset
1165 1167 applied[newhunk.filename()].append(newhunk)
1166 1168 else:
1167 1169 fixoffset += chunk.removed - chunk.added
1168 1170 return (sum([h for h in applied.itervalues()
1169 1171 if h[0].special() or len(h) > 1], []), {})
1170 1172 class hunk(object):
1171 1173 def __init__(self, desc, num, lr, context):
1172 1174 self.number = num
1173 1175 self.desc = desc
1174 1176 self.hunk = [desc]
1175 1177 self.a = []
1176 1178 self.b = []
1177 1179 self.starta = self.lena = None
1178 1180 self.startb = self.lenb = None
1179 1181 if lr is not None:
1180 1182 if context:
1181 1183 self.read_context_hunk(lr)
1182 1184 else:
1183 1185 self.read_unified_hunk(lr)
1184 1186
1185 1187 def getnormalized(self):
1186 1188 """Return a copy with line endings normalized to LF."""
1187 1189
1188 1190 def normalize(lines):
1189 1191 nlines = []
1190 1192 for line in lines:
1191 1193 if line.endswith('\r\n'):
1192 1194 line = line[:-2] + '\n'
1193 1195 nlines.append(line)
1194 1196 return nlines
1195 1197
1196 1198 # Dummy object, it is rebuilt manually
1197 1199 nh = hunk(self.desc, self.number, None, None)
1198 1200 nh.number = self.number
1199 1201 nh.desc = self.desc
1200 1202 nh.hunk = self.hunk
1201 1203 nh.a = normalize(self.a)
1202 1204 nh.b = normalize(self.b)
1203 1205 nh.starta = self.starta
1204 1206 nh.startb = self.startb
1205 1207 nh.lena = self.lena
1206 1208 nh.lenb = self.lenb
1207 1209 return nh
1208 1210
1209 1211 def read_unified_hunk(self, lr):
1210 1212 m = unidesc.match(self.desc)
1211 1213 if not m:
1212 1214 raise PatchError(_("bad hunk #%d") % self.number)
1213 1215 self.starta, self.lena, self.startb, self.lenb = m.groups()
1214 1216 if self.lena is None:
1215 1217 self.lena = 1
1216 1218 else:
1217 1219 self.lena = int(self.lena)
1218 1220 if self.lenb is None:
1219 1221 self.lenb = 1
1220 1222 else:
1221 1223 self.lenb = int(self.lenb)
1222 1224 self.starta = int(self.starta)
1223 1225 self.startb = int(self.startb)
1224 1226 diffhelpers.addlines(lr, self.hunk, self.lena, self.lenb, self.a,
1225 1227 self.b)
1226 1228 # if we hit eof before finishing out the hunk, the last line will
1227 1229 # be zero length. Lets try to fix it up.
1228 1230 while len(self.hunk[-1]) == 0:
1229 1231 del self.hunk[-1]
1230 1232 del self.a[-1]
1231 1233 del self.b[-1]
1232 1234 self.lena -= 1
1233 1235 self.lenb -= 1
1234 1236 self._fixnewline(lr)
1235 1237
1236 1238 def read_context_hunk(self, lr):
1237 1239 self.desc = lr.readline()
1238 1240 m = contextdesc.match(self.desc)
1239 1241 if not m:
1240 1242 raise PatchError(_("bad hunk #%d") % self.number)
1241 1243 self.starta, aend = m.groups()
1242 1244 self.starta = int(self.starta)
1243 1245 if aend is None:
1244 1246 aend = self.starta
1245 1247 self.lena = int(aend) - self.starta
1246 1248 if self.starta:
1247 1249 self.lena += 1
1248 1250 for x in xrange(self.lena):
1249 1251 l = lr.readline()
1250 1252 if l.startswith('---'):
1251 1253 # lines addition, old block is empty
1252 1254 lr.push(l)
1253 1255 break
1254 1256 s = l[2:]
1255 1257 if l.startswith('- ') or l.startswith('! '):
1256 1258 u = '-' + s
1257 1259 elif l.startswith(' '):
1258 1260 u = ' ' + s
1259 1261 else:
1260 1262 raise PatchError(_("bad hunk #%d old text line %d") %
1261 1263 (self.number, x))
1262 1264 self.a.append(u)
1263 1265 self.hunk.append(u)
1264 1266
1265 1267 l = lr.readline()
1266 1268 if l.startswith('\ '):
1267 1269 s = self.a[-1][:-1]
1268 1270 self.a[-1] = s
1269 1271 self.hunk[-1] = s
1270 1272 l = lr.readline()
1271 1273 m = contextdesc.match(l)
1272 1274 if not m:
1273 1275 raise PatchError(_("bad hunk #%d") % self.number)
1274 1276 self.startb, bend = m.groups()
1275 1277 self.startb = int(self.startb)
1276 1278 if bend is None:
1277 1279 bend = self.startb
1278 1280 self.lenb = int(bend) - self.startb
1279 1281 if self.startb:
1280 1282 self.lenb += 1
1281 1283 hunki = 1
1282 1284 for x in xrange(self.lenb):
1283 1285 l = lr.readline()
1284 1286 if l.startswith('\ '):
1285 1287 # XXX: the only way to hit this is with an invalid line range.
1286 1288 # The no-eol marker is not counted in the line range, but I
1287 1289 # guess there are diff(1) out there which behave differently.
1288 1290 s = self.b[-1][:-1]
1289 1291 self.b[-1] = s
1290 1292 self.hunk[hunki - 1] = s
1291 1293 continue
1292 1294 if not l:
1293 1295 # line deletions, new block is empty and we hit EOF
1294 1296 lr.push(l)
1295 1297 break
1296 1298 s = l[2:]
1297 1299 if l.startswith('+ ') or l.startswith('! '):
1298 1300 u = '+' + s
1299 1301 elif l.startswith(' '):
1300 1302 u = ' ' + s
1301 1303 elif len(self.b) == 0:
1302 1304 # line deletions, new block is empty
1303 1305 lr.push(l)
1304 1306 break
1305 1307 else:
1306 1308 raise PatchError(_("bad hunk #%d old text line %d") %
1307 1309 (self.number, x))
1308 1310 self.b.append(s)
1309 1311 while True:
1310 1312 if hunki >= len(self.hunk):
1311 1313 h = ""
1312 1314 else:
1313 1315 h = self.hunk[hunki]
1314 1316 hunki += 1
1315 1317 if h == u:
1316 1318 break
1317 1319 elif h.startswith('-'):
1318 1320 continue
1319 1321 else:
1320 1322 self.hunk.insert(hunki - 1, u)
1321 1323 break
1322 1324
1323 1325 if not self.a:
1324 1326 # this happens when lines were only added to the hunk
1325 1327 for x in self.hunk:
1326 1328 if x.startswith('-') or x.startswith(' '):
1327 1329 self.a.append(x)
1328 1330 if not self.b:
1329 1331 # this happens when lines were only deleted from the hunk
1330 1332 for x in self.hunk:
1331 1333 if x.startswith('+') or x.startswith(' '):
1332 1334 self.b.append(x[1:])
1333 1335 # @@ -start,len +start,len @@
1334 1336 self.desc = "@@ -%d,%d +%d,%d @@\n" % (self.starta, self.lena,
1335 1337 self.startb, self.lenb)
1336 1338 self.hunk[0] = self.desc
1337 1339 self._fixnewline(lr)
1338 1340
1339 1341 def _fixnewline(self, lr):
1340 1342 l = lr.readline()
1341 1343 if l.startswith('\ '):
1342 1344 diffhelpers.fix_newline(self.hunk, self.a, self.b)
1343 1345 else:
1344 1346 lr.push(l)
1345 1347
1346 1348 def complete(self):
1347 1349 return len(self.a) == self.lena and len(self.b) == self.lenb
1348 1350
1349 1351 def _fuzzit(self, old, new, fuzz, toponly):
1350 1352 # this removes context lines from the top and bottom of list 'l'. It
1351 1353 # checks the hunk to make sure only context lines are removed, and then
1352 1354 # returns a new shortened list of lines.
1353 1355 fuzz = min(fuzz, len(old))
1354 1356 if fuzz:
1355 1357 top = 0
1356 1358 bot = 0
1357 1359 hlen = len(self.hunk)
1358 1360 for x in xrange(hlen - 1):
1359 1361 # the hunk starts with the @@ line, so use x+1
1360 1362 if self.hunk[x + 1][0] == ' ':
1361 1363 top += 1
1362 1364 else:
1363 1365 break
1364 1366 if not toponly:
1365 1367 for x in xrange(hlen - 1):
1366 1368 if self.hunk[hlen - bot - 1][0] == ' ':
1367 1369 bot += 1
1368 1370 else:
1369 1371 break
1370 1372
1371 1373 bot = min(fuzz, bot)
1372 1374 top = min(fuzz, top)
1373 1375 return old[top:len(old) - bot], new[top:len(new) - bot], top
1374 1376 return old, new, 0
1375 1377
1376 1378 def fuzzit(self, fuzz, toponly):
1377 1379 old, new, top = self._fuzzit(self.a, self.b, fuzz, toponly)
1378 1380 oldstart = self.starta + top
1379 1381 newstart = self.startb + top
1380 1382 # zero length hunk ranges already have their start decremented
1381 1383 if self.lena and oldstart > 0:
1382 1384 oldstart -= 1
1383 1385 if self.lenb and newstart > 0:
1384 1386 newstart -= 1
1385 1387 return old, oldstart, new, newstart
1386 1388
1387 1389 class binhunk(object):
1388 1390 'A binary patch file.'
1389 1391 def __init__(self, lr, fname):
1390 1392 self.text = None
1391 1393 self.delta = False
1392 1394 self.hunk = ['GIT binary patch\n']
1393 1395 self._fname = fname
1394 1396 self._read(lr)
1395 1397
1396 1398 def complete(self):
1397 1399 return self.text is not None
1398 1400
1399 1401 def new(self, lines):
1400 1402 if self.delta:
1401 1403 return [applybindelta(self.text, ''.join(lines))]
1402 1404 return [self.text]
1403 1405
1404 1406 def _read(self, lr):
1405 1407 def getline(lr, hunk):
1406 1408 l = lr.readline()
1407 1409 hunk.append(l)
1408 1410 return l.rstrip('\r\n')
1409 1411
1410 1412 size = 0
1411 1413 while True:
1412 1414 line = getline(lr, self.hunk)
1413 1415 if not line:
1414 1416 raise PatchError(_('could not extract "%s" binary data')
1415 1417 % self._fname)
1416 1418 if line.startswith('literal '):
1417 1419 size = int(line[8:].rstrip())
1418 1420 break
1419 1421 if line.startswith('delta '):
1420 1422 size = int(line[6:].rstrip())
1421 1423 self.delta = True
1422 1424 break
1423 1425 dec = []
1424 1426 line = getline(lr, self.hunk)
1425 1427 while len(line) > 1:
1426 1428 l = line[0]
1427 1429 if l <= 'Z' and l >= 'A':
1428 1430 l = ord(l) - ord('A') + 1
1429 1431 else:
1430 1432 l = ord(l) - ord('a') + 27
1431 1433 try:
1432 1434 dec.append(util.b85decode(line[1:])[:l])
1433 1435 except ValueError as e:
1434 1436 raise PatchError(_('could not decode "%s" binary patch: %s')
1435 1437 % (self._fname, str(e)))
1436 1438 line = getline(lr, self.hunk)
1437 1439 text = zlib.decompress(''.join(dec))
1438 1440 if len(text) != size:
1439 1441 raise PatchError(_('"%s" length is %d bytes, should be %d')
1440 1442 % (self._fname, len(text), size))
1441 1443 self.text = text
1442 1444
1443 1445 def parsefilename(str):
1444 1446 # --- filename \t|space stuff
1445 1447 s = str[4:].rstrip('\r\n')
1446 1448 i = s.find('\t')
1447 1449 if i < 0:
1448 1450 i = s.find(' ')
1449 1451 if i < 0:
1450 1452 return s
1451 1453 return s[:i]
1452 1454
1453 1455 def reversehunks(hunks):
1454 1456 '''reverse the signs in the hunks given as argument
1455 1457
1456 1458 This function operates on hunks coming out of patch.filterpatch, that is
1457 1459 a list of the form: [header1, hunk1, hunk2, header2...]. Example usage:
1458 1460
1459 1461 >>> rawpatch = """diff --git a/folder1/g b/folder1/g
1460 1462 ... --- a/folder1/g
1461 1463 ... +++ b/folder1/g
1462 1464 ... @@ -1,7 +1,7 @@
1463 1465 ... +firstline
1464 1466 ... c
1465 1467 ... 1
1466 1468 ... 2
1467 1469 ... + 3
1468 1470 ... -4
1469 1471 ... 5
1470 1472 ... d
1471 1473 ... +lastline"""
1472 1474 >>> hunks = parsepatch(rawpatch)
1473 1475 >>> hunkscomingfromfilterpatch = []
1474 1476 >>> for h in hunks:
1475 1477 ... hunkscomingfromfilterpatch.append(h)
1476 1478 ... hunkscomingfromfilterpatch.extend(h.hunks)
1477 1479
1478 1480 >>> reversedhunks = reversehunks(hunkscomingfromfilterpatch)
1479 1481 >>> from . import util
1480 1482 >>> fp = util.stringio()
1481 1483 >>> for c in reversedhunks:
1482 1484 ... c.write(fp)
1483 1485 >>> fp.seek(0)
1484 1486 >>> reversedpatch = fp.read()
1485 1487 >>> print reversedpatch
1486 1488 diff --git a/folder1/g b/folder1/g
1487 1489 --- a/folder1/g
1488 1490 +++ b/folder1/g
1489 1491 @@ -1,4 +1,3 @@
1490 1492 -firstline
1491 1493 c
1492 1494 1
1493 1495 2
1494 1496 @@ -1,6 +2,6 @@
1495 1497 c
1496 1498 1
1497 1499 2
1498 1500 - 3
1499 1501 +4
1500 1502 5
1501 1503 d
1502 1504 @@ -5,3 +6,2 @@
1503 1505 5
1504 1506 d
1505 1507 -lastline
1506 1508
1507 1509 '''
1508 1510
1509 1511 from . import crecord as crecordmod
1510 1512 newhunks = []
1511 1513 for c in hunks:
1512 1514 if isinstance(c, crecordmod.uihunk):
1513 1515 # curses hunks encapsulate the record hunk in _hunk
1514 1516 c = c._hunk
1515 1517 if isinstance(c, recordhunk):
1516 1518 for j, line in enumerate(c.hunk):
1517 1519 if line.startswith("-"):
1518 1520 c.hunk[j] = "+" + c.hunk[j][1:]
1519 1521 elif line.startswith("+"):
1520 1522 c.hunk[j] = "-" + c.hunk[j][1:]
1521 1523 c.added, c.removed = c.removed, c.added
1522 1524 newhunks.append(c)
1523 1525 return newhunks
1524 1526
1525 1527 def parsepatch(originalchunks):
1526 1528 """patch -> [] of headers -> [] of hunks """
1527 1529 class parser(object):
1528 1530 """patch parsing state machine"""
1529 1531 def __init__(self):
1530 1532 self.fromline = 0
1531 1533 self.toline = 0
1532 1534 self.proc = ''
1533 1535 self.header = None
1534 1536 self.context = []
1535 1537 self.before = []
1536 1538 self.hunk = []
1537 1539 self.headers = []
1538 1540
1539 1541 def addrange(self, limits):
1540 1542 fromstart, fromend, tostart, toend, proc = limits
1541 1543 self.fromline = int(fromstart)
1542 1544 self.toline = int(tostart)
1543 1545 self.proc = proc
1544 1546
1545 1547 def addcontext(self, context):
1546 1548 if self.hunk:
1547 1549 h = recordhunk(self.header, self.fromline, self.toline,
1548 1550 self.proc, self.before, self.hunk, context)
1549 1551 self.header.hunks.append(h)
1550 1552 self.fromline += len(self.before) + h.removed
1551 1553 self.toline += len(self.before) + h.added
1552 1554 self.before = []
1553 1555 self.hunk = []
1554 1556 self.context = context
1555 1557
1556 1558 def addhunk(self, hunk):
1557 1559 if self.context:
1558 1560 self.before = self.context
1559 1561 self.context = []
1560 1562 self.hunk = hunk
1561 1563
1562 1564 def newfile(self, hdr):
1563 1565 self.addcontext([])
1564 1566 h = header(hdr)
1565 1567 self.headers.append(h)
1566 1568 self.header = h
1567 1569
1568 1570 def addother(self, line):
1569 1571 pass # 'other' lines are ignored
1570 1572
1571 1573 def finished(self):
1572 1574 self.addcontext([])
1573 1575 return self.headers
1574 1576
1575 1577 transitions = {
1576 1578 'file': {'context': addcontext,
1577 1579 'file': newfile,
1578 1580 'hunk': addhunk,
1579 1581 'range': addrange},
1580 1582 'context': {'file': newfile,
1581 1583 'hunk': addhunk,
1582 1584 'range': addrange,
1583 1585 'other': addother},
1584 1586 'hunk': {'context': addcontext,
1585 1587 'file': newfile,
1586 1588 'range': addrange},
1587 1589 'range': {'context': addcontext,
1588 1590 'hunk': addhunk},
1589 1591 'other': {'other': addother},
1590 1592 }
1591 1593
1592 1594 p = parser()
1593 1595 fp = stringio()
1594 1596 fp.write(''.join(originalchunks))
1595 1597 fp.seek(0)
1596 1598
1597 1599 state = 'context'
1598 1600 for newstate, data in scanpatch(fp):
1599 1601 try:
1600 1602 p.transitions[state][newstate](p, data)
1601 1603 except KeyError:
1602 1604 raise PatchError('unhandled transition: %s -> %s' %
1603 1605 (state, newstate))
1604 1606 state = newstate
1605 1607 del fp
1606 1608 return p.finished()
1607 1609
1608 1610 def pathtransform(path, strip, prefix):
1609 1611 '''turn a path from a patch into a path suitable for the repository
1610 1612
1611 1613 prefix, if not empty, is expected to be normalized with a / at the end.
1612 1614
1613 1615 Returns (stripped components, path in repository).
1614 1616
1615 1617 >>> pathtransform('a/b/c', 0, '')
1616 1618 ('', 'a/b/c')
1617 1619 >>> pathtransform(' a/b/c ', 0, '')
1618 1620 ('', ' a/b/c')
1619 1621 >>> pathtransform(' a/b/c ', 2, '')
1620 1622 ('a/b/', 'c')
1621 1623 >>> pathtransform('a/b/c', 0, 'd/e/')
1622 1624 ('', 'd/e/a/b/c')
1623 1625 >>> pathtransform(' a//b/c ', 2, 'd/e/')
1624 1626 ('a//b/', 'd/e/c')
1625 1627 >>> pathtransform('a/b/c', 3, '')
1626 1628 Traceback (most recent call last):
1627 1629 PatchError: unable to strip away 1 of 3 dirs from a/b/c
1628 1630 '''
1629 1631 pathlen = len(path)
1630 1632 i = 0
1631 1633 if strip == 0:
1632 1634 return '', prefix + path.rstrip()
1633 1635 count = strip
1634 1636 while count > 0:
1635 1637 i = path.find('/', i)
1636 1638 if i == -1:
1637 1639 raise PatchError(_("unable to strip away %d of %d dirs from %s") %
1638 1640 (count, strip, path))
1639 1641 i += 1
1640 1642 # consume '//' in the path
1641 1643 while i < pathlen - 1 and path[i] == '/':
1642 1644 i += 1
1643 1645 count -= 1
1644 1646 return path[:i].lstrip(), prefix + path[i:].rstrip()
1645 1647
1646 1648 def makepatchmeta(backend, afile_orig, bfile_orig, hunk, strip, prefix):
1647 1649 nulla = afile_orig == "/dev/null"
1648 1650 nullb = bfile_orig == "/dev/null"
1649 1651 create = nulla and hunk.starta == 0 and hunk.lena == 0
1650 1652 remove = nullb and hunk.startb == 0 and hunk.lenb == 0
1651 1653 abase, afile = pathtransform(afile_orig, strip, prefix)
1652 1654 gooda = not nulla and backend.exists(afile)
1653 1655 bbase, bfile = pathtransform(bfile_orig, strip, prefix)
1654 1656 if afile == bfile:
1655 1657 goodb = gooda
1656 1658 else:
1657 1659 goodb = not nullb and backend.exists(bfile)
1658 1660 missing = not goodb and not gooda and not create
1659 1661
1660 1662 # some diff programs apparently produce patches where the afile is
1661 1663 # not /dev/null, but afile starts with bfile
1662 1664 abasedir = afile[:afile.rfind('/') + 1]
1663 1665 bbasedir = bfile[:bfile.rfind('/') + 1]
1664 1666 if (missing and abasedir == bbasedir and afile.startswith(bfile)
1665 1667 and hunk.starta == 0 and hunk.lena == 0):
1666 1668 create = True
1667 1669 missing = False
1668 1670
1669 1671 # If afile is "a/b/foo" and bfile is "a/b/foo.orig" we assume the
1670 1672 # diff is between a file and its backup. In this case, the original
1671 1673 # file should be patched (see original mpatch code).
1672 1674 isbackup = (abase == bbase and bfile.startswith(afile))
1673 1675 fname = None
1674 1676 if not missing:
1675 1677 if gooda and goodb:
1676 1678 if isbackup:
1677 1679 fname = afile
1678 1680 else:
1679 1681 fname = bfile
1680 1682 elif gooda:
1681 1683 fname = afile
1682 1684
1683 1685 if not fname:
1684 1686 if not nullb:
1685 1687 if isbackup:
1686 1688 fname = afile
1687 1689 else:
1688 1690 fname = bfile
1689 1691 elif not nulla:
1690 1692 fname = afile
1691 1693 else:
1692 1694 raise PatchError(_("undefined source and destination files"))
1693 1695
1694 1696 gp = patchmeta(fname)
1695 1697 if create:
1696 1698 gp.op = 'ADD'
1697 1699 elif remove:
1698 1700 gp.op = 'DELETE'
1699 1701 return gp
1700 1702
1701 1703 def scanpatch(fp):
1702 1704 """like patch.iterhunks, but yield different events
1703 1705
1704 1706 - ('file', [header_lines + fromfile + tofile])
1705 1707 - ('context', [context_lines])
1706 1708 - ('hunk', [hunk_lines])
1707 1709 - ('range', (-start,len, +start,len, proc))
1708 1710 """
1709 1711 lines_re = re.compile(r'@@ -(\d+),(\d+) \+(\d+),(\d+) @@\s*(.*)')
1710 1712 lr = linereader(fp)
1711 1713
1712 1714 def scanwhile(first, p):
1713 1715 """scan lr while predicate holds"""
1714 1716 lines = [first]
1715 1717 for line in iter(lr.readline, ''):
1716 1718 if p(line):
1717 1719 lines.append(line)
1718 1720 else:
1719 1721 lr.push(line)
1720 1722 break
1721 1723 return lines
1722 1724
1723 1725 for line in iter(lr.readline, ''):
1724 1726 if line.startswith('diff --git a/') or line.startswith('diff -r '):
1725 1727 def notheader(line):
1726 1728 s = line.split(None, 1)
1727 1729 return not s or s[0] not in ('---', 'diff')
1728 1730 header = scanwhile(line, notheader)
1729 1731 fromfile = lr.readline()
1730 1732 if fromfile.startswith('---'):
1731 1733 tofile = lr.readline()
1732 1734 header += [fromfile, tofile]
1733 1735 else:
1734 1736 lr.push(fromfile)
1735 1737 yield 'file', header
1736 1738 elif line[0] == ' ':
1737 1739 yield 'context', scanwhile(line, lambda l: l[0] in ' \\')
1738 1740 elif line[0] in '-+':
1739 1741 yield 'hunk', scanwhile(line, lambda l: l[0] in '-+\\')
1740 1742 else:
1741 1743 m = lines_re.match(line)
1742 1744 if m:
1743 1745 yield 'range', m.groups()
1744 1746 else:
1745 1747 yield 'other', line
1746 1748
1747 1749 def scangitpatch(lr, firstline):
1748 1750 """
1749 1751 Git patches can emit:
1750 1752 - rename a to b
1751 1753 - change b
1752 1754 - copy a to c
1753 1755 - change c
1754 1756
1755 1757 We cannot apply this sequence as-is, the renamed 'a' could not be
1756 1758 found for it would have been renamed already. And we cannot copy
1757 1759 from 'b' instead because 'b' would have been changed already. So
1758 1760 we scan the git patch for copy and rename commands so we can
1759 1761 perform the copies ahead of time.
1760 1762 """
1761 1763 pos = 0
1762 1764 try:
1763 1765 pos = lr.fp.tell()
1764 1766 fp = lr.fp
1765 1767 except IOError:
1766 1768 fp = stringio(lr.fp.read())
1767 1769 gitlr = linereader(fp)
1768 1770 gitlr.push(firstline)
1769 1771 gitpatches = readgitpatch(gitlr)
1770 1772 fp.seek(pos)
1771 1773 return gitpatches
1772 1774
1773 1775 def iterhunks(fp):
1774 1776 """Read a patch and yield the following events:
1775 1777 - ("file", afile, bfile, firsthunk): select a new target file.
1776 1778 - ("hunk", hunk): a new hunk is ready to be applied, follows a
1777 1779 "file" event.
1778 1780 - ("git", gitchanges): current diff is in git format, gitchanges
1779 1781 maps filenames to gitpatch records. Unique event.
1780 1782 """
1781 1783 afile = ""
1782 1784 bfile = ""
1783 1785 state = None
1784 1786 hunknum = 0
1785 1787 emitfile = newfile = False
1786 1788 gitpatches = None
1787 1789
1788 1790 # our states
1789 1791 BFILE = 1
1790 1792 context = None
1791 1793 lr = linereader(fp)
1792 1794
1793 1795 for x in iter(lr.readline, ''):
1794 1796 if state == BFILE and (
1795 1797 (not context and x[0] == '@')
1796 1798 or (context is not False and x.startswith('***************'))
1797 1799 or x.startswith('GIT binary patch')):
1798 1800 gp = None
1799 1801 if (gitpatches and
1800 1802 gitpatches[-1].ispatching(afile, bfile)):
1801 1803 gp = gitpatches.pop()
1802 1804 if x.startswith('GIT binary patch'):
1803 1805 h = binhunk(lr, gp.path)
1804 1806 else:
1805 1807 if context is None and x.startswith('***************'):
1806 1808 context = True
1807 1809 h = hunk(x, hunknum + 1, lr, context)
1808 1810 hunknum += 1
1809 1811 if emitfile:
1810 1812 emitfile = False
1811 1813 yield 'file', (afile, bfile, h, gp and gp.copy() or None)
1812 1814 yield 'hunk', h
1813 1815 elif x.startswith('diff --git a/'):
1814 1816 m = gitre.match(x.rstrip(' \r\n'))
1815 1817 if not m:
1816 1818 continue
1817 1819 if gitpatches is None:
1818 1820 # scan whole input for git metadata
1819 1821 gitpatches = scangitpatch(lr, x)
1820 1822 yield 'git', [g.copy() for g in gitpatches
1821 1823 if g.op in ('COPY', 'RENAME')]
1822 1824 gitpatches.reverse()
1823 1825 afile = 'a/' + m.group(1)
1824 1826 bfile = 'b/' + m.group(2)
1825 1827 while gitpatches and not gitpatches[-1].ispatching(afile, bfile):
1826 1828 gp = gitpatches.pop()
1827 1829 yield 'file', ('a/' + gp.path, 'b/' + gp.path, None, gp.copy())
1828 1830 if not gitpatches:
1829 1831 raise PatchError(_('failed to synchronize metadata for "%s"')
1830 1832 % afile[2:])
1831 1833 gp = gitpatches[-1]
1832 1834 newfile = True
1833 1835 elif x.startswith('---'):
1834 1836 # check for a unified diff
1835 1837 l2 = lr.readline()
1836 1838 if not l2.startswith('+++'):
1837 1839 lr.push(l2)
1838 1840 continue
1839 1841 newfile = True
1840 1842 context = False
1841 1843 afile = parsefilename(x)
1842 1844 bfile = parsefilename(l2)
1843 1845 elif x.startswith('***'):
1844 1846 # check for a context diff
1845 1847 l2 = lr.readline()
1846 1848 if not l2.startswith('---'):
1847 1849 lr.push(l2)
1848 1850 continue
1849 1851 l3 = lr.readline()
1850 1852 lr.push(l3)
1851 1853 if not l3.startswith("***************"):
1852 1854 lr.push(l2)
1853 1855 continue
1854 1856 newfile = True
1855 1857 context = True
1856 1858 afile = parsefilename(x)
1857 1859 bfile = parsefilename(l2)
1858 1860
1859 1861 if newfile:
1860 1862 newfile = False
1861 1863 emitfile = True
1862 1864 state = BFILE
1863 1865 hunknum = 0
1864 1866
1865 1867 while gitpatches:
1866 1868 gp = gitpatches.pop()
1867 1869 yield 'file', ('a/' + gp.path, 'b/' + gp.path, None, gp.copy())
1868 1870
1869 1871 def applybindelta(binchunk, data):
1870 1872 """Apply a binary delta hunk
1871 1873 The algorithm used is the algorithm from git's patch-delta.c
1872 1874 """
1873 1875 def deltahead(binchunk):
1874 1876 i = 0
1875 1877 for c in binchunk:
1876 1878 i += 1
1877 1879 if not (ord(c) & 0x80):
1878 1880 return i
1879 1881 return i
1880 1882 out = ""
1881 1883 s = deltahead(binchunk)
1882 1884 binchunk = binchunk[s:]
1883 1885 s = deltahead(binchunk)
1884 1886 binchunk = binchunk[s:]
1885 1887 i = 0
1886 1888 while i < len(binchunk):
1887 1889 cmd = ord(binchunk[i])
1888 1890 i += 1
1889 1891 if (cmd & 0x80):
1890 1892 offset = 0
1891 1893 size = 0
1892 1894 if (cmd & 0x01):
1893 1895 offset = ord(binchunk[i])
1894 1896 i += 1
1895 1897 if (cmd & 0x02):
1896 1898 offset |= ord(binchunk[i]) << 8
1897 1899 i += 1
1898 1900 if (cmd & 0x04):
1899 1901 offset |= ord(binchunk[i]) << 16
1900 1902 i += 1
1901 1903 if (cmd & 0x08):
1902 1904 offset |= ord(binchunk[i]) << 24
1903 1905 i += 1
1904 1906 if (cmd & 0x10):
1905 1907 size = ord(binchunk[i])
1906 1908 i += 1
1907 1909 if (cmd & 0x20):
1908 1910 size |= ord(binchunk[i]) << 8
1909 1911 i += 1
1910 1912 if (cmd & 0x40):
1911 1913 size |= ord(binchunk[i]) << 16
1912 1914 i += 1
1913 1915 if size == 0:
1914 1916 size = 0x10000
1915 1917 offset_end = offset + size
1916 1918 out += data[offset:offset_end]
1917 1919 elif cmd != 0:
1918 1920 offset_end = i + cmd
1919 1921 out += binchunk[i:offset_end]
1920 1922 i += cmd
1921 1923 else:
1922 1924 raise PatchError(_('unexpected delta opcode 0'))
1923 1925 return out
1924 1926
1925 1927 def applydiff(ui, fp, backend, store, strip=1, prefix='', eolmode='strict'):
1926 1928 """Reads a patch from fp and tries to apply it.
1927 1929
1928 1930 Returns 0 for a clean patch, -1 if any rejects were found and 1 if
1929 1931 there was any fuzz.
1930 1932
1931 1933 If 'eolmode' is 'strict', the patch content and patched file are
1932 1934 read in binary mode. Otherwise, line endings are ignored when
1933 1935 patching then normalized according to 'eolmode'.
1934 1936 """
1935 1937 return _applydiff(ui, fp, patchfile, backend, store, strip=strip,
1936 1938 prefix=prefix, eolmode=eolmode)
1937 1939
1938 1940 def _applydiff(ui, fp, patcher, backend, store, strip=1, prefix='',
1939 1941 eolmode='strict'):
1940 1942
1941 1943 if prefix:
1942 1944 prefix = pathutil.canonpath(backend.repo.root, backend.repo.getcwd(),
1943 1945 prefix)
1944 1946 if prefix != '':
1945 1947 prefix += '/'
1946 1948 def pstrip(p):
1947 1949 return pathtransform(p, strip - 1, prefix)[1]
1948 1950
1949 1951 rejects = 0
1950 1952 err = 0
1951 1953 current_file = None
1952 1954
1953 1955 for state, values in iterhunks(fp):
1954 1956 if state == 'hunk':
1955 1957 if not current_file:
1956 1958 continue
1957 1959 ret = current_file.apply(values)
1958 1960 if ret > 0:
1959 1961 err = 1
1960 1962 elif state == 'file':
1961 1963 if current_file:
1962 1964 rejects += current_file.close()
1963 1965 current_file = None
1964 1966 afile, bfile, first_hunk, gp = values
1965 1967 if gp:
1966 1968 gp.path = pstrip(gp.path)
1967 1969 if gp.oldpath:
1968 1970 gp.oldpath = pstrip(gp.oldpath)
1969 1971 else:
1970 1972 gp = makepatchmeta(backend, afile, bfile, first_hunk, strip,
1971 1973 prefix)
1972 1974 if gp.op == 'RENAME':
1973 1975 backend.unlink(gp.oldpath)
1974 1976 if not first_hunk:
1975 1977 if gp.op == 'DELETE':
1976 1978 backend.unlink(gp.path)
1977 1979 continue
1978 1980 data, mode = None, None
1979 1981 if gp.op in ('RENAME', 'COPY'):
1980 1982 data, mode = store.getfile(gp.oldpath)[:2]
1981 1983 if data is None:
1982 1984 # This means that the old path does not exist
1983 1985 raise PatchError(_("source file '%s' does not exist")
1984 1986 % gp.oldpath)
1985 1987 if gp.mode:
1986 1988 mode = gp.mode
1987 1989 if gp.op == 'ADD':
1988 1990 # Added files without content have no hunk and
1989 1991 # must be created
1990 1992 data = ''
1991 1993 if data or mode:
1992 1994 if (gp.op in ('ADD', 'RENAME', 'COPY')
1993 1995 and backend.exists(gp.path)):
1994 1996 raise PatchError(_("cannot create %s: destination "
1995 1997 "already exists") % gp.path)
1996 1998 backend.setfile(gp.path, data, mode, gp.oldpath)
1997 1999 continue
1998 2000 try:
1999 2001 current_file = patcher(ui, gp, backend, store,
2000 2002 eolmode=eolmode)
2001 2003 except PatchError as inst:
2002 2004 ui.warn(str(inst) + '\n')
2003 2005 current_file = None
2004 2006 rejects += 1
2005 2007 continue
2006 2008 elif state == 'git':
2007 2009 for gp in values:
2008 2010 path = pstrip(gp.oldpath)
2009 2011 data, mode = backend.getfile(path)
2010 2012 if data is None:
2011 2013 # The error ignored here will trigger a getfile()
2012 2014 # error in a place more appropriate for error
2013 2015 # handling, and will not interrupt the patching
2014 2016 # process.
2015 2017 pass
2016 2018 else:
2017 2019 store.setfile(path, data, mode)
2018 2020 else:
2019 2021 raise error.Abort(_('unsupported parser state: %s') % state)
2020 2022
2021 2023 if current_file:
2022 2024 rejects += current_file.close()
2023 2025
2024 2026 if rejects:
2025 2027 return -1
2026 2028 return err
2027 2029
2028 2030 def _externalpatch(ui, repo, patcher, patchname, strip, files,
2029 2031 similarity):
2030 2032 """use <patcher> to apply <patchname> to the working directory.
2031 2033 returns whether patch was applied with fuzz factor."""
2032 2034
2033 2035 fuzz = False
2034 2036 args = []
2035 2037 cwd = repo.root
2036 2038 if cwd:
2037 2039 args.append('-d %s' % util.shellquote(cwd))
2038 2040 fp = util.popen('%s %s -p%d < %s' % (patcher, ' '.join(args), strip,
2039 2041 util.shellquote(patchname)))
2040 2042 try:
2041 2043 for line in util.iterfile(fp):
2042 2044 line = line.rstrip()
2043 2045 ui.note(line + '\n')
2044 2046 if line.startswith('patching file '):
2045 2047 pf = util.parsepatchoutput(line)
2046 2048 printed_file = False
2047 2049 files.add(pf)
2048 2050 elif line.find('with fuzz') >= 0:
2049 2051 fuzz = True
2050 2052 if not printed_file:
2051 2053 ui.warn(pf + '\n')
2052 2054 printed_file = True
2053 2055 ui.warn(line + '\n')
2054 2056 elif line.find('saving rejects to file') >= 0:
2055 2057 ui.warn(line + '\n')
2056 2058 elif line.find('FAILED') >= 0:
2057 2059 if not printed_file:
2058 2060 ui.warn(pf + '\n')
2059 2061 printed_file = True
2060 2062 ui.warn(line + '\n')
2061 2063 finally:
2062 2064 if files:
2063 2065 scmutil.marktouched(repo, files, similarity)
2064 2066 code = fp.close()
2065 2067 if code:
2066 2068 raise PatchError(_("patch command failed: %s") %
2067 2069 util.explainexit(code)[0])
2068 2070 return fuzz
2069 2071
2070 2072 def patchbackend(ui, backend, patchobj, strip, prefix, files=None,
2071 2073 eolmode='strict'):
2072 2074 if files is None:
2073 2075 files = set()
2074 2076 if eolmode is None:
2075 2077 eolmode = ui.config('patch', 'eol', 'strict')
2076 2078 if eolmode.lower() not in eolmodes:
2077 2079 raise error.Abort(_('unsupported line endings type: %s') % eolmode)
2078 2080 eolmode = eolmode.lower()
2079 2081
2080 2082 store = filestore()
2081 2083 try:
2082 2084 fp = open(patchobj, 'rb')
2083 2085 except TypeError:
2084 2086 fp = patchobj
2085 2087 try:
2086 2088 ret = applydiff(ui, fp, backend, store, strip=strip, prefix=prefix,
2087 2089 eolmode=eolmode)
2088 2090 finally:
2089 2091 if fp != patchobj:
2090 2092 fp.close()
2091 2093 files.update(backend.close())
2092 2094 store.close()
2093 2095 if ret < 0:
2094 2096 raise PatchError(_('patch failed to apply'))
2095 2097 return ret > 0
2096 2098
2097 2099 def internalpatch(ui, repo, patchobj, strip, prefix='', files=None,
2098 2100 eolmode='strict', similarity=0):
2099 2101 """use builtin patch to apply <patchobj> to the working directory.
2100 2102 returns whether patch was applied with fuzz factor."""
2101 2103 backend = workingbackend(ui, repo, similarity)
2102 2104 return patchbackend(ui, backend, patchobj, strip, prefix, files, eolmode)
2103 2105
2104 2106 def patchrepo(ui, repo, ctx, store, patchobj, strip, prefix, files=None,
2105 2107 eolmode='strict'):
2106 2108 backend = repobackend(ui, repo, ctx, store)
2107 2109 return patchbackend(ui, backend, patchobj, strip, prefix, files, eolmode)
2108 2110
2109 2111 def patch(ui, repo, patchname, strip=1, prefix='', files=None, eolmode='strict',
2110 2112 similarity=0):
2111 2113 """Apply <patchname> to the working directory.
2112 2114
2113 2115 'eolmode' specifies how end of lines should be handled. It can be:
2114 2116 - 'strict': inputs are read in binary mode, EOLs are preserved
2115 2117 - 'crlf': EOLs are ignored when patching and reset to CRLF
2116 2118 - 'lf': EOLs are ignored when patching and reset to LF
2117 2119 - None: get it from user settings, default to 'strict'
2118 2120 'eolmode' is ignored when using an external patcher program.
2119 2121
2120 2122 Returns whether patch was applied with fuzz factor.
2121 2123 """
2122 2124 patcher = ui.config('ui', 'patch')
2123 2125 if files is None:
2124 2126 files = set()
2125 2127 if patcher:
2126 2128 return _externalpatch(ui, repo, patcher, patchname, strip,
2127 2129 files, similarity)
2128 2130 return internalpatch(ui, repo, patchname, strip, prefix, files, eolmode,
2129 2131 similarity)
2130 2132
2131 2133 def changedfiles(ui, repo, patchpath, strip=1):
2132 2134 backend = fsbackend(ui, repo.root)
2133 2135 with open(patchpath, 'rb') as fp:
2134 2136 changed = set()
2135 2137 for state, values in iterhunks(fp):
2136 2138 if state == 'file':
2137 2139 afile, bfile, first_hunk, gp = values
2138 2140 if gp:
2139 2141 gp.path = pathtransform(gp.path, strip - 1, '')[1]
2140 2142 if gp.oldpath:
2141 2143 gp.oldpath = pathtransform(gp.oldpath, strip - 1, '')[1]
2142 2144 else:
2143 2145 gp = makepatchmeta(backend, afile, bfile, first_hunk, strip,
2144 2146 '')
2145 2147 changed.add(gp.path)
2146 2148 if gp.op == 'RENAME':
2147 2149 changed.add(gp.oldpath)
2148 2150 elif state not in ('hunk', 'git'):
2149 2151 raise error.Abort(_('unsupported parser state: %s') % state)
2150 2152 return changed
2151 2153
2152 2154 class GitDiffRequired(Exception):
2153 2155 pass
2154 2156
2155 2157 def diffallopts(ui, opts=None, untrusted=False, section='diff'):
2156 2158 '''return diffopts with all features supported and parsed'''
2157 2159 return difffeatureopts(ui, opts=opts, untrusted=untrusted, section=section,
2158 2160 git=True, whitespace=True, formatchanging=True)
2159 2161
2160 2162 diffopts = diffallopts
2161 2163
2162 2164 def difffeatureopts(ui, opts=None, untrusted=False, section='diff', git=False,
2163 2165 whitespace=False, formatchanging=False):
2164 2166 '''return diffopts with only opted-in features parsed
2165 2167
2166 2168 Features:
2167 2169 - git: git-style diffs
2168 2170 - whitespace: whitespace options like ignoreblanklines and ignorews
2169 2171 - formatchanging: options that will likely break or cause correctness issues
2170 2172 with most diff parsers
2171 2173 '''
2172 2174 def get(key, name=None, getter=ui.configbool, forceplain=None):
2173 2175 if opts:
2174 2176 v = opts.get(key)
2175 2177 # diffopts flags are either None-default (which is passed
2176 2178 # through unchanged, so we can identify unset values), or
2177 2179 # some other falsey default (eg --unified, which defaults
2178 2180 # to an empty string). We only want to override the config
2179 2181 # entries from hgrc with command line values if they
2180 2182 # appear to have been set, which is any truthy value,
2181 2183 # True, or False.
2182 2184 if v or isinstance(v, bool):
2183 2185 return v
2184 2186 if forceplain is not None and ui.plain():
2185 2187 return forceplain
2186 2188 return getter(section, name or key, None, untrusted=untrusted)
2187 2189
2188 2190 # core options, expected to be understood by every diff parser
2189 2191 buildopts = {
2190 2192 'nodates': get('nodates'),
2191 2193 'showfunc': get('show_function', 'showfunc'),
2192 2194 'context': get('unified', getter=ui.config),
2193 2195 }
2194 2196
2195 2197 if git:
2196 2198 buildopts['git'] = get('git')
2197 2199
2198 2200 # since this is in the experimental section, we need to call
2199 2201 # ui.configbool directory
2200 2202 buildopts['showsimilarity'] = ui.configbool('experimental',
2201 2203 'extendedheader.similarity')
2202 2204
2203 2205 # need to inspect the ui object instead of using get() since we want to
2204 2206 # test for an int
2205 2207 hconf = ui.config('experimental', 'extendedheader.index')
2206 2208 if hconf is not None:
2207 2209 hlen = None
2208 2210 try:
2209 2211 # the hash config could be an integer (for length of hash) or a
2210 2212 # word (e.g. short, full, none)
2211 2213 hlen = int(hconf)
2212 2214 if hlen < 0 or hlen > 40:
2213 2215 msg = _("invalid length for extendedheader.index: '%d'\n")
2214 2216 ui.warn(msg % hlen)
2215 2217 except ValueError:
2216 2218 # default value
2217 2219 if hconf == 'short' or hconf == '':
2218 2220 hlen = 12
2219 2221 elif hconf == 'full':
2220 2222 hlen = 40
2221 2223 elif hconf != 'none':
2222 2224 msg = _("invalid value for extendedheader.index: '%s'\n")
2223 2225 ui.warn(msg % hconf)
2224 2226 finally:
2225 2227 buildopts['index'] = hlen
2226 2228
2227 2229 if whitespace:
2228 2230 buildopts['ignorews'] = get('ignore_all_space', 'ignorews')
2229 2231 buildopts['ignorewsamount'] = get('ignore_space_change',
2230 2232 'ignorewsamount')
2231 2233 buildopts['ignoreblanklines'] = get('ignore_blank_lines',
2232 2234 'ignoreblanklines')
2233 2235 if formatchanging:
2234 2236 buildopts['text'] = opts and opts.get('text')
2235 2237 binary = None if opts is None else opts.get('binary')
2236 2238 buildopts['nobinary'] = (not binary if binary is not None
2237 2239 else get('nobinary', forceplain=False))
2238 2240 buildopts['noprefix'] = get('noprefix', forceplain=False)
2239 2241
2240 2242 return mdiff.diffopts(**pycompat.strkwargs(buildopts))
2241 2243
2242 2244 def diff(repo, node1=None, node2=None, match=None, changes=None,
2243 2245 opts=None, losedatafn=None, prefix='', relroot='', copy=None):
2244 2246 '''yields diff of changes to files between two nodes, or node and
2245 2247 working directory.
2246 2248
2247 2249 if node1 is None, use first dirstate parent instead.
2248 2250 if node2 is None, compare node1 with working directory.
2249 2251
2250 2252 losedatafn(**kwarg) is a callable run when opts.upgrade=True and
2251 2253 every time some change cannot be represented with the current
2252 2254 patch format. Return False to upgrade to git patch format, True to
2253 2255 accept the loss or raise an exception to abort the diff. It is
2254 2256 called with the name of current file being diffed as 'fn'. If set
2255 2257 to None, patches will always be upgraded to git format when
2256 2258 necessary.
2257 2259
2258 2260 prefix is a filename prefix that is prepended to all filenames on
2259 2261 display (used for subrepos).
2260 2262
2261 2263 relroot, if not empty, must be normalized with a trailing /. Any match
2262 2264 patterns that fall outside it will be ignored.
2263 2265
2264 2266 copy, if not empty, should contain mappings {dst@y: src@x} of copy
2265 2267 information.'''
2266 2268 for header, hunks in diffhunks(repo, node1=node1, node2=node2, match=match,
2267 2269 changes=changes, opts=opts,
2268 2270 losedatafn=losedatafn, prefix=prefix,
2269 2271 relroot=relroot, copy=copy):
2270 2272 text = ''.join(sum((list(hlines) for hrange, hlines in hunks), []))
2271 2273 if header and (text or len(header) > 1):
2272 2274 yield '\n'.join(header) + '\n'
2273 2275 if text:
2274 2276 yield text
2275 2277
2276 2278 def diffhunks(repo, node1=None, node2=None, match=None, changes=None,
2277 2279 opts=None, losedatafn=None, prefix='', relroot='', copy=None):
2278 2280 """Yield diff of changes to files in the form of (`header`, `hunks`) tuples
2279 2281 where `header` is a list of diff headers and `hunks` is an iterable of
2280 2282 (`hunkrange`, `hunklines`) tuples.
2281 2283
2282 2284 See diff() for the meaning of parameters.
2283 2285 """
2284 2286
2285 2287 if opts is None:
2286 2288 opts = mdiff.defaultopts
2287 2289
2288 2290 if not node1 and not node2:
2289 2291 node1 = repo.dirstate.p1()
2290 2292
2291 2293 def lrugetfilectx():
2292 2294 cache = {}
2293 2295 order = collections.deque()
2294 2296 def getfilectx(f, ctx):
2295 2297 fctx = ctx.filectx(f, filelog=cache.get(f))
2296 2298 if f not in cache:
2297 2299 if len(cache) > 20:
2298 2300 del cache[order.popleft()]
2299 2301 cache[f] = fctx.filelog()
2300 2302 else:
2301 2303 order.remove(f)
2302 2304 order.append(f)
2303 2305 return fctx
2304 2306 return getfilectx
2305 2307 getfilectx = lrugetfilectx()
2306 2308
2307 2309 ctx1 = repo[node1]
2308 2310 ctx2 = repo[node2]
2309 2311
2310 2312 relfiltered = False
2311 2313 if relroot != '' and match.always():
2312 2314 # as a special case, create a new matcher with just the relroot
2313 2315 pats = [relroot]
2314 2316 match = scmutil.match(ctx2, pats, default='path')
2315 2317 relfiltered = True
2316 2318
2317 2319 if not changes:
2318 2320 changes = repo.status(ctx1, ctx2, match=match)
2319 2321 modified, added, removed = changes[:3]
2320 2322
2321 2323 if not modified and not added and not removed:
2322 2324 return []
2323 2325
2324 2326 if repo.ui.debugflag:
2325 2327 hexfunc = hex
2326 2328 else:
2327 2329 hexfunc = short
2328 2330 revs = [hexfunc(node) for node in [ctx1.node(), ctx2.node()] if node]
2329 2331
2330 2332 if copy is None:
2331 2333 copy = {}
2332 2334 if opts.git or opts.upgrade:
2333 2335 copy = copies.pathcopies(ctx1, ctx2, match=match)
2334 2336
2335 2337 if relroot is not None:
2336 2338 if not relfiltered:
2337 2339 # XXX this would ideally be done in the matcher, but that is
2338 2340 # generally meant to 'or' patterns, not 'and' them. In this case we
2339 2341 # need to 'and' all the patterns from the matcher with relroot.
2340 2342 def filterrel(l):
2341 2343 return [f for f in l if f.startswith(relroot)]
2342 2344 modified = filterrel(modified)
2343 2345 added = filterrel(added)
2344 2346 removed = filterrel(removed)
2345 2347 relfiltered = True
2346 2348 # filter out copies where either side isn't inside the relative root
2347 2349 copy = dict(((dst, src) for (dst, src) in copy.iteritems()
2348 2350 if dst.startswith(relroot)
2349 2351 and src.startswith(relroot)))
2350 2352
2351 2353 modifiedset = set(modified)
2352 2354 addedset = set(added)
2353 2355 removedset = set(removed)
2354 2356 for f in modified:
2355 2357 if f not in ctx1:
2356 2358 # Fix up added, since merged-in additions appear as
2357 2359 # modifications during merges
2358 2360 modifiedset.remove(f)
2359 2361 addedset.add(f)
2360 2362 for f in removed:
2361 2363 if f not in ctx1:
2362 2364 # Merged-in additions that are then removed are reported as removed.
2363 2365 # They are not in ctx1, so We don't want to show them in the diff.
2364 2366 removedset.remove(f)
2365 2367 modified = sorted(modifiedset)
2366 2368 added = sorted(addedset)
2367 2369 removed = sorted(removedset)
2368 2370 for dst, src in copy.items():
2369 2371 if src not in ctx1:
2370 2372 # Files merged in during a merge and then copied/renamed are
2371 2373 # reported as copies. We want to show them in the diff as additions.
2372 2374 del copy[dst]
2373 2375
2374 2376 def difffn(opts, losedata):
2375 2377 return trydiff(repo, revs, ctx1, ctx2, modified, added, removed,
2376 2378 copy, getfilectx, opts, losedata, prefix, relroot)
2377 2379 if opts.upgrade and not opts.git:
2378 2380 try:
2379 2381 def losedata(fn):
2380 2382 if not losedatafn or not losedatafn(fn=fn):
2381 2383 raise GitDiffRequired
2382 2384 # Buffer the whole output until we are sure it can be generated
2383 2385 return list(difffn(opts.copy(git=False), losedata))
2384 2386 except GitDiffRequired:
2385 2387 return difffn(opts.copy(git=True), None)
2386 2388 else:
2387 2389 return difffn(opts, None)
2388 2390
2389 2391 def difflabel(func, *args, **kw):
2390 2392 '''yields 2-tuples of (output, label) based on the output of func()'''
2391 2393 headprefixes = [('diff', 'diff.diffline'),
2392 2394 ('copy', 'diff.extended'),
2393 2395 ('rename', 'diff.extended'),
2394 2396 ('old', 'diff.extended'),
2395 2397 ('new', 'diff.extended'),
2396 2398 ('deleted', 'diff.extended'),
2397 2399 ('index', 'diff.extended'),
2398 2400 ('similarity', 'diff.extended'),
2399 2401 ('---', 'diff.file_a'),
2400 2402 ('+++', 'diff.file_b')]
2401 2403 textprefixes = [('@', 'diff.hunk'),
2402 2404 ('-', 'diff.deleted'),
2403 2405 ('+', 'diff.inserted')]
2404 2406 head = False
2405 2407 for chunk in func(*args, **kw):
2406 2408 lines = chunk.split('\n')
2407 2409 for i, line in enumerate(lines):
2408 2410 if i != 0:
2409 2411 yield ('\n', '')
2410 2412 if head:
2411 2413 if line.startswith('@'):
2412 2414 head = False
2413 2415 else:
2414 2416 if line and line[0] not in ' +-@\\':
2415 2417 head = True
2416 2418 stripline = line
2417 2419 diffline = False
2418 2420 if not head and line and line[0] in '+-':
2419 2421 # highlight tabs and trailing whitespace, but only in
2420 2422 # changed lines
2421 2423 stripline = line.rstrip()
2422 2424 diffline = True
2423 2425
2424 2426 prefixes = textprefixes
2425 2427 if head:
2426 2428 prefixes = headprefixes
2427 2429 for prefix, label in prefixes:
2428 2430 if stripline.startswith(prefix):
2429 2431 if diffline:
2430 2432 for token in tabsplitter.findall(stripline):
2431 2433 if '\t' == token[0]:
2432 2434 yield (token, 'diff.tab')
2433 2435 else:
2434 2436 yield (token, label)
2435 2437 else:
2436 2438 yield (stripline, label)
2437 2439 break
2438 2440 else:
2439 2441 yield (line, '')
2440 2442 if line != stripline:
2441 2443 yield (line[len(stripline):], 'diff.trailingwhitespace')
2442 2444
2443 2445 def diffui(*args, **kw):
2444 2446 '''like diff(), but yields 2-tuples of (output, label) for ui.write()'''
2445 2447 return difflabel(diff, *args, **kw)
2446 2448
2447 2449 def _filepairs(modified, added, removed, copy, opts):
2448 2450 '''generates tuples (f1, f2, copyop), where f1 is the name of the file
2449 2451 before and f2 is the the name after. For added files, f1 will be None,
2450 2452 and for removed files, f2 will be None. copyop may be set to None, 'copy'
2451 2453 or 'rename' (the latter two only if opts.git is set).'''
2452 2454 gone = set()
2453 2455
2454 2456 copyto = dict([(v, k) for k, v in copy.items()])
2455 2457
2456 2458 addedset, removedset = set(added), set(removed)
2457 2459
2458 2460 for f in sorted(modified + added + removed):
2459 2461 copyop = None
2460 2462 f1, f2 = f, f
2461 2463 if f in addedset:
2462 2464 f1 = None
2463 2465 if f in copy:
2464 2466 if opts.git:
2465 2467 f1 = copy[f]
2466 2468 if f1 in removedset and f1 not in gone:
2467 2469 copyop = 'rename'
2468 2470 gone.add(f1)
2469 2471 else:
2470 2472 copyop = 'copy'
2471 2473 elif f in removedset:
2472 2474 f2 = None
2473 2475 if opts.git:
2474 2476 # have we already reported a copy above?
2475 2477 if (f in copyto and copyto[f] in addedset
2476 2478 and copy[copyto[f]] == f):
2477 2479 continue
2478 2480 yield f1, f2, copyop
2479 2481
2480 2482 def trydiff(repo, revs, ctx1, ctx2, modified, added, removed,
2481 2483 copy, getfilectx, opts, losedatafn, prefix, relroot):
2482 2484 '''given input data, generate a diff and yield it in blocks
2483 2485
2484 2486 If generating a diff would lose data like flags or binary data and
2485 2487 losedatafn is not None, it will be called.
2486 2488
2487 2489 relroot is removed and prefix is added to every path in the diff output.
2488 2490
2489 2491 If relroot is not empty, this function expects every path in modified,
2490 2492 added, removed and copy to start with it.'''
2491 2493
2492 2494 def gitindex(text):
2493 2495 if not text:
2494 2496 text = ""
2495 2497 l = len(text)
2496 2498 s = hashlib.sha1('blob %d\0' % l)
2497 2499 s.update(text)
2498 2500 return s.hexdigest()
2499 2501
2500 2502 if opts.noprefix:
2501 2503 aprefix = bprefix = ''
2502 2504 else:
2503 2505 aprefix = 'a/'
2504 2506 bprefix = 'b/'
2505 2507
2506 2508 def diffline(f, revs):
2507 2509 revinfo = ' '.join(["-r %s" % rev for rev in revs])
2508 2510 return 'diff %s %s' % (revinfo, f)
2509 2511
2510 2512 def isempty(fctx):
2511 2513 return fctx is None or fctx.size() == 0
2512 2514
2513 2515 date1 = util.datestr(ctx1.date())
2514 2516 date2 = util.datestr(ctx2.date())
2515 2517
2516 2518 gitmode = {'l': '120000', 'x': '100755', '': '100644'}
2517 2519
2518 2520 if relroot != '' and (repo.ui.configbool('devel', 'all')
2519 2521 or repo.ui.configbool('devel', 'check-relroot')):
2520 2522 for f in modified + added + removed + copy.keys() + copy.values():
2521 2523 if f is not None and not f.startswith(relroot):
2522 2524 raise AssertionError(
2523 2525 "file %s doesn't start with relroot %s" % (f, relroot))
2524 2526
2525 2527 for f1, f2, copyop in _filepairs(modified, added, removed, copy, opts):
2526 2528 content1 = None
2527 2529 content2 = None
2528 2530 fctx1 = None
2529 2531 fctx2 = None
2530 2532 flag1 = None
2531 2533 flag2 = None
2532 2534 if f1:
2533 2535 fctx1 = getfilectx(f1, ctx1)
2534 2536 if opts.git or losedatafn:
2535 2537 flag1 = ctx1.flags(f1)
2536 2538 if f2:
2537 2539 fctx2 = getfilectx(f2, ctx2)
2538 2540 if opts.git or losedatafn:
2539 2541 flag2 = ctx2.flags(f2)
2540 2542 # if binary is True, output "summary" or "base85", but not "text diff"
2541 2543 binary = not opts.text and any(f.isbinary()
2542 2544 for f in [fctx1, fctx2] if f is not None)
2543 2545
2544 2546 if losedatafn and not opts.git:
2545 2547 if (binary or
2546 2548 # copy/rename
2547 2549 f2 in copy or
2548 2550 # empty file creation
2549 2551 (not f1 and isempty(fctx2)) or
2550 2552 # empty file deletion
2551 2553 (isempty(fctx1) and not f2) or
2552 2554 # create with flags
2553 2555 (not f1 and flag2) or
2554 2556 # change flags
2555 2557 (f1 and f2 and flag1 != flag2)):
2556 2558 losedatafn(f2 or f1)
2557 2559
2558 2560 path1 = f1 or f2
2559 2561 path2 = f2 or f1
2560 2562 path1 = posixpath.join(prefix, path1[len(relroot):])
2561 2563 path2 = posixpath.join(prefix, path2[len(relroot):])
2562 2564 header = []
2563 2565 if opts.git:
2564 2566 header.append('diff --git %s%s %s%s' %
2565 2567 (aprefix, path1, bprefix, path2))
2566 2568 if not f1: # added
2567 2569 header.append('new file mode %s' % gitmode[flag2])
2568 2570 elif not f2: # removed
2569 2571 header.append('deleted file mode %s' % gitmode[flag1])
2570 2572 else: # modified/copied/renamed
2571 2573 mode1, mode2 = gitmode[flag1], gitmode[flag2]
2572 2574 if mode1 != mode2:
2573 2575 header.append('old mode %s' % mode1)
2574 2576 header.append('new mode %s' % mode2)
2575 2577 if copyop is not None:
2576 2578 if opts.showsimilarity:
2577 2579 sim = similar.score(ctx1[path1], ctx2[path2]) * 100
2578 2580 header.append('similarity index %d%%' % sim)
2579 2581 header.append('%s from %s' % (copyop, path1))
2580 2582 header.append('%s to %s' % (copyop, path2))
2581 2583 elif revs and not repo.ui.quiet:
2582 2584 header.append(diffline(path1, revs))
2583 2585
2584 2586 # fctx.is | diffopts | what to | is fctx.data()
2585 2587 # binary() | text nobinary git index | output? | outputted?
2586 2588 # ------------------------------------|----------------------------
2587 2589 # yes | no no no * | summary | no
2588 2590 # yes | no no yes * | base85 | yes
2589 2591 # yes | no yes no * | summary | no
2590 2592 # yes | no yes yes 0 | summary | no
2591 2593 # yes | no yes yes >0 | summary | semi [1]
2592 2594 # yes | yes * * * | text diff | yes
2593 2595 # no | * * * * | text diff | yes
2594 2596 # [1]: hash(fctx.data()) is outputted. so fctx.data() cannot be faked
2595 2597 if binary and (not opts.git or (opts.git and opts.nobinary and not
2596 2598 opts.index)):
2597 2599 # fast path: no binary content will be displayed, content1 and
2598 2600 # content2 are only used for equivalent test. cmp() could have a
2599 2601 # fast path.
2600 2602 if fctx1 is not None:
2601 2603 content1 = b'\0'
2602 2604 if fctx2 is not None:
2603 2605 if fctx1 is not None and not fctx1.cmp(fctx2):
2604 2606 content2 = b'\0' # not different
2605 2607 else:
2606 2608 content2 = b'\0\0'
2607 2609 else:
2608 2610 # normal path: load contents
2609 2611 if fctx1 is not None:
2610 2612 content1 = fctx1.data()
2611 2613 if fctx2 is not None:
2612 2614 content2 = fctx2.data()
2613 2615
2614 2616 if binary and opts.git and not opts.nobinary:
2615 2617 text = mdiff.b85diff(content1, content2)
2616 2618 if text:
2617 2619 header.append('index %s..%s' %
2618 2620 (gitindex(content1), gitindex(content2)))
2619 2621 hunks = (None, [text]),
2620 2622 else:
2621 2623 if opts.git and opts.index > 0:
2622 2624 flag = flag1
2623 2625 if flag is None:
2624 2626 flag = flag2
2625 2627 header.append('index %s..%s %s' %
2626 2628 (gitindex(content1)[0:opts.index],
2627 2629 gitindex(content2)[0:opts.index],
2628 2630 gitmode[flag]))
2629 2631
2630 2632 uheaders, hunks = mdiff.unidiff(content1, date1,
2631 2633 content2, date2,
2632 2634 path1, path2, opts=opts)
2633 2635 header.extend(uheaders)
2634 2636 yield header, hunks
2635 2637
2636 2638 def diffstatsum(stats):
2637 2639 maxfile, maxtotal, addtotal, removetotal, binary = 0, 0, 0, 0, False
2638 2640 for f, a, r, b in stats:
2639 2641 maxfile = max(maxfile, encoding.colwidth(f))
2640 2642 maxtotal = max(maxtotal, a + r)
2641 2643 addtotal += a
2642 2644 removetotal += r
2643 2645 binary = binary or b
2644 2646
2645 2647 return maxfile, maxtotal, addtotal, removetotal, binary
2646 2648
2647 2649 def diffstatdata(lines):
2648 2650 diffre = re.compile('^diff .*-r [a-z0-9]+\s(.*)$')
2649 2651
2650 2652 results = []
2651 2653 filename, adds, removes, isbinary = None, 0, 0, False
2652 2654
2653 2655 def addresult():
2654 2656 if filename:
2655 2657 results.append((filename, adds, removes, isbinary))
2656 2658
2657 2659 # inheader is used to track if a line is in the
2658 2660 # header portion of the diff. This helps properly account
2659 2661 # for lines that start with '--' or '++'
2660 2662 inheader = False
2661 2663
2662 2664 for line in lines:
2663 2665 if line.startswith('diff'):
2664 2666 addresult()
2665 2667 # starting a new file diff
2666 2668 # set numbers to 0 and reset inheader
2667 2669 inheader = True
2668 2670 adds, removes, isbinary = 0, 0, False
2669 2671 if line.startswith('diff --git a/'):
2670 2672 filename = gitre.search(line).group(2)
2671 2673 elif line.startswith('diff -r'):
2672 2674 # format: "diff -r ... -r ... filename"
2673 2675 filename = diffre.search(line).group(1)
2674 2676 elif line.startswith('@@'):
2675 2677 inheader = False
2676 2678 elif line.startswith('+') and not inheader:
2677 2679 adds += 1
2678 2680 elif line.startswith('-') and not inheader:
2679 2681 removes += 1
2680 2682 elif (line.startswith('GIT binary patch') or
2681 2683 line.startswith('Binary file')):
2682 2684 isbinary = True
2683 2685 addresult()
2684 2686 return results
2685 2687
2686 2688 def diffstat(lines, width=80):
2687 2689 output = []
2688 2690 stats = diffstatdata(lines)
2689 2691 maxname, maxtotal, totaladds, totalremoves, hasbinary = diffstatsum(stats)
2690 2692
2691 2693 countwidth = len(str(maxtotal))
2692 2694 if hasbinary and countwidth < 3:
2693 2695 countwidth = 3
2694 2696 graphwidth = width - countwidth - maxname - 6
2695 2697 if graphwidth < 10:
2696 2698 graphwidth = 10
2697 2699
2698 2700 def scale(i):
2699 2701 if maxtotal <= graphwidth:
2700 2702 return i
2701 2703 # If diffstat runs out of room it doesn't print anything,
2702 2704 # which isn't very useful, so always print at least one + or -
2703 2705 # if there were at least some changes.
2704 2706 return max(i * graphwidth // maxtotal, int(bool(i)))
2705 2707
2706 2708 for filename, adds, removes, isbinary in stats:
2707 2709 if isbinary:
2708 2710 count = 'Bin'
2709 2711 else:
2710 2712 count = adds + removes
2711 2713 pluses = '+' * scale(adds)
2712 2714 minuses = '-' * scale(removes)
2713 2715 output.append(' %s%s | %*s %s%s\n' %
2714 2716 (filename, ' ' * (maxname - encoding.colwidth(filename)),
2715 2717 countwidth, count, pluses, minuses))
2716 2718
2717 2719 if stats:
2718 2720 output.append(_(' %d files changed, %d insertions(+), '
2719 2721 '%d deletions(-)\n')
2720 2722 % (len(stats), totaladds, totalremoves))
2721 2723
2722 2724 return ''.join(output)
2723 2725
2724 2726 def diffstatui(*args, **kw):
2725 2727 '''like diffstat(), but yields 2-tuples of (output, label) for
2726 2728 ui.write()
2727 2729 '''
2728 2730
2729 2731 for line in diffstat(*args, **kw).splitlines():
2730 2732 if line and line[-1] in '+-':
2731 2733 name, graph = line.rsplit(' ', 1)
2732 2734 yield (name + ' ', '')
2733 2735 m = re.search(r'\++', graph)
2734 2736 if m:
2735 2737 yield (m.group(0), 'diffstat.inserted')
2736 2738 m = re.search(r'-+', graph)
2737 2739 if m:
2738 2740 yield (m.group(0), 'diffstat.deleted')
2739 2741 else:
2740 2742 yield (line, '')
2741 2743 yield ('\n', '')
@@ -1,802 +1,802
1 1 #
2 2 # This is the mercurial setup script.
3 3 #
4 4 # 'python setup.py install', or
5 5 # 'python setup.py --help' for more options
6 6
7 7 import sys, platform
8 8 if sys.version_info < (2, 7, 0, 'final'):
9 9 raise SystemExit('Mercurial requires Python 2.7 or later.')
10 10
11 11 if sys.version_info[0] >= 3:
12 12 printf = eval('print')
13 13 libdir_escape = 'unicode_escape'
14 14 else:
15 15 libdir_escape = 'string_escape'
16 16 def printf(*args, **kwargs):
17 17 f = kwargs.get('file', sys.stdout)
18 18 end = kwargs.get('end', '\n')
19 19 f.write(b' '.join(args) + end)
20 20
21 21 # Solaris Python packaging brain damage
22 22 try:
23 23 import hashlib
24 24 sha = hashlib.sha1()
25 25 except ImportError:
26 26 try:
27 27 import sha
28 28 sha.sha # silence unused import warning
29 29 except ImportError:
30 30 raise SystemExit(
31 31 "Couldn't import standard hashlib (incomplete Python install).")
32 32
33 33 try:
34 34 import zlib
35 35 zlib.compressobj # silence unused import warning
36 36 except ImportError:
37 37 raise SystemExit(
38 38 "Couldn't import standard zlib (incomplete Python install).")
39 39
40 40 # The base IronPython distribution (as of 2.7.1) doesn't support bz2
41 41 isironpython = False
42 42 try:
43 43 isironpython = (platform.python_implementation()
44 44 .lower().find("ironpython") != -1)
45 45 except AttributeError:
46 46 pass
47 47
48 48 if isironpython:
49 49 sys.stderr.write("warning: IronPython detected (no bz2 support)\n")
50 50 else:
51 51 try:
52 52 import bz2
53 53 bz2.BZ2Compressor # silence unused import warning
54 54 except ImportError:
55 55 raise SystemExit(
56 56 "Couldn't import standard bz2 (incomplete Python install).")
57 57
58 58 ispypy = "PyPy" in sys.version
59 59
60 60 import ctypes
61 61 import os, stat, subprocess, time
62 62 import re
63 63 import shutil
64 64 import tempfile
65 65 from distutils import log
66 66 # We have issues with setuptools on some platforms and builders. Until
67 67 # those are resolved, setuptools is opt-in except for platforms where
68 68 # we don't have issues.
69 69 if os.name == 'nt' or 'FORCE_SETUPTOOLS' in os.environ:
70 70 from setuptools import setup
71 71 else:
72 72 from distutils.core import setup
73 73 from distutils.ccompiler import new_compiler
74 74 from distutils.core import Command, Extension
75 75 from distutils.dist import Distribution
76 76 from distutils.command.build import build
77 77 from distutils.command.build_ext import build_ext
78 78 from distutils.command.build_py import build_py
79 79 from distutils.command.build_scripts import build_scripts
80 80 from distutils.command.install_lib import install_lib
81 81 from distutils.command.install_scripts import install_scripts
82 82 from distutils.spawn import spawn, find_executable
83 83 from distutils import file_util
84 84 from distutils.errors import (
85 85 CCompilerError,
86 86 DistutilsError,
87 87 DistutilsExecError,
88 88 )
89 89 from distutils.sysconfig import get_python_inc, get_config_var
90 90 from distutils.version import StrictVersion
91 91
92 92 scripts = ['hg']
93 93 if os.name == 'nt':
94 94 # We remove hg.bat if we are able to build hg.exe.
95 95 scripts.append('contrib/win32/hg.bat')
96 96
97 97 def cancompile(cc, code):
98 98 tmpdir = tempfile.mkdtemp(prefix='hg-install-')
99 99 devnull = oldstderr = None
100 100 try:
101 101 fname = os.path.join(tmpdir, 'testcomp.c')
102 102 f = open(fname, 'w')
103 103 f.write(code)
104 104 f.close()
105 105 # Redirect stderr to /dev/null to hide any error messages
106 106 # from the compiler.
107 107 # This will have to be changed if we ever have to check
108 108 # for a function on Windows.
109 109 devnull = open('/dev/null', 'w')
110 110 oldstderr = os.dup(sys.stderr.fileno())
111 111 os.dup2(devnull.fileno(), sys.stderr.fileno())
112 112 objects = cc.compile([fname], output_dir=tmpdir)
113 113 cc.link_executable(objects, os.path.join(tmpdir, "a.out"))
114 114 return True
115 115 except Exception:
116 116 return False
117 117 finally:
118 118 if oldstderr is not None:
119 119 os.dup2(oldstderr, sys.stderr.fileno())
120 120 if devnull is not None:
121 121 devnull.close()
122 122 shutil.rmtree(tmpdir)
123 123
124 124 # simplified version of distutils.ccompiler.CCompiler.has_function
125 125 # that actually removes its temporary files.
126 126 def hasfunction(cc, funcname):
127 127 code = 'int main(void) { %s(); }\n' % funcname
128 128 return cancompile(cc, code)
129 129
130 130 def hasheader(cc, headername):
131 131 code = '#include <%s>\nint main(void) { return 0; }\n' % headername
132 132 return cancompile(cc, code)
133 133
134 134 # py2exe needs to be installed to work
135 135 try:
136 136 import py2exe
137 137 py2exe.Distribution # silence unused import warning
138 138 py2exeloaded = True
139 139 # import py2exe's patched Distribution class
140 140 from distutils.core import Distribution
141 141 except ImportError:
142 142 py2exeloaded = False
143 143
144 144 def runcmd(cmd, env):
145 145 if (sys.platform == 'plan9'
146 146 and (sys.version_info[0] == 2 and sys.version_info[1] < 7)):
147 147 # subprocess kludge to work around issues in half-baked Python
148 148 # ports, notably bichued/python:
149 149 _, out, err = os.popen3(cmd)
150 150 return str(out), str(err)
151 151 else:
152 152 p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
153 153 stderr=subprocess.PIPE, env=env)
154 154 out, err = p.communicate()
155 155 return out, err
156 156
157 157 def runhg(cmd, env):
158 158 out, err = runcmd(cmd, env)
159 159 # If root is executing setup.py, but the repository is owned by
160 160 # another user (as in "sudo python setup.py install") we will get
161 161 # trust warnings since the .hg/hgrc file is untrusted. That is
162 162 # fine, we don't want to load it anyway. Python may warn about
163 163 # a missing __init__.py in mercurial/locale, we also ignore that.
164 164 err = [e for e in err.splitlines()
165 165 if not e.startswith(b'not trusting file') \
166 166 and not e.startswith(b'warning: Not importing') \
167 167 and not e.startswith(b'obsolete feature not enabled')]
168 168 if err:
169 169 printf("stderr from '%s':" % (' '.join(cmd)), file=sys.stderr)
170 170 printf(b'\n'.join([b' ' + e for e in err]), file=sys.stderr)
171 171 return ''
172 172 return out
173 173
174 174 version = ''
175 175
176 176 # Execute hg out of this directory with a custom environment which takes care
177 177 # to not use any hgrc files and do no localization.
178 178 env = {'HGMODULEPOLICY': 'py',
179 179 'HGRCPATH': '',
180 180 'LANGUAGE': 'C',
181 181 'PATH': ''} # make pypi modules that use os.environ['PATH'] happy
182 182 if 'LD_LIBRARY_PATH' in os.environ:
183 183 env['LD_LIBRARY_PATH'] = os.environ['LD_LIBRARY_PATH']
184 184 if 'SystemRoot' in os.environ:
185 185 # Copy SystemRoot into the custom environment for Python 2.6
186 186 # under Windows. Otherwise, the subprocess will fail with
187 187 # error 0xc0150004. See: http://bugs.python.org/issue3440
188 188 env['SystemRoot'] = os.environ['SystemRoot']
189 189
190 190 if os.path.isdir('.hg'):
191 191 cmd = [sys.executable, 'hg', 'log', '-r', '.', '--template', '{tags}\n']
192 192 numerictags = [t for t in runhg(cmd, env).split() if t[0].isdigit()]
193 193 hgid = runhg([sys.executable, 'hg', 'id', '-i'], env).strip()
194 194 if numerictags: # tag(s) found
195 195 version = numerictags[-1]
196 196 if hgid.endswith('+'): # propagate the dirty status to the tag
197 197 version += '+'
198 198 else: # no tag found
199 199 ltagcmd = [sys.executable, 'hg', 'parents', '--template',
200 200 '{latesttag}']
201 201 ltag = runhg(ltagcmd, env)
202 202 changessincecmd = [sys.executable, 'hg', 'log', '-T', 'x\n', '-r',
203 203 "only(.,'%s')" % ltag]
204 204 changessince = len(runhg(changessincecmd, env).splitlines())
205 205 version = '%s+%s-%s' % (ltag, changessince, hgid)
206 206 if version.endswith('+'):
207 207 version += time.strftime('%Y%m%d')
208 208 elif os.path.exists('.hg_archival.txt'):
209 209 kw = dict([[t.strip() for t in l.split(':', 1)]
210 210 for l in open('.hg_archival.txt')])
211 211 if 'tag' in kw:
212 212 version = kw['tag']
213 213 elif 'latesttag' in kw:
214 214 if 'changessincelatesttag' in kw:
215 215 version = '%(latesttag)s+%(changessincelatesttag)s-%(node).12s' % kw
216 216 else:
217 217 version = '%(latesttag)s+%(latesttagdistance)s-%(node).12s' % kw
218 218 else:
219 219 version = kw.get('node', '')[:12]
220 220
221 221 if version:
222 222 with open("mercurial/__version__.py", "w") as f:
223 223 f.write('# this file is autogenerated by setup.py\n')
224 224 f.write('version = "%s"\n' % version)
225 225
226 226 try:
227 227 oldpolicy = os.environ.get('HGMODULEPOLICY', None)
228 228 os.environ['HGMODULEPOLICY'] = 'py'
229 229 from mercurial import __version__
230 230 version = __version__.version
231 231 except ImportError:
232 232 version = 'unknown'
233 233 finally:
234 234 if oldpolicy is None:
235 235 del os.environ['HGMODULEPOLICY']
236 236 else:
237 237 os.environ['HGMODULEPOLICY'] = oldpolicy
238 238
239 239 class hgbuild(build):
240 240 # Insert hgbuildmo first so that files in mercurial/locale/ are found
241 241 # when build_py is run next.
242 242 sub_commands = [('build_mo', None)] + build.sub_commands
243 243
244 244 class hgbuildmo(build):
245 245
246 246 description = "build translations (.mo files)"
247 247
248 248 def run(self):
249 249 if not find_executable('msgfmt'):
250 250 self.warn("could not find msgfmt executable, no translations "
251 251 "will be built")
252 252 return
253 253
254 254 podir = 'i18n'
255 255 if not os.path.isdir(podir):
256 256 self.warn("could not find %s/ directory" % podir)
257 257 return
258 258
259 259 join = os.path.join
260 260 for po in os.listdir(podir):
261 261 if not po.endswith('.po'):
262 262 continue
263 263 pofile = join(podir, po)
264 264 modir = join('locale', po[:-3], 'LC_MESSAGES')
265 265 mofile = join(modir, 'hg.mo')
266 266 mobuildfile = join('mercurial', mofile)
267 267 cmd = ['msgfmt', '-v', '-o', mobuildfile, pofile]
268 268 if sys.platform != 'sunos5':
269 269 # msgfmt on Solaris does not know about -c
270 270 cmd.append('-c')
271 271 self.mkpath(join('mercurial', modir))
272 272 self.make_file([pofile], mobuildfile, spawn, (cmd,))
273 273
274 274
275 275 class hgdist(Distribution):
276 276 pure = False
277 277 cffi = ispypy
278 278
279 279 global_options = Distribution.global_options + \
280 280 [('pure', None, "use pure (slow) Python "
281 281 "code instead of C extensions"),
282 282 ]
283 283
284 284 def has_ext_modules(self):
285 285 # self.ext_modules is emptied in hgbuildpy.finalize_options which is
286 286 # too late for some cases
287 287 return not self.pure and Distribution.has_ext_modules(self)
288 288
289 289 # This is ugly as a one-liner. So use a variable.
290 290 buildextnegops = dict(getattr(build_ext, 'negative_options', {}))
291 291 buildextnegops['no-zstd'] = 'zstd'
292 292
293 293 class hgbuildext(build_ext):
294 294 user_options = build_ext.user_options + [
295 295 ('zstd', None, 'compile zstd bindings [default]'),
296 296 ('no-zstd', None, 'do not compile zstd bindings'),
297 297 ]
298 298
299 299 boolean_options = build_ext.boolean_options + ['zstd']
300 300 negative_opt = buildextnegops
301 301
302 302 def initialize_options(self):
303 303 self.zstd = True
304 304 return build_ext.initialize_options(self)
305 305
306 306 def build_extensions(self):
307 307 # Filter out zstd if disabled via argument.
308 308 if not self.zstd:
309 309 self.extensions = [e for e in self.extensions
310 310 if e.name != 'mercurial.zstd']
311 311
312 312 return build_ext.build_extensions(self)
313 313
314 314 def build_extension(self, ext):
315 315 try:
316 316 build_ext.build_extension(self, ext)
317 317 except CCompilerError:
318 318 if not getattr(ext, 'optional', False):
319 319 raise
320 320 log.warn("Failed to build optional extension '%s' (skipping)",
321 321 ext.name)
322 322
323 323 class hgbuildscripts(build_scripts):
324 324 def run(self):
325 325 if os.name != 'nt' or self.distribution.pure:
326 326 return build_scripts.run(self)
327 327
328 328 exebuilt = False
329 329 try:
330 330 self.run_command('build_hgexe')
331 331 exebuilt = True
332 332 except (DistutilsError, CCompilerError):
333 333 log.warn('failed to build optional hg.exe')
334 334
335 335 if exebuilt:
336 336 # Copying hg.exe to the scripts build directory ensures it is
337 337 # installed by the install_scripts command.
338 338 hgexecommand = self.get_finalized_command('build_hgexe')
339 339 dest = os.path.join(self.build_dir, 'hg.exe')
340 340 self.mkpath(self.build_dir)
341 341 self.copy_file(hgexecommand.hgexepath, dest)
342 342
343 343 # Remove hg.bat because it is redundant with hg.exe.
344 344 self.scripts.remove('contrib/win32/hg.bat')
345 345
346 346 return build_scripts.run(self)
347 347
348 348 class hgbuildpy(build_py):
349 349 def finalize_options(self):
350 350 build_py.finalize_options(self)
351 351
352 352 if self.distribution.pure:
353 353 self.distribution.ext_modules = []
354 354 elif self.distribution.cffi:
355 355 from mercurial.cffi import (
356 356 bdiff,
357 357 mpatch,
358 358 )
359 359 exts = [mpatch.ffi.distutils_extension(),
360 360 bdiff.ffi.distutils_extension()]
361 361 # cffi modules go here
362 362 if sys.platform == 'darwin':
363 363 from mercurial.cffi import osutil
364 364 exts.append(osutil.ffi.distutils_extension())
365 365 self.distribution.ext_modules = exts
366 366 else:
367 367 h = os.path.join(get_python_inc(), 'Python.h')
368 368 if not os.path.exists(h):
369 369 raise SystemExit('Python headers are required to build '
370 370 'Mercurial but weren\'t found in %s' % h)
371 371
372 372 def run(self):
373 373 if self.distribution.pure:
374 374 modulepolicy = 'py'
375 375 elif self.build_lib == '.':
376 376 # in-place build should run without rebuilding C extensions
377 377 modulepolicy = 'allow'
378 378 else:
379 379 modulepolicy = 'c'
380 380 with open("mercurial/__modulepolicy__.py", "w") as f:
381 381 f.write('# this file is autogenerated by setup.py\n')
382 382 f.write('modulepolicy = b"%s"\n' % modulepolicy)
383 383
384 384 build_py.run(self)
385 385
386 386 class buildhgextindex(Command):
387 387 description = 'generate prebuilt index of hgext (for frozen package)'
388 388 user_options = []
389 389 _indexfilename = 'hgext/__index__.py'
390 390
391 391 def initialize_options(self):
392 392 pass
393 393
394 394 def finalize_options(self):
395 395 pass
396 396
397 397 def run(self):
398 398 if os.path.exists(self._indexfilename):
399 399 with open(self._indexfilename, 'w') as f:
400 400 f.write('# empty\n')
401 401
402 402 # here no extension enabled, disabled() lists up everything
403 403 code = ('import pprint; from mercurial import extensions; '
404 404 'pprint.pprint(extensions.disabled())')
405 405 out, err = runcmd([sys.executable, '-c', code], env)
406 406 if err:
407 407 raise DistutilsExecError(err)
408 408
409 409 with open(self._indexfilename, 'w') as f:
410 410 f.write('# this file is autogenerated by setup.py\n')
411 411 f.write('docs = ')
412 412 f.write(out)
413 413
414 414 class buildhgexe(build_ext):
415 415 description = 'compile hg.exe from mercurial/exewrapper.c'
416 416
417 417 def build_extensions(self):
418 418 if os.name != 'nt':
419 419 return
420 420 if isinstance(self.compiler, HackedMingw32CCompiler):
421 421 self.compiler.compiler_so = self.compiler.compiler # no -mdll
422 422 self.compiler.dll_libraries = [] # no -lmsrvc90
423 423
424 424 # Different Python installs can have different Python library
425 425 # names. e.g. the official CPython distribution uses pythonXY.dll
426 426 # and MinGW uses libpythonX.Y.dll.
427 427 _kernel32 = ctypes.windll.kernel32
428 428 _kernel32.GetModuleFileNameA.argtypes = [ctypes.c_void_p,
429 429 ctypes.c_void_p,
430 430 ctypes.c_ulong]
431 431 _kernel32.GetModuleFileNameA.restype = ctypes.c_ulong
432 432 size = 1000
433 433 buf = ctypes.create_string_buffer(size + 1)
434 434 filelen = _kernel32.GetModuleFileNameA(sys.dllhandle, ctypes.byref(buf),
435 435 size)
436 436
437 437 if filelen > 0 and filelen != size:
438 438 dllbasename = os.path.basename(buf.value)
439 439 if not dllbasename.lower().endswith('.dll'):
440 440 raise SystemExit('Python DLL does not end with .dll: %s' %
441 441 dllbasename)
442 442 pythonlib = dllbasename[:-4]
443 443 else:
444 444 log.warn('could not determine Python DLL filename; '
445 445 'assuming pythonXY')
446 446
447 447 hv = sys.hexversion
448 448 pythonlib = 'python%d%d' % (hv >> 24, (hv >> 16) & 0xff)
449 449
450 450 log.info('using %s as Python library name' % pythonlib)
451 451 with open('mercurial/hgpythonlib.h', 'wb') as f:
452 452 f.write('/* this file is autogenerated by setup.py */\n')
453 453 f.write('#define HGPYTHONLIB "%s"\n' % pythonlib)
454 454 objects = self.compiler.compile(['mercurial/exewrapper.c'],
455 455 output_dir=self.build_temp)
456 456 dir = os.path.dirname(self.get_ext_fullpath('dummy'))
457 457 target = os.path.join(dir, 'hg')
458 458 self.compiler.link_executable(objects, target,
459 459 libraries=[],
460 460 output_dir=self.build_temp)
461 461
462 462 @property
463 463 def hgexepath(self):
464 464 dir = os.path.dirname(self.get_ext_fullpath('dummy'))
465 465 return os.path.join(self.build_temp, dir, 'hg.exe')
466 466
467 467 class hginstalllib(install_lib):
468 468 '''
469 469 This is a specialization of install_lib that replaces the copy_file used
470 470 there so that it supports setting the mode of files after copying them,
471 471 instead of just preserving the mode that the files originally had. If your
472 472 system has a umask of something like 027, preserving the permissions when
473 473 copying will lead to a broken install.
474 474
475 475 Note that just passing keep_permissions=False to copy_file would be
476 476 insufficient, as it might still be applying a umask.
477 477 '''
478 478
479 479 def run(self):
480 480 realcopyfile = file_util.copy_file
481 481 def copyfileandsetmode(*args, **kwargs):
482 482 src, dst = args[0], args[1]
483 483 dst, copied = realcopyfile(*args, **kwargs)
484 484 if copied:
485 485 st = os.stat(src)
486 486 # Persist executable bit (apply it to group and other if user
487 487 # has it)
488 488 if st[stat.ST_MODE] & stat.S_IXUSR:
489 489 setmode = int('0755', 8)
490 490 else:
491 491 setmode = int('0644', 8)
492 492 m = stat.S_IMODE(st[stat.ST_MODE])
493 493 m = (m & ~int('0777', 8)) | setmode
494 494 os.chmod(dst, m)
495 495 file_util.copy_file = copyfileandsetmode
496 496 try:
497 497 install_lib.run(self)
498 498 finally:
499 499 file_util.copy_file = realcopyfile
500 500
501 501 class hginstallscripts(install_scripts):
502 502 '''
503 503 This is a specialization of install_scripts that replaces the @LIBDIR@ with
504 504 the configured directory for modules. If possible, the path is made relative
505 505 to the directory for scripts.
506 506 '''
507 507
508 508 def initialize_options(self):
509 509 install_scripts.initialize_options(self)
510 510
511 511 self.install_lib = None
512 512
513 513 def finalize_options(self):
514 514 install_scripts.finalize_options(self)
515 515 self.set_undefined_options('install',
516 516 ('install_lib', 'install_lib'))
517 517
518 518 def run(self):
519 519 install_scripts.run(self)
520 520
521 521 # It only makes sense to replace @LIBDIR@ with the install path if
522 522 # the install path is known. For wheels, the logic below calculates
523 523 # the libdir to be "../..". This is because the internal layout of a
524 524 # wheel archive looks like:
525 525 #
526 526 # mercurial-3.6.1.data/scripts/hg
527 527 # mercurial/__init__.py
528 528 #
529 529 # When installing wheels, the subdirectories of the "<pkg>.data"
530 530 # directory are translated to system local paths and files therein
531 531 # are copied in place. The mercurial/* files are installed into the
532 532 # site-packages directory. However, the site-packages directory
533 533 # isn't known until wheel install time. This means we have no clue
534 534 # at wheel generation time what the installed site-packages directory
535 535 # will be. And, wheels don't appear to provide the ability to register
536 536 # custom code to run during wheel installation. This all means that
537 537 # we can't reliably set the libdir in wheels: the default behavior
538 538 # of looking in sys.path must do.
539 539
540 540 if (os.path.splitdrive(self.install_dir)[0] !=
541 541 os.path.splitdrive(self.install_lib)[0]):
542 542 # can't make relative paths from one drive to another, so use an
543 543 # absolute path instead
544 544 libdir = self.install_lib
545 545 else:
546 546 common = os.path.commonprefix((self.install_dir, self.install_lib))
547 547 rest = self.install_dir[len(common):]
548 548 uplevel = len([n for n in os.path.split(rest) if n])
549 549
550 550 libdir = uplevel * ('..' + os.sep) + self.install_lib[len(common):]
551 551
552 552 for outfile in self.outfiles:
553 553 with open(outfile, 'rb') as fp:
554 554 data = fp.read()
555 555
556 556 # skip binary files
557 557 if b'\0' in data:
558 558 continue
559 559
560 560 # During local installs, the shebang will be rewritten to the final
561 561 # install path. During wheel packaging, the shebang has a special
562 562 # value.
563 563 if data.startswith(b'#!python'):
564 564 log.info('not rewriting @LIBDIR@ in %s because install path '
565 565 'not known' % outfile)
566 566 continue
567 567
568 568 data = data.replace(b'@LIBDIR@', libdir.encode(libdir_escape))
569 569 with open(outfile, 'wb') as fp:
570 570 fp.write(data)
571 571
572 572 cmdclass = {'build': hgbuild,
573 573 'build_mo': hgbuildmo,
574 574 'build_ext': hgbuildext,
575 575 'build_py': hgbuildpy,
576 576 'build_scripts': hgbuildscripts,
577 577 'build_hgextindex': buildhgextindex,
578 578 'install_lib': hginstalllib,
579 579 'install_scripts': hginstallscripts,
580 580 'build_hgexe': buildhgexe,
581 581 }
582 582
583 583 packages = ['mercurial',
584 584 'mercurial.cext',
585 585 'mercurial.hgweb',
586 586 'mercurial.httpclient',
587 587 'mercurial.pure',
588 588 'hgext', 'hgext.convert', 'hgext.fsmonitor',
589 589 'hgext.fsmonitor.pywatchman', 'hgext.highlight',
590 590 'hgext.largefiles', 'hgext.zeroconf', 'hgext3rd']
591 591
592 592 common_depends = ['mercurial/bitmanipulation.h',
593 593 'mercurial/compat.h',
594 594 'mercurial/util.h']
595 595 common_include_dirs = ['mercurial']
596 596
597 597 osutil_cflags = []
598 598 osutil_ldflags = []
599 599
600 600 # platform specific macros
601 601 for plat, func in [('bsd', 'setproctitle')]:
602 602 if re.search(plat, sys.platform) and hasfunction(new_compiler(), func):
603 603 osutil_cflags.append('-DHAVE_%s' % func.upper())
604 604
605 605 for plat, macro, code in [
606 606 ('bsd|darwin', 'BSD_STATFS', '''
607 607 #include <sys/param.h>
608 608 #include <sys/mount.h>
609 609 int main() { struct statfs s; return sizeof(s.f_fstypename); }
610 610 '''),
611 611 ('linux', 'LINUX_STATFS', '''
612 612 #include <linux/magic.h>
613 613 #include <sys/vfs.h>
614 614 int main() { struct statfs s; return sizeof(s.f_type); }
615 615 '''),
616 616 ]:
617 617 if re.search(plat, sys.platform) and cancompile(new_compiler(), code):
618 618 osutil_cflags.append('-DHAVE_%s' % macro)
619 619
620 620 if sys.platform == 'darwin':
621 621 osutil_ldflags += ['-framework', 'ApplicationServices']
622 622
623 623 extmodules = [
624 624 Extension('mercurial.cext.base85', ['mercurial/cext/base85.c'],
625 625 include_dirs=common_include_dirs,
626 626 depends=common_depends),
627 627 Extension('mercurial.cext.bdiff', ['mercurial/bdiff.c',
628 628 'mercurial/cext/bdiff.c'],
629 629 include_dirs=common_include_dirs,
630 630 depends=common_depends + ['mercurial/bdiff.h']),
631 Extension('mercurial.diffhelpers', ['mercurial/diffhelpers.c'],
631 Extension('mercurial.cext.diffhelpers', ['mercurial/cext/diffhelpers.c'],
632 632 include_dirs=common_include_dirs,
633 633 depends=common_depends),
634 634 Extension('mercurial.mpatch', ['mercurial/mpatch.c',
635 635 'mercurial/mpatch_module.c'],
636 636 include_dirs=common_include_dirs,
637 637 depends=common_depends),
638 638 Extension('mercurial.parsers', ['mercurial/dirs.c',
639 639 'mercurial/manifest.c',
640 640 'mercurial/parsers.c',
641 641 'mercurial/pathencode.c'],
642 642 include_dirs=common_include_dirs,
643 643 depends=common_depends),
644 644 Extension('mercurial.cext.osutil', ['mercurial/cext/osutil.c'],
645 645 include_dirs=common_include_dirs,
646 646 extra_compile_args=osutil_cflags,
647 647 extra_link_args=osutil_ldflags,
648 648 depends=common_depends),
649 649 Extension('hgext.fsmonitor.pywatchman.bser',
650 650 ['hgext/fsmonitor/pywatchman/bser.c']),
651 651 ]
652 652
653 653 sys.path.insert(0, 'contrib/python-zstandard')
654 654 import setup_zstd
655 655 extmodules.append(setup_zstd.get_c_extension(name='mercurial.zstd'))
656 656
657 657 try:
658 658 from distutils import cygwinccompiler
659 659
660 660 # the -mno-cygwin option has been deprecated for years
661 661 compiler = cygwinccompiler.Mingw32CCompiler
662 662
663 663 class HackedMingw32CCompiler(cygwinccompiler.Mingw32CCompiler):
664 664 def __init__(self, *args, **kwargs):
665 665 compiler.__init__(self, *args, **kwargs)
666 666 for i in 'compiler compiler_so linker_exe linker_so'.split():
667 667 try:
668 668 getattr(self, i).remove('-mno-cygwin')
669 669 except ValueError:
670 670 pass
671 671
672 672 cygwinccompiler.Mingw32CCompiler = HackedMingw32CCompiler
673 673 except ImportError:
674 674 # the cygwinccompiler package is not available on some Python
675 675 # distributions like the ones from the optware project for Synology
676 676 # DiskStation boxes
677 677 class HackedMingw32CCompiler(object):
678 678 pass
679 679
680 680 packagedata = {'mercurial': ['locale/*/LC_MESSAGES/hg.mo',
681 681 'help/*.txt',
682 682 'help/internals/*.txt',
683 683 'default.d/*.rc',
684 684 'dummycert.pem']}
685 685
686 686 def ordinarypath(p):
687 687 return p and p[0] != '.' and p[-1] != '~'
688 688
689 689 for root in ('templates',):
690 690 for curdir, dirs, files in os.walk(os.path.join('mercurial', root)):
691 691 curdir = curdir.split(os.sep, 1)[1]
692 692 dirs[:] = filter(ordinarypath, dirs)
693 693 for f in filter(ordinarypath, files):
694 694 f = os.path.join(curdir, f)
695 695 packagedata['mercurial'].append(f)
696 696
697 697 datafiles = []
698 698
699 699 # distutils expects version to be str/unicode. Converting it to
700 700 # unicode on Python 2 still works because it won't contain any
701 701 # non-ascii bytes and will be implicitly converted back to bytes
702 702 # when operated on.
703 703 assert isinstance(version, bytes)
704 704 setupversion = version.decode('ascii')
705 705
706 706 extra = {}
707 707
708 708 if py2exeloaded:
709 709 extra['console'] = [
710 710 {'script':'hg',
711 711 'copyright':'Copyright (C) 2005-2017 Matt Mackall and others',
712 712 'product_version':version}]
713 713 # sub command of 'build' because 'py2exe' does not handle sub_commands
714 714 build.sub_commands.insert(0, ('build_hgextindex', None))
715 715 # put dlls in sub directory so that they won't pollute PATH
716 716 extra['zipfile'] = 'lib/library.zip'
717 717
718 718 if os.name == 'nt':
719 719 # Windows binary file versions for exe/dll files must have the
720 720 # form W.X.Y.Z, where W,X,Y,Z are numbers in the range 0..65535
721 721 setupversion = version.split('+', 1)[0]
722 722
723 723 if sys.platform == 'darwin' and os.path.exists('/usr/bin/xcodebuild'):
724 724 version = runcmd(['/usr/bin/xcodebuild', '-version'], {})[0].splitlines()
725 725 if version:
726 726 version = version[0]
727 727 if sys.version_info[0] == 3:
728 728 version = version.decode('utf-8')
729 729 xcode4 = (version.startswith('Xcode') and
730 730 StrictVersion(version.split()[1]) >= StrictVersion('4.0'))
731 731 xcode51 = re.match(r'^Xcode\s+5\.1', version) is not None
732 732 else:
733 733 # xcodebuild returns empty on OS X Lion with XCode 4.3 not
734 734 # installed, but instead with only command-line tools. Assume
735 735 # that only happens on >= Lion, thus no PPC support.
736 736 xcode4 = True
737 737 xcode51 = False
738 738
739 739 # XCode 4.0 dropped support for ppc architecture, which is hardcoded in
740 740 # distutils.sysconfig
741 741 if xcode4:
742 742 os.environ['ARCHFLAGS'] = ''
743 743
744 744 # XCode 5.1 changes clang such that it now fails to compile if the
745 745 # -mno-fused-madd flag is passed, but the version of Python shipped with
746 746 # OS X 10.9 Mavericks includes this flag. This causes problems in all
747 747 # C extension modules, and a bug has been filed upstream at
748 748 # http://bugs.python.org/issue21244. We also need to patch this here
749 749 # so Mercurial can continue to compile in the meantime.
750 750 if xcode51:
751 751 cflags = get_config_var('CFLAGS')
752 752 if cflags and re.search(r'-mno-fused-madd\b', cflags) is not None:
753 753 os.environ['CFLAGS'] = (
754 754 os.environ.get('CFLAGS', '') + ' -Qunused-arguments')
755 755
756 756 setup(name='mercurial',
757 757 version=setupversion,
758 758 author='Matt Mackall and many others',
759 759 author_email='mercurial@mercurial-scm.org',
760 760 url='https://mercurial-scm.org/',
761 761 download_url='https://mercurial-scm.org/release/',
762 762 description=('Fast scalable distributed SCM (revision control, version '
763 763 'control) system'),
764 764 long_description=('Mercurial is a distributed SCM tool written in Python.'
765 765 ' It is used by a number of large projects that require'
766 766 ' fast, reliable distributed revision control, such as '
767 767 'Mozilla.'),
768 768 license='GNU GPLv2 or any later version',
769 769 classifiers=[
770 770 'Development Status :: 6 - Mature',
771 771 'Environment :: Console',
772 772 'Intended Audience :: Developers',
773 773 'Intended Audience :: System Administrators',
774 774 'License :: OSI Approved :: GNU General Public License (GPL)',
775 775 'Natural Language :: Danish',
776 776 'Natural Language :: English',
777 777 'Natural Language :: German',
778 778 'Natural Language :: Italian',
779 779 'Natural Language :: Japanese',
780 780 'Natural Language :: Portuguese (Brazilian)',
781 781 'Operating System :: Microsoft :: Windows',
782 782 'Operating System :: OS Independent',
783 783 'Operating System :: POSIX',
784 784 'Programming Language :: C',
785 785 'Programming Language :: Python',
786 786 'Topic :: Software Development :: Version Control',
787 787 ],
788 788 scripts=scripts,
789 789 packages=packages,
790 790 ext_modules=extmodules,
791 791 data_files=datafiles,
792 792 package_data=packagedata,
793 793 cmdclass=cmdclass,
794 794 distclass=hgdist,
795 795 options={'py2exe': {'packages': ['hgext', 'email']},
796 796 'bdist_mpkg': {'zipdist': False,
797 797 'license': 'COPYING',
798 798 'readme': 'contrib/macosx/Readme.html',
799 799 'welcome': 'contrib/macosx/Welcome.html',
800 800 },
801 801 },
802 802 **extra)
General Comments 0
You need to be logged in to leave comments. Login now