diff --git a/contrib/hgfixes/fix_bytes.py b/contrib/hgfixes/fix_bytes.py new file mode 100644 --- /dev/null +++ b/contrib/hgfixes/fix_bytes.py @@ -0,0 +1,96 @@ +"""Fixer that changes plain strings to bytes strings.""" + +import re + +from lib2to3 import fixer_base +from lib2to3.pgen2 import token +from lib2to3.fixer_util import Name +from lib2to3.pygram import python_symbols as syms + +_re = re.compile(r'[rR]?[\'\"]') + +# XXX: Implementing a blacklist in 2to3 turned out to be more troublesome than +# blacklisting some modules inside the fixers. So, this is what I came with. + +blacklist = ['mercurial/demandimport.py', + 'mercurial/i18n.py', + ] + +def isdocstring(node): + def isclassorfunction(ancestor): + symbols = (syms.funcdef, syms.classdef) + # if the current node is a child of a function definition, a class + # definition or a file, then it is a docstring + if ancestor.type == syms.simple_stmt: + try: + while True: + if ancestor.type in symbols: + return True + ancestor = ancestor.parent + except AttributeError: + return False + return False + + def ismodule(ancestor): + # Our child is a docstring if we are a simple statement, and our + # ancestor is file_input. In other words, our child is a lone string in + # the source file. + try: + if (ancestor.type == syms.simple_stmt and + ancestor.parent.type == syms.file_input): + return True + except AttributeError: + return False + + def isdocassignment(ancestor): + # Assigning to __doc__, definitely a string + try: + while True: + if (ancestor.type == syms.expr_stmt and + Name('__doc__') in ancestor.children): + return True + ancestor = ancestor.parent + except AttributeError: + return False + + if ismodule(node.parent) or \ + isdocassignment(node.parent) or \ + isclassorfunction(node.parent): + return True + return False + +def shouldtransform(node): + specialnames = ['__main__'] + + if node.value in specialnames: + return False + + ggparent = node.parent.parent.parent + sggparent = str(ggparent) + + if 'getattr' in sggparent or \ + 'hasattr' in sggparent or \ + 'setattr' in sggparent or \ + 'encode' in sggparent or \ + 'decode' in sggparent: + return False + + return True + +class FixBytes(fixer_base.BaseFix): + + PATTERN = 'STRING' + + def transform(self, node, results): + if self.filename in blacklist: + return + if node.type == token.STRING: + if _re.match(node.value): + if isdocstring(node): + return + if not shouldtransform(node): + return + new = node.clone() + new.value = 'b' + new.value + return new +