# HG changeset patch # User Yuya Nishihara # Date 2018-04-08 02:55:46 # Node ID e9dea82ea1f33986e79f6f967db008d445b3a2b9 # Parent 152f1b47e0adbdc44c5bc593977d4ad365b9cc0e wireproto: convert python literal to object without using unsafe eval() Follows up cc5a040fe150. At this point, I don't think we need a real eval(). If we want to support a set literal, maybe we can vendor ast.literal_eval(), which is relatively simple function. diff --git a/mercurial/utils/stringutil.py b/mercurial/utils/stringutil.py --- a/mercurial/utils/stringutil.py +++ b/mercurial/utils/stringutil.py @@ -9,7 +9,7 @@ from __future__ import absolute_import -import __future__ +import ast import codecs import re as remod import textwrap @@ -499,28 +499,7 @@ def parsebool(s): """ return _booleans.get(s.lower(), None) -def evalpython(s): - """Evaluate a string containing a Python expression. - - THIS FUNCTION IS NOT SAFE TO USE ON UNTRUSTED INPUT. IT'S USE SHOULD BE - LIMITED TO DEVELOPER-FACING FUNCTIONALITY. - """ - globs = { - r'__builtins__': { - r'None': None, - r'False': False, - r'True': True, - r'int': int, - r'set': set, - r'tuple': tuple, - # Don't need to expose dict and list because we can use - # literals. - }, - } - - # We can't use eval() directly because it inherits compiler - # flags from this module and we need unicode literals for Python 3 - # compatibility. - code = compile(s, r'', r'eval', - __future__.unicode_literals.compiler_flag, True) - return eval(code, globs, {}) +def evalpythonliteral(s): + """Evaluate a string containing a Python literal expression""" + # We could backport our tokenizer hack to rewrite '' to u'' if we want + return ast.literal_eval(s) diff --git a/mercurial/wireprotoframing.py b/mercurial/wireprotoframing.py --- a/mercurial/wireprotoframing.py +++ b/mercurial/wireprotoframing.py @@ -180,9 +180,6 @@ def makeframe(requestid, streamid, strea def makeframefromhumanstring(s): """Create a frame from a human readable string - DANGER: NOT SAFE TO USE WITH UNTRUSTED INPUT BECAUSE OF POTENTIAL - eval() USAGE. DO NOT USE IN CORE. - Strings have the form: @@ -198,7 +195,7 @@ def makeframefromhumanstring(s): Flags can be delimited by `|` to bitwise OR them together. If the payload begins with ``cbor:``, the following string will be - evaluated as Python code and the resulting object will be fed into + evaluated as Python literal and the resulting object will be fed into a CBOR encoder. Otherwise, the payload is interpreted as a Python byte string literal. """ @@ -229,7 +226,8 @@ def makeframefromhumanstring(s): finalflags |= int(flag) if payload.startswith(b'cbor:'): - payload = cbor.dumps(stringutil.evalpython(payload[5:]), canonical=True) + payload = cbor.dumps(stringutil.evalpythonliteral(payload[5:]), + canonical=True) else: payload = stringutil.unescapestr(payload) diff --git a/tests/test-wireproto-serverreactor.py b/tests/test-wireproto-serverreactor.py --- a/tests/test-wireproto-serverreactor.py +++ b/tests/test-wireproto-serverreactor.py @@ -70,10 +70,6 @@ class FrameHumanStringTests(unittest.Tes b'\x05\x00\x00\x01\x00\x01\x00\x10:\x00\x05:\r') def testcborstrings(self): - # String literals should be unicode. - self.assertEqual(ffs(b"1 1 0 1 0 cbor:'foo'"), - b'\x04\x00\x00\x01\x00\x01\x00\x10cfoo') - self.assertEqual(ffs(b"1 1 0 1 0 cbor:b'foo'"), b'\x04\x00\x00\x01\x00\x01\x00\x10Cfoo')