# HG changeset patch # User Gregory Szorc # Date 2018-03-28 22:05:39 # Node ID cc5a040fe1502393439f0c0e5ae92330bef07625 # Parent 5fadc63ac99f36ef8a4800e237757cf7011e761b wireproto: syntax for encoding CBOR into frames We just vendored a library for encoding and decoding the CBOR data format. While the intent of that vendor was to support state files, CBOR is really a nice data format. It is extensible and compact. I've been feeling dirty inventing my own data formats for frame payloads. While custom formats can always beat out a generic format, there is a cost to be paid in terms of implementation, comprehension, etc. CBOR is compact enough that I'm not too worried about efficiency loss. I think the benefits of using a standardized format outweigh rolling our own formats. So I plan to make heavy use of CBOR in the wire protocol going forward. This commit introduces support for encoding CBOR data in frame payloads to our function to make a frame from a human string. We do need to employ some low-level Python code in order to evaluate a string as a Python expression. But other than that, this should hopefully be pretty straightforward. Unit tests for this function have been added. Differential Revision: https://phab.mercurial-scm.org/D2948 diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py --- a/mercurial/debugcommands.py +++ b/mercurial/debugcommands.py @@ -2793,7 +2793,10 @@ def debugwireproto(ui, repo, path=None, or a flag name for stream flags or frame flags, respectively. Values are resolved to integers and then bitwise OR'd together. - ``payload`` is is evaluated as a Python byte string literal. + ``payload`` represents the raw frame payload. If it begins with + ``cbor:``, the following string is evaluated as Python code and the + resulting object is fed into a CBOR encoder. Otherwise it is interpreted + as a Python byte string literal. """ opts = pycompat.byteskwargs(opts) diff --git a/mercurial/utils/stringutil.py b/mercurial/utils/stringutil.py --- a/mercurial/utils/stringutil.py +++ b/mercurial/utils/stringutil.py @@ -9,6 +9,7 @@ from __future__ import absolute_import +import __future__ import codecs import re as remod import textwrap @@ -497,3 +498,29 @@ def parsebool(s): If s is not a valid boolean, returns None. """ return _booleans.get(s.lower(), None) + +def evalpython(s): + """Evaluate a string containing a Python expression. + + THIS FUNCTION IS NOT SAFE TO USE ON UNTRUSTED INPUT. IT'S USE SHOULD BE + LIMITED TO DEVELOPER-FACING FUNCTIONALITY. + """ + globs = { + r'__builtins__': { + r'None': None, + r'False': False, + r'True': True, + r'int': int, + r'set': set, + r'tuple': tuple, + # Don't need to expose dict and list because we can use + # literals. + }, + } + + # We can't use eval() directly because it inherits compiler + # flags from this module and we need unicode literals for Python 3 + # compatibility. + code = compile(s, r'', r'eval', + __future__.unicode_literals.compiler_flag, True) + return eval(code, globs, {}) diff --git a/mercurial/wireprotoframing.py b/mercurial/wireprotoframing.py --- a/mercurial/wireprotoframing.py +++ b/mercurial/wireprotoframing.py @@ -16,6 +16,7 @@ import struct from .i18n import _ from .thirdparty import ( attr, + cbor, ) from . import ( error, @@ -156,6 +157,9 @@ def makeframe(requestid, streamid, strea def makeframefromhumanstring(s): """Create a frame from a human readable string + DANGER: NOT SAFE TO USE WITH UNTRUSTED INPUT BECAUSE OF POTENTIAL + eval() USAGE. DO NOT USE IN CORE. + Strings have the form: @@ -169,6 +173,11 @@ def makeframefromhumanstring(s): named constant. Flags can be delimited by `|` to bitwise OR them together. + + If the payload begins with ``cbor:``, the following string will be + evaluated as Python code and the resulting object will be fed into + a CBOR encoder. Otherwise, the payload is interpreted as a Python + byte string literal. """ fields = s.split(b' ', 5) requestid, streamid, streamflags, frametype, frameflags, payload = fields @@ -196,7 +205,11 @@ def makeframefromhumanstring(s): else: finalflags |= int(flag) - payload = stringutil.unescapestr(payload) + if payload.startswith(b'cbor:'): + payload = cbor.dumps(stringutil.evalpython(payload[5:]), canonical=True) + + else: + payload = stringutil.unescapestr(payload) return makeframe(requestid=requestid, streamid=streamid, streamflags=finalstreamflags, typeid=frametype, diff --git a/tests/test-wireproto-serverreactor.py b/tests/test-wireproto-serverreactor.py --- a/tests/test-wireproto-serverreactor.py +++ b/tests/test-wireproto-serverreactor.py @@ -35,6 +35,59 @@ def sendcommandframes(reactor, stream, r framing.createcommandframes(stream, rid, cmd, args, datafh)) +class FrameHumanStringTests(unittest.TestCase): + def testbasic(self): + self.assertEqual(ffs(b'1 1 0 1 0 '), + b'\x00\x00\x00\x01\x00\x01\x00\x10') + + self.assertEqual(ffs(b'2 4 0 1 0 '), + b'\x00\x00\x00\x02\x00\x04\x00\x10') + + self.assertEqual(ffs(b'2 4 0 1 0 foo'), + b'\x03\x00\x00\x02\x00\x04\x00\x10foo') + + def testcborint(self): + self.assertEqual(ffs(b'1 1 0 1 0 cbor:15'), + b'\x01\x00\x00\x01\x00\x01\x00\x10\x0f') + + self.assertEqual(ffs(b'1 1 0 1 0 cbor:42'), + b'\x02\x00\x00\x01\x00\x01\x00\x10\x18*') + + self.assertEqual(ffs(b'1 1 0 1 0 cbor:1048576'), + b'\x05\x00\x00\x01\x00\x01\x00\x10\x1a' + b'\x00\x10\x00\x00') + + self.assertEqual(ffs(b'1 1 0 1 0 cbor:0'), + b'\x01\x00\x00\x01\x00\x01\x00\x10\x00') + + self.assertEqual(ffs(b'1 1 0 1 0 cbor:-1'), + b'\x01\x00\x00\x01\x00\x01\x00\x10 ') + + self.assertEqual(ffs(b'1 1 0 1 0 cbor:-342542'), + b'\x05\x00\x00\x01\x00\x01\x00\x10:\x00\x05:\r') + + def testcborstrings(self): + # String literals should be unicode. + self.assertEqual(ffs(b"1 1 0 1 0 cbor:'foo'"), + b'\x04\x00\x00\x01\x00\x01\x00\x10cfoo') + + self.assertEqual(ffs(b"1 1 0 1 0 cbor:b'foo'"), + b'\x04\x00\x00\x01\x00\x01\x00\x10Cfoo') + + self.assertEqual(ffs(b"1 1 0 1 0 cbor:u'foo'"), + b'\x04\x00\x00\x01\x00\x01\x00\x10cfoo') + + def testcborlists(self): + self.assertEqual(ffs(b"1 1 0 1 0 cbor:[None, True, False, 42, b'foo']"), + b'\n\x00\x00\x01\x00\x01\x00\x10\x85\xf6\xf5\xf4' + b'\x18*Cfoo') + + def testcbordicts(self): + self.assertEqual(ffs(b"1 1 0 1 0 " + b"cbor:{b'foo': b'val1', b'bar': b'val2'}"), + b'\x13\x00\x00\x01\x00\x01\x00\x10\xa2' + b'CbarDval2CfooDval1') + class FrameTests(unittest.TestCase): def testdataexactframesize(self): data = util.bytesio(b'x' * framing.DEFAULT_MAX_FRAME_SIZE)