##// END OF EJS Templates
py3: use int instead of pycompat.long...
Gregory Szorc -
r49787:176f1a0d default
parent child Browse files
Show More
@@ -1,410 +1,410 b''
1 1 # monotone.py - monotone support for the convert extension
2 2 #
3 3 # Copyright 2008, 2009 Mikkel Fahnoe Jorgensen <mikkel@dvide.com> and
4 4 # others
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 import os
10 10 import re
11 11
12 12 from mercurial.i18n import _
13 13 from mercurial.pycompat import open
14 14 from mercurial import (
15 15 error,
16 16 pycompat,
17 17 )
18 18 from mercurial.utils import dateutil
19 19
20 20 from . import common
21 21
22 22
23 23 class monotone_source(common.converter_source, common.commandline):
24 24 def __init__(self, ui, repotype, path=None, revs=None):
25 25 common.converter_source.__init__(self, ui, repotype, path, revs)
26 26 if revs and len(revs) > 1:
27 27 raise error.Abort(
28 28 _(
29 29 b'monotone source does not support specifying '
30 30 b'multiple revs'
31 31 )
32 32 )
33 33 common.commandline.__init__(self, ui, b'mtn')
34 34
35 35 self.ui = ui
36 36 self.path = path
37 37 self.automatestdio = False
38 38 self.revs = revs
39 39
40 40 norepo = common.NoRepo(
41 41 _(b"%s does not look like a monotone repository") % path
42 42 )
43 43 if not os.path.exists(os.path.join(path, b'_MTN')):
44 44 # Could be a monotone repository (SQLite db file)
45 45 try:
46 46 f = open(path, b'rb')
47 47 header = f.read(16)
48 48 f.close()
49 49 except IOError:
50 50 header = b''
51 51 if header != b'SQLite format 3\x00':
52 52 raise norepo
53 53
54 54 # regular expressions for parsing monotone output
55 55 space = br'\s*'
56 56 name = br'\s+"((?:\\"|[^"])*)"\s*'
57 57 value = name
58 58 revision = br'\s+\[(\w+)\]\s*'
59 59 lines = br'(?:.|\n)+'
60 60
61 61 self.dir_re = re.compile(space + b"dir" + name)
62 62 self.file_re = re.compile(
63 63 space + b"file" + name + b"content" + revision
64 64 )
65 65 self.add_file_re = re.compile(
66 66 space + b"add_file" + name + b"content" + revision
67 67 )
68 68 self.patch_re = re.compile(
69 69 space + b"patch" + name + b"from" + revision + b"to" + revision
70 70 )
71 71 self.rename_re = re.compile(space + b"rename" + name + b"to" + name)
72 72 self.delete_re = re.compile(space + b"delete" + name)
73 73 self.tag_re = re.compile(space + b"tag" + name + b"revision" + revision)
74 74 self.cert_re = re.compile(
75 75 lines + space + b"name" + name + b"value" + value
76 76 )
77 77
78 78 attr = space + b"file" + lines + space + b"attr" + space
79 79 self.attr_execute_re = re.compile(
80 80 attr + b'"mtn:execute"' + space + b'"true"'
81 81 )
82 82
83 83 # cached data
84 84 self.manifest_rev = None
85 85 self.manifest = None
86 86 self.files = None
87 87 self.dirs = None
88 88
89 89 common.checktool(b'mtn', abort=False)
90 90
91 91 def mtnrun(self, *args, **kwargs):
92 92 if self.automatestdio:
93 93 return self.mtnrunstdio(*args, **kwargs)
94 94 else:
95 95 return self.mtnrunsingle(*args, **kwargs)
96 96
97 97 def mtnrunsingle(self, *args, **kwargs):
98 98 kwargs['d'] = self.path
99 99 return self.run0(b'automate', *args, **kwargs)
100 100
101 101 def mtnrunstdio(self, *args, **kwargs):
102 102 # Prepare the command in automate stdio format
103 103 kwargs = pycompat.byteskwargs(kwargs)
104 104 command = []
105 105 for k, v in kwargs.items():
106 106 command.append(b"%d:%s" % (len(k), k))
107 107 if v:
108 108 command.append(b"%d:%s" % (len(v), v))
109 109 if command:
110 110 command.insert(0, b'o')
111 111 command.append(b'e')
112 112
113 113 command.append(b'l')
114 114 for arg in args:
115 115 command.append(b"%d:%s" % (len(arg), arg))
116 116 command.append(b'e')
117 117 command = b''.join(command)
118 118
119 119 self.ui.debug(b"mtn: sending '%s'\n" % command)
120 120 self.mtnwritefp.write(command)
121 121 self.mtnwritefp.flush()
122 122
123 123 return self.mtnstdioreadcommandoutput(command)
124 124
125 125 def mtnstdioreadpacket(self):
126 126 read = None
127 127 commandnbr = b''
128 128 while read != b':':
129 129 read = self.mtnreadfp.read(1)
130 130 if not read:
131 131 raise error.Abort(_(b'bad mtn packet - no end of commandnbr'))
132 132 commandnbr += read
133 133 commandnbr = commandnbr[:-1]
134 134
135 135 stream = self.mtnreadfp.read(1)
136 136 if stream not in b'mewptl':
137 137 raise error.Abort(
138 138 _(b'bad mtn packet - bad stream type %s') % stream
139 139 )
140 140
141 141 read = self.mtnreadfp.read(1)
142 142 if read != b':':
143 143 raise error.Abort(_(b'bad mtn packet - no divider before size'))
144 144
145 145 read = None
146 146 lengthstr = b''
147 147 while read != b':':
148 148 read = self.mtnreadfp.read(1)
149 149 if not read:
150 150 raise error.Abort(_(b'bad mtn packet - no end of packet size'))
151 151 lengthstr += read
152 152 try:
153 length = pycompat.long(lengthstr[:-1])
153 length = int(lengthstr[:-1])
154 154 except TypeError:
155 155 raise error.Abort(
156 156 _(b'bad mtn packet - bad packet size %s') % lengthstr
157 157 )
158 158
159 159 read = self.mtnreadfp.read(length)
160 160 if len(read) != length:
161 161 raise error.Abort(
162 162 _(
163 163 b"bad mtn packet - unable to read full packet "
164 164 b"read %s of %s"
165 165 )
166 166 % (len(read), length)
167 167 )
168 168
169 169 return (commandnbr, stream, length, read)
170 170
171 171 def mtnstdioreadcommandoutput(self, command):
172 172 retval = []
173 173 while True:
174 174 commandnbr, stream, length, output = self.mtnstdioreadpacket()
175 175 self.ui.debug(
176 176 b'mtn: read packet %s:%s:%d\n' % (commandnbr, stream, length)
177 177 )
178 178
179 179 if stream == b'l':
180 180 # End of command
181 181 if output != b'0':
182 182 raise error.Abort(
183 183 _(b"mtn command '%s' returned %s") % (command, output)
184 184 )
185 185 break
186 186 elif stream in b'ew':
187 187 # Error, warning output
188 188 self.ui.warn(_(b'%s error:\n') % self.command)
189 189 self.ui.warn(output)
190 190 elif stream == b'p':
191 191 # Progress messages
192 192 self.ui.debug(b'mtn: ' + output)
193 193 elif stream == b'm':
194 194 # Main stream - command output
195 195 retval.append(output)
196 196
197 197 return b''.join(retval)
198 198
199 199 def mtnloadmanifest(self, rev):
200 200 if self.manifest_rev == rev:
201 201 return
202 202 self.manifest = self.mtnrun(b"get_manifest_of", rev).split(b"\n\n")
203 203 self.manifest_rev = rev
204 204 self.files = {}
205 205 self.dirs = {}
206 206
207 207 for e in self.manifest:
208 208 m = self.file_re.match(e)
209 209 if m:
210 210 attr = b""
211 211 name = m.group(1)
212 212 node = m.group(2)
213 213 if self.attr_execute_re.match(e):
214 214 attr += b"x"
215 215 self.files[name] = (node, attr)
216 216 m = self.dir_re.match(e)
217 217 if m:
218 218 self.dirs[m.group(1)] = True
219 219
220 220 def mtnisfile(self, name, rev):
221 221 # a non-file could be a directory or a deleted or renamed file
222 222 self.mtnloadmanifest(rev)
223 223 return name in self.files
224 224
225 225 def mtnisdir(self, name, rev):
226 226 self.mtnloadmanifest(rev)
227 227 return name in self.dirs
228 228
229 229 def mtngetcerts(self, rev):
230 230 certs = {
231 231 b"author": b"<missing>",
232 232 b"date": b"<missing>",
233 233 b"changelog": b"<missing>",
234 234 b"branch": b"<missing>",
235 235 }
236 236 certlist = self.mtnrun(b"certs", rev)
237 237 # mtn < 0.45:
238 238 # key "test@selenic.com"
239 239 # mtn >= 0.45:
240 240 # key [ff58a7ffb771907c4ff68995eada1c4da068d328]
241 241 certlist = re.split(br'\n\n {6}key ["\[]', certlist)
242 242 for e in certlist:
243 243 m = self.cert_re.match(e)
244 244 if m:
245 245 name, value = m.groups()
246 246 value = value.replace(br'\"', b'"')
247 247 value = value.replace(br'\\', b'\\')
248 248 certs[name] = value
249 249 # Monotone may have subsecond dates: 2005-02-05T09:39:12.364306
250 250 # and all times are stored in UTC
251 251 certs[b"date"] = certs[b"date"].split(b'.')[0] + b" UTC"
252 252 return certs
253 253
254 254 # implement the converter_source interface:
255 255
256 256 def getheads(self):
257 257 if not self.revs:
258 258 return self.mtnrun(b"leaves").splitlines()
259 259 else:
260 260 return self.revs
261 261
262 262 def getchanges(self, rev, full):
263 263 if full:
264 264 raise error.Abort(
265 265 _(b"convert from monotone does not support --full")
266 266 )
267 267 revision = self.mtnrun(b"get_revision", rev).split(b"\n\n")
268 268 files = {}
269 269 ignoremove = {}
270 270 renameddirs = []
271 271 copies = {}
272 272 for e in revision:
273 273 m = self.add_file_re.match(e)
274 274 if m:
275 275 files[m.group(1)] = rev
276 276 ignoremove[m.group(1)] = rev
277 277 m = self.patch_re.match(e)
278 278 if m:
279 279 files[m.group(1)] = rev
280 280 # Delete/rename is handled later when the convert engine
281 281 # discovers an IOError exception from getfile,
282 282 # but only if we add the "from" file to the list of changes.
283 283 m = self.delete_re.match(e)
284 284 if m:
285 285 files[m.group(1)] = rev
286 286 m = self.rename_re.match(e)
287 287 if m:
288 288 toname = m.group(2)
289 289 fromname = m.group(1)
290 290 if self.mtnisfile(toname, rev):
291 291 ignoremove[toname] = 1
292 292 copies[toname] = fromname
293 293 files[toname] = rev
294 294 files[fromname] = rev
295 295 elif self.mtnisdir(toname, rev):
296 296 renameddirs.append((fromname, toname))
297 297
298 298 # Directory renames can be handled only once we have recorded
299 299 # all new files
300 300 for fromdir, todir in renameddirs:
301 301 renamed = {}
302 302 for tofile in self.files:
303 303 if tofile in ignoremove:
304 304 continue
305 305 if tofile.startswith(todir + b'/'):
306 306 renamed[tofile] = fromdir + tofile[len(todir) :]
307 307 # Avoid chained moves like:
308 308 # d1(/a) => d3/d1(/a)
309 309 # d2 => d3
310 310 ignoremove[tofile] = 1
311 311 for tofile, fromfile in renamed.items():
312 312 self.ui.debug(
313 313 b"copying file in renamed directory from '%s' to '%s'"
314 314 % (fromfile, tofile),
315 315 b'\n',
316 316 )
317 317 files[tofile] = rev
318 318 copies[tofile] = fromfile
319 319 for fromfile in renamed.values():
320 320 files[fromfile] = rev
321 321
322 322 return (files.items(), copies, set())
323 323
324 324 def getfile(self, name, rev):
325 325 if not self.mtnisfile(name, rev):
326 326 return None, None
327 327 try:
328 328 data = self.mtnrun(b"get_file_of", name, r=rev)
329 329 except Exception:
330 330 return None, None
331 331 self.mtnloadmanifest(rev)
332 332 node, attr = self.files.get(name, (None, b""))
333 333 return data, attr
334 334
335 335 def getcommit(self, rev):
336 336 extra = {}
337 337 certs = self.mtngetcerts(rev)
338 338 if certs.get(b'suspend') == certs[b"branch"]:
339 339 extra[b'close'] = b'1'
340 340 dateformat = b"%Y-%m-%dT%H:%M:%S"
341 341 return common.commit(
342 342 author=certs[b"author"],
343 343 date=dateutil.datestr(dateutil.strdate(certs[b"date"], dateformat)),
344 344 desc=certs[b"changelog"],
345 345 rev=rev,
346 346 parents=self.mtnrun(b"parents", rev).splitlines(),
347 347 branch=certs[b"branch"],
348 348 extra=extra,
349 349 )
350 350
351 351 def gettags(self):
352 352 tags = {}
353 353 for e in self.mtnrun(b"tags").split(b"\n\n"):
354 354 m = self.tag_re.match(e)
355 355 if m:
356 356 tags[m.group(1)] = m.group(2)
357 357 return tags
358 358
359 359 def getchangedfiles(self, rev, i):
360 360 # This function is only needed to support --filemap
361 361 # ... and we don't support that
362 362 raise NotImplementedError
363 363
364 364 def before(self):
365 365 # Check if we have a new enough version to use automate stdio
366 366 try:
367 367 versionstr = self.mtnrunsingle(b"interface_version")
368 368 version = float(versionstr)
369 369 except Exception:
370 370 raise error.Abort(
371 371 _(b"unable to determine mtn automate interface version")
372 372 )
373 373
374 374 if version >= 12.0:
375 375 self.automatestdio = True
376 376 self.ui.debug(
377 377 b"mtn automate version %f - using automate stdio\n" % version
378 378 )
379 379
380 380 # launch the long-running automate stdio process
381 381 self.mtnwritefp, self.mtnreadfp = self._run2(
382 382 b'automate', b'stdio', b'-d', self.path
383 383 )
384 384 # read the headers
385 385 read = self.mtnreadfp.readline()
386 386 if read != b'format-version: 2\n':
387 387 raise error.Abort(
388 388 _(b'mtn automate stdio header unexpected: %s') % read
389 389 )
390 390 while read != b'\n':
391 391 read = self.mtnreadfp.readline()
392 392 if not read:
393 393 raise error.Abort(
394 394 _(
395 395 b"failed to reach end of mtn automate "
396 396 b"stdio headers"
397 397 )
398 398 )
399 399 else:
400 400 self.ui.debug(
401 401 b"mtn automate version %s - not using automate stdio "
402 402 b"(automate >= 12.0 - mtn >= 0.46 is needed)\n" % version
403 403 )
404 404
405 405 def after(self):
406 406 if self.automatestdio:
407 407 self.mtnwritefp.close()
408 408 self.mtnwritefp = None
409 409 self.mtnreadfp.close()
410 410 self.mtnreadfp = None
@@ -1,543 +1,543 b''
1 1 # shallowutil.py -- remotefilelog utilities
2 2 #
3 3 # Copyright 2014 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import collections
9 9 import errno
10 10 import os
11 11 import stat
12 12 import struct
13 13 import tempfile
14 14
15 15 from mercurial.i18n import _
16 16 from mercurial.pycompat import open
17 17 from mercurial.node import hex
18 18 from mercurial import (
19 19 error,
20 20 pycompat,
21 21 revlog,
22 22 util,
23 23 )
24 24 from mercurial.utils import (
25 25 hashutil,
26 26 storageutil,
27 27 stringutil,
28 28 )
29 29 from . import constants
30 30
31 31 if not pycompat.iswindows:
32 32 import grp
33 33
34 34
35 35 def isenabled(repo):
36 36 """returns whether the repository is remotefilelog enabled or not"""
37 37 return constants.SHALLOWREPO_REQUIREMENT in repo.requirements
38 38
39 39
40 40 def getcachekey(reponame, file, id):
41 41 pathhash = hex(hashutil.sha1(file).digest())
42 42 return os.path.join(reponame, pathhash[:2], pathhash[2:], id)
43 43
44 44
45 45 def getlocalkey(file, id):
46 46 pathhash = hex(hashutil.sha1(file).digest())
47 47 return os.path.join(pathhash, id)
48 48
49 49
50 50 def getcachepath(ui, allowempty=False):
51 51 cachepath = ui.config(b"remotefilelog", b"cachepath")
52 52 if not cachepath:
53 53 if allowempty:
54 54 return None
55 55 else:
56 56 raise error.Abort(
57 57 _(b"could not find config option remotefilelog.cachepath")
58 58 )
59 59 return util.expandpath(cachepath)
60 60
61 61
62 62 def getcachepackpath(repo, category):
63 63 cachepath = getcachepath(repo.ui)
64 64 if category != constants.FILEPACK_CATEGORY:
65 65 return os.path.join(cachepath, repo.name, b'packs', category)
66 66 else:
67 67 return os.path.join(cachepath, repo.name, b'packs')
68 68
69 69
70 70 def getlocalpackpath(base, category):
71 71 return os.path.join(base, b'packs', category)
72 72
73 73
74 74 def createrevlogtext(text, copyfrom=None, copyrev=None):
75 75 """returns a string that matches the revlog contents in a
76 76 traditional revlog
77 77 """
78 78 meta = {}
79 79 if copyfrom or text.startswith(b'\1\n'):
80 80 if copyfrom:
81 81 meta[b'copy'] = copyfrom
82 82 meta[b'copyrev'] = copyrev
83 83 text = storageutil.packmeta(meta, text)
84 84
85 85 return text
86 86
87 87
88 88 def parsemeta(text):
89 89 """parse mercurial filelog metadata"""
90 90 meta, size = storageutil.parsemeta(text)
91 91 if text.startswith(b'\1\n'):
92 92 s = text.index(b'\1\n', 2)
93 93 text = text[s + 2 :]
94 94 return meta or {}, text
95 95
96 96
97 97 def sumdicts(*dicts):
98 98 """Adds all the values of *dicts together into one dictionary. This assumes
99 99 the values in *dicts are all summable.
100 100
101 101 e.g. [{'a': 4', 'b': 2}, {'b': 3, 'c': 1}] -> {'a': 4, 'b': 5, 'c': 1}
102 102 """
103 103 result = collections.defaultdict(lambda: 0)
104 104 for dict in dicts:
105 105 for k, v in dict.items():
106 106 result[k] += v
107 107 return result
108 108
109 109
110 110 def prefixkeys(dict, prefix):
111 111 """Returns ``dict`` with ``prefix`` prepended to all its keys."""
112 112 result = {}
113 113 for k, v in dict.items():
114 114 result[prefix + k] = v
115 115 return result
116 116
117 117
118 118 def reportpackmetrics(ui, prefix, *stores):
119 119 dicts = [s.getmetrics() for s in stores]
120 120 dict = prefixkeys(sumdicts(*dicts), prefix + b'_')
121 121 ui.log(prefix + b"_packsizes", b"\n", **pycompat.strkwargs(dict))
122 122
123 123
124 124 def _parsepackmeta(metabuf):
125 125 """parse datapack meta, bytes (<metadata-list>) -> dict
126 126
127 127 The dict contains raw content - both keys and values are strings.
128 128 Upper-level business may want to convert some of them to other types like
129 129 integers, on their own.
130 130
131 131 raise ValueError if the data is corrupted
132 132 """
133 133 metadict = {}
134 134 offset = 0
135 135 buflen = len(metabuf)
136 136 while buflen - offset >= 3:
137 137 key = metabuf[offset : offset + 1]
138 138 offset += 1
139 139 metalen = struct.unpack_from(b'!H', metabuf, offset)[0]
140 140 offset += 2
141 141 if offset + metalen > buflen:
142 142 raise ValueError(b'corrupted metadata: incomplete buffer')
143 143 value = metabuf[offset : offset + metalen]
144 144 metadict[key] = value
145 145 offset += metalen
146 146 if offset != buflen:
147 147 raise ValueError(b'corrupted metadata: redundant data')
148 148 return metadict
149 149
150 150
151 151 def _buildpackmeta(metadict):
152 152 """reverse of _parsepackmeta, dict -> bytes (<metadata-list>)
153 153
154 154 The dict contains raw content - both keys and values are strings.
155 155 Upper-level business may want to serialize some of other types (like
156 156 integers) to strings before calling this function.
157 157
158 158 raise ProgrammingError when metadata key is illegal, or ValueError if
159 159 length limit is exceeded
160 160 """
161 161 metabuf = b''
162 162 for k, v in sorted((metadict or {}).items()):
163 163 if len(k) != 1:
164 164 raise error.ProgrammingError(b'packmeta: illegal key: %s' % k)
165 165 if len(v) > 0xFFFE:
166 166 raise ValueError(
167 167 b'metadata value is too long: 0x%x > 0xfffe' % len(v)
168 168 )
169 169 metabuf += k
170 170 metabuf += struct.pack(b'!H', len(v))
171 171 metabuf += v
172 172 # len(metabuf) is guaranteed representable in 4 bytes, because there are
173 173 # only 256 keys, and for each value, len(value) <= 0xfffe.
174 174 return metabuf
175 175
176 176
177 177 _metaitemtypes = {
178 constants.METAKEYFLAG: (int, pycompat.long),
179 constants.METAKEYSIZE: (int, pycompat.long),
178 constants.METAKEYFLAG: (int, int),
179 constants.METAKEYSIZE: (int, int),
180 180 }
181 181
182 182
183 183 def buildpackmeta(metadict):
184 184 """like _buildpackmeta, but typechecks metadict and normalize it.
185 185
186 186 This means, METAKEYSIZE and METAKEYSIZE should have integers as values,
187 187 and METAKEYFLAG will be dropped if its value is 0.
188 188 """
189 189 newmeta = {}
190 190 for k, v in (metadict or {}).items():
191 191 expectedtype = _metaitemtypes.get(k, (bytes,))
192 192 if not isinstance(v, expectedtype):
193 193 raise error.ProgrammingError(b'packmeta: wrong type of key %s' % k)
194 194 # normalize int to binary buffer
195 195 if int in expectedtype:
196 196 # optimization: remove flag if it's 0 to save space
197 197 if k == constants.METAKEYFLAG and v == 0:
198 198 continue
199 199 v = int2bin(v)
200 200 newmeta[k] = v
201 201 return _buildpackmeta(newmeta)
202 202
203 203
204 204 def parsepackmeta(metabuf):
205 205 """like _parsepackmeta, but convert fields to desired types automatically.
206 206
207 207 This means, METAKEYFLAG and METAKEYSIZE fields will be converted to
208 208 integers.
209 209 """
210 210 metadict = _parsepackmeta(metabuf)
211 211 for k, v in metadict.items():
212 212 if k in _metaitemtypes and int in _metaitemtypes[k]:
213 213 metadict[k] = bin2int(v)
214 214 return metadict
215 215
216 216
217 217 def int2bin(n):
218 218 """convert a non-negative integer to raw binary buffer"""
219 219 buf = bytearray()
220 220 while n > 0:
221 221 buf.insert(0, n & 0xFF)
222 222 n >>= 8
223 223 return bytes(buf)
224 224
225 225
226 226 def bin2int(buf):
227 227 """the reverse of int2bin, convert a binary buffer to an integer"""
228 228 x = 0
229 229 for b in bytearray(buf):
230 230 x <<= 8
231 231 x |= b
232 232 return x
233 233
234 234
235 235 class BadRemotefilelogHeader(error.StorageError):
236 236 """Exception raised when parsing a remotefilelog blob header fails."""
237 237
238 238
239 239 def parsesizeflags(raw):
240 240 """given a remotefilelog blob, return (headersize, rawtextsize, flags)
241 241
242 242 see remotefilelogserver.createfileblob for the format.
243 243 raise RuntimeError if the content is illformed.
244 244 """
245 245 flags = revlog.REVIDX_DEFAULT_FLAGS
246 246 size = None
247 247 try:
248 248 index = raw.index(b'\0')
249 249 except ValueError:
250 250 raise BadRemotefilelogHeader(
251 251 "unexpected remotefilelog header: illegal format"
252 252 )
253 253 header = raw[:index]
254 254 if header.startswith(b'v'):
255 255 # v1 and above, header starts with 'v'
256 256 if header.startswith(b'v1\n'):
257 257 for s in header.split(b'\n'):
258 258 if s.startswith(constants.METAKEYSIZE):
259 259 size = int(s[len(constants.METAKEYSIZE) :])
260 260 elif s.startswith(constants.METAKEYFLAG):
261 261 flags = int(s[len(constants.METAKEYFLAG) :])
262 262 else:
263 263 raise BadRemotefilelogHeader(
264 264 b'unsupported remotefilelog header: %s' % header
265 265 )
266 266 else:
267 267 # v0, str(int(size)) is the header
268 268 size = int(header)
269 269 if size is None:
270 270 raise BadRemotefilelogHeader(
271 271 "unexpected remotefilelog header: no size found"
272 272 )
273 273 return index + 1, size, flags
274 274
275 275
276 276 def buildfileblobheader(size, flags, version=None):
277 277 """return the header of a remotefilelog blob.
278 278
279 279 see remotefilelogserver.createfileblob for the format.
280 280 approximately the reverse of parsesizeflags.
281 281
282 282 version could be 0 or 1, or None (auto decide).
283 283 """
284 284 # choose v0 if flags is empty, otherwise v1
285 285 if version is None:
286 286 version = int(bool(flags))
287 287 if version == 1:
288 288 header = b'v1\n%s%d\n%s%d' % (
289 289 constants.METAKEYSIZE,
290 290 size,
291 291 constants.METAKEYFLAG,
292 292 flags,
293 293 )
294 294 elif version == 0:
295 295 if flags:
296 296 raise error.ProgrammingError(b'fileblob v0 does not support flag')
297 297 header = b'%d' % size
298 298 else:
299 299 raise error.ProgrammingError(b'unknown fileblob version %d' % version)
300 300 return header
301 301
302 302
303 303 def ancestormap(raw):
304 304 offset, size, flags = parsesizeflags(raw)
305 305 start = offset + size
306 306
307 307 mapping = {}
308 308 while start < len(raw):
309 309 divider = raw.index(b'\0', start + 80)
310 310
311 311 currentnode = raw[start : (start + 20)]
312 312 p1 = raw[(start + 20) : (start + 40)]
313 313 p2 = raw[(start + 40) : (start + 60)]
314 314 linknode = raw[(start + 60) : (start + 80)]
315 315 copyfrom = raw[(start + 80) : divider]
316 316
317 317 mapping[currentnode] = (p1, p2, linknode, copyfrom)
318 318 start = divider + 1
319 319
320 320 return mapping
321 321
322 322
323 323 def readfile(path):
324 324 f = open(path, b'rb')
325 325 try:
326 326 result = f.read()
327 327
328 328 # we should never have empty files
329 329 if not result:
330 330 os.remove(path)
331 331 raise IOError(b"empty file: %s" % path)
332 332
333 333 return result
334 334 finally:
335 335 f.close()
336 336
337 337
338 338 def unlinkfile(filepath):
339 339 if pycompat.iswindows:
340 340 # On Windows, os.unlink cannnot delete readonly files
341 341 os.chmod(filepath, stat.S_IWUSR)
342 342 os.unlink(filepath)
343 343
344 344
345 345 def renamefile(source, destination):
346 346 if pycompat.iswindows:
347 347 # On Windows, os.rename cannot rename readonly files
348 348 # and cannot overwrite destination if it exists
349 349 os.chmod(source, stat.S_IWUSR)
350 350 if os.path.isfile(destination):
351 351 os.chmod(destination, stat.S_IWUSR)
352 352 os.unlink(destination)
353 353
354 354 os.rename(source, destination)
355 355
356 356
357 357 def writefile(path, content, readonly=False):
358 358 dirname, filename = os.path.split(path)
359 359 if not os.path.exists(dirname):
360 360 try:
361 361 os.makedirs(dirname)
362 362 except OSError as ex:
363 363 if ex.errno != errno.EEXIST:
364 364 raise
365 365
366 366 fd, temp = tempfile.mkstemp(prefix=b'.%s-' % filename, dir=dirname)
367 367 os.close(fd)
368 368
369 369 try:
370 370 f = util.posixfile(temp, b'wb')
371 371 f.write(content)
372 372 f.close()
373 373
374 374 if readonly:
375 375 mode = 0o444
376 376 else:
377 377 # tempfiles are created with 0o600, so we need to manually set the
378 378 # mode.
379 379 oldumask = os.umask(0)
380 380 # there's no way to get the umask without modifying it, so set it
381 381 # back
382 382 os.umask(oldumask)
383 383 mode = ~oldumask
384 384
385 385 renamefile(temp, path)
386 386 os.chmod(path, mode)
387 387 except Exception:
388 388 try:
389 389 unlinkfile(temp)
390 390 except OSError:
391 391 pass
392 392 raise
393 393
394 394
395 395 def sortnodes(nodes, parentfunc):
396 396 """Topologically sorts the nodes, using the parentfunc to find
397 397 the parents of nodes."""
398 398 nodes = set(nodes)
399 399 childmap = {}
400 400 parentmap = {}
401 401 roots = []
402 402
403 403 # Build a child and parent map
404 404 for n in nodes:
405 405 parents = [p for p in parentfunc(n) if p in nodes]
406 406 parentmap[n] = set(parents)
407 407 for p in parents:
408 408 childmap.setdefault(p, set()).add(n)
409 409 if not parents:
410 410 roots.append(n)
411 411
412 412 roots.sort()
413 413 # Process roots, adding children to the queue as they become roots
414 414 results = []
415 415 while roots:
416 416 n = roots.pop(0)
417 417 results.append(n)
418 418 if n in childmap:
419 419 children = childmap[n]
420 420 for c in children:
421 421 childparents = parentmap[c]
422 422 childparents.remove(n)
423 423 if len(childparents) == 0:
424 424 # insert at the beginning, that way child nodes
425 425 # are likely to be output immediately after their
426 426 # parents. This gives better compression results.
427 427 roots.insert(0, c)
428 428
429 429 return results
430 430
431 431
432 432 def readexactly(stream, n):
433 433 '''read n bytes from stream.read and abort if less was available'''
434 434 s = stream.read(n)
435 435 if len(s) < n:
436 436 raise error.Abort(
437 437 _(b"stream ended unexpectedly (got %d bytes, expected %d)")
438 438 % (len(s), n)
439 439 )
440 440 return s
441 441
442 442
443 443 def readunpack(stream, fmt):
444 444 data = readexactly(stream, struct.calcsize(fmt))
445 445 return struct.unpack(fmt, data)
446 446
447 447
448 448 def readpath(stream):
449 449 rawlen = readexactly(stream, constants.FILENAMESIZE)
450 450 pathlen = struct.unpack(constants.FILENAMESTRUCT, rawlen)[0]
451 451 return readexactly(stream, pathlen)
452 452
453 453
454 454 def readnodelist(stream):
455 455 rawlen = readexactly(stream, constants.NODECOUNTSIZE)
456 456 nodecount = struct.unpack(constants.NODECOUNTSTRUCT, rawlen)[0]
457 457 for i in pycompat.xrange(nodecount):
458 458 yield readexactly(stream, constants.NODESIZE)
459 459
460 460
461 461 def readpathlist(stream):
462 462 rawlen = readexactly(stream, constants.PATHCOUNTSIZE)
463 463 pathcount = struct.unpack(constants.PATHCOUNTSTRUCT, rawlen)[0]
464 464 for i in pycompat.xrange(pathcount):
465 465 yield readpath(stream)
466 466
467 467
468 468 def getgid(groupname):
469 469 try:
470 470 gid = grp.getgrnam(pycompat.fsdecode(groupname)).gr_gid
471 471 return gid
472 472 except KeyError:
473 473 return None
474 474
475 475
476 476 def setstickygroupdir(path, gid, warn=None):
477 477 if gid is None:
478 478 return
479 479 try:
480 480 os.chown(path, -1, gid)
481 481 os.chmod(path, 0o2775)
482 482 except (IOError, OSError) as ex:
483 483 if warn:
484 484 warn(_(b'unable to chown/chmod on %s: %s\n') % (path, ex))
485 485
486 486
487 487 def mkstickygroupdir(ui, path):
488 488 """Creates the given directory (if it doesn't exist) and give it a
489 489 particular group with setgid enabled."""
490 490 gid = None
491 491 groupname = ui.config(b"remotefilelog", b"cachegroup")
492 492 if groupname:
493 493 gid = getgid(groupname)
494 494 if gid is None:
495 495 ui.warn(_(b'unable to resolve group name: %s\n') % groupname)
496 496
497 497 # we use a single stat syscall to test the existence and mode / group bit
498 498 st = None
499 499 try:
500 500 st = os.stat(path)
501 501 except OSError:
502 502 pass
503 503
504 504 if st:
505 505 # exists
506 506 if (st.st_mode & 0o2775) != 0o2775 or st.st_gid != gid:
507 507 # permission needs to be fixed
508 508 setstickygroupdir(path, gid, ui.warn)
509 509 return
510 510
511 511 oldumask = os.umask(0o002)
512 512 try:
513 513 missingdirs = [path]
514 514 path = os.path.dirname(path)
515 515 while path and not os.path.exists(path):
516 516 missingdirs.append(path)
517 517 path = os.path.dirname(path)
518 518
519 519 for path in reversed(missingdirs):
520 520 try:
521 521 os.mkdir(path)
522 522 except OSError as ex:
523 523 if ex.errno != errno.EEXIST:
524 524 raise
525 525
526 526 for path in missingdirs:
527 527 setstickygroupdir(path, gid, ui.warn)
528 528 finally:
529 529 os.umask(oldumask)
530 530
531 531
532 532 def getusername(ui):
533 533 try:
534 534 return stringutil.shortuser(ui.username())
535 535 except Exception:
536 536 return b'unknown'
537 537
538 538
539 539 def getreponame(ui):
540 540 reponame = ui.config(b'paths', b'default')
541 541 if reponame:
542 542 return os.path.basename(reponame)
543 543 return b"unknown"
@@ -1,870 +1,870 b''
1 1 # formatter.py - generic output formatting for mercurial
2 2 #
3 3 # Copyright 2012 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Generic output formatting for Mercurial
9 9
10 10 The formatter provides API to show data in various ways. The following
11 11 functions should be used in place of ui.write():
12 12
13 13 - fm.write() for unconditional output
14 14 - fm.condwrite() to show some extra data conditionally in plain output
15 15 - fm.context() to provide changectx to template output
16 16 - fm.data() to provide extra data to JSON or template output
17 17 - fm.plain() to show raw text that isn't provided to JSON or template output
18 18
19 19 To show structured data (e.g. date tuples, dicts, lists), apply fm.format*()
20 20 beforehand so the data is converted to the appropriate data type. Use
21 21 fm.isplain() if you need to convert or format data conditionally which isn't
22 22 supported by the formatter API.
23 23
24 24 To build nested structure (i.e. a list of dicts), use fm.nested().
25 25
26 26 See also https://www.mercurial-scm.org/wiki/GenericTemplatingPlan
27 27
28 28 fm.condwrite() vs 'if cond:':
29 29
30 30 In most cases, use fm.condwrite() so users can selectively show the data
31 31 in template output. If it's costly to build data, use plain 'if cond:' with
32 32 fm.write().
33 33
34 34 fm.nested() vs fm.formatdict() (or fm.formatlist()):
35 35
36 36 fm.nested() should be used to form a tree structure (a list of dicts of
37 37 lists of dicts...) which can be accessed through template keywords, e.g.
38 38 "{foo % "{bar % {...}} {baz % {...}}"}". On the other hand, fm.formatdict()
39 39 exports a dict-type object to template, which can be accessed by e.g.
40 40 "{get(foo, key)}" function.
41 41
42 42 Doctest helper:
43 43
44 44 >>> def show(fn, verbose=False, **opts):
45 45 ... import sys
46 46 ... from . import ui as uimod
47 47 ... ui = uimod.ui()
48 48 ... ui.verbose = verbose
49 49 ... ui.pushbuffer()
50 50 ... try:
51 51 ... return fn(ui, ui.formatter(pycompat.sysbytes(fn.__name__),
52 52 ... pycompat.byteskwargs(opts)))
53 53 ... finally:
54 54 ... print(pycompat.sysstr(ui.popbuffer()), end='')
55 55
56 56 Basic example:
57 57
58 58 >>> def files(ui, fm):
59 59 ... files = [(b'foo', 123, (0, 0)), (b'bar', 456, (1, 0))]
60 60 ... for f in files:
61 61 ... fm.startitem()
62 62 ... fm.write(b'path', b'%s', f[0])
63 63 ... fm.condwrite(ui.verbose, b'date', b' %s',
64 64 ... fm.formatdate(f[2], b'%Y-%m-%d %H:%M:%S'))
65 65 ... fm.data(size=f[1])
66 66 ... fm.plain(b'\\n')
67 67 ... fm.end()
68 68 >>> show(files)
69 69 foo
70 70 bar
71 71 >>> show(files, verbose=True)
72 72 foo 1970-01-01 00:00:00
73 73 bar 1970-01-01 00:00:01
74 74 >>> show(files, template=b'json')
75 75 [
76 76 {
77 77 "date": [0, 0],
78 78 "path": "foo",
79 79 "size": 123
80 80 },
81 81 {
82 82 "date": [1, 0],
83 83 "path": "bar",
84 84 "size": 456
85 85 }
86 86 ]
87 87 >>> show(files, template=b'path: {path}\\ndate: {date|rfc3339date}\\n')
88 88 path: foo
89 89 date: 1970-01-01T00:00:00+00:00
90 90 path: bar
91 91 date: 1970-01-01T00:00:01+00:00
92 92
93 93 Nested example:
94 94
95 95 >>> def subrepos(ui, fm):
96 96 ... fm.startitem()
97 97 ... fm.write(b'reponame', b'[%s]\\n', b'baz')
98 98 ... files(ui, fm.nested(b'files', tmpl=b'{reponame}'))
99 99 ... fm.end()
100 100 >>> show(subrepos)
101 101 [baz]
102 102 foo
103 103 bar
104 104 >>> show(subrepos, template=b'{reponame}: {join(files % "{path}", ", ")}\\n')
105 105 baz: foo, bar
106 106 """
107 107
108 108
109 109 import contextlib
110 110 import itertools
111 111 import os
112 112 import pickle
113 113
114 114 from .i18n import _
115 115 from .node import (
116 116 hex,
117 117 short,
118 118 )
119 119 from .thirdparty import attr
120 120
121 121 from . import (
122 122 error,
123 123 pycompat,
124 124 templatefilters,
125 125 templatekw,
126 126 templater,
127 127 templateutil,
128 128 util,
129 129 )
130 130 from .utils import (
131 131 cborutil,
132 132 dateutil,
133 133 stringutil,
134 134 )
135 135
136 136
137 137 def isprintable(obj):
138 138 """Check if the given object can be directly passed in to formatter's
139 139 write() and data() functions
140 140
141 141 Returns False if the object is unsupported or must be pre-processed by
142 142 formatdate(), formatdict(), or formatlist().
143 143 """
144 return isinstance(obj, (type(None), bool, int, pycompat.long, float, bytes))
144 return isinstance(obj, (type(None), bool, int, int, float, bytes))
145 145
146 146
147 147 class _nullconverter(object):
148 148 '''convert non-primitive data types to be processed by formatter'''
149 149
150 150 # set to True if context object should be stored as item
151 151 storecontext = False
152 152
153 153 @staticmethod
154 154 def wrapnested(data, tmpl, sep):
155 155 '''wrap nested data by appropriate type'''
156 156 return data
157 157
158 158 @staticmethod
159 159 def formatdate(date, fmt):
160 160 '''convert date tuple to appropriate format'''
161 161 # timestamp can be float, but the canonical form should be int
162 162 ts, tz = date
163 163 return (int(ts), tz)
164 164
165 165 @staticmethod
166 166 def formatdict(data, key, value, fmt, sep):
167 167 '''convert dict or key-value pairs to appropriate dict format'''
168 168 # use plain dict instead of util.sortdict so that data can be
169 169 # serialized as a builtin dict in pickle output
170 170 return dict(data)
171 171
172 172 @staticmethod
173 173 def formatlist(data, name, fmt, sep):
174 174 '''convert iterable to appropriate list format'''
175 175 return list(data)
176 176
177 177
178 178 class baseformatter(object):
179 179
180 180 # set to True if the formater output a strict format that does not support
181 181 # arbitrary output in the stream.
182 182 strict_format = False
183 183
184 184 def __init__(self, ui, topic, opts, converter):
185 185 self._ui = ui
186 186 self._topic = topic
187 187 self._opts = opts
188 188 self._converter = converter
189 189 self._item = None
190 190 # function to convert node to string suitable for this output
191 191 self.hexfunc = hex
192 192
193 193 def __enter__(self):
194 194 return self
195 195
196 196 def __exit__(self, exctype, excvalue, traceback):
197 197 if exctype is None:
198 198 self.end()
199 199
200 200 def _showitem(self):
201 201 '''show a formatted item once all data is collected'''
202 202
203 203 def startitem(self):
204 204 '''begin an item in the format list'''
205 205 if self._item is not None:
206 206 self._showitem()
207 207 self._item = {}
208 208
209 209 def formatdate(self, date, fmt=b'%a %b %d %H:%M:%S %Y %1%2'):
210 210 '''convert date tuple to appropriate format'''
211 211 return self._converter.formatdate(date, fmt)
212 212
213 213 def formatdict(self, data, key=b'key', value=b'value', fmt=None, sep=b' '):
214 214 '''convert dict or key-value pairs to appropriate dict format'''
215 215 return self._converter.formatdict(data, key, value, fmt, sep)
216 216
217 217 def formatlist(self, data, name, fmt=None, sep=b' '):
218 218 '''convert iterable to appropriate list format'''
219 219 # name is mandatory argument for now, but it could be optional if
220 220 # we have default template keyword, e.g. {item}
221 221 return self._converter.formatlist(data, name, fmt, sep)
222 222
223 223 def context(self, **ctxs):
224 224 '''insert context objects to be used to render template keywords'''
225 225 ctxs = pycompat.byteskwargs(ctxs)
226 226 assert all(k in {b'repo', b'ctx', b'fctx'} for k in ctxs)
227 227 if self._converter.storecontext:
228 228 # populate missing resources in fctx -> ctx -> repo order
229 229 if b'fctx' in ctxs and b'ctx' not in ctxs:
230 230 ctxs[b'ctx'] = ctxs[b'fctx'].changectx()
231 231 if b'ctx' in ctxs and b'repo' not in ctxs:
232 232 ctxs[b'repo'] = ctxs[b'ctx'].repo()
233 233 self._item.update(ctxs)
234 234
235 235 def datahint(self):
236 236 '''set of field names to be referenced'''
237 237 return set()
238 238
239 239 def data(self, **data):
240 240 '''insert data into item that's not shown in default output'''
241 241 data = pycompat.byteskwargs(data)
242 242 self._item.update(data)
243 243
244 244 def write(self, fields, deftext, *fielddata, **opts):
245 245 '''do default text output while assigning data to item'''
246 246 fieldkeys = fields.split()
247 247 assert len(fieldkeys) == len(fielddata), (fieldkeys, fielddata)
248 248 self._item.update(zip(fieldkeys, fielddata))
249 249
250 250 def condwrite(self, cond, fields, deftext, *fielddata, **opts):
251 251 '''do conditional write (primarily for plain formatter)'''
252 252 fieldkeys = fields.split()
253 253 assert len(fieldkeys) == len(fielddata)
254 254 self._item.update(zip(fieldkeys, fielddata))
255 255
256 256 def plain(self, text, **opts):
257 257 '''show raw text for non-templated mode'''
258 258
259 259 def isplain(self):
260 260 '''check for plain formatter usage'''
261 261 return False
262 262
263 263 def nested(self, field, tmpl=None, sep=b''):
264 264 '''sub formatter to store nested data in the specified field'''
265 265 data = []
266 266 self._item[field] = self._converter.wrapnested(data, tmpl, sep)
267 267 return _nestedformatter(self._ui, self._converter, data)
268 268
269 269 def end(self):
270 270 '''end output for the formatter'''
271 271 if self._item is not None:
272 272 self._showitem()
273 273
274 274
275 275 def nullformatter(ui, topic, opts):
276 276 '''formatter that prints nothing'''
277 277 return baseformatter(ui, topic, opts, converter=_nullconverter)
278 278
279 279
280 280 class _nestedformatter(baseformatter):
281 281 '''build sub items and store them in the parent formatter'''
282 282
283 283 def __init__(self, ui, converter, data):
284 284 baseformatter.__init__(
285 285 self, ui, topic=b'', opts={}, converter=converter
286 286 )
287 287 self._data = data
288 288
289 289 def _showitem(self):
290 290 self._data.append(self._item)
291 291
292 292
293 293 def _iteritems(data):
294 294 '''iterate key-value pairs in stable order'''
295 295 if isinstance(data, dict):
296 296 return sorted(data.items())
297 297 return data
298 298
299 299
300 300 class _plainconverter(object):
301 301 '''convert non-primitive data types to text'''
302 302
303 303 storecontext = False
304 304
305 305 @staticmethod
306 306 def wrapnested(data, tmpl, sep):
307 307 raise error.ProgrammingError(b'plainformatter should never be nested')
308 308
309 309 @staticmethod
310 310 def formatdate(date, fmt):
311 311 '''stringify date tuple in the given format'''
312 312 return dateutil.datestr(date, fmt)
313 313
314 314 @staticmethod
315 315 def formatdict(data, key, value, fmt, sep):
316 316 '''stringify key-value pairs separated by sep'''
317 317 prefmt = pycompat.identity
318 318 if fmt is None:
319 319 fmt = b'%s=%s'
320 320 prefmt = pycompat.bytestr
321 321 return sep.join(
322 322 fmt % (prefmt(k), prefmt(v)) for k, v in _iteritems(data)
323 323 )
324 324
325 325 @staticmethod
326 326 def formatlist(data, name, fmt, sep):
327 327 '''stringify iterable separated by sep'''
328 328 prefmt = pycompat.identity
329 329 if fmt is None:
330 330 fmt = b'%s'
331 331 prefmt = pycompat.bytestr
332 332 return sep.join(fmt % prefmt(e) for e in data)
333 333
334 334
335 335 class plainformatter(baseformatter):
336 336 '''the default text output scheme'''
337 337
338 338 def __init__(self, ui, out, topic, opts):
339 339 baseformatter.__init__(self, ui, topic, opts, _plainconverter)
340 340 if ui.debugflag:
341 341 self.hexfunc = hex
342 342 else:
343 343 self.hexfunc = short
344 344 if ui is out:
345 345 self._write = ui.write
346 346 else:
347 347 self._write = lambda s, **opts: out.write(s)
348 348
349 349 def startitem(self):
350 350 pass
351 351
352 352 def data(self, **data):
353 353 pass
354 354
355 355 def write(self, fields, deftext, *fielddata, **opts):
356 356 self._write(deftext % fielddata, **opts)
357 357
358 358 def condwrite(self, cond, fields, deftext, *fielddata, **opts):
359 359 '''do conditional write'''
360 360 if cond:
361 361 self._write(deftext % fielddata, **opts)
362 362
363 363 def plain(self, text, **opts):
364 364 self._write(text, **opts)
365 365
366 366 def isplain(self):
367 367 return True
368 368
369 369 def nested(self, field, tmpl=None, sep=b''):
370 370 # nested data will be directly written to ui
371 371 return self
372 372
373 373 def end(self):
374 374 pass
375 375
376 376
377 377 class debugformatter(baseformatter):
378 378 def __init__(self, ui, out, topic, opts):
379 379 baseformatter.__init__(self, ui, topic, opts, _nullconverter)
380 380 self._out = out
381 381 self._out.write(b"%s = [\n" % self._topic)
382 382
383 383 def _showitem(self):
384 384 self._out.write(
385 385 b' %s,\n' % stringutil.pprint(self._item, indent=4, level=1)
386 386 )
387 387
388 388 def end(self):
389 389 baseformatter.end(self)
390 390 self._out.write(b"]\n")
391 391
392 392
393 393 class pickleformatter(baseformatter):
394 394 def __init__(self, ui, out, topic, opts):
395 395 baseformatter.__init__(self, ui, topic, opts, _nullconverter)
396 396 self._out = out
397 397 self._data = []
398 398
399 399 def _showitem(self):
400 400 self._data.append(self._item)
401 401
402 402 def end(self):
403 403 baseformatter.end(self)
404 404 self._out.write(pickle.dumps(self._data))
405 405
406 406
407 407 class cborformatter(baseformatter):
408 408 '''serialize items as an indefinite-length CBOR array'''
409 409
410 410 def __init__(self, ui, out, topic, opts):
411 411 baseformatter.__init__(self, ui, topic, opts, _nullconverter)
412 412 self._out = out
413 413 self._out.write(cborutil.BEGIN_INDEFINITE_ARRAY)
414 414
415 415 def _showitem(self):
416 416 self._out.write(b''.join(cborutil.streamencode(self._item)))
417 417
418 418 def end(self):
419 419 baseformatter.end(self)
420 420 self._out.write(cborutil.BREAK)
421 421
422 422
423 423 class jsonformatter(baseformatter):
424 424
425 425 strict_format = True
426 426
427 427 def __init__(self, ui, out, topic, opts):
428 428 baseformatter.__init__(self, ui, topic, opts, _nullconverter)
429 429 self._out = out
430 430 self._out.write(b"[")
431 431 self._first = True
432 432
433 433 def _showitem(self):
434 434 if self._first:
435 435 self._first = False
436 436 else:
437 437 self._out.write(b",")
438 438
439 439 self._out.write(b"\n {\n")
440 440 first = True
441 441 for k, v in sorted(self._item.items()):
442 442 if first:
443 443 first = False
444 444 else:
445 445 self._out.write(b",\n")
446 446 u = templatefilters.json(v, paranoid=False)
447 447 self._out.write(b' "%s": %s' % (k, u))
448 448 self._out.write(b"\n }")
449 449
450 450 def end(self):
451 451 baseformatter.end(self)
452 452 self._out.write(b"\n]\n")
453 453
454 454
455 455 class _templateconverter(object):
456 456 '''convert non-primitive data types to be processed by templater'''
457 457
458 458 storecontext = True
459 459
460 460 @staticmethod
461 461 def wrapnested(data, tmpl, sep):
462 462 '''wrap nested data by templatable type'''
463 463 return templateutil.mappinglist(data, tmpl=tmpl, sep=sep)
464 464
465 465 @staticmethod
466 466 def formatdate(date, fmt):
467 467 '''return date tuple'''
468 468 return templateutil.date(date)
469 469
470 470 @staticmethod
471 471 def formatdict(data, key, value, fmt, sep):
472 472 '''build object that can be evaluated as either plain string or dict'''
473 473 data = util.sortdict(_iteritems(data))
474 474
475 475 def f():
476 476 yield _plainconverter.formatdict(data, key, value, fmt, sep)
477 477
478 478 return templateutil.hybriddict(
479 479 data, key=key, value=value, fmt=fmt, gen=f
480 480 )
481 481
482 482 @staticmethod
483 483 def formatlist(data, name, fmt, sep):
484 484 '''build object that can be evaluated as either plain string or list'''
485 485 data = list(data)
486 486
487 487 def f():
488 488 yield _plainconverter.formatlist(data, name, fmt, sep)
489 489
490 490 return templateutil.hybridlist(data, name=name, fmt=fmt, gen=f)
491 491
492 492
493 493 class templateformatter(baseformatter):
494 494 def __init__(self, ui, out, topic, opts, spec, overridetemplates=None):
495 495 baseformatter.__init__(self, ui, topic, opts, _templateconverter)
496 496 self._out = out
497 497 self._tref = spec.ref
498 498 self._t = loadtemplater(
499 499 ui,
500 500 spec,
501 501 defaults=templatekw.keywords,
502 502 resources=templateresources(ui),
503 503 cache=templatekw.defaulttempl,
504 504 )
505 505 if overridetemplates:
506 506 self._t.cache.update(overridetemplates)
507 507 self._parts = templatepartsmap(
508 508 spec, self._t, [b'docheader', b'docfooter', b'separator']
509 509 )
510 510 self._counter = itertools.count()
511 511 self._renderitem(b'docheader', {})
512 512
513 513 def _showitem(self):
514 514 item = self._item.copy()
515 515 item[b'index'] = index = next(self._counter)
516 516 if index > 0:
517 517 self._renderitem(b'separator', {})
518 518 self._renderitem(self._tref, item)
519 519
520 520 def _renderitem(self, part, item):
521 521 if part not in self._parts:
522 522 return
523 523 ref = self._parts[part]
524 524 # None can't be put in the mapping dict since it means <unset>
525 525 for k, v in item.items():
526 526 if v is None:
527 527 item[k] = templateutil.wrappedvalue(v)
528 528 self._out.write(self._t.render(ref, item))
529 529
530 530 @util.propertycache
531 531 def _symbolsused(self):
532 532 return self._t.symbolsused(self._tref)
533 533
534 534 def datahint(self):
535 535 '''set of field names to be referenced from the template'''
536 536 return self._symbolsused[0]
537 537
538 538 def end(self):
539 539 baseformatter.end(self)
540 540 self._renderitem(b'docfooter', {})
541 541
542 542
543 543 @attr.s(frozen=True)
544 544 class templatespec(object):
545 545 ref = attr.ib()
546 546 tmpl = attr.ib()
547 547 mapfile = attr.ib()
548 548 refargs = attr.ib(default=None)
549 549 fp = attr.ib(default=None)
550 550
551 551
552 552 def empty_templatespec():
553 553 return templatespec(None, None, None)
554 554
555 555
556 556 def reference_templatespec(ref, refargs=None):
557 557 return templatespec(ref, None, None, refargs)
558 558
559 559
560 560 def literal_templatespec(tmpl):
561 561 assert not isinstance(tmpl, str), b'tmpl must not be a str'
562 562 return templatespec(b'', tmpl, None)
563 563
564 564
565 565 def mapfile_templatespec(topic, mapfile, fp=None):
566 566 return templatespec(topic, None, mapfile, fp=fp)
567 567
568 568
569 569 def lookuptemplate(ui, topic, tmpl):
570 570 """Find the template matching the given -T/--template spec 'tmpl'
571 571
572 572 'tmpl' can be any of the following:
573 573
574 574 - a literal template (e.g. '{rev}')
575 575 - a reference to built-in template (i.e. formatter)
576 576 - a map-file name or path (e.g. 'changelog')
577 577 - a reference to [templates] in config file
578 578 - a path to raw template file
579 579
580 580 A map file defines a stand-alone template environment. If a map file
581 581 selected, all templates defined in the file will be loaded, and the
582 582 template matching the given topic will be rendered. Aliases won't be
583 583 loaded from user config, but from the map file.
584 584
585 585 If no map file selected, all templates in [templates] section will be
586 586 available as well as aliases in [templatealias].
587 587 """
588 588
589 589 if not tmpl:
590 590 return empty_templatespec()
591 591
592 592 # looks like a literal template?
593 593 if b'{' in tmpl:
594 594 return literal_templatespec(tmpl)
595 595
596 596 # a reference to built-in (formatter) template
597 597 if tmpl in {b'cbor', b'json', b'pickle', b'debug'}:
598 598 return reference_templatespec(tmpl)
599 599
600 600 # a function-style reference to built-in template
601 601 func, fsep, ftail = tmpl.partition(b'(')
602 602 if func in {b'cbor', b'json'} and fsep and ftail.endswith(b')'):
603 603 templater.parseexpr(tmpl) # make sure syntax errors are confined
604 604 return reference_templatespec(func, refargs=ftail[:-1])
605 605
606 606 # perhaps a stock style?
607 607 if not os.path.split(tmpl)[0]:
608 608 (mapname, fp) = templater.try_open_template(
609 609 b'map-cmdline.' + tmpl
610 610 ) or templater.try_open_template(tmpl)
611 611 if mapname:
612 612 return mapfile_templatespec(topic, mapname, fp)
613 613
614 614 # perhaps it's a reference to [templates]
615 615 if ui.config(b'templates', tmpl):
616 616 return reference_templatespec(tmpl)
617 617
618 618 if tmpl == b'list':
619 619 ui.write(_(b"available styles: %s\n") % templater.stylelist())
620 620 raise error.Abort(_(b"specify a template"))
621 621
622 622 # perhaps it's a path to a map or a template
623 623 if (b'/' in tmpl or b'\\' in tmpl) and os.path.isfile(tmpl):
624 624 # is it a mapfile for a style?
625 625 if os.path.basename(tmpl).startswith(b"map-"):
626 626 return mapfile_templatespec(topic, os.path.realpath(tmpl))
627 627 with util.posixfile(tmpl, b'rb') as f:
628 628 tmpl = f.read()
629 629 return literal_templatespec(tmpl)
630 630
631 631 # constant string?
632 632 return literal_templatespec(tmpl)
633 633
634 634
635 635 def templatepartsmap(spec, t, partnames):
636 636 """Create a mapping of {part: ref}"""
637 637 partsmap = {spec.ref: spec.ref} # initial ref must exist in t
638 638 if spec.mapfile:
639 639 partsmap.update((p, p) for p in partnames if p in t)
640 640 elif spec.ref:
641 641 for part in partnames:
642 642 ref = b'%s:%s' % (spec.ref, part) # select config sub-section
643 643 if ref in t:
644 644 partsmap[part] = ref
645 645 return partsmap
646 646
647 647
648 648 def loadtemplater(ui, spec, defaults=None, resources=None, cache=None):
649 649 """Create a templater from either a literal template or loading from
650 650 a map file"""
651 651 assert not (spec.tmpl and spec.mapfile)
652 652 if spec.mapfile:
653 653 return templater.templater.frommapfile(
654 654 spec.mapfile,
655 655 spec.fp,
656 656 defaults=defaults,
657 657 resources=resources,
658 658 cache=cache,
659 659 )
660 660 return maketemplater(
661 661 ui, spec.tmpl, defaults=defaults, resources=resources, cache=cache
662 662 )
663 663
664 664
665 665 def maketemplater(ui, tmpl, defaults=None, resources=None, cache=None):
666 666 """Create a templater from a string template 'tmpl'"""
667 667 aliases = ui.configitems(b'templatealias')
668 668 t = templater.templater(
669 669 defaults=defaults, resources=resources, cache=cache, aliases=aliases
670 670 )
671 671 t.cache.update(
672 672 (k, templater.unquotestring(v)) for k, v in ui.configitems(b'templates')
673 673 )
674 674 if tmpl:
675 675 t.cache[b''] = tmpl
676 676 return t
677 677
678 678
679 679 # marker to denote a resource to be loaded on demand based on mapping values
680 680 # (e.g. (ctx, path) -> fctx)
681 681 _placeholder = object()
682 682
683 683
684 684 class templateresources(templater.resourcemapper):
685 685 """Resource mapper designed for the default templatekw and function"""
686 686
687 687 def __init__(self, ui, repo=None):
688 688 self._resmap = {
689 689 b'cache': {}, # for templatekw/funcs to store reusable data
690 690 b'repo': repo,
691 691 b'ui': ui,
692 692 }
693 693
694 694 def availablekeys(self, mapping):
695 695 return {
696 696 k for k in self.knownkeys() if self._getsome(mapping, k) is not None
697 697 }
698 698
699 699 def knownkeys(self):
700 700 return {b'cache', b'ctx', b'fctx', b'repo', b'revcache', b'ui'}
701 701
702 702 def lookup(self, mapping, key):
703 703 if key not in self.knownkeys():
704 704 return None
705 705 v = self._getsome(mapping, key)
706 706 if v is _placeholder:
707 707 v = mapping[key] = self._loadermap[key](self, mapping)
708 708 return v
709 709
710 710 def populatemap(self, context, origmapping, newmapping):
711 711 mapping = {}
712 712 if self._hasnodespec(newmapping):
713 713 mapping[b'revcache'] = {} # per-ctx cache
714 714 if self._hasnodespec(origmapping) and self._hasnodespec(newmapping):
715 715 orignode = templateutil.runsymbol(context, origmapping, b'node')
716 716 mapping[b'originalnode'] = orignode
717 717 # put marker to override 'ctx'/'fctx' in mapping if any, and flag
718 718 # its existence to be reported by availablekeys()
719 719 if b'ctx' not in newmapping and self._hasliteral(newmapping, b'node'):
720 720 mapping[b'ctx'] = _placeholder
721 721 if b'fctx' not in newmapping and self._hasliteral(newmapping, b'path'):
722 722 mapping[b'fctx'] = _placeholder
723 723 return mapping
724 724
725 725 def _getsome(self, mapping, key):
726 726 v = mapping.get(key)
727 727 if v is not None:
728 728 return v
729 729 return self._resmap.get(key)
730 730
731 731 def _hasliteral(self, mapping, key):
732 732 """Test if a literal value is set or unset in the given mapping"""
733 733 return key in mapping and not callable(mapping[key])
734 734
735 735 def _getliteral(self, mapping, key):
736 736 """Return value of the given name if it is a literal"""
737 737 v = mapping.get(key)
738 738 if callable(v):
739 739 return None
740 740 return v
741 741
742 742 def _hasnodespec(self, mapping):
743 743 """Test if context revision is set or unset in the given mapping"""
744 744 return b'node' in mapping or b'ctx' in mapping
745 745
746 746 def _loadctx(self, mapping):
747 747 repo = self._getsome(mapping, b'repo')
748 748 node = self._getliteral(mapping, b'node')
749 749 if repo is None or node is None:
750 750 return
751 751 try:
752 752 return repo[node]
753 753 except error.RepoLookupError:
754 754 return None # maybe hidden/non-existent node
755 755
756 756 def _loadfctx(self, mapping):
757 757 ctx = self._getsome(mapping, b'ctx')
758 758 path = self._getliteral(mapping, b'path')
759 759 if ctx is None or path is None:
760 760 return None
761 761 try:
762 762 return ctx[path]
763 763 except error.LookupError:
764 764 return None # maybe removed file?
765 765
766 766 _loadermap = {
767 767 b'ctx': _loadctx,
768 768 b'fctx': _loadfctx,
769 769 }
770 770
771 771
772 772 def _internaltemplateformatter(
773 773 ui,
774 774 out,
775 775 topic,
776 776 opts,
777 777 spec,
778 778 tmpl,
779 779 docheader=b'',
780 780 docfooter=b'',
781 781 separator=b'',
782 782 ):
783 783 """Build template formatter that handles customizable built-in templates
784 784 such as -Tjson(...)"""
785 785 templates = {spec.ref: tmpl}
786 786 if docheader:
787 787 templates[b'%s:docheader' % spec.ref] = docheader
788 788 if docfooter:
789 789 templates[b'%s:docfooter' % spec.ref] = docfooter
790 790 if separator:
791 791 templates[b'%s:separator' % spec.ref] = separator
792 792 return templateformatter(
793 793 ui, out, topic, opts, spec, overridetemplates=templates
794 794 )
795 795
796 796
797 797 def formatter(ui, out, topic, opts):
798 798 spec = lookuptemplate(ui, topic, opts.get(b'template', b''))
799 799 if spec.ref == b"cbor" and spec.refargs is not None:
800 800 return _internaltemplateformatter(
801 801 ui,
802 802 out,
803 803 topic,
804 804 opts,
805 805 spec,
806 806 tmpl=b'{dict(%s)|cbor}' % spec.refargs,
807 807 docheader=cborutil.BEGIN_INDEFINITE_ARRAY,
808 808 docfooter=cborutil.BREAK,
809 809 )
810 810 elif spec.ref == b"cbor":
811 811 return cborformatter(ui, out, topic, opts)
812 812 elif spec.ref == b"json" and spec.refargs is not None:
813 813 return _internaltemplateformatter(
814 814 ui,
815 815 out,
816 816 topic,
817 817 opts,
818 818 spec,
819 819 tmpl=b'{dict(%s)|json}' % spec.refargs,
820 820 docheader=b'[\n ',
821 821 docfooter=b'\n]\n',
822 822 separator=b',\n ',
823 823 )
824 824 elif spec.ref == b"json":
825 825 return jsonformatter(ui, out, topic, opts)
826 826 elif spec.ref == b"pickle":
827 827 assert spec.refargs is None, r'function-style not supported'
828 828 return pickleformatter(ui, out, topic, opts)
829 829 elif spec.ref == b"debug":
830 830 assert spec.refargs is None, r'function-style not supported'
831 831 return debugformatter(ui, out, topic, opts)
832 832 elif spec.ref or spec.tmpl or spec.mapfile:
833 833 assert spec.refargs is None, r'function-style not supported'
834 834 return templateformatter(ui, out, topic, opts, spec)
835 835 # developer config: ui.formatdebug
836 836 elif ui.configbool(b'ui', b'formatdebug'):
837 837 return debugformatter(ui, out, topic, opts)
838 838 # deprecated config: ui.formatjson
839 839 elif ui.configbool(b'ui', b'formatjson'):
840 840 return jsonformatter(ui, out, topic, opts)
841 841 return plainformatter(ui, out, topic, opts)
842 842
843 843
844 844 @contextlib.contextmanager
845 845 def openformatter(ui, filename, topic, opts):
846 846 """Create a formatter that writes outputs to the specified file
847 847
848 848 Must be invoked using the 'with' statement.
849 849 """
850 850 with util.posixfile(filename, b'wb') as out:
851 851 with formatter(ui, out, topic, opts) as fm:
852 852 yield fm
853 853
854 854
855 855 @contextlib.contextmanager
856 856 def _neverending(fm):
857 857 yield fm
858 858
859 859
860 860 def maybereopen(fm, filename):
861 861 """Create a formatter backed by file if filename specified, else return
862 862 the given formatter
863 863
864 864 Must be invoked using the 'with' statement. This will never call fm.end()
865 865 of the given formatter.
866 866 """
867 867 if filename:
868 868 return openformatter(fm._ui, filename, fm._topic, fm._opts)
869 869 else:
870 870 return _neverending(fm)
@@ -1,556 +1,556 b''
1 1 # templatefilters.py - common template expansion filters
2 2 #
3 3 # Copyright 2005-2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import os
10 10 import re
11 11 import time
12 12
13 13 from .i18n import _
14 14 from .node import hex
15 15 from . import (
16 16 encoding,
17 17 error,
18 18 pycompat,
19 19 registrar,
20 20 smartset,
21 21 templateutil,
22 22 url,
23 23 util,
24 24 )
25 25 from .utils import (
26 26 cborutil,
27 27 dateutil,
28 28 stringutil,
29 29 )
30 30
31 31 urlerr = util.urlerr
32 32 urlreq = util.urlreq
33 33
34 34 # filters are callables like:
35 35 # fn(obj)
36 36 # with:
37 37 # obj - object to be filtered (text, date, list and so on)
38 38 filters = {}
39 39
40 40 templatefilter = registrar.templatefilter(filters)
41 41
42 42
43 43 @templatefilter(b'addbreaks', intype=bytes)
44 44 def addbreaks(text):
45 45 """Any text. Add an XHTML "<br />" tag before the end of
46 46 every line except the last.
47 47 """
48 48 return text.replace(b'\n', b'<br/>\n')
49 49
50 50
51 51 agescales = [
52 52 (b"year", 3600 * 24 * 365, b'Y'),
53 53 (b"month", 3600 * 24 * 30, b'M'),
54 54 (b"week", 3600 * 24 * 7, b'W'),
55 55 (b"day", 3600 * 24, b'd'),
56 56 (b"hour", 3600, b'h'),
57 57 (b"minute", 60, b'm'),
58 58 (b"second", 1, b's'),
59 59 ]
60 60
61 61
62 62 @templatefilter(b'age', intype=templateutil.date)
63 63 def age(date, abbrev=False):
64 64 """Date. Returns a human-readable date/time difference between the
65 65 given date/time and the current date/time.
66 66 """
67 67
68 68 def plural(t, c):
69 69 if c == 1:
70 70 return t
71 71 return t + b"s"
72 72
73 73 def fmt(t, c, a):
74 74 if abbrev:
75 75 return b"%d%s" % (c, a)
76 76 return b"%d %s" % (c, plural(t, c))
77 77
78 78 now = time.time()
79 79 then = date[0]
80 80 future = False
81 81 if then > now:
82 82 future = True
83 83 delta = max(1, int(then - now))
84 84 if delta > agescales[0][1] * 30:
85 85 return b'in the distant future'
86 86 else:
87 87 delta = max(1, int(now - then))
88 88 if delta > agescales[0][1] * 2:
89 89 return dateutil.shortdate(date)
90 90
91 91 for t, s, a in agescales:
92 92 n = delta // s
93 93 if n >= 2 or s == 1:
94 94 if future:
95 95 return b'%s from now' % fmt(t, n, a)
96 96 return b'%s ago' % fmt(t, n, a)
97 97
98 98
99 99 @templatefilter(b'basename', intype=bytes)
100 100 def basename(path):
101 101 """Any text. Treats the text as a path, and returns the last
102 102 component of the path after splitting by the path separator.
103 103 For example, "foo/bar/baz" becomes "baz" and "foo/bar//" becomes "".
104 104 """
105 105 return os.path.basename(path)
106 106
107 107
108 108 def _tocborencodable(obj):
109 109 if isinstance(obj, smartset.abstractsmartset):
110 110 return list(obj)
111 111 return obj
112 112
113 113
114 114 @templatefilter(b'cbor')
115 115 def cbor(obj):
116 116 """Any object. Serializes the object to CBOR bytes."""
117 117 # cborutil is stricter about type than json() filter
118 118 obj = pycompat.rapply(_tocborencodable, obj)
119 119 return b''.join(cborutil.streamencode(obj))
120 120
121 121
122 122 @templatefilter(b'commondir')
123 123 def commondir(filelist):
124 124 """List of text. Treats each list item as file name with /
125 125 as path separator and returns the longest common directory
126 126 prefix shared by all list items.
127 127 Returns the empty string if no common prefix exists.
128 128
129 129 The list items are not normalized, i.e. "foo/../bar" is handled as
130 130 file "bar" in the directory "foo/..". Leading slashes are ignored.
131 131
132 132 For example, ["foo/bar/baz", "foo/baz/bar"] becomes "foo" and
133 133 ["foo/bar", "baz"] becomes "".
134 134 """
135 135
136 136 def common(a, b):
137 137 if len(a) > len(b):
138 138 a = b[: len(a)]
139 139 elif len(b) > len(a):
140 140 b = b[: len(a)]
141 141 if a == b:
142 142 return a
143 143 for i in pycompat.xrange(len(a)):
144 144 if a[i] != b[i]:
145 145 return a[:i]
146 146 return a
147 147
148 148 try:
149 149 if not filelist:
150 150 return b""
151 151 dirlist = [f.lstrip(b'/').split(b'/')[:-1] for f in filelist]
152 152 if len(dirlist) == 1:
153 153 return b'/'.join(dirlist[0])
154 154 a = min(dirlist)
155 155 b = max(dirlist)
156 156 # The common prefix of a and b is shared with all
157 157 # elements of the list since Python sorts lexicographical
158 158 # and [1, x] after [1].
159 159 return b'/'.join(common(a, b))
160 160 except TypeError:
161 161 raise error.ParseError(_(b'argument is not a list of text'))
162 162
163 163
164 164 @templatefilter(b'count')
165 165 def count(i):
166 166 """List or text. Returns the length as an integer."""
167 167 try:
168 168 return len(i)
169 169 except TypeError:
170 170 raise error.ParseError(_(b'not countable'))
171 171
172 172
173 173 @templatefilter(b'dirname', intype=bytes)
174 174 def dirname(path):
175 175 """Any text. Treats the text as a path, and strips the last
176 176 component of the path after splitting by the path separator.
177 177 """
178 178 return os.path.dirname(path)
179 179
180 180
181 181 @templatefilter(b'domain', intype=bytes)
182 182 def domain(author):
183 183 """Any text. Finds the first string that looks like an email
184 184 address, and extracts just the domain component. Example: ``User
185 185 <user@example.com>`` becomes ``example.com``.
186 186 """
187 187 f = author.find(b'@')
188 188 if f == -1:
189 189 return b''
190 190 author = author[f + 1 :]
191 191 f = author.find(b'>')
192 192 if f >= 0:
193 193 author = author[:f]
194 194 return author
195 195
196 196
197 197 @templatefilter(b'email', intype=bytes)
198 198 def email(text):
199 199 """Any text. Extracts the first string that looks like an email
200 200 address. Example: ``User <user@example.com>`` becomes
201 201 ``user@example.com``.
202 202 """
203 203 return stringutil.email(text)
204 204
205 205
206 206 @templatefilter(b'escape', intype=bytes)
207 207 def escape(text):
208 208 """Any text. Replaces the special XML/XHTML characters "&", "<"
209 209 and ">" with XML entities, and filters out NUL characters.
210 210 """
211 211 return url.escape(text.replace(b'\0', b''), True)
212 212
213 213
214 214 para_re = None
215 215 space_re = None
216 216
217 217
218 218 def fill(text, width, initindent=b'', hangindent=b''):
219 219 '''fill many paragraphs with optional indentation.'''
220 220 global para_re, space_re
221 221 if para_re is None:
222 222 para_re = re.compile(b'(\n\n|\n\\s*[-*]\\s*)', re.M)
223 223 space_re = re.compile(br' +')
224 224
225 225 def findparas():
226 226 start = 0
227 227 while True:
228 228 m = para_re.search(text, start)
229 229 if not m:
230 230 uctext = encoding.unifromlocal(text[start:])
231 231 w = len(uctext)
232 232 while w > 0 and uctext[w - 1].isspace():
233 233 w -= 1
234 234 yield (
235 235 encoding.unitolocal(uctext[:w]),
236 236 encoding.unitolocal(uctext[w:]),
237 237 )
238 238 break
239 239 yield text[start : m.start(0)], m.group(1)
240 240 start = m.end(1)
241 241
242 242 return b"".join(
243 243 [
244 244 stringutil.wrap(
245 245 space_re.sub(b' ', stringutil.wrap(para, width)),
246 246 width,
247 247 initindent,
248 248 hangindent,
249 249 )
250 250 + rest
251 251 for para, rest in findparas()
252 252 ]
253 253 )
254 254
255 255
256 256 @templatefilter(b'fill68', intype=bytes)
257 257 def fill68(text):
258 258 """Any text. Wraps the text to fit in 68 columns."""
259 259 return fill(text, 68)
260 260
261 261
262 262 @templatefilter(b'fill76', intype=bytes)
263 263 def fill76(text):
264 264 """Any text. Wraps the text to fit in 76 columns."""
265 265 return fill(text, 76)
266 266
267 267
268 268 @templatefilter(b'firstline', intype=bytes)
269 269 def firstline(text):
270 270 """Any text. Returns the first line of text."""
271 271 try:
272 272 return text.splitlines(True)[0].rstrip(b'\r\n')
273 273 except IndexError:
274 274 return b''
275 275
276 276
277 277 @templatefilter(b'hex', intype=bytes)
278 278 def hexfilter(text):
279 279 """Any text. Convert a binary Mercurial node identifier into
280 280 its long hexadecimal representation.
281 281 """
282 282 return hex(text)
283 283
284 284
285 285 @templatefilter(b'hgdate', intype=templateutil.date)
286 286 def hgdate(text):
287 287 """Date. Returns the date as a pair of numbers: "1157407993
288 288 25200" (Unix timestamp, timezone offset).
289 289 """
290 290 return b"%d %d" % text
291 291
292 292
293 293 @templatefilter(b'isodate', intype=templateutil.date)
294 294 def isodate(text):
295 295 """Date. Returns the date in ISO 8601 format: "2009-08-18 13:00
296 296 +0200".
297 297 """
298 298 return dateutil.datestr(text, b'%Y-%m-%d %H:%M %1%2')
299 299
300 300
301 301 @templatefilter(b'isodatesec', intype=templateutil.date)
302 302 def isodatesec(text):
303 303 """Date. Returns the date in ISO 8601 format, including
304 304 seconds: "2009-08-18 13:00:13 +0200". See also the rfc3339date
305 305 filter.
306 306 """
307 307 return dateutil.datestr(text, b'%Y-%m-%d %H:%M:%S %1%2')
308 308
309 309
310 310 def indent(text, prefix, firstline=b''):
311 311 '''indent each non-empty line of text after first with prefix.'''
312 312 lines = text.splitlines()
313 313 num_lines = len(lines)
314 314 endswithnewline = text[-1:] == b'\n'
315 315
316 316 def indenter():
317 317 for i in pycompat.xrange(num_lines):
318 318 l = lines[i]
319 319 if l.strip():
320 320 yield prefix if i else firstline
321 321 yield l
322 322 if i < num_lines - 1 or endswithnewline:
323 323 yield b'\n'
324 324
325 325 return b"".join(indenter())
326 326
327 327
328 328 @templatefilter(b'json')
329 329 def json(obj, paranoid=True):
330 330 """Any object. Serializes the object to a JSON formatted text."""
331 331 if obj is None:
332 332 return b'null'
333 333 elif obj is False:
334 334 return b'false'
335 335 elif obj is True:
336 336 return b'true'
337 elif isinstance(obj, (int, pycompat.long, float)):
337 elif isinstance(obj, (int, int, float)):
338 338 return pycompat.bytestr(obj)
339 339 elif isinstance(obj, bytes):
340 340 return b'"%s"' % encoding.jsonescape(obj, paranoid=paranoid)
341 341 elif isinstance(obj, type(u'')):
342 342 raise error.ProgrammingError(
343 343 b'Mercurial only does output with bytes: %r' % obj
344 344 )
345 345 elif util.safehasattr(obj, b'keys'):
346 346 out = [
347 347 b'"%s": %s'
348 348 % (encoding.jsonescape(k, paranoid=paranoid), json(v, paranoid))
349 349 for k, v in sorted(obj.items())
350 350 ]
351 351 return b'{' + b', '.join(out) + b'}'
352 352 elif util.safehasattr(obj, b'__iter__'):
353 353 out = [json(i, paranoid) for i in obj]
354 354 return b'[' + b', '.join(out) + b']'
355 355 raise error.ProgrammingError(b'cannot encode %r' % obj)
356 356
357 357
358 358 @templatefilter(b'lower', intype=bytes)
359 359 def lower(text):
360 360 """Any text. Converts the text to lowercase."""
361 361 return encoding.lower(text)
362 362
363 363
364 364 @templatefilter(b'nonempty', intype=bytes)
365 365 def nonempty(text):
366 366 """Any text. Returns '(none)' if the string is empty."""
367 367 return text or b"(none)"
368 368
369 369
370 370 @templatefilter(b'obfuscate', intype=bytes)
371 371 def obfuscate(text):
372 372 """Any text. Returns the input text rendered as a sequence of
373 373 XML entities.
374 374 """
375 375 text = pycompat.unicode(
376 376 text, pycompat.sysstr(encoding.encoding), r'replace'
377 377 )
378 378 return b''.join([b'&#%d;' % ord(c) for c in text])
379 379
380 380
381 381 @templatefilter(b'permissions', intype=bytes)
382 382 def permissions(flags):
383 383 if b"l" in flags:
384 384 return b"lrwxrwxrwx"
385 385 if b"x" in flags:
386 386 return b"-rwxr-xr-x"
387 387 return b"-rw-r--r--"
388 388
389 389
390 390 @templatefilter(b'person', intype=bytes)
391 391 def person(author):
392 392 """Any text. Returns the name before an email address,
393 393 interpreting it as per RFC 5322.
394 394 """
395 395 return stringutil.person(author)
396 396
397 397
398 398 @templatefilter(b'revescape', intype=bytes)
399 399 def revescape(text):
400 400 """Any text. Escapes all "special" characters, except @.
401 401 Forward slashes are escaped twice to prevent web servers from prematurely
402 402 unescaping them. For example, "@foo bar/baz" becomes "@foo%20bar%252Fbaz".
403 403 """
404 404 return urlreq.quote(text, safe=b'/@').replace(b'/', b'%252F')
405 405
406 406
407 407 @templatefilter(b'rfc3339date', intype=templateutil.date)
408 408 def rfc3339date(text):
409 409 """Date. Returns a date using the Internet date format
410 410 specified in RFC 3339: "2009-08-18T13:00:13+02:00".
411 411 """
412 412 return dateutil.datestr(text, b"%Y-%m-%dT%H:%M:%S%1:%2")
413 413
414 414
415 415 @templatefilter(b'rfc822date', intype=templateutil.date)
416 416 def rfc822date(text):
417 417 """Date. Returns a date using the same format used in email
418 418 headers: "Tue, 18 Aug 2009 13:00:13 +0200".
419 419 """
420 420 return dateutil.datestr(text, b"%a, %d %b %Y %H:%M:%S %1%2")
421 421
422 422
423 423 @templatefilter(b'short', intype=bytes)
424 424 def short(text):
425 425 """Changeset hash. Returns the short form of a changeset hash,
426 426 i.e. a 12 hexadecimal digit string.
427 427 """
428 428 return text[:12]
429 429
430 430
431 431 @templatefilter(b'shortbisect', intype=bytes)
432 432 def shortbisect(label):
433 433 """Any text. Treats `label` as a bisection status, and
434 434 returns a single-character representing the status (G: good, B: bad,
435 435 S: skipped, U: untested, I: ignored). Returns single space if `text`
436 436 is not a valid bisection status.
437 437 """
438 438 if label:
439 439 return label[0:1].upper()
440 440 return b' '
441 441
442 442
443 443 @templatefilter(b'shortdate', intype=templateutil.date)
444 444 def shortdate(text):
445 445 """Date. Returns a date like "2006-09-18"."""
446 446 return dateutil.shortdate(text)
447 447
448 448
449 449 @templatefilter(b'slashpath', intype=bytes)
450 450 def slashpath(path):
451 451 """Any text. Replaces the native path separator with slash."""
452 452 return util.pconvert(path)
453 453
454 454
455 455 @templatefilter(b'splitlines', intype=bytes)
456 456 def splitlines(text):
457 457 """Any text. Split text into a list of lines."""
458 458 return templateutil.hybridlist(text.splitlines(), name=b'line')
459 459
460 460
461 461 @templatefilter(b'stringescape', intype=bytes)
462 462 def stringescape(text):
463 463 return stringutil.escapestr(text)
464 464
465 465
466 466 @templatefilter(b'stringify', intype=bytes)
467 467 def stringify(thing):
468 468 """Any type. Turns the value into text by converting values into
469 469 text and concatenating them.
470 470 """
471 471 return thing # coerced by the intype
472 472
473 473
474 474 @templatefilter(b'stripdir', intype=bytes)
475 475 def stripdir(text):
476 476 """Treat the text as path and strip a directory level, if
477 477 possible. For example, "foo" and "foo/bar" becomes "foo".
478 478 """
479 479 dir = os.path.dirname(text)
480 480 if dir == b"":
481 481 return os.path.basename(text)
482 482 else:
483 483 return dir
484 484
485 485
486 486 @templatefilter(b'tabindent', intype=bytes)
487 487 def tabindent(text):
488 488 """Any text. Returns the text, with every non-empty line
489 489 except the first starting with a tab character.
490 490 """
491 491 return indent(text, b'\t')
492 492
493 493
494 494 @templatefilter(b'upper', intype=bytes)
495 495 def upper(text):
496 496 """Any text. Converts the text to uppercase."""
497 497 return encoding.upper(text)
498 498
499 499
500 500 @templatefilter(b'urlescape', intype=bytes)
501 501 def urlescape(text):
502 502 """Any text. Escapes all "special" characters. For example,
503 503 "foo bar" becomes "foo%20bar".
504 504 """
505 505 return urlreq.quote(text)
506 506
507 507
508 508 @templatefilter(b'user', intype=bytes)
509 509 def userfilter(text):
510 510 """Any text. Returns a short representation of a user name or email
511 511 address."""
512 512 return stringutil.shortuser(text)
513 513
514 514
515 515 @templatefilter(b'emailuser', intype=bytes)
516 516 def emailuser(text):
517 517 """Any text. Returns the user portion of an email address."""
518 518 return stringutil.emailuser(text)
519 519
520 520
521 521 @templatefilter(b'utf8', intype=bytes)
522 522 def utf8(text):
523 523 """Any text. Converts from the local character encoding to UTF-8."""
524 524 return encoding.fromlocal(text)
525 525
526 526
527 527 @templatefilter(b'xmlescape', intype=bytes)
528 528 def xmlescape(text):
529 529 text = (
530 530 text.replace(b'&', b'&amp;')
531 531 .replace(b'<', b'&lt;')
532 532 .replace(b'>', b'&gt;')
533 533 .replace(b'"', b'&quot;')
534 534 .replace(b"'", b'&#39;')
535 535 ) # &apos; invalid in HTML
536 536 return re.sub(b'[\x00-\x08\x0B\x0C\x0E-\x1F]', b' ', text)
537 537
538 538
539 539 def websub(text, websubtable):
540 540 """:websub: Any text. Only applies to hgweb. Applies the regular
541 541 expression replacements defined in the websub section.
542 542 """
543 543 if websubtable:
544 544 for regexp, format in websubtable:
545 545 text = regexp.sub(format, text)
546 546 return text
547 547
548 548
549 549 def loadfilter(ui, extname, registrarobj):
550 550 """Load template filter from specified registrarobj"""
551 551 for name, func in registrarobj._table.items():
552 552 filters[name] = func
553 553
554 554
555 555 # tell hggettext to extract docstrings from these functions:
556 556 i18nfunctions = filters.values()
@@ -1,1082 +1,1081 b''
1 1 # cborutil.py - CBOR extensions
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import struct
10 10 import sys
11 11
12 from .. import pycompat
13 12
14 13 # Very short very of RFC 7049...
15 14 #
16 15 # Each item begins with a byte. The 3 high bits of that byte denote the
17 16 # "major type." The lower 5 bits denote the "subtype." Each major type
18 17 # has its own encoding mechanism.
19 18 #
20 19 # Most types have lengths. However, bytestring, string, array, and map
21 20 # can be indefinite length. These are denotes by a subtype with value 31.
22 21 # Sub-components of those types then come afterwards and are terminated
23 22 # by a "break" byte.
24 23
25 24 MAJOR_TYPE_UINT = 0
26 25 MAJOR_TYPE_NEGINT = 1
27 26 MAJOR_TYPE_BYTESTRING = 2
28 27 MAJOR_TYPE_STRING = 3
29 28 MAJOR_TYPE_ARRAY = 4
30 29 MAJOR_TYPE_MAP = 5
31 30 MAJOR_TYPE_SEMANTIC = 6
32 31 MAJOR_TYPE_SPECIAL = 7
33 32
34 33 SUBTYPE_MASK = 0b00011111
35 34
36 35 SUBTYPE_FALSE = 20
37 36 SUBTYPE_TRUE = 21
38 37 SUBTYPE_NULL = 22
39 38 SUBTYPE_HALF_FLOAT = 25
40 39 SUBTYPE_SINGLE_FLOAT = 26
41 40 SUBTYPE_DOUBLE_FLOAT = 27
42 41 SUBTYPE_INDEFINITE = 31
43 42
44 43 SEMANTIC_TAG_FINITE_SET = 258
45 44
46 45 # Indefinite types begin with their major type ORd with information value 31.
47 46 BEGIN_INDEFINITE_BYTESTRING = struct.pack(
48 47 '>B', MAJOR_TYPE_BYTESTRING << 5 | SUBTYPE_INDEFINITE
49 48 )
50 49 BEGIN_INDEFINITE_ARRAY = struct.pack(
51 50 '>B', MAJOR_TYPE_ARRAY << 5 | SUBTYPE_INDEFINITE
52 51 )
53 52 BEGIN_INDEFINITE_MAP = struct.pack(
54 53 '>B', MAJOR_TYPE_MAP << 5 | SUBTYPE_INDEFINITE
55 54 )
56 55
57 56 ENCODED_LENGTH_1 = struct.Struct('>B')
58 57 ENCODED_LENGTH_2 = struct.Struct('>BB')
59 58 ENCODED_LENGTH_3 = struct.Struct('>BH')
60 59 ENCODED_LENGTH_4 = struct.Struct('>BL')
61 60 ENCODED_LENGTH_5 = struct.Struct('>BQ')
62 61
63 62 # The break ends an indefinite length item.
64 63 BREAK = b'\xff'
65 64 BREAK_INT = 255
66 65
67 66
68 67 def encodelength(majortype, length):
69 68 """Obtain a value encoding the major type and its length."""
70 69 if length < 24:
71 70 return ENCODED_LENGTH_1.pack(majortype << 5 | length)
72 71 elif length < 256:
73 72 return ENCODED_LENGTH_2.pack(majortype << 5 | 24, length)
74 73 elif length < 65536:
75 74 return ENCODED_LENGTH_3.pack(majortype << 5 | 25, length)
76 75 elif length < 4294967296:
77 76 return ENCODED_LENGTH_4.pack(majortype << 5 | 26, length)
78 77 else:
79 78 return ENCODED_LENGTH_5.pack(majortype << 5 | 27, length)
80 79
81 80
82 81 def streamencodebytestring(v):
83 82 yield encodelength(MAJOR_TYPE_BYTESTRING, len(v))
84 83 yield v
85 84
86 85
87 86 def streamencodebytestringfromiter(it):
88 87 """Convert an iterator of chunks to an indefinite bytestring.
89 88
90 89 Given an input that is iterable and each element in the iterator is
91 90 representable as bytes, emit an indefinite length bytestring.
92 91 """
93 92 yield BEGIN_INDEFINITE_BYTESTRING
94 93
95 94 for chunk in it:
96 95 yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))
97 96 yield chunk
98 97
99 98 yield BREAK
100 99
101 100
102 101 def streamencodeindefinitebytestring(source, chunksize=65536):
103 102 """Given a large source buffer, emit as an indefinite length bytestring.
104 103
105 104 This is a generator of chunks constituting the encoded CBOR data.
106 105 """
107 106 yield BEGIN_INDEFINITE_BYTESTRING
108 107
109 108 i = 0
110 109 l = len(source)
111 110
112 111 while True:
113 112 chunk = source[i : i + chunksize]
114 113 i += len(chunk)
115 114
116 115 yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))
117 116 yield chunk
118 117
119 118 if i >= l:
120 119 break
121 120
122 121 yield BREAK
123 122
124 123
125 124 def streamencodeint(v):
126 125 if v >= 18446744073709551616 or v < -18446744073709551616:
127 126 raise ValueError(b'big integers not supported')
128 127
129 128 if v >= 0:
130 129 yield encodelength(MAJOR_TYPE_UINT, v)
131 130 else:
132 131 yield encodelength(MAJOR_TYPE_NEGINT, abs(v) - 1)
133 132
134 133
135 134 def streamencodearray(l):
136 135 """Encode a known size iterable to an array."""
137 136
138 137 yield encodelength(MAJOR_TYPE_ARRAY, len(l))
139 138
140 139 for i in l:
141 140 for chunk in streamencode(i):
142 141 yield chunk
143 142
144 143
145 144 def streamencodearrayfromiter(it):
146 145 """Encode an iterator of items to an indefinite length array."""
147 146
148 147 yield BEGIN_INDEFINITE_ARRAY
149 148
150 149 for i in it:
151 150 for chunk in streamencode(i):
152 151 yield chunk
153 152
154 153 yield BREAK
155 154
156 155
157 156 def _mixedtypesortkey(v):
158 157 return type(v).__name__, v
159 158
160 159
161 160 def streamencodeset(s):
162 161 # https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml defines
163 162 # semantic tag 258 for finite sets.
164 163 yield encodelength(MAJOR_TYPE_SEMANTIC, SEMANTIC_TAG_FINITE_SET)
165 164
166 165 for chunk in streamencodearray(sorted(s, key=_mixedtypesortkey)):
167 166 yield chunk
168 167
169 168
170 169 def streamencodemap(d):
171 170 """Encode dictionary to a generator.
172 171
173 172 Does not supporting indefinite length dictionaries.
174 173 """
175 174 yield encodelength(MAJOR_TYPE_MAP, len(d))
176 175
177 176 for key, value in sorted(d.items(), key=lambda x: _mixedtypesortkey(x[0])):
178 177 for chunk in streamencode(key):
179 178 yield chunk
180 179 for chunk in streamencode(value):
181 180 yield chunk
182 181
183 182
184 183 def streamencodemapfromiter(it):
185 184 """Given an iterable of (key, value), encode to an indefinite length map."""
186 185 yield BEGIN_INDEFINITE_MAP
187 186
188 187 for key, value in it:
189 188 for chunk in streamencode(key):
190 189 yield chunk
191 190 for chunk in streamencode(value):
192 191 yield chunk
193 192
194 193 yield BREAK
195 194
196 195
197 196 def streamencodebool(b):
198 197 # major type 7, simple value 20 and 21.
199 198 yield b'\xf5' if b else b'\xf4'
200 199
201 200
202 201 def streamencodenone(v):
203 202 # major type 7, simple value 22.
204 203 yield b'\xf6'
205 204
206 205
207 206 STREAM_ENCODERS = {
208 207 bytes: streamencodebytestring,
209 208 int: streamencodeint,
210 pycompat.long: streamencodeint,
209 int: streamencodeint,
211 210 list: streamencodearray,
212 211 tuple: streamencodearray,
213 212 dict: streamencodemap,
214 213 set: streamencodeset,
215 214 bool: streamencodebool,
216 215 type(None): streamencodenone,
217 216 }
218 217
219 218
220 219 def streamencode(v):
221 220 """Encode a value in a streaming manner.
222 221
223 222 Given an input object, encode it to CBOR recursively.
224 223
225 224 Returns a generator of CBOR encoded bytes. There is no guarantee
226 225 that each emitted chunk fully decodes to a value or sub-value.
227 226
228 227 Encoding is deterministic - unordered collections are sorted.
229 228 """
230 229 fn = STREAM_ENCODERS.get(v.__class__)
231 230
232 231 if not fn:
233 232 # handle subtypes such as encoding.localstr and util.sortdict
234 233 for ty in STREAM_ENCODERS:
235 234 if not isinstance(v, ty):
236 235 continue
237 236 fn = STREAM_ENCODERS[ty]
238 237 break
239 238
240 239 if not fn:
241 240 raise ValueError(b'do not know how to encode %s' % type(v))
242 241
243 242 return fn(v)
244 243
245 244
246 245 class CBORDecodeError(Exception):
247 246 """Represents an error decoding CBOR."""
248 247
249 248
250 249 if sys.version_info.major >= 3:
251 250
252 251 def _elementtointeger(b, i):
253 252 return b[i]
254 253
255 254
256 255 else:
257 256
258 257 def _elementtointeger(b, i):
259 258 return ord(b[i])
260 259
261 260
262 261 STRUCT_BIG_UBYTE = struct.Struct('>B')
263 262 STRUCT_BIG_USHORT = struct.Struct(b'>H')
264 263 STRUCT_BIG_ULONG = struct.Struct(b'>L')
265 264 STRUCT_BIG_ULONGLONG = struct.Struct(b'>Q')
266 265
267 266 SPECIAL_NONE = 0
268 267 SPECIAL_START_INDEFINITE_BYTESTRING = 1
269 268 SPECIAL_START_ARRAY = 2
270 269 SPECIAL_START_MAP = 3
271 270 SPECIAL_START_SET = 4
272 271 SPECIAL_INDEFINITE_BREAK = 5
273 272
274 273
275 274 def decodeitem(b, offset=0):
276 275 """Decode a new CBOR value from a buffer at offset.
277 276
278 277 This function attempts to decode up to one complete CBOR value
279 278 from ``b`` starting at offset ``offset``.
280 279
281 280 The beginning of a collection (such as an array, map, set, or
282 281 indefinite length bytestring) counts as a single value. For these
283 282 special cases, a state flag will indicate that a special value was seen.
284 283
285 284 When called, the function either returns a decoded value or gives
286 285 a hint as to how many more bytes are needed to do so. By calling
287 286 the function repeatedly given a stream of bytes, the caller can
288 287 build up the original values.
289 288
290 289 Returns a tuple with the following elements:
291 290
292 291 * Bool indicating whether a complete value was decoded.
293 292 * A decoded value if first value is True otherwise None
294 293 * Integer number of bytes. If positive, the number of bytes
295 294 read. If negative, the number of bytes we need to read to
296 295 decode this value or the next chunk in this value.
297 296 * One of the ``SPECIAL_*`` constants indicating special treatment
298 297 for this value. ``SPECIAL_NONE`` means this is a fully decoded
299 298 simple value (such as an integer or bool).
300 299 """
301 300
302 301 initial = _elementtointeger(b, offset)
303 302 offset += 1
304 303
305 304 majortype = initial >> 5
306 305 subtype = initial & SUBTYPE_MASK
307 306
308 307 if majortype == MAJOR_TYPE_UINT:
309 308 complete, value, readcount = decodeuint(subtype, b, offset)
310 309
311 310 if complete:
312 311 return True, value, readcount + 1, SPECIAL_NONE
313 312 else:
314 313 return False, None, readcount, SPECIAL_NONE
315 314
316 315 elif majortype == MAJOR_TYPE_NEGINT:
317 316 # Negative integers are the same as UINT except inverted minus 1.
318 317 complete, value, readcount = decodeuint(subtype, b, offset)
319 318
320 319 if complete:
321 320 return True, -value - 1, readcount + 1, SPECIAL_NONE
322 321 else:
323 322 return False, None, readcount, SPECIAL_NONE
324 323
325 324 elif majortype == MAJOR_TYPE_BYTESTRING:
326 325 # Beginning of bytestrings are treated as uints in order to
327 326 # decode their length, which may be indefinite.
328 327 complete, size, readcount = decodeuint(
329 328 subtype, b, offset, allowindefinite=True
330 329 )
331 330
332 331 # We don't know the size of the bytestring. It must be a definitive
333 332 # length since the indefinite subtype would be encoded in the initial
334 333 # byte.
335 334 if not complete:
336 335 return False, None, readcount, SPECIAL_NONE
337 336
338 337 # We know the length of the bytestring.
339 338 if size is not None:
340 339 # And the data is available in the buffer.
341 340 if offset + readcount + size <= len(b):
342 341 value = b[offset + readcount : offset + readcount + size]
343 342 return True, value, readcount + size + 1, SPECIAL_NONE
344 343
345 344 # And we need more data in order to return the bytestring.
346 345 else:
347 346 wanted = len(b) - offset - readcount - size
348 347 return False, None, wanted, SPECIAL_NONE
349 348
350 349 # It is an indefinite length bytestring.
351 350 else:
352 351 return True, None, 1, SPECIAL_START_INDEFINITE_BYTESTRING
353 352
354 353 elif majortype == MAJOR_TYPE_STRING:
355 354 raise CBORDecodeError(b'string major type not supported')
356 355
357 356 elif majortype == MAJOR_TYPE_ARRAY:
358 357 # Beginning of arrays are treated as uints in order to decode their
359 358 # length. We don't allow indefinite length arrays.
360 359 complete, size, readcount = decodeuint(subtype, b, offset)
361 360
362 361 if complete:
363 362 return True, size, readcount + 1, SPECIAL_START_ARRAY
364 363 else:
365 364 return False, None, readcount, SPECIAL_NONE
366 365
367 366 elif majortype == MAJOR_TYPE_MAP:
368 367 # Beginning of maps are treated as uints in order to decode their
369 368 # number of elements. We don't allow indefinite length arrays.
370 369 complete, size, readcount = decodeuint(subtype, b, offset)
371 370
372 371 if complete:
373 372 return True, size, readcount + 1, SPECIAL_START_MAP
374 373 else:
375 374 return False, None, readcount, SPECIAL_NONE
376 375
377 376 elif majortype == MAJOR_TYPE_SEMANTIC:
378 377 # Semantic tag value is read the same as a uint.
379 378 complete, tagvalue, readcount = decodeuint(subtype, b, offset)
380 379
381 380 if not complete:
382 381 return False, None, readcount, SPECIAL_NONE
383 382
384 383 # This behavior here is a little wonky. The main type being "decorated"
385 384 # by this semantic tag follows. A more robust parser would probably emit
386 385 # a special flag indicating this as a semantic tag and let the caller
387 386 # deal with the types that follow. But since we don't support many
388 387 # semantic tags, it is easier to deal with the special cases here and
389 388 # hide complexity from the caller. If we add support for more semantic
390 389 # tags, we should probably move semantic tag handling into the caller.
391 390 if tagvalue == SEMANTIC_TAG_FINITE_SET:
392 391 if offset + readcount >= len(b):
393 392 return False, None, -1, SPECIAL_NONE
394 393
395 394 complete, size, readcount2, special = decodeitem(
396 395 b, offset + readcount
397 396 )
398 397
399 398 if not complete:
400 399 return False, None, readcount2, SPECIAL_NONE
401 400
402 401 if special != SPECIAL_START_ARRAY:
403 402 raise CBORDecodeError(
404 403 b'expected array after finite set semantic tag'
405 404 )
406 405
407 406 return True, size, readcount + readcount2 + 1, SPECIAL_START_SET
408 407
409 408 else:
410 409 raise CBORDecodeError(b'semantic tag %d not allowed' % tagvalue)
411 410
412 411 elif majortype == MAJOR_TYPE_SPECIAL:
413 412 # Only specific values for the information field are allowed.
414 413 if subtype == SUBTYPE_FALSE:
415 414 return True, False, 1, SPECIAL_NONE
416 415 elif subtype == SUBTYPE_TRUE:
417 416 return True, True, 1, SPECIAL_NONE
418 417 elif subtype == SUBTYPE_NULL:
419 418 return True, None, 1, SPECIAL_NONE
420 419 elif subtype == SUBTYPE_INDEFINITE:
421 420 return True, None, 1, SPECIAL_INDEFINITE_BREAK
422 421 # If value is 24, subtype is in next byte.
423 422 else:
424 423 raise CBORDecodeError(b'special type %d not allowed' % subtype)
425 424 else:
426 425 assert False
427 426
428 427
429 428 def decodeuint(subtype, b, offset=0, allowindefinite=False):
430 429 """Decode an unsigned integer.
431 430
432 431 ``subtype`` is the lower 5 bits from the initial byte CBOR item
433 432 "header." ``b`` is a buffer containing bytes. ``offset`` points to
434 433 the index of the first byte after the byte that ``subtype`` was
435 434 derived from.
436 435
437 436 ``allowindefinite`` allows the special indefinite length value
438 437 indicator.
439 438
440 439 Returns a 3-tuple of (successful, value, count).
441 440
442 441 The first element is a bool indicating if decoding completed. The 2nd
443 442 is the decoded integer value or None if not fully decoded or the subtype
444 443 is 31 and ``allowindefinite`` is True. The 3rd value is the count of bytes.
445 444 If positive, it is the number of additional bytes decoded. If negative,
446 445 it is the number of additional bytes needed to decode this value.
447 446 """
448 447
449 448 # Small values are inline.
450 449 if subtype < 24:
451 450 return True, subtype, 0
452 451 # Indefinite length specifier.
453 452 elif subtype == 31:
454 453 if allowindefinite:
455 454 return True, None, 0
456 455 else:
457 456 raise CBORDecodeError(b'indefinite length uint not allowed here')
458 457 elif subtype >= 28:
459 458 raise CBORDecodeError(
460 459 b'unsupported subtype on integer type: %d' % subtype
461 460 )
462 461
463 462 if subtype == 24:
464 463 s = STRUCT_BIG_UBYTE
465 464 elif subtype == 25:
466 465 s = STRUCT_BIG_USHORT
467 466 elif subtype == 26:
468 467 s = STRUCT_BIG_ULONG
469 468 elif subtype == 27:
470 469 s = STRUCT_BIG_ULONGLONG
471 470 else:
472 471 raise CBORDecodeError(b'bounds condition checking violation')
473 472
474 473 if len(b) - offset >= s.size:
475 474 return True, s.unpack_from(b, offset)[0], s.size
476 475 else:
477 476 return False, None, len(b) - offset - s.size
478 477
479 478
480 479 class bytestringchunk(bytes):
481 480 """Represents a chunk/segment in an indefinite length bytestring.
482 481
483 482 This behaves like a ``bytes`` but in addition has the ``isfirst``
484 483 and ``islast`` attributes indicating whether this chunk is the first
485 484 or last in an indefinite length bytestring.
486 485 """
487 486
488 487 def __new__(cls, v, first=False, last=False):
489 488 self = bytes.__new__(cls, v)
490 489 self.isfirst = first
491 490 self.islast = last
492 491
493 492 return self
494 493
495 494
496 495 class sansiodecoder(object):
497 496 """A CBOR decoder that doesn't perform its own I/O.
498 497
499 498 To use, construct an instance and feed it segments containing
500 499 CBOR-encoded bytes via ``decode()``. The return value from ``decode()``
501 500 indicates whether a fully-decoded value is available, how many bytes
502 501 were consumed, and offers a hint as to how many bytes should be fed
503 502 in next time to decode the next value.
504 503
505 504 The decoder assumes it will decode N discrete CBOR values, not just
506 505 a single value. i.e. if the bytestream contains uints packed one after
507 506 the other, the decoder will decode them all, rather than just the initial
508 507 one.
509 508
510 509 When ``decode()`` indicates a value is available, call ``getavailable()``
511 510 to return all fully decoded values.
512 511
513 512 ``decode()`` can partially decode input. It is up to the caller to keep
514 513 track of what data was consumed and to pass unconsumed data in on the
515 514 next invocation.
516 515
517 516 The decoder decodes atomically at the *item* level. See ``decodeitem()``.
518 517 If an *item* cannot be fully decoded, the decoder won't record it as
519 518 partially consumed. Instead, the caller will be instructed to pass in
520 519 the initial bytes of this item on the next invocation. This does result
521 520 in some redundant parsing. But the overhead should be minimal.
522 521
523 522 This decoder only supports a subset of CBOR as required by Mercurial.
524 523 It lacks support for:
525 524
526 525 * Indefinite length arrays
527 526 * Indefinite length maps
528 527 * Use of indefinite length bytestrings as keys or values within
529 528 arrays, maps, or sets.
530 529 * Nested arrays, maps, or sets within sets
531 530 * Any semantic tag that isn't a mathematical finite set
532 531 * Floating point numbers
533 532 * Undefined special value
534 533
535 534 CBOR types are decoded to Python types as follows:
536 535
537 536 uint -> int
538 537 negint -> int
539 538 bytestring -> bytes
540 539 map -> dict
541 540 array -> list
542 541 True -> bool
543 542 False -> bool
544 543 null -> None
545 544 indefinite length bytestring chunk -> [bytestringchunk]
546 545
547 546 The only non-obvious mapping here is an indefinite length bytestring
548 547 to the ``bytestringchunk`` type. This is to facilitate streaming
549 548 indefinite length bytestrings out of the decoder and to differentiate
550 549 a regular bytestring from an indefinite length bytestring.
551 550 """
552 551
553 552 _STATE_NONE = 0
554 553 _STATE_WANT_MAP_KEY = 1
555 554 _STATE_WANT_MAP_VALUE = 2
556 555 _STATE_WANT_ARRAY_VALUE = 3
557 556 _STATE_WANT_SET_VALUE = 4
558 557 _STATE_WANT_BYTESTRING_CHUNK_FIRST = 5
559 558 _STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT = 6
560 559
561 560 def __init__(self):
562 561 # TODO add support for limiting size of bytestrings
563 562 # TODO add support for limiting number of keys / values in collections
564 563 # TODO add support for limiting size of buffered partial values
565 564
566 565 self.decodedbytecount = 0
567 566
568 567 self._state = self._STATE_NONE
569 568
570 569 # Stack of active nested collections. Each entry is a dict describing
571 570 # the collection.
572 571 self._collectionstack = []
573 572
574 573 # Fully decoded key to use for the current map.
575 574 self._currentmapkey = None
576 575
577 576 # Fully decoded values available for retrieval.
578 577 self._decodedvalues = []
579 578
580 579 @property
581 580 def inprogress(self):
582 581 """Whether the decoder has partially decoded a value."""
583 582 return self._state != self._STATE_NONE
584 583
585 584 def decode(self, b, offset=0):
586 585 """Attempt to decode bytes from an input buffer.
587 586
588 587 ``b`` is a collection of bytes and ``offset`` is the byte
589 588 offset within that buffer from which to begin reading data.
590 589
591 590 ``b`` must support ``len()`` and accessing bytes slices via
592 591 ``__slice__``. Typically ``bytes`` instances are used.
593 592
594 593 Returns a tuple with the following fields:
595 594
596 595 * Bool indicating whether values are available for retrieval.
597 596 * Integer indicating the number of bytes that were fully consumed,
598 597 starting from ``offset``.
599 598 * Integer indicating the number of bytes that are desired for the
600 599 next call in order to decode an item.
601 600 """
602 601 if not b:
603 602 return bool(self._decodedvalues), 0, 0
604 603
605 604 initialoffset = offset
606 605
607 606 # We could easily split the body of this loop into a function. But
608 607 # Python performance is sensitive to function calls and collections
609 608 # are composed of many items. So leaving as a while loop could help
610 609 # with performance. One thing that may not help is the use of
611 610 # if..elif versus a lookup/dispatch table. There may be value
612 611 # in switching that.
613 612 while offset < len(b):
614 613 # Attempt to decode an item. This could be a whole value or a
615 614 # special value indicating an event, such as start or end of a
616 615 # collection or indefinite length type.
617 616 complete, value, readcount, special = decodeitem(b, offset)
618 617
619 618 if readcount > 0:
620 619 self.decodedbytecount += readcount
621 620
622 621 if not complete:
623 622 assert readcount < 0
624 623 return (
625 624 bool(self._decodedvalues),
626 625 offset - initialoffset,
627 626 -readcount,
628 627 )
629 628
630 629 offset += readcount
631 630
632 631 # No nested state. We either have a full value or beginning of a
633 632 # complex value to deal with.
634 633 if self._state == self._STATE_NONE:
635 634 # A normal value.
636 635 if special == SPECIAL_NONE:
637 636 self._decodedvalues.append(value)
638 637
639 638 elif special == SPECIAL_START_ARRAY:
640 639 self._collectionstack.append(
641 640 {
642 641 b'remaining': value,
643 642 b'v': [],
644 643 }
645 644 )
646 645 self._state = self._STATE_WANT_ARRAY_VALUE
647 646
648 647 elif special == SPECIAL_START_MAP:
649 648 self._collectionstack.append(
650 649 {
651 650 b'remaining': value,
652 651 b'v': {},
653 652 }
654 653 )
655 654 self._state = self._STATE_WANT_MAP_KEY
656 655
657 656 elif special == SPECIAL_START_SET:
658 657 self._collectionstack.append(
659 658 {
660 659 b'remaining': value,
661 660 b'v': set(),
662 661 }
663 662 )
664 663 self._state = self._STATE_WANT_SET_VALUE
665 664
666 665 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
667 666 self._state = self._STATE_WANT_BYTESTRING_CHUNK_FIRST
668 667
669 668 else:
670 669 raise CBORDecodeError(
671 670 b'unhandled special state: %d' % special
672 671 )
673 672
674 673 # This value becomes an element of the current array.
675 674 elif self._state == self._STATE_WANT_ARRAY_VALUE:
676 675 # Simple values get appended.
677 676 if special == SPECIAL_NONE:
678 677 c = self._collectionstack[-1]
679 678 c[b'v'].append(value)
680 679 c[b'remaining'] -= 1
681 680
682 681 # self._state doesn't need changed.
683 682
684 683 # An array nested within an array.
685 684 elif special == SPECIAL_START_ARRAY:
686 685 lastc = self._collectionstack[-1]
687 686 newvalue = []
688 687
689 688 lastc[b'v'].append(newvalue)
690 689 lastc[b'remaining'] -= 1
691 690
692 691 self._collectionstack.append(
693 692 {
694 693 b'remaining': value,
695 694 b'v': newvalue,
696 695 }
697 696 )
698 697
699 698 # self._state doesn't need changed.
700 699
701 700 # A map nested within an array.
702 701 elif special == SPECIAL_START_MAP:
703 702 lastc = self._collectionstack[-1]
704 703 newvalue = {}
705 704
706 705 lastc[b'v'].append(newvalue)
707 706 lastc[b'remaining'] -= 1
708 707
709 708 self._collectionstack.append(
710 709 {b'remaining': value, b'v': newvalue}
711 710 )
712 711
713 712 self._state = self._STATE_WANT_MAP_KEY
714 713
715 714 elif special == SPECIAL_START_SET:
716 715 lastc = self._collectionstack[-1]
717 716 newvalue = set()
718 717
719 718 lastc[b'v'].append(newvalue)
720 719 lastc[b'remaining'] -= 1
721 720
722 721 self._collectionstack.append(
723 722 {
724 723 b'remaining': value,
725 724 b'v': newvalue,
726 725 }
727 726 )
728 727
729 728 self._state = self._STATE_WANT_SET_VALUE
730 729
731 730 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
732 731 raise CBORDecodeError(
733 732 b'indefinite length bytestrings '
734 733 b'not allowed as array values'
735 734 )
736 735
737 736 else:
738 737 raise CBORDecodeError(
739 738 b'unhandled special item when '
740 739 b'expecting array value: %d' % special
741 740 )
742 741
743 742 # This value becomes the key of the current map instance.
744 743 elif self._state == self._STATE_WANT_MAP_KEY:
745 744 if special == SPECIAL_NONE:
746 745 self._currentmapkey = value
747 746 self._state = self._STATE_WANT_MAP_VALUE
748 747
749 748 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
750 749 raise CBORDecodeError(
751 750 b'indefinite length bytestrings '
752 751 b'not allowed as map keys'
753 752 )
754 753
755 754 elif special in (
756 755 SPECIAL_START_ARRAY,
757 756 SPECIAL_START_MAP,
758 757 SPECIAL_START_SET,
759 758 ):
760 759 raise CBORDecodeError(
761 760 b'collections not supported as map keys'
762 761 )
763 762
764 763 # We do not allow special values to be used as map keys.
765 764 else:
766 765 raise CBORDecodeError(
767 766 b'unhandled special item when '
768 767 b'expecting map key: %d' % special
769 768 )
770 769
771 770 # This value becomes the value of the current map key.
772 771 elif self._state == self._STATE_WANT_MAP_VALUE:
773 772 # Simple values simply get inserted into the map.
774 773 if special == SPECIAL_NONE:
775 774 lastc = self._collectionstack[-1]
776 775 lastc[b'v'][self._currentmapkey] = value
777 776 lastc[b'remaining'] -= 1
778 777
779 778 self._state = self._STATE_WANT_MAP_KEY
780 779
781 780 # A new array is used as the map value.
782 781 elif special == SPECIAL_START_ARRAY:
783 782 lastc = self._collectionstack[-1]
784 783 newvalue = []
785 784
786 785 lastc[b'v'][self._currentmapkey] = newvalue
787 786 lastc[b'remaining'] -= 1
788 787
789 788 self._collectionstack.append(
790 789 {
791 790 b'remaining': value,
792 791 b'v': newvalue,
793 792 }
794 793 )
795 794
796 795 self._state = self._STATE_WANT_ARRAY_VALUE
797 796
798 797 # A new map is used as the map value.
799 798 elif special == SPECIAL_START_MAP:
800 799 lastc = self._collectionstack[-1]
801 800 newvalue = {}
802 801
803 802 lastc[b'v'][self._currentmapkey] = newvalue
804 803 lastc[b'remaining'] -= 1
805 804
806 805 self._collectionstack.append(
807 806 {
808 807 b'remaining': value,
809 808 b'v': newvalue,
810 809 }
811 810 )
812 811
813 812 self._state = self._STATE_WANT_MAP_KEY
814 813
815 814 # A new set is used as the map value.
816 815 elif special == SPECIAL_START_SET:
817 816 lastc = self._collectionstack[-1]
818 817 newvalue = set()
819 818
820 819 lastc[b'v'][self._currentmapkey] = newvalue
821 820 lastc[b'remaining'] -= 1
822 821
823 822 self._collectionstack.append(
824 823 {
825 824 b'remaining': value,
826 825 b'v': newvalue,
827 826 }
828 827 )
829 828
830 829 self._state = self._STATE_WANT_SET_VALUE
831 830
832 831 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
833 832 raise CBORDecodeError(
834 833 b'indefinite length bytestrings not '
835 834 b'allowed as map values'
836 835 )
837 836
838 837 else:
839 838 raise CBORDecodeError(
840 839 b'unhandled special item when '
841 840 b'expecting map value: %d' % special
842 841 )
843 842
844 843 self._currentmapkey = None
845 844
846 845 # This value is added to the current set.
847 846 elif self._state == self._STATE_WANT_SET_VALUE:
848 847 if special == SPECIAL_NONE:
849 848 lastc = self._collectionstack[-1]
850 849 lastc[b'v'].add(value)
851 850 lastc[b'remaining'] -= 1
852 851
853 852 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
854 853 raise CBORDecodeError(
855 854 b'indefinite length bytestrings not '
856 855 b'allowed as set values'
857 856 )
858 857
859 858 elif special in (
860 859 SPECIAL_START_ARRAY,
861 860 SPECIAL_START_MAP,
862 861 SPECIAL_START_SET,
863 862 ):
864 863 raise CBORDecodeError(
865 864 b'collections not allowed as set values'
866 865 )
867 866
868 867 # We don't allow non-trivial types to exist as set values.
869 868 else:
870 869 raise CBORDecodeError(
871 870 b'unhandled special item when '
872 871 b'expecting set value: %d' % special
873 872 )
874 873
875 874 # This value represents the first chunk in an indefinite length
876 875 # bytestring.
877 876 elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_FIRST:
878 877 # We received a full chunk.
879 878 if special == SPECIAL_NONE:
880 879 self._decodedvalues.append(
881 880 bytestringchunk(value, first=True)
882 881 )
883 882
884 883 self._state = self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT
885 884
886 885 # The end of stream marker. This means it is an empty
887 886 # indefinite length bytestring.
888 887 elif special == SPECIAL_INDEFINITE_BREAK:
889 888 # We /could/ convert this to a b''. But we want to preserve
890 889 # the nature of the underlying data so consumers expecting
891 890 # an indefinite length bytestring get one.
892 891 self._decodedvalues.append(
893 892 bytestringchunk(b'', first=True, last=True)
894 893 )
895 894
896 895 # Since indefinite length bytestrings can't be used in
897 896 # collections, we must be at the root level.
898 897 assert not self._collectionstack
899 898 self._state = self._STATE_NONE
900 899
901 900 else:
902 901 raise CBORDecodeError(
903 902 b'unexpected special value when '
904 903 b'expecting bytestring chunk: %d' % special
905 904 )
906 905
907 906 # This value represents the non-initial chunk in an indefinite
908 907 # length bytestring.
909 908 elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT:
910 909 # We received a full chunk.
911 910 if special == SPECIAL_NONE:
912 911 self._decodedvalues.append(bytestringchunk(value))
913 912
914 913 # The end of stream marker.
915 914 elif special == SPECIAL_INDEFINITE_BREAK:
916 915 self._decodedvalues.append(bytestringchunk(b'', last=True))
917 916
918 917 # Since indefinite length bytestrings can't be used in
919 918 # collections, we must be at the root level.
920 919 assert not self._collectionstack
921 920 self._state = self._STATE_NONE
922 921
923 922 else:
924 923 raise CBORDecodeError(
925 924 b'unexpected special value when '
926 925 b'expecting bytestring chunk: %d' % special
927 926 )
928 927
929 928 else:
930 929 raise CBORDecodeError(
931 930 b'unhandled decoder state: %d' % self._state
932 931 )
933 932
934 933 # We could have just added the final value in a collection. End
935 934 # all complete collections at the top of the stack.
936 935 while True:
937 936 # Bail if we're not waiting on a new collection item.
938 937 if self._state not in (
939 938 self._STATE_WANT_ARRAY_VALUE,
940 939 self._STATE_WANT_MAP_KEY,
941 940 self._STATE_WANT_SET_VALUE,
942 941 ):
943 942 break
944 943
945 944 # Or we are expecting more items for this collection.
946 945 lastc = self._collectionstack[-1]
947 946
948 947 if lastc[b'remaining']:
949 948 break
950 949
951 950 # The collection at the top of the stack is complete.
952 951
953 952 # Discard it, as it isn't needed for future items.
954 953 self._collectionstack.pop()
955 954
956 955 # If this is a nested collection, we don't emit it, since it
957 956 # will be emitted by its parent collection. But we do need to
958 957 # update state to reflect what the new top-most collection
959 958 # on the stack is.
960 959 if self._collectionstack:
961 960 self._state = {
962 961 list: self._STATE_WANT_ARRAY_VALUE,
963 962 dict: self._STATE_WANT_MAP_KEY,
964 963 set: self._STATE_WANT_SET_VALUE,
965 964 }[type(self._collectionstack[-1][b'v'])]
966 965
967 966 # If this is the root collection, emit it.
968 967 else:
969 968 self._decodedvalues.append(lastc[b'v'])
970 969 self._state = self._STATE_NONE
971 970
972 971 return (
973 972 bool(self._decodedvalues),
974 973 offset - initialoffset,
975 974 0,
976 975 )
977 976
978 977 def getavailable(self):
979 978 """Returns an iterator over fully decoded values.
980 979
981 980 Once values are retrieved, they won't be available on the next call.
982 981 """
983 982
984 983 l = list(self._decodedvalues)
985 984 self._decodedvalues = []
986 985 return l
987 986
988 987
989 988 class bufferingdecoder(object):
990 989 """A CBOR decoder that buffers undecoded input.
991 990
992 991 This is a glorified wrapper around ``sansiodecoder`` that adds a buffering
993 992 layer. All input that isn't consumed by ``sansiodecoder`` will be buffered
994 993 and concatenated with any new input that arrives later.
995 994
996 995 TODO consider adding limits as to the maximum amount of data that can
997 996 be buffered.
998 997 """
999 998
1000 999 def __init__(self):
1001 1000 self._decoder = sansiodecoder()
1002 1001 self._chunks = []
1003 1002 self._wanted = 0
1004 1003
1005 1004 def decode(self, b):
1006 1005 """Attempt to decode bytes to CBOR values.
1007 1006
1008 1007 Returns a tuple with the following fields:
1009 1008
1010 1009 * Bool indicating whether new values are available for retrieval.
1011 1010 * Integer number of bytes decoded from the new input.
1012 1011 * Integer number of bytes wanted to decode the next value.
1013 1012 """
1014 1013 # We /might/ be able to support passing a bytearray all the
1015 1014 # way through. For now, let's cheat.
1016 1015 if isinstance(b, bytearray):
1017 1016 b = bytes(b)
1018 1017
1019 1018 # Our strategy for buffering is to aggregate the incoming chunks in a
1020 1019 # list until we've received enough data to decode the next item.
1021 1020 # This is slightly more complicated than using an ``io.BytesIO``
1022 1021 # or continuously concatenating incoming data. However, because it
1023 1022 # isn't constantly reallocating backing memory for a growing buffer,
1024 1023 # it prevents excessive memory thrashing and is significantly faster,
1025 1024 # especially in cases where the percentage of input chunks that don't
1026 1025 # decode into a full item is high.
1027 1026
1028 1027 if self._chunks:
1029 1028 # A previous call said we needed N bytes to decode the next item.
1030 1029 # But this call doesn't provide enough data. We buffer the incoming
1031 1030 # chunk without attempting to decode.
1032 1031 if len(b) < self._wanted:
1033 1032 self._chunks.append(b)
1034 1033 self._wanted -= len(b)
1035 1034 return False, 0, self._wanted
1036 1035
1037 1036 # Else we may have enough data to decode the next item. Aggregate
1038 1037 # old data with new and reset the buffer.
1039 1038 newlen = len(b)
1040 1039 self._chunks.append(b)
1041 1040 b = b''.join(self._chunks)
1042 1041 self._chunks = []
1043 1042 oldlen = len(b) - newlen
1044 1043
1045 1044 else:
1046 1045 oldlen = 0
1047 1046
1048 1047 available, readcount, wanted = self._decoder.decode(b)
1049 1048 self._wanted = wanted
1050 1049
1051 1050 if readcount < len(b):
1052 1051 self._chunks.append(b[readcount:])
1053 1052
1054 1053 return available, readcount - oldlen, wanted
1055 1054
1056 1055 def getavailable(self):
1057 1056 return self._decoder.getavailable()
1058 1057
1059 1058
1060 1059 def decodeall(b):
1061 1060 """Decode all CBOR items present in an iterable of bytes.
1062 1061
1063 1062 In addition to regular decode errors, raises CBORDecodeError if the
1064 1063 entirety of the passed buffer does not fully decode to complete CBOR
1065 1064 values. This includes failure to decode any value, incomplete collection
1066 1065 types, incomplete indefinite length items, and extra data at the end of
1067 1066 the buffer.
1068 1067 """
1069 1068 if not b:
1070 1069 return []
1071 1070
1072 1071 decoder = sansiodecoder()
1073 1072
1074 1073 havevalues, readcount, wantbytes = decoder.decode(b)
1075 1074
1076 1075 if readcount != len(b):
1077 1076 raise CBORDecodeError(b'input data not fully consumed')
1078 1077
1079 1078 if decoder.inprogress:
1080 1079 raise CBORDecodeError(b'input data not complete')
1081 1080
1082 1081 return decoder.getavailable()
General Comments 0
You need to be logged in to leave comments. Login now